5637
by pjrm
utf8-to-roff: work around what's arguably a bug in perl 5.10 |
1 |
if (true || '\'); then exec perl -CI "$0" "$@"; fi # ') {} |
2 |
# The above uses the fact that backslash isn't special in single quotes in
|
|
3 |
# shell script, whereas in perl it escapes the following single quote.
|
|
4 |
#
|
|
5 |
# The problem it tries to solve is that we want perl to be run with -CI flag
|
|
6 |
# (to have stdin interpreted as utf-8), so we would use `#! /usr/bin/perl -CI',
|
|
7 |
# except that if we do that then perl 5.10 complains about it being too late
|
|
8 |
# to apply -CI if the script is run with `perl -CI ./utf8-to-roff', as we want
|
|
9 |
# to do from the Makefile. The reason we don't do `./utf8-to-roff' from the makefile
|
|
10 |
# is that then we require the #! line to have the right location of perl instead of
|
|
11 |
# just consulting the path. (Similarly, we could use `#! /usr/bin/env perl -CI',
|
|
12 |
# though that still requires that /usr/bin/env exist.) The reason we don't just
|
|
13 |
# remove the `-CI' from the #! line is that then the script couldn't be run correctly
|
|
14 |
# with ./utf8-to-roff.
|
|
15 |
||
16 |
||
1
by mental
moving trunk for module inkscape |
17 |
# Converts a man page from utf8 (not understood by groff 1.18) to groff escapes.
|
18 |
# I couldn't find any existing tool to convert from utf8 to groff, though I
|
|
19 |
# seem to recall seeing some code to do so somewhere.
|
|
20 |
#
|
|
21 |
# Hereby released into public domain by Peter Moulder.
|
|
5637
by pjrm
utf8-to-roff: work around what's arguably a bug in perl 5.10 |
22 |
use warnings; |
1
by mental
moving trunk for module inkscape |
23 |
use strict; |
24 |
||
25 |
# Table generated automatically using:
|
|
26 |
# zcat /usr/share/man/man7/groff_char.7.gz |groff -man -Tutf8| col -pb | grep '\\\['|
|
|
27 |
# perl -CI -nae 'my ($ch, $seq) = @F; if (ord($ch) >= 128) { printf(" 0x\%x, q{\%s},\n", ord($ch), $seq); }'
|
|
6885
by Ted Gould
From trunk |
28 |
# with č (0x10d) manually translated as cˇ (c\[ah]). (Anyone have a better translation, e.g. using
|
1
by mental
moving trunk for module inkscape |
29 |
# overprint? \[vc] doesn't work, btw.)
|
6885
by Ted Gould
From trunk |
30 |
# Similarly, ć (0x107) has been manually translated as c´ (c\[aa]), and ń (0x144) as n´ (n\[aa]).
|
1
by mental
moving trunk for module inkscape |
31 |
my %map = ( |
32 |
0xd0, q{\[-D]}, |
|
33 |
0xf0, q{\[Sd]}, |
|
34 |
0xde, q{\[TP]}, |
|
35 |
0xfe, q{\[Tp]}, |
|
36 |
0xdf, q{\[ss]}, |
|
37 |
0xfb00, q{\[ff]}, |
|
38 |
0xfb01, q{\[fi]}, |
|
39 |
0xfb02, q{\[fl]}, |
|
40 |
0xfb03, q{\[Fi]}, |
|
41 |
0xfb04, q{\[Fl]}, |
|
42 |
0xc6, q{\[AE]}, |
|
43 |
0xe6, q{\[ae]}, |
|
44 |
0x152, q{\[OE]}, |
|
45 |
0x153, q{\[oe]}, |
|
46 |
0x131, q{\[.i]}, |
|
47 |
0xc1, q{\['A]}, |
|
48 |
0xc9, q{\['E]}, |
|
49 |
0xcd, q{\['I]}, |
|
50 |
0xd3, q{\['O]}, |
|
51 |
0xda, q{\['U]}, |
|
52 |
0xdd, q{\['Y]}, |
|
53 |
0xe1, q{\['a]}, |
|
54 |
0xe9, q{\['e]}, |
|
55 |
0xed, q{\['i]}, |
|
56 |
0xf3, q{\['o]}, |
|
57 |
0xfa, q{\['u]}, |
|
58 |
0xfd, q{\['y]}, |
|
59 |
0xc4, q{\[:A]}, |
|
60 |
0xcb, q{\[:E]}, |
|
61 |
0xcf, q{\[:I]}, |
|
62 |
0xd6, q{\[:O]}, |
|
63 |
0xdc, q{\[:U]}, |
|
64 |
0x178, q{\[:Y]}, |
|
65 |
0xe4, q{\[:a]}, |
|
66 |
0xeb, q{\[:e]}, |
|
67 |
0xef, q{\[:i]}, |
|
68 |
0xf6, q{\[:o]}, |
|
69 |
0xfc, q{\[:u]}, |
|
70 |
0xff, q{\[:y]}, |
|
71 |
0xc2, q{\[^A]}, |
|
72 |
0xca, q{\[^E]}, |
|
73 |
0xce, q{\[^I]}, |
|
74 |
0xd4, q{\[^O]}, |
|
75 |
0xdb, q{\[^U]}, |
|
76 |
0xe2, q{\[^a]}, |
|
77 |
0xea, q{\[^e]}, |
|
78 |
0xee, q{\[^i]}, |
|
79 |
0xf4, q{\[^o]}, |
|
80 |
0xfb, q{\[^u]}, |
|
81 |
0xc0, q{\[`A]}, |
|
82 |
0xc8, q{\[`E]}, |
|
83 |
0xcc, q{\[`I]}, |
|
84 |
0xd2, q{\[`O]}, |
|
85 |
0xd9, q{\[`U]}, |
|
86 |
0xe0, q{\[`a]}, |
|
87 |
0xe8, q{\[`e]}, |
|
88 |
0xec, q{\[`i]}, |
|
89 |
0xf2, q{\[`o]}, |
|
90 |
0xf9, q{\[`u]}, |
|
91 |
0xc3, q{\[~A]}, |
|
92 |
0xd1, q{\[~N]}, |
|
93 |
0xd5, q{\[~O]}, |
|
94 |
0xe3, q{\[~a]}, |
|
95 |
0xf1, q{\[~n]}, |
|
96 |
0xf5, q{\[~o]}, |
|
6885
by Ted Gould
From trunk |
97 |
0x107, q{c\[aa]}, # Added manually; see above. |
98 |
0x10d, q{c\[ah]}, # Added manually; see above. |
|
1
by mental
moving trunk for module inkscape |
99 |
0x160, q{\[vS]}, |
100 |
0x161, q{\[vs]}, |
|
101 |
0x17d, q{\[vZ]}, |
|
102 |
0x17e, q{\[vz]}, |
|
103 |
0xc7, q{\[,C]}, |
|
104 |
0xe7, q{\[,c]}, |
|
105 |
0x141, q{\[/L]}, |
|
106 |
0x142, q{\[/l]}, |
|
6885
by Ted Gould
From trunk |
107 |
0x144, q{n\[aa]}, # Added manually; see above. |
1
by mental
moving trunk for module inkscape |
108 |
0xd8, q{\[/O]}, |
109 |
0xf8, q{\[/o]}, |
|
110 |
0xc5, q{\[oA]}, |
|
111 |
0xe5, q{\[oa]}, |
|
112 |
0x2dd, q{\[a"]}, |
|
113 |
0xaf, q{\[a-]}, |
|
114 |
0x2d9, q{\[a.]}, |
|
115 |
0xb4, q{\[aa]}, |
|
116 |
0x2d8, q{\[ab]}, |
|
117 |
0xb8, q{\[ac]}, |
|
118 |
0xa8, q{\[ad]}, |
|
119 |
0x2c7, q{\[ah]}, |
|
120 |
0x2da, q{\[ao]}, |
|
121 |
0x2db, q{\[ho]}, |
|
122 |
0x223c, q{\[ti]}, |
|
123 |
0x201e, q{\[Bq]}, |
|
124 |
0x201a, q{\[bq]}, |
|
125 |
0x201c, q{\[lq]}, |
|
126 |
0x201d, q{\[rq]}, |
|
127 |
0x2018, q{\[oq]}, |
|
128 |
0x2019, q{\[cq]}, |
|
129 |
0xab, q{\[Fo]}, |
|
130 |
0xbb, q{\[Fc]}, |
|
131 |
0x2039, q{\[fo]}, |
|
132 |
0x203a, q{\[fc]}, |
|
133 |
0xa1, q{\[r!]}, |
|
134 |
0xbf, q{\[r?]}, |
|
135 |
0x2014, q{\[em]}, |
|
136 |
0x2013, q{\[en]}, |
|
137 |
0x2010, q{\[hy]}, |
|
138 |
0x2329, q{\[la]}, |
|
139 |
0x232a, q{\[ra]}, |
|
140 |
0x2190, q{\[<-]}, |
|
141 |
0x2192, q{\[->]}, |
|
142 |
0x2194, q{\[<>]}, |
|
143 |
0x2193, q{\[da]}, |
|
144 |
0x21d1, q{\[ua]}, |
|
145 |
0x21d0, q{\[lA]}, |
|
146 |
0x21d2, q{\[rA]}, |
|
147 |
0x21d4, q{\[hA]}, |
|
148 |
0x21d3, q{\[dA]}, |
|
149 |
0x21d1, q{\[uA]}, |
|
150 |
0x2500, q{\[an]}, |
|
151 |
0x2502, q{\[br]}, |
|
152 |
0x2502, q{\[bv]}, |
|
153 |
0xa6, q{\[bb]}, |
|
154 |
0x25ef, q{\[ci]}, |
|
155 |
0xb7, q{\[bu]}, |
|
156 |
0x2021, q{\[dd]}, |
|
157 |
0x2020, q{\[dg]}, |
|
158 |
0x25ca, q{\[lz]}, |
|
159 |
0x25a1, q{\[sq]}, |
|
160 |
0xb6, q{\[ps]}, |
|
161 |
0xa7, q{\[sc]}, |
|
162 |
0x261c, q{\[lh]}, |
|
163 |
0x261e, q{\[rh]}, |
|
164 |
0x240d, q{\[CR]}, |
|
165 |
0xa9, q{\[co]}, |
|
166 |
0xae, q{\[rg]}, |
|
167 |
0x2122, q{\[tm]}, |
|
168 |
0x21d1, q{\[Do]}, |
|
169 |
0xa2, q{\[ct]}, |
|
170 |
0x20ac, q{\[eu]}, |
|
171 |
0x20ac, q{\[Eu]}, |
|
172 |
0xa5, q{\[Ye]}, |
|
173 |
0xa3, q{\[Po]}, |
|
174 |
0xa4, q{\[Cs]}, |
|
175 |
0x192, q{\[Fn]}, |
|
176 |
0xb0, q{\[de]}, |
|
177 |
0x2030, q{\[%0]}, |
|
178 |
0x2032, q{\[fm]}, |
|
179 |
0x2033, q{\[sd]}, |
|
180 |
0xb5, q{\[mc]}, |
|
181 |
0xaa, q{\[Of]}, |
|
182 |
0xba, q{\[Om]}, |
|
183 |
0x2227, q{\[AN]}, |
|
184 |
0x2228, q{\[OR]}, |
|
185 |
0xac, q{\[no]}, |
|
186 |
0x2203, q{\[te]}, |
|
187 |
0x2200, q{\[fa]}, |
|
188 |
0x220b, q{\[st]}, |
|
189 |
0x2234, q{\[3d]}, |
|
190 |
0x2234, q{\[tf]}, |
|
191 |
0xbd, q{\[12]}, |
|
192 |
0xbc, q{\[14]}, |
|
193 |
0xbe, q{\[34]}, |
|
194 |
0xb9, q{\[S1]}, |
|
195 |
0xb2, q{\[S2]}, |
|
196 |
0xb3, q{\[S3]}, |
|
197 |
0xb1, q{\[+-]}, |
|
198 |
0xb1, q{\[t+-]}, |
|
199 |
0xb7, q{\[pc]}, |
|
200 |
0x22c5, q{\[md]}, |
|
201 |
0xd7, q{\[mu]}, |
|
202 |
0xd7, q{\[tmu]}, |
|
203 |
0x2297, q{\[c*]}, |
|
204 |
0x2295, q{\[c+]}, |
|
205 |
0xf7, q{\[di]}, |
|
206 |
0xf7, q{\[tdi]}, |
|
207 |
0x2044, q{\[f/]}, |
|
208 |
0x2217, q{\[**]}, |
|
209 |
0x2264, q{\[<=]}, |
|
210 |
0x2265, q{\[>=]}, |
|
211 |
0x2260, q{\[!=]}, |
|
212 |
0x2261, q{\[==]}, |
|
213 |
0x2245, q{\[=~]}, |
|
214 |
0x223c, q{\[ap]}, |
|
215 |
0x2248, q{\[~~]}, |
|
216 |
0x2248, q{\[~=]}, |
|
217 |
0x221d, q{\[pt]}, |
|
218 |
0x2205, q{\[es]}, |
|
219 |
0x2208, q{\[mo]}, |
|
220 |
0x2209, q{\[nm]}, |
|
221 |
0x2284, q{\[nb]}, |
|
222 |
0x2282, q{\[sb]}, |
|
223 |
0x2283, q{\[sp]}, |
|
224 |
0x2286, q{\[ib]}, |
|
225 |
0x2287, q{\[ip]}, |
|
226 |
0x2229, q{\[ca]}, |
|
227 |
0x222a, q{\[cu]}, |
|
228 |
0x2220, q{\[/_]}, |
|
229 |
0x22a5, q{\[pp]}, |
|
230 |
0x222b, q{\[is]}, |
|
231 |
0x2211, q{\[sum]}, |
|
232 |
0x220f, q{\[product]}, |
|
233 |
0x2207, q{\[gr]}, |
|
234 |
0x221a, q{\[sr]}, |
|
235 |
0x203e, q{\[rn]}, |
|
236 |
0x221e, q{\[if]}, |
|
237 |
0x2135, q{\[Ah]}, |
|
238 |
0x2111, q{\[Im]}, |
|
239 |
0x211c, q{\[Re]}, |
|
240 |
0x2118, q{\[wp]}, |
|
241 |
0x2202, q{\[pd]}, |
|
242 |
0x391, q{\[*A]}, |
|
243 |
0x392, q{\[*B]}, |
|
244 |
0x39e, q{\[*C]}, |
|
245 |
0x394, q{\[*D]}, |
|
246 |
0x395, q{\[*E]}, |
|
247 |
0x3a6, q{\[*F]}, |
|
248 |
0x393, q{\[*G]}, |
|
249 |
0x398, q{\[*H]}, |
|
250 |
0x399, q{\[*I]}, |
|
251 |
0x39a, q{\[*K]}, |
|
252 |
0x39b, q{\[*L]}, |
|
253 |
0x39c, q{\[*M]}, |
|
254 |
0x39d, q{\[*N]}, |
|
255 |
0x39f, q{\[*O]}, |
|
256 |
0x3a0, q{\[*P]}, |
|
257 |
0x3a8, q{\[*Q]}, |
|
258 |
0x3a1, q{\[*R]}, |
|
259 |
0x3a3, q{\[*S]}, |
|
260 |
0x3a4, q{\[*T]}, |
|
261 |
0x3a5, q{\[*U]}, |
|
262 |
0x3a9, q{\[*W]}, |
|
263 |
0x3a7, q{\[*X]}, |
|
264 |
0x397, q{\[*Y]}, |
|
265 |
0x396, q{\[*Z]}, |
|
266 |
0x3b1, q{\[*a]}, |
|
267 |
0x3b2, q{\[*b]}, |
|
268 |
0x3be, q{\[*c]}, |
|
269 |
0x3b4, q{\[*d]}, |
|
270 |
0x3b5, q{\[*e]}, |
|
271 |
0x3c6, q{\[*f]}, |
|
272 |
0x3d5, q{\[+f]}, |
|
273 |
0x3b3, q{\[*g]}, |
|
274 |
0x3b8, q{\[*h]}, |
|
275 |
0x3d1, q{\[+h]}, |
|
276 |
0x3b9, q{\[*i]}, |
|
277 |
0x3ba, q{\[*k]}, |
|
278 |
0x3bb, q{\[*l]}, |
|
279 |
0x3bc, q{\[*m]}, |
|
280 |
0x3bd, q{\[*n]}, |
|
281 |
0x3bf, q{\[*o]}, |
|
282 |
0x3c0, q{\[*p]}, |
|
283 |
0x3d6, q{\[+p]}, |
|
284 |
0x3c8, q{\[*q]}, |
|
285 |
0x3c1, q{\[*r]}, |
|
286 |
0x3c3, q{\[*s]}, |
|
287 |
0x3c4, q{\[*t]}, |
|
288 |
0x3c5, q{\[*u]}, |
|
289 |
0x3c9, q{\[*w]}, |
|
290 |
0x3c7, q{\[*x]}, |
|
291 |
0x3b7, q{\[*y]}, |
|
292 |
0x3b6, q{\[*z]}, |
|
293 |
0x3c2, q{\[ts]}, |
|
294 |
0x2663, q{\[CL]}, |
|
295 |
0x2660, q{\[SP]}, |
|
296 |
0x2665, q{\[HE]}, |
|
297 |
0x2666, q{\[DI]}, |
|
298 |
);
|
|
299 |
||
300 |
#while(<>) {
|
|
301 |
# s/([^ -~])/(ord($1) < 128 ? $1 : defined($map{$1}) ? $map{$1} : sprintf("\\u%4x", $1))/ge;
|
|
302 |
# print;
|
|
303 |
#}
|
|
304 |
#exit 0;
|
|
305 |
||
306 |
my $ch; |
|
307 |
while(defined($ch = getc(STDIN))) { |
|
308 |
my $ord = ord($ch); |
|
309 |
if ($ord < 128) { |
|
310 |
print $ch; |
|
311 |
} else { |
|
312 |
my $out = $map{$ord}; |
|
313 |
if (defined($out)) { |
|
314 |
print $out; |
|
315 |
} else { |
|
316 |
die "Untranslatable character \\u" . sprintf("%X", ord($ch)) . " / `$ch'"; |
|
317 |
}
|
|
318 |
}
|
|
319 |
}
|