~inkscape.dev/inkscape/trunk

5637 by pjrm
utf8-to-roff: work around what's arguably a bug in perl 5.10
1
if (true || '\'); then exec perl -CI "$0" "$@"; fi  # ') {}
2
# The above uses the fact that backslash isn't special in single quotes in
3
# shell script, whereas in perl it escapes the following single quote.
4
#
5
# The problem it tries to solve is that we want perl to be run with -CI flag
6
# (to have stdin interpreted as utf-8), so we would use `#! /usr/bin/perl -CI',
7
# except that if we do that then perl 5.10 complains about it being too late
8
# to apply -CI if the script is run with `perl -CI ./utf8-to-roff', as we want
9
# to do from the Makefile.  The reason we don't do `./utf8-to-roff' from the makefile
10
# is that then we require the #! line to have the right location of perl instead of
11
# just consulting the path.  (Similarly, we could use `#! /usr/bin/env perl -CI',
12
# though that still requires that /usr/bin/env exist.)  The reason we don't just
13
# remove the `-CI' from the #! line is that then the script couldn't be run correctly
14
# with ./utf8-to-roff.
15
16
1 by mental
moving trunk for module inkscape
17
# Converts a man page from utf8 (not understood by groff 1.18) to groff escapes.
18
# I couldn't find any existing tool to convert from utf8 to groff, though I
19
# seem to recall seeing some code to do so somewhere.
20
#
21
# Hereby released into public domain by Peter Moulder.
5637 by pjrm
utf8-to-roff: work around what's arguably a bug in perl 5.10
22
use warnings;
1 by mental
moving trunk for module inkscape
23
use strict;
24
25
# Table generated automatically using:
26
#  zcat /usr/share/man/man7/groff_char.7.gz |groff -man -Tutf8| col -pb | grep '\\\['|
27
#   perl -CI -nae 'my ($ch, $seq) = @F; if (ord($ch) >= 128) { printf(" 0x\%x, q{\%s},\n", ord($ch), $seq); }'
6885 by Ted Gould
From trunk
28
# with č (0x10d) manually translated as cˇ (c\[ah]).  (Anyone have a better translation, e.g. using
1 by mental
moving trunk for module inkscape
29
# overprint?  \[vc] doesn't work, btw.)
6885 by Ted Gould
From trunk
30
# Similarly, ć (0x107) has been manually translated as c´ (c\[aa]), and ń (0x144) as n´ (n\[aa]).
1 by mental
moving trunk for module inkscape
31
my %map = (
32
 0xd0, q{\[-D]},
33
 0xf0, q{\[Sd]},
34
 0xde, q{\[TP]},
35
 0xfe, q{\[Tp]},
36
 0xdf, q{\[ss]},
37
 0xfb00, q{\[ff]},
38
 0xfb01, q{\[fi]},
39
 0xfb02, q{\[fl]},
40
 0xfb03, q{\[Fi]},
41
 0xfb04, q{\[Fl]},
42
 0xc6, q{\[AE]},
43
 0xe6, q{\[ae]},
44
 0x152, q{\[OE]},
45
 0x153, q{\[oe]},
46
 0x131, q{\[.i]},
47
 0xc1, q{\['A]},
48
 0xc9, q{\['E]},
49
 0xcd, q{\['I]},
50
 0xd3, q{\['O]},
51
 0xda, q{\['U]},
52
 0xdd, q{\['Y]},
53
 0xe1, q{\['a]},
54
 0xe9, q{\['e]},
55
 0xed, q{\['i]},
56
 0xf3, q{\['o]},
57
 0xfa, q{\['u]},
58
 0xfd, q{\['y]},
59
 0xc4, q{\[:A]},
60
 0xcb, q{\[:E]},
61
 0xcf, q{\[:I]},
62
 0xd6, q{\[:O]},
63
 0xdc, q{\[:U]},
64
 0x178, q{\[:Y]},
65
 0xe4, q{\[:a]},
66
 0xeb, q{\[:e]},
67
 0xef, q{\[:i]},
68
 0xf6, q{\[:o]},
69
 0xfc, q{\[:u]},
70
 0xff, q{\[:y]},
71
 0xc2, q{\[^A]},
72
 0xca, q{\[^E]},
73
 0xce, q{\[^I]},
74
 0xd4, q{\[^O]},
75
 0xdb, q{\[^U]},
76
 0xe2, q{\[^a]},
77
 0xea, q{\[^e]},
78
 0xee, q{\[^i]},
79
 0xf4, q{\[^o]},
80
 0xfb, q{\[^u]},
81
 0xc0, q{\[`A]},
82
 0xc8, q{\[`E]},
83
 0xcc, q{\[`I]},
84
 0xd2, q{\[`O]},
85
 0xd9, q{\[`U]},
86
 0xe0, q{\[`a]},
87
 0xe8, q{\[`e]},
88
 0xec, q{\[`i]},
89
 0xf2, q{\[`o]},
90
 0xf9, q{\[`u]},
91
 0xc3, q{\[~A]},
92
 0xd1, q{\[~N]},
93
 0xd5, q{\[~O]},
94
 0xe3, q{\[~a]},
95
 0xf1, q{\[~n]},
96
 0xf5, q{\[~o]},
6885 by Ted Gould
From trunk
97
 0x107, q{c\[aa]}, # Added manually; see above.
98
 0x10d, q{c\[ah]}, # Added manually; see above.
1 by mental
moving trunk for module inkscape
99
 0x160, q{\[vS]},
100
 0x161, q{\[vs]},
101
 0x17d, q{\[vZ]},
102
 0x17e, q{\[vz]},
103
 0xc7, q{\[,C]},
104
 0xe7, q{\[,c]},
105
 0x141, q{\[/L]},
106
 0x142, q{\[/l]},
6885 by Ted Gould
From trunk
107
 0x144, q{n\[aa]}, # Added manually; see above.
1 by mental
moving trunk for module inkscape
108
 0xd8, q{\[/O]},
109
 0xf8, q{\[/o]},
110
 0xc5, q{\[oA]},
111
 0xe5, q{\[oa]},
112
 0x2dd, q{\[a"]},
113
 0xaf, q{\[a-]},
114
 0x2d9, q{\[a.]},
115
 0xb4, q{\[aa]},
116
 0x2d8, q{\[ab]},
117
 0xb8, q{\[ac]},
118
 0xa8, q{\[ad]},
119
 0x2c7, q{\[ah]},
120
 0x2da, q{\[ao]},
121
 0x2db, q{\[ho]},
122
 0x223c, q{\[ti]},
123
 0x201e, q{\[Bq]},
124
 0x201a, q{\[bq]},
125
 0x201c, q{\[lq]},
126
 0x201d, q{\[rq]},
127
 0x2018, q{\[oq]},
128
 0x2019, q{\[cq]},
129
 0xab, q{\[Fo]},
130
 0xbb, q{\[Fc]},
131
 0x2039, q{\[fo]},
132
 0x203a, q{\[fc]},
133
 0xa1, q{\[r!]},
134
 0xbf, q{\[r?]},
135
 0x2014, q{\[em]},
136
 0x2013, q{\[en]},
137
 0x2010, q{\[hy]},
138
 0x2329, q{\[la]},
139
 0x232a, q{\[ra]},
140
 0x2190, q{\[<-]},
141
 0x2192, q{\[->]},
142
 0x2194, q{\[<>]},
143
 0x2193, q{\[da]},
144
 0x21d1, q{\[ua]},
145
 0x21d0, q{\[lA]},
146
 0x21d2, q{\[rA]},
147
 0x21d4, q{\[hA]},
148
 0x21d3, q{\[dA]},
149
 0x21d1, q{\[uA]},
150
 0x2500, q{\[an]},
151
 0x2502, q{\[br]},
152
 0x2502, q{\[bv]},
153
 0xa6, q{\[bb]},
154
 0x25ef, q{\[ci]},
155
 0xb7, q{\[bu]},
156
 0x2021, q{\[dd]},
157
 0x2020, q{\[dg]},
158
 0x25ca, q{\[lz]},
159
 0x25a1, q{\[sq]},
160
 0xb6, q{\[ps]},
161
 0xa7, q{\[sc]},
162
 0x261c, q{\[lh]},
163
 0x261e, q{\[rh]},
164
 0x240d, q{\[CR]},
165
 0xa9, q{\[co]},
166
 0xae, q{\[rg]},
167
 0x2122, q{\[tm]},
168
 0x21d1, q{\[Do]},
169
 0xa2, q{\[ct]},
170
 0x20ac, q{\[eu]},
171
 0x20ac, q{\[Eu]},
172
 0xa5, q{\[Ye]},
173
 0xa3, q{\[Po]},
174
 0xa4, q{\[Cs]},
175
 0x192, q{\[Fn]},
176
 0xb0, q{\[de]},
177
 0x2030, q{\[%0]},
178
 0x2032, q{\[fm]},
179
 0x2033, q{\[sd]},
180
 0xb5, q{\[mc]},
181
 0xaa, q{\[Of]},
182
 0xba, q{\[Om]},
183
 0x2227, q{\[AN]},
184
 0x2228, q{\[OR]},
185
 0xac, q{\[no]},
186
 0x2203, q{\[te]},
187
 0x2200, q{\[fa]},
188
 0x220b, q{\[st]},
189
 0x2234, q{\[3d]},
190
 0x2234, q{\[tf]},
191
 0xbd, q{\[12]},
192
 0xbc, q{\[14]},
193
 0xbe, q{\[34]},
194
 0xb9, q{\[S1]},
195
 0xb2, q{\[S2]},
196
 0xb3, q{\[S3]},
197
 0xb1, q{\[+-]},
198
 0xb1, q{\[t+-]},
199
 0xb7, q{\[pc]},
200
 0x22c5, q{\[md]},
201
 0xd7, q{\[mu]},
202
 0xd7, q{\[tmu]},
203
 0x2297, q{\[c*]},
204
 0x2295, q{\[c+]},
205
 0xf7, q{\[di]},
206
 0xf7, q{\[tdi]},
207
 0x2044, q{\[f/]},
208
 0x2217, q{\[**]},
209
 0x2264, q{\[<=]},
210
 0x2265, q{\[>=]},
211
 0x2260, q{\[!=]},
212
 0x2261, q{\[==]},
213
 0x2245, q{\[=~]},
214
 0x223c, q{\[ap]},
215
 0x2248, q{\[~~]},
216
 0x2248, q{\[~=]},
217
 0x221d, q{\[pt]},
218
 0x2205, q{\[es]},
219
 0x2208, q{\[mo]},
220
 0x2209, q{\[nm]},
221
 0x2284, q{\[nb]},
222
 0x2282, q{\[sb]},
223
 0x2283, q{\[sp]},
224
 0x2286, q{\[ib]},
225
 0x2287, q{\[ip]},
226
 0x2229, q{\[ca]},
227
 0x222a, q{\[cu]},
228
 0x2220, q{\[/_]},
229
 0x22a5, q{\[pp]},
230
 0x222b, q{\[is]},
231
 0x2211, q{\[sum]},
232
 0x220f, q{\[product]},
233
 0x2207, q{\[gr]},
234
 0x221a, q{\[sr]},
235
 0x203e, q{\[rn]},
236
 0x221e, q{\[if]},
237
 0x2135, q{\[Ah]},
238
 0x2111, q{\[Im]},
239
 0x211c, q{\[Re]},
240
 0x2118, q{\[wp]},
241
 0x2202, q{\[pd]},
242
 0x391, q{\[*A]},
243
 0x392, q{\[*B]},
244
 0x39e, q{\[*C]},
245
 0x394, q{\[*D]},
246
 0x395, q{\[*E]},
247
 0x3a6, q{\[*F]},
248
 0x393, q{\[*G]},
249
 0x398, q{\[*H]},
250
 0x399, q{\[*I]},
251
 0x39a, q{\[*K]},
252
 0x39b, q{\[*L]},
253
 0x39c, q{\[*M]},
254
 0x39d, q{\[*N]},
255
 0x39f, q{\[*O]},
256
 0x3a0, q{\[*P]},
257
 0x3a8, q{\[*Q]},
258
 0x3a1, q{\[*R]},
259
 0x3a3, q{\[*S]},
260
 0x3a4, q{\[*T]},
261
 0x3a5, q{\[*U]},
262
 0x3a9, q{\[*W]},
263
 0x3a7, q{\[*X]},
264
 0x397, q{\[*Y]},
265
 0x396, q{\[*Z]},
266
 0x3b1, q{\[*a]},
267
 0x3b2, q{\[*b]},
268
 0x3be, q{\[*c]},
269
 0x3b4, q{\[*d]},
270
 0x3b5, q{\[*e]},
271
 0x3c6, q{\[*f]},
272
 0x3d5, q{\[+f]},
273
 0x3b3, q{\[*g]},
274
 0x3b8, q{\[*h]},
275
 0x3d1, q{\[+h]},
276
 0x3b9, q{\[*i]},
277
 0x3ba, q{\[*k]},
278
 0x3bb, q{\[*l]},
279
 0x3bc, q{\[*m]},
280
 0x3bd, q{\[*n]},
281
 0x3bf, q{\[*o]},
282
 0x3c0, q{\[*p]},
283
 0x3d6, q{\[+p]},
284
 0x3c8, q{\[*q]},
285
 0x3c1, q{\[*r]},
286
 0x3c3, q{\[*s]},
287
 0x3c4, q{\[*t]},
288
 0x3c5, q{\[*u]},
289
 0x3c9, q{\[*w]},
290
 0x3c7, q{\[*x]},
291
 0x3b7, q{\[*y]},
292
 0x3b6, q{\[*z]},
293
 0x3c2, q{\[ts]},
294
 0x2663, q{\[CL]},
295
 0x2660, q{\[SP]},
296
 0x2665, q{\[HE]},
297
 0x2666, q{\[DI]},
298
);
299
300
#while(<>) {
301
#  s/([^ -~])/(ord($1) < 128 ? $1 : defined($map{$1}) ? $map{$1} : sprintf("\\u%4x", $1))/ge;
302
#  print;
303
#}
304
#exit 0;
305
306
my $ch;
307
while(defined($ch = getc(STDIN))) {
308
	my $ord = ord($ch);
309
	if ($ord < 128) {
310
		print $ch;
311
	} else {
312
		my $out = $map{$ord};
313
		if (defined($out)) {
314
			print $out;
315
		} else {
316
			die "Untranslatable character \\u" . sprintf("%X", ord($ch)) . " / `$ch'";
317
		}
318
	}
319
}