4
* Copyright Ā© 2004 Keith Packard
6
* Permission to use, copy, modify, distribute, and sell this software and its
7
* documentation for any purpose is hereby granted without fee, provided that
8
* the above copyright notice appear in all copies and that both that
9
* copyright notice and this permission notice appear in supporting
10
* documentation, and that the name of Keith Packard not be used in
11
* advertising or publicity pertaining to distribution of the software without
12
* specific, written prior permission. Keith Packard makes no
13
* representations about the suitability of this software for any purpose. It
14
* is provided "as is" without express or implied warranty.
16
* KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18
* EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22
* PERFORMANCE OF THIS SOFTWARE.
31
typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass;
33
typedef struct _caseFoldClassMap {
38
static const CaseFoldClassMap caseFoldClassMap[] = {
39
{ "C", CaseFoldCommon },
40
{ "F", CaseFoldFull },
41
{ "S", CaseFoldSimple },
42
{ "T", CaseFoldTurkic },
46
typedef struct _caseFoldRaw {
50
FcChar32 lower[MAX_OUT];
54
panic (const char *reason)
56
fprintf (stderr, "fc-case: panic %s\n", reason);
61
static FcCaseFold *folds;
68
folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold));
70
folds = malloc (sizeof (FcCaseFold));
72
panic ("out of memory");
73
return &folds[nfolds++];
77
ucs4_to_utf8 (FcChar32 ucs4,
78
FcChar8 dest[FC_UTF8_MAX_LEN])
83
if (ucs4 < 0x80) { *d++= ucs4; bits= -6; }
84
else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; }
85
else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; }
86
else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; }
87
else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; }
88
else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; }
91
for ( ; bits >= 0; bits-= 6) {
92
*d++= ((ucs4 >> bits) & 0x3F) | 0x80;
98
utf8_size (FcChar32 ucs4)
100
FcChar8 utf8[FC_UTF8_MAX_LEN];
101
return ucs4_to_utf8 (ucs4, utf8 );
104
static FcChar8 *foldChars;
105
static int nfoldChars;
106
static int maxFoldChars;
107
static FcChar32 minFoldChar;
108
static FcChar32 maxFoldChar;
113
FcChar8 utf8[FC_UTF8_MAX_LEN];
117
len = ucs4_to_utf8 (c, utf8);
119
foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8));
121
foldChars = malloc (sizeof (FcChar8) * len);
123
panic ("out of memory");
124
for (i = 0; i < len; i++)
125
foldChars[nfoldChars + i] = utf8[i];
130
foldExtends (FcCaseFold *fold, CaseFoldRaw *raw)
132
switch (fold->method) {
133
case FC_CASE_FOLD_RANGE:
134
if ((short) (raw->lower[0] - raw->upper) != fold->offset)
136
if (raw->upper != fold->upper + fold->count)
139
case FC_CASE_FOLD_EVEN_ODD:
140
if ((short) (raw->lower[0] - raw->upper) != 1)
142
if (raw->upper != fold->upper + fold->count + 1)
145
case FC_CASE_FOLD_FULL:
152
case_fold_method_name (FcChar16 method)
155
case FC_CASE_FOLD_RANGE: return "FC_CASE_FOLD_RANGE,";
156
case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,";
157
case FC_CASE_FOLD_FULL: return "FC_CASE_FOLD_FULL,";
158
default: return "unknown";
167
printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds);
168
printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars);
169
printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars);
170
printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand);
171
printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar);
172
printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar);
178
printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n");
179
for (i = 0; i < nfolds; i++)
181
printf (" { 0x%08x, %-22s 0x%04x, %6d },\n",
182
folds[i].upper, case_fold_method_name (folds[i].method),
183
folds[i].count, folds[i].offset);
188
* Dump out "other" values
191
printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n");
192
for (i = 0; i < nfoldChars; i++)
194
printf ("0x%02x", foldChars[i]);
195
if (i != nfoldChars - 1)
197
if ((i & 0xf) == 0xf)
207
* Read the standard Unicode CaseFolding.txt file
212
parseRaw (char *line, CaseFoldRaw *raw)
217
if (!isxdigit (line[0]))
220
* Get upper case value
222
tok = strtok (line, SEP);
223
if (!tok || tok[0] == '#')
225
raw->upper = strtol (tok, &end, 16);
231
tok = strtok (NULL, SEP);
232
if (!tok || tok[0] == '#')
234
for (i = 0; caseFoldClassMap[i].name; i++)
235
if (!strcmp (tok, caseFoldClassMap[i].name))
237
raw->class = caseFoldClassMap[i].class;
240
if (!caseFoldClassMap[i].name)
244
* Get list of result characters
246
for (i = 0; i < MAX_OUT; i++)
248
tok = strtok (NULL, SEP);
249
if (!tok || tok[0] == '#')
251
raw->lower[i] = strtol (tok, &end, 16);
262
caseFoldReadRaw (FILE *in, CaseFoldRaw *raw)
268
if (!fgets (line, sizeof (line) - 1, in))
270
if (parseRaw (line, raw))
276
main (int argc, char **argv)
278
FcCaseFold *fold = 0;
286
panic ("usage: fc-case CaseFolding.txt");
287
caseFile = fopen (argv[1], "r");
289
panic ("can't open case folding file");
291
while (caseFoldReadRaw (caseFile, &raw))
294
minFoldChar = raw.upper;
295
maxFoldChar = raw.upper;
301
if (fold && foldExtends (fold, &raw))
302
fold->count = raw.upper - fold->upper + 1;
306
fold->upper = raw.upper;
307
fold->offset = raw.lower[0] - raw.upper;
308
if (fold->offset == 1)
309
fold->method = FC_CASE_FOLD_EVEN_ODD;
311
fold->method = FC_CASE_FOLD_RANGE;
314
expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper);
319
fold->upper = raw.upper;
320
fold->method = FC_CASE_FOLD_FULL;
321
fold->offset = nfoldChars;
322
for (i = 0; i < raw.nout; i++)
323
addChar (raw.lower[i]);
324
fold->count = nfoldChars - fold->offset;
325
if (fold->count > maxFoldChars)
326
maxFoldChars = fold->count;
327
expand = fold->count - utf8_size (raw.upper);
329
if (expand > maxExpand)
339
* Scan the input until the marker is found
342
while (fgets (line, sizeof (line), stdin))
344
if (!strncmp (line, "@@@", 3))
346
fputs (line, stdout);
355
* And flush out the rest of the input file
358
while (fgets (line, sizeof (line), stdin))
359
fputs (line, stdout);
362
exit (ferror (stdout));