5
#define New_N(type,n) ((type*)GC_MALLOC((n)*sizeof(type)))
9
wc_locale WcLocale = 0;
14
} lang_ces_table[] = {
15
{ "cs", WC_CES_ISO_8859_2 }, /* cs_CZ */
16
{ "el", WC_CES_ISO_8859_7 }, /* el_GR */
17
{ "iw", WC_CES_ISO_8859_8 }, /* iw_IL */
18
{ "ja", WC_CES_EUC_JP }, /* ja_JP */
19
{ "ko", WC_CES_EUC_KR }, /* ko_KR */
20
{ "hu", WC_CES_ISO_8859_2 }, /* hu_HU */
21
{ "pl", WC_CES_ISO_8859_2 }, /* pl_PL */
22
{ "ro", WC_CES_ISO_8859_2 }, /* ro_RO */
23
{ "ru", WC_CES_ISO_8859_5 }, /* ru_SU */
24
{ "sk", WC_CES_ISO_8859_2 }, /* sk_SK */
25
{ "sl", WC_CES_ISO_8859_2 }, /* sl_CS */
26
{ "tr", WC_CES_ISO_8859_9 }, /* tr_TR */
27
{ "zh", WC_CES_EUC_CN }, /* zh_CN */
32
wc_guess_charset(char *charset, wc_ces orig)
36
if (charset == NULL || *charset == '\0')
38
guess = wc_charset_to_ces(charset);
39
return guess ? guess : orig;
43
wc_guess_charset_short(char *charset, wc_ces orig)
47
if (charset == NULL || *charset == '\0')
49
guess = wc_charset_short_to_ces(charset);
50
return guess ? guess : orig;
54
wc_guess_locale_charset(char *locale, wc_ces orig)
58
if (locale == NULL || *locale == '\0')
60
guess = wc_locale_to_ces(locale);
61
return guess ? guess : orig;
65
wc_charset_to_ces(char *charset)
71
if (tolower(*p) == 'x' && *(p+1) == '-')
73
for (n = 0; *p && n < 15; p++) {
74
if ((unsigned char)*p > 0x20 && *p != '_' && *p != '-')
75
buf[n++] = tolower(*p);
81
if (! strncmp(p, "euc", 3)) {
84
case 'j': return WC_CES_EUC_JP;
85
case 'c': return WC_CES_EUC_CN;
86
case 't': return WC_CES_EUC_TW;
87
case 'k': return WC_CES_EUC_KR;
90
case WC_LOCALE_JA_JP: return WC_CES_EUC_JP;
91
case WC_LOCALE_ZH_CN: return WC_CES_EUC_CN;
92
case WC_LOCALE_ZH_TW: return WC_CES_EUC_TW;
93
case WC_LOCALE_ZH_HK: return WC_CES_EUC_CN;
94
case WC_LOCALE_KO_KR: return WC_CES_EUC_KR;
100
if (! strncmp(p, "iso2022", 7)) {
104
if (! strncmp(p, "jp2", 3))
105
return WC_CES_ISO_2022_JP_2;
106
if (! strncmp(p, "jp3", 3))
107
return WC_CES_ISO_2022_JP_3;
108
return WC_CES_ISO_2022_JP;
109
case 'c': return WC_CES_ISO_2022_CN;
110
case 'k': return WC_CES_ISO_2022_KR;
112
return WC_CES_ISO_2022_JP;
113
} else if (! strncmp(p, "iso8859", 7)) {
115
if (n >= 1 && n <= 16 && n != 12)
116
return (WC_CES_E_ISO_8859 | n);
117
return WC_CES_ISO_8859_1;
121
if (! strncmp(p, "johab", 5))
123
if (! strncmp(p, "jis", 3))
124
return WC_CES_ISO_2022_JP;
127
if (! strncmp(p, "shiftjisx0213", 13) ||
128
! strncmp(p, "sjisx0213", 9))
129
return WC_CES_SHIFT_JISX0213;
130
if (! strncmp(p, "shiftjis", 8) ||
131
! strncmp(p, "sjis", 4))
132
return WC_CES_SHIFT_JIS;
135
if (! strncmp(p, "gb18030", 7) ||
136
! strncmp(p, "gbk2k", 5))
137
return WC_CES_GB18030;
138
if (! strncmp(p, "gbk", 3))
140
if (! strncmp(p, "gb2312", 6))
141
return WC_CES_EUC_CN;
144
if (! strncmp(p, "big5hkscs", 9))
146
if (! strncmp(p, "big5", 4))
150
if (! strncmp(p, "hz", 2))
151
return WC_CES_HZ_GB_2312;
152
if (! strncmp(p, "hkscs", 5))
156
if (! strncmp(p, "koi8r", 5))
157
return WC_CES_KOI8_R;
158
if (! strncmp(p, "koi8u", 5))
159
return WC_CES_KOI8_U;
160
if (! strncmp(p, "ksx1001", 7))
161
return WC_CES_EUC_KR;
162
if (! strncmp(p, "ksc5601", 7))
163
return WC_CES_EUC_KR;
166
if (! strncmp(p, "tis620", 6))
167
return WC_CES_TIS_620;
168
if (! strncmp(p, "tcvn", 4))
169
return WC_CES_TCVN_5712;
172
if (! strncmp(p, "next", 4))
173
return WC_CES_NEXTSTEP;
176
if (! strncmp(p, "viet", 4)) {
178
if (! strncmp(p, "tcvn", 4))
179
return WC_CES_TCVN_5712;
181
if (! strncmp(p, "viscii", 6))
182
return WC_CES_VISCII_11;
183
if (! strncmp(p, "vps", 3))
188
if (! strncmp(p, "utf8", 4))
190
if (! strncmp(p, "utf7", 4))
193
if (! strncmp(p, "uhc", 3))
195
if (! strncmp(p, "ujis", 4))
196
return WC_CES_EUC_JP;
197
if (! strncmp(p, "usascii", 7))
198
return WC_CES_US_ASCII;
201
if (! strncmp(p, "ascii", 5))
202
return WC_CES_US_ASCII;
205
if (! strncmp(p, "cngb", 4))
206
return WC_CES_EUC_CN;
211
case 437: return WC_CES_CP437;
212
case 737: return WC_CES_CP737;
213
case 775: return WC_CES_CP775;
214
case 850: return WC_CES_CP850;
215
case 852: return WC_CES_CP852;
216
case 855: return WC_CES_CP855;
217
case 856: return WC_CES_CP856;
218
case 857: return WC_CES_CP857;
219
case 860: return WC_CES_CP860;
220
case 861: return WC_CES_CP861;
221
case 862: return WC_CES_CP862;
222
case 863: return WC_CES_CP863;
223
case 864: return WC_CES_CP864;
224
case 865: return WC_CES_CP865;
225
case 866: return WC_CES_CP866;
226
case 869: return WC_CES_CP869;
227
case 874: return WC_CES_CP874;
228
case 932: return WC_CES_CP932; /* CP932 = Shift_JIS */
229
case 936: return WC_CES_CP936; /* CP936 = GBK > EUC_CN */
230
case 949: return WC_CES_CP949; /* CP949 = UHC > EUC_KR */
231
case 950: return WC_CES_CP950; /* CP950 = Big5 */
232
case 1006: return WC_CES_CP1006;
233
case 1250: return WC_CES_CP1250;
234
case 1251: return WC_CES_CP1251;
235
case 1252: return WC_CES_CP1252;
236
case 1253: return WC_CES_CP1253;
237
case 1254: return WC_CES_CP1254;
238
case 1255: return WC_CES_CP1255;
239
case 1256: return WC_CES_CP1256;
240
case 1257: return WC_CES_CP1257;
241
case 1258: return WC_CES_CP1258;
245
if (strncmp(p, "windows", 7))
247
if (! strncmp(p, "31j", 3))
251
case 1250: return WC_CES_CP1250;
252
case 1251: return WC_CES_CP1251;
253
case 1252: return WC_CES_CP1252;
254
case 1253: return WC_CES_CP1253;
255
case 1254: return WC_CES_CP1254;
256
case 1255: return WC_CES_CP1255;
257
case 1256: return WC_CES_CP1256;
258
case 1257: return WC_CES_CP1257;
259
case 1258: return WC_CES_CP1258;
267
wc_charset_short_to_ces(char *charset)
274
ces = wc_charset_to_ces(charset);
278
for (n = 0; *p && n < 15; p++) {
279
if ((unsigned char)*p > 0x20 && *p != '_' && *p != '-')
280
buf[n++] = tolower(*p);
287
case 'j': return WC_CES_EUC_JP;
288
case 'c': return WC_CES_EUC_CN;
289
case 't': return WC_CES_EUC_TW;
290
case 'k': return WC_CES_EUC_KR;
292
return WC_CES_EUC_JP;
300
return WC_CES_ISO_2022_JP_2;
302
return WC_CES_ISO_2022_JP_3;
303
return WC_CES_ISO_2022_JP;
305
return WC_CES_SHIFT_JIS;
307
return WC_CES_EUC_CN;
313
return WC_CES_HZ_GB_2312;
316
return WC_CES_KOI8_R;
317
return WC_CES_ISO_2022_KR;
320
if (n >= 1 && n <= 16 && n != 12)
321
return (WC_CES_E_ISO_8859 | n);
322
return WC_CES_ISO_8859_1;
325
return WC_CES_TCVN_5712;
326
return WC_CES_TIS_620;
328
return WC_CES_NEXTSTEP;
332
return WC_CES_VISCII_11;
340
return WC_CES_US_ASCII;
342
return WC_CES_ISO_2022_CN;
346
case 1250: return WC_CES_CP1250;
347
case 1251: return WC_CES_CP1251;
348
case 1252: return WC_CES_CP1252;
349
case 1253: return WC_CES_CP1253;
350
case 1254: return WC_CES_CP1254;
351
case 1255: return WC_CES_CP1255;
352
case 1256: return WC_CES_CP1256;
353
case 1257: return WC_CES_CP1257;
354
case 1258: return WC_CES_CP1258;
364
wc_locale_to_ces(char *locale)
370
if (*p == 'C' && *(p+1) == '\0')
371
return WC_CES_US_ASCII;
372
for (n = 0; *p && *p != '.' && n < 5; p++) {
373
if ((unsigned char)*p > 0x20)
374
buf[n++] = tolower(*p);
379
if (! strcasecmp(p, "euc")) {
382
WcLocale = WC_LOCALE_JA_JP;
385
WcLocale = WC_LOCALE_KO_KR;
388
if (!strcmp(buf, "zh_tw"))
389
WcLocale = WC_LOCALE_ZH_TW;
390
else if (!strcmp(buf, "zh_hk"))
391
WcLocale = WC_LOCALE_ZH_HK;
393
WcLocale = WC_LOCALE_ZH_CN;
400
return wc_charset_to_ces(p);
403
if (!strcmp(buf, "japanese"))
404
return WC_CES_SHIFT_JIS;
405
if (!strcmp(buf, "zh_tw") ||
406
!strcmp(buf, "zh_hk"))
408
for (n = 0; lang_ces_table[n].lang; n++) {
409
if (!strncmp(buf, lang_ces_table[n].lang, 2))
410
return lang_ces_table[n].ces;
412
return WC_CES_ISO_8859_1;
416
wc_ces_to_charset(wc_ces ces)
418
if (ces == WC_CES_WTF)
420
return WcCesInfo[WC_CES_INDEX(ces)].name;
424
wc_ces_to_charset_desc(wc_ces ces)
426
if (ces == WC_CES_WTF)
427
return "W3M Transfer Format";
428
return WcCesInfo[WC_CES_INDEX(ces)].desc;
432
wc_guess_8bit_charset(wc_ces orig)
435
case WC_CES_ISO_2022_JP:
436
case WC_CES_ISO_2022_JP_2:
437
case WC_CES_ISO_2022_JP_3:
438
return WC_CES_EUC_JP;
439
case WC_CES_ISO_2022_KR:
440
return WC_CES_EUC_KR;
441
case WC_CES_ISO_2022_CN:
442
case WC_CES_HZ_GB_2312:
443
return WC_CES_EUC_CN;
444
case WC_CES_US_ASCII:
445
return WC_CES_ISO_8859_1;
451
wc_check_ces(wc_ces ces)
453
size_t i = WC_CES_INDEX(ces);
455
return (i <= WC_CES_END && WcCesInfo[i].id == ces);
459
wc_ces_list_cmp(const void *a, const void *b)
461
return strcasecmp(((wc_ces_list *)a)->desc, ((wc_ces_list *)b)->desc);
464
static wc_ces_list *list = NULL;
467
wc_get_ces_list(void)
474
for (info = WcCesInfo, n = 0; info->id; info++) {
475
if (info->name != NULL)
478
list = New_N(wc_ces_list, n + 1);
479
for (info = WcCesInfo, n = 0; info->id; info++) {
480
if (info->name != NULL) {
481
list[n].id = info->id;
482
list[n].name = info->name;
483
list[n].desc = info->desc;
490
qsort(list, n, sizeof(wc_ces_list), wc_ces_list_cmp);