111
124
cp = charset_aliases;
114
#if !(defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
127
#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
117
129
const char *base = "charset.alias";
120
132
/* Make it possible to override the charset.alias location. This is
121
necessary for running the testsuite before "make install". */
133
necessary for running the testsuite before "make install". */
122
134
dir = getenv ("CHARSETALIASDIR");
123
135
if (dir == NULL || dir[0] == '\0')
124
dir = relocate (LIBDIR);
136
dir = relocate (LIBDIR);
126
138
/* Concatenate dir and base into freshly allocated file_name. */
128
size_t dir_len = strlen (dir);
129
size_t base_len = strlen (base);
130
int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
131
file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
132
if (file_name != NULL)
134
memcpy (file_name, dir, dir_len);
136
file_name[dir_len] = DIRECTORY_SEPARATOR;
137
memcpy (file_name + dir_len + add_slash, base, base_len + 1);
140
size_t dir_len = strlen (dir);
141
size_t base_len = strlen (base);
142
int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
143
file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
144
if (file_name != NULL)
146
memcpy (file_name, dir, dir_len);
148
file_name[dir_len] = DIRECTORY_SEPARATOR;
149
memcpy (file_name + dir_len + add_slash, base, base_len + 1);
141
if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)
142
/* Out of memory or file not found, treat it as empty. */
153
if (file_name == NULL)
154
/* Out of memory. Treat the file as empty. */
146
/* Parse the file's contents. */
147
char *res_ptr = NULL;
161
if (c == '\n' || c == ' ' || c == '\t')
165
/* Skip comment, to end of line. */
168
while (!(c == EOF || c == '\n'));
174
if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
178
old_res_ptr = res_ptr;
181
res_size = l1 + 1 + l2 + 1;
182
res_ptr = (char *) malloc (res_size + 1);
186
res_size += l1 + 1 + l2 + 1;
187
res_ptr = (char *) realloc (res_ptr, res_size + 1);
193
if (old_res_ptr != NULL)
197
strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
198
strcpy (res_ptr + res_size - (l2 + 1), buf2);
205
*(res_ptr + res_size) = '\0';
210
if (file_name != NULL)
160
/* Open the file. Reject symbolic links on platforms that support
161
O_NOFOLLOW. This is a security feature. Without it, an attacker
162
could retrieve parts of the contents (namely, the tail of the
163
first line that starts with "* ") of an arbitrary file by placing
164
a symbolic link to that file under the name "charset.alias" in
165
some writable directory and defining the environment variable
166
CHARSETALIASDIR to point to that directory. */
167
fd = open (file_name,
168
O_RDONLY | (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0));
170
/* File not found. Treat it as empty. */
176
fp = fdopen (fd, "r");
179
/* Out of memory. Treat the file as empty. */
185
/* Parse the file's contents. */
186
char *res_ptr = NULL;
200
if (c == '\n' || c == ' ' || c == '\t')
204
/* Skip comment, to end of line. */
207
while (!(c == EOF || c == '\n'));
213
if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
217
old_res_ptr = res_ptr;
220
res_size = l1 + 1 + l2 + 1;
221
res_ptr = (char *) malloc (res_size + 1);
225
res_size += l1 + 1 + l2 + 1;
226
res_ptr = (char *) realloc (res_ptr, res_size + 1);
232
if (old_res_ptr != NULL)
236
strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
237
strcpy (res_ptr + res_size - (l2 + 1), buf2);
244
*(res_ptr + res_size) = '\0';
256
/* To avoid the trouble of installing a file that is shared by many
257
GNU packages -- many packaging systems have problems with this --,
258
simply inline the aliases here. */
259
cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
260
"ISO8859-2" "\0" "ISO-8859-2" "\0"
261
"ISO8859-4" "\0" "ISO-8859-4" "\0"
262
"ISO8859-5" "\0" "ISO-8859-5" "\0"
263
"ISO8859-7" "\0" "ISO-8859-7" "\0"
264
"ISO8859-9" "\0" "ISO-8859-9" "\0"
265
"ISO8859-13" "\0" "ISO-8859-13" "\0"
266
"ISO8859-15" "\0" "ISO-8859-15" "\0"
267
"KOI8-R" "\0" "KOI8-R" "\0"
268
"KOI8-U" "\0" "KOI8-U" "\0"
269
"CP866" "\0" "CP866" "\0"
270
"CP949" "\0" "CP949" "\0"
271
"CP1131" "\0" "CP1131" "\0"
272
"CP1251" "\0" "CP1251" "\0"
273
"eucCN" "\0" "GB2312" "\0"
274
"GB2312" "\0" "GB2312" "\0"
275
"eucJP" "\0" "EUC-JP" "\0"
276
"eucKR" "\0" "EUC-KR" "\0"
277
"Big5" "\0" "BIG5" "\0"
278
"Big5HKSCS" "\0" "BIG5-HKSCS" "\0"
279
"GBK" "\0" "GBK" "\0"
280
"GB18030" "\0" "GB18030" "\0"
281
"SJIS" "\0" "SHIFT_JIS" "\0"
282
"ARMSCII-8" "\0" "ARMSCII-8" "\0"
283
"PT154" "\0" "PT154" "\0"
284
/*"ISCII-DEV" "\0" "?" "\0"*/
285
"*" "\0" "UTF-8" "\0";
216
289
/* To avoid the troubles of an extra file charset.alias_vms in the
217
sources of many GNU packages, simply inline the aliases here. */
290
sources of many GNU packages, simply inline the aliases here. */
218
291
/* The list of encodings is taken from the OpenVMS 7.3-1 documentation
219
"Compaq C Run-Time Library Reference Manual for OpenVMS systems"
220
section 10.7 "Handling Different Character Sets". */
292
"Compaq C Run-Time Library Reference Manual for OpenVMS systems"
293
section 10.7 "Handling Different Character Sets". */
221
294
cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
222
"ISO8859-2" "\0" "ISO-8859-2" "\0"
223
"ISO8859-5" "\0" "ISO-8859-5" "\0"
224
"ISO8859-7" "\0" "ISO-8859-7" "\0"
225
"ISO8859-8" "\0" "ISO-8859-8" "\0"
226
"ISO8859-9" "\0" "ISO-8859-9" "\0"
228
"eucJP" "\0" "EUC-JP" "\0"
229
"SJIS" "\0" "SHIFT_JIS" "\0"
230
"DECKANJI" "\0" "DEC-KANJI" "\0"
231
"SDECKANJI" "\0" "EUC-JP" "\0"
233
"eucTW" "\0" "EUC-TW" "\0"
234
"DECHANYU" "\0" "DEC-HANYU" "\0"
235
"DECHANZI" "\0" "GB2312" "\0"
237
"DECKOREAN" "\0" "EUC-KR" "\0";
295
"ISO8859-2" "\0" "ISO-8859-2" "\0"
296
"ISO8859-5" "\0" "ISO-8859-5" "\0"
297
"ISO8859-7" "\0" "ISO-8859-7" "\0"
298
"ISO8859-8" "\0" "ISO-8859-8" "\0"
299
"ISO8859-9" "\0" "ISO-8859-9" "\0"
301
"eucJP" "\0" "EUC-JP" "\0"
302
"SJIS" "\0" "SHIFT_JIS" "\0"
303
"DECKANJI" "\0" "DEC-KANJI" "\0"
304
"SDECKANJI" "\0" "EUC-JP" "\0"
306
"eucTW" "\0" "EUC-TW" "\0"
307
"DECHANYU" "\0" "DEC-HANYU" "\0"
308
"DECHANZI" "\0" "GB2312" "\0"
310
"DECKOREAN" "\0" "EUC-KR" "\0";
240
313
# if defined WIN32_NATIVE || defined __CYGWIN__
241
314
/* To avoid the troubles of installing a separate file in the same
242
directory as the DLL and of retrieving the DLL's directory at
243
runtime, simply inline the aliases here. */
315
directory as the DLL and of retrieving the DLL's directory at
316
runtime, simply inline the aliases here. */
245
318
cp = "CP936" "\0" "GBK" "\0"
246
"CP1361" "\0" "JOHAB" "\0"
247
"CP20127" "\0" "ASCII" "\0"
248
"CP20866" "\0" "KOI8-R" "\0"
249
"CP20936" "\0" "GB2312" "\0"
250
"CP21866" "\0" "KOI8-RU" "\0"
251
"CP28591" "\0" "ISO-8859-1" "\0"
252
"CP28592" "\0" "ISO-8859-2" "\0"
253
"CP28593" "\0" "ISO-8859-3" "\0"
254
"CP28594" "\0" "ISO-8859-4" "\0"
255
"CP28595" "\0" "ISO-8859-5" "\0"
256
"CP28596" "\0" "ISO-8859-6" "\0"
257
"CP28597" "\0" "ISO-8859-7" "\0"
258
"CP28598" "\0" "ISO-8859-8" "\0"
259
"CP28599" "\0" "ISO-8859-9" "\0"
260
"CP28605" "\0" "ISO-8859-15" "\0"
261
"CP38598" "\0" "ISO-8859-8" "\0"
262
"CP51932" "\0" "EUC-JP" "\0"
263
"CP51936" "\0" "GB2312" "\0"
264
"CP51949" "\0" "EUC-KR" "\0"
265
"CP51950" "\0" "EUC-TW" "\0"
266
"CP54936" "\0" "GB18030" "\0"
267
"CP65001" "\0" "UTF-8" "\0";
319
"CP1361" "\0" "JOHAB" "\0"
320
"CP20127" "\0" "ASCII" "\0"
321
"CP20866" "\0" "KOI8-R" "\0"
322
"CP20936" "\0" "GB2312" "\0"
323
"CP21866" "\0" "KOI8-RU" "\0"
324
"CP28591" "\0" "ISO-8859-1" "\0"
325
"CP28592" "\0" "ISO-8859-2" "\0"
326
"CP28593" "\0" "ISO-8859-3" "\0"
327
"CP28594" "\0" "ISO-8859-4" "\0"
328
"CP28595" "\0" "ISO-8859-5" "\0"
329
"CP28596" "\0" "ISO-8859-6" "\0"
330
"CP28597" "\0" "ISO-8859-7" "\0"
331
"CP28598" "\0" "ISO-8859-8" "\0"
332
"CP28599" "\0" "ISO-8859-9" "\0"
333
"CP28605" "\0" "ISO-8859-15" "\0"
334
"CP38598" "\0" "ISO-8859-8" "\0"
335
"CP51932" "\0" "EUC-JP" "\0"
336
"CP51936" "\0" "GB2312" "\0"
337
"CP51949" "\0" "EUC-KR" "\0"
338
"CP51950" "\0" "EUC-TW" "\0"
339
"CP54936" "\0" "GB18030" "\0"
340
"CP65001" "\0" "UTF-8" "\0";
309
381
locale = getenv ("LC_ALL");
310
382
if (locale == NULL || locale[0] == '\0')
312
locale = getenv ("LC_CTYPE");
313
if (locale == NULL || locale[0] == '\0')
314
locale = getenv ("LANG");
384
locale = getenv ("LC_CTYPE");
385
if (locale == NULL || locale[0] == '\0')
386
locale = getenv ("LANG");
316
388
if (locale != NULL && locale[0] != '\0')
318
/* If the locale name contains an encoding after the dot, return
320
const char *dot = strchr (locale, '.');
324
const char *modifier;
327
/* Look for the possible @... trailer and remove it, if any. */
328
modifier = strchr (dot, '@');
329
if (modifier == NULL)
331
if (modifier - dot < sizeof (buf))
333
memcpy (buf, dot, modifier - dot);
334
buf [modifier - dot] = '\0';
340
/* Woe32 has a function returning the locale's codepage as a number. */
390
/* If the locale name contains an encoding after the dot, return
392
const char *dot = strchr (locale, '.');
396
const char *modifier;
399
/* Look for the possible @... trailer and remove it, if any. */
400
modifier = strchr (dot, '@');
401
if (modifier == NULL)
403
if (modifier - dot < sizeof (buf))
405
memcpy (buf, dot, modifier - dot);
406
buf [modifier - dot] = '\0';
412
/* Woe32 has a function returning the locale's codepage as a number:
413
GetACP(). This encoding is used by Cygwin, unless the user has set
414
the environment variable CYGWIN=codepage:oem (which very few people
416
Output directed to console windows needs to be converted (to
417
GetOEMCP() if the console is using a raster font, or to
418
GetConsoleOutputCP() if it is using a TrueType font). Cygwin does
419
this conversion transparently (see winsup/cygwin/fhandler_console.cc),
420
converting to GetConsoleOutputCP(). This leads to correct results,
421
except when SetConsoleOutputCP has been called and a raster font is
341
423
sprintf (buf, "CP%u", GetACP ());