143
gchar *convert_string (const gchar *string, const gchar *from, const gchar *to)
149
/* stolen from gnome-desktop-item.c */
151
check_locale (const char *locale)
153
GIConv cd = g_iconv_open ("UTF-8", locale);
154
if ((GIConv)-1 == cd)
160
/* stolen from gnome-desktop-item.c */
162
insert_locales (GHashTable *encodings, char *enc, ...)
167
va_start (args, enc);
169
s = va_arg (args, char *);
172
g_hash_table_insert (encodings, s, enc);
177
/* stolen from gnome-desktop-item.c */
178
/* make a standard conversion table from the desktop standard spec */
180
Charset_Insert_Locales_Init (void)
182
encodings = g_hash_table_new (g_str_hash, g_str_equal);
184
/* "C" is plain ascii */
185
insert_locales (encodings, "ASCII", "C", NULL);
187
insert_locales (encodings, "ARMSCII-8", "by", NULL);
188
insert_locales (encodings, "BIG5", "zh_TW", NULL);
189
insert_locales (encodings, "CP1251", "be", "bg", NULL);
190
if (check_locale ("EUC-CN")) {
191
insert_locales (encodings, "EUC-CN", "zh_CN", NULL);
193
insert_locales (encodings, "GB2312", "zh_CN", NULL);
195
insert_locales (encodings, "EUC-JP", "ja", NULL);
196
insert_locales (encodings, "EUC-KR", "ko", NULL);
197
/*insert_locales (encodings, "GEORGIAN-ACADEMY", NULL);*/
198
insert_locales (encodings, "GEORGIAN-PS", "ka", NULL);
199
insert_locales (encodings, "ISO-8859-1", "br", "ca", "da", "de", "en", "es", "eu", "fi", "fr", "gl", "it", "nl", "wa", "no", "pt", "pt", "sv", NULL);
200
insert_locales (encodings, "ISO-8859-2", "cs", "hr", "hu", "pl", "ro", "sk", "sl", "sq", "sr", NULL);
201
insert_locales (encodings, "ISO-8859-3", "eo", NULL);
202
insert_locales (encodings, "ISO-8859-5", "mk", "sp", NULL);
203
insert_locales (encodings, "ISO-8859-7", "el", NULL);
204
insert_locales (encodings, "ISO-8859-9", "tr", NULL);
205
insert_locales (encodings, "ISO-8859-13", "lt", "lv", "mi", NULL);
206
insert_locales (encodings, "ISO-8859-14", "ga", "cy", NULL);
207
insert_locales (encodings, "ISO-8859-15", "et", NULL);
208
insert_locales (encodings, "KOI8-R", "ru", NULL);
209
insert_locales (encodings, "KOI8-U", "uk", NULL);
210
if (check_locale ("TCVN-5712")) {
211
insert_locales (encodings, "TCVN-5712", "vi", NULL);
213
insert_locales (encodings, "TCVN", "vi", NULL);
215
insert_locales (encodings, "TIS-620", "th", NULL);
216
/*insert_locales (encodings, "VISCII", NULL);*/
220
Charset_Insert_Locales_Destroy (void)
222
g_hash_table_destroy (encodings);
225
/* stolen from gnome-desktop-item.c */
227
get_encoding_from_locale (const char *locale)
230
const char *encoding;
235
/* if locale includes encoding, use it *//*
236
encoding = strchr (locale, '.');
237
if (encoding != NULL) {
240
/* if locale includes encoding (that isn't UTF-8), use it */
241
encoding = strchr (locale, '.');
242
if (encoding != NULL && strncmp (encoding, ".UTF-8", 6)) {
246
/* first try the entire locale (at this point ll_CC) */
247
encoding = g_hash_table_lookup (encodings, locale);
248
if (encoding != NULL)
251
/* Try just the language */
252
strncpy (lang, locale, 2);
254
return g_hash_table_lookup (encodings, lang);
259
* Return the locale from LANG if exists, else from LC_ALL
261
* http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap08.html#tag_08_02
264
* This variable shall determine the locale category for native language,
265
* local customs, and coded character set in the absence of the LC_ALL and
266
* other LC_* ( LC_COLLATE , LC_CTYPE , LC_MESSAGES , LC_MONETARY , LC_NUMERIC ,
267
* LC_TIME ) environment variables. This can be used by applications to
268
* determine the language to use for error messages and instructions, collating
269
* sequences, date formats, and so on.
271
* This variable shall determine the values for all locale categories. The
272
* value of the LC_ALL environment variable has precedence over any of the
273
* other environment variables starting with LC_ ( LC_COLLATE , LC_CTYPE ,
274
* LC_MESSAGES , LC_MONETARY , LC_NUMERIC , LC_TIME ) and the LANG environment
277
* This variable shall determine the locale category for character collation.
278
* It determines collation information for regular expressions and sorting,
279
* including equivalence classes and multi-character collating elements, in
280
* various utilities and the strcoll() and strxfrm() functions. Additional
281
* semantics of this variable, if any, are implementation-defined.
283
* This variable shall determine the locale category for character handling
284
* functions, such as tolower(), toupper(), and isalpha(). This environment
285
* variable determines the interpretation of sequences of bytes of text data
286
* as characters (for example, single as opposed to multi-byte characters),
287
* the classification of characters (for example, alpha, digit, graph), and
288
* the behavior of character classes. Additional semantics of this variable,
289
* if any, are implementation-defined.
291
* This variable shall determine the locale category for processing affirmative
292
* and negative responses and the language and cultural conventions in which
293
* messages should be written. [XSI] [Option Start] It also affects the behavior
294
* of the catopen() function in determining the message catalog. [Option End]
295
* Additional semantics of this variable, if any, are implementation-defined.
296
* The language and cultural conventions of diagnostic and informative messages
297
* whose format is unspecified by IEEE Std 1003.1-2001 should be affected by
298
* the setting of LC_MESSAGES .
300
* This variable shall determine the locale category for monetary-related
301
* numeric formatting information. Additional semantics of this variable, if
302
* any, are implementation-defined.
304
* This variable shall determine the locale category for numeric formatting
305
* (for example, thousands separator and radix character) information in
306
* various utilities as well as the formatted I/O operations in printf() and
307
* scanf() and the string conversion functions in strtod(). Additional semantics
308
* of this variable, if any, are implementation-defined.
310
* This variable shall determine the locale category for date and time formatting
311
* information. It affects the behavior of the time functions in strftime().
312
* Additional semantics of this variable, if any, are implementation-defined.
315
* The values of locale categories shall be determined by a precedence order; the
316
* first condition met below determines the value:
318
* 1. If the LC_ALL environment variable is defined and is not null, the value
319
* of LC_ALL shall be used.
320
* 2. If the LC_* environment variable ( LC_COLLATE , LC_CTYPE , LC_MESSAGES ,
321
* LC_MONETARY , LC_NUMERIC , LC_TIME ) is defined and is not null, the value
322
* of the environment variable shall be used to initialize the category that
323
* corresponds to the environment variable.
324
* 3. If the LANG environment variable is defined and is not null, the value of
325
* the LANG environment variable shall be used.
326
* 4. If the LANG environment variable is not set or is set to the empty string,
327
* the implementation-defined default locale shall be used.
330
const gchar *get_locale (void)
332
if (g_getenv("LC_ALL"))
333
return g_getenv("LC_ALL");
335
else if (g_getenv("LC_CTYPE"))
336
return g_getenv("LC_CTYPE");
338
else if (g_getenv("LANG"))
339
return g_getenv("LANG");
349
* convert_string : (don't use with UTF-16 strings)
350
* - display_error : if TRUE, may return an escaped string and display an error
351
* message (if conversion fails).
353
gchar *convert_string (const gchar *string, const gchar *from_codeset,
354
const gchar *to_codeset, const gboolean display_error)
356
return convert_string_1(string, -1, from_codeset, to_codeset, display_error);
358
/* Length must be passed, as the string might be Unicode, in which case we can't
359
* count zeroes (see strlen call below). */
360
gchar *convert_string_1 (const gchar *string, gssize length, const gchar *from_codeset,
361
const gchar *to_codeset, const gboolean display_error)
146
364
GError *error = NULL;
151
output = g_convert(string, -1, to, from, NULL, NULL, &error);
153
//g_message("converting %s from %s to %s", string, from, to);
370
output = g_convert(string, length, to_codeset, from_codeset, NULL, &bytes_written, &error);
371
//output = g_convert_with_fallback(string, length, to_codeset, from_codeset, "?", NULL, &bytes_written, &error);
155
373
if (output == NULL)
157
375
gchar *escaped_str = g_strescape(string, NULL);
158
g_warning("convert_string(): Failed conversion from charset '%s' to '%s'. "
159
"String '%s'. Errcode %d (%s).\n",
160
from, to, escaped_str, error->code, error->message);
378
g_warning("convert_string(): Failed conversion from charset '%s' to '%s'. "
379
"String '%s'. Errcode %d (%s).\n",
380
from_codeset, to_codeset, escaped_str, error->code, error->message);
161
382
g_free(escaped_str);
162
383
g_error_free(error);
163
return g_strdup(string);
384
// Return the input string without converting it. If the string is
385
// displayed in the UI, it must be in UTF-8!
386
if ( (g_ascii_strcasecmp(to_codeset, "UTF-8"))
387
|| (g_utf8_validate(string, -1, NULL)) )
389
return g_strdup(string);
393
// Patch from Alexey Illarionov:
394
// g_convert returns null-terminated string only with one \0 at the
395
// end. It can cause some garbage at the end of a string for UTF-16.
396
// The second \0 should be set manually.
397
output = g_realloc(output, bytes_written + 2);
399
output[bytes_written] = output[bytes_written + 1] = 0;
402
//g_print("from %s => len: %d, string: '%s'\n (%x %x %x %x %x %x %x %x)\n",from_codeset,length,string,string[0],string[1],string[2],string[3],string[4],string[5],string[6],string[7]);
403
//g_print("to %s => len: %d, output: '%s'\n (%x %x %x %x %x %x %x %x)\n\n",to_codeset,bytes_written+2,output,output[0],output[1],output[2],output[3],output[4],output[5],output[6],output[7]);
240
* Conversion with ISO-8859-1 for ID3v2.3 tags (current_charset <===> ISO-8859-1)
242
char *convert_to_iso88591 (const char *string)
244
const gchar *charset;
245
g_get_charset(&charset);
247
/* No conversion needed */
248
if (strcmp(charset, "ANSI_X3.4-1968") == 0)
249
return g_strdup(string);
251
return convert_string(string, charset, "ISO-8859-1");
254
char *convert_from_iso88591 (const char *string)
256
const gchar *charset;
257
g_get_charset(&charset);
259
/* No conversion needed */
260
if (strcmp(charset, "ANSI_X3.4-1968") == 0)
261
return g_strdup(string);
263
return convert_string(string, "ISO-8859-1", charset);
269
* Conversion with "this_charset" for ID3v2.3 tags (current_charset <===> this_charset)
271
// Convert from the locale charset to 'this_charset'
272
char *convert_to_this_charset (const char *string, char *this_charset)
274
const gchar *charset;
275
g_get_charset(&charset);
277
return convert_string(string, charset, this_charset);
280
// Convert from 'this_charset' to the locale charset
281
char *convert_from_this_charset (const char *string, char *this_charset)
283
const gchar *charset;
284
g_get_charset(&charset);
286
return convert_string(string, this_charset, charset);
292
* Conversion functions using default parameters set by user in the preference window. (USER_CHARACTER_SET <===> FILE_CHARACTER_SET)
294
char *convert_from_user_to_file (const char *string)
296
char *file_charset = FILE_CHARACTER_SET;
297
char *user_charset = USER_CHARACTER_SET;
299
return convert_string(string,user_charset,file_charset);
302
char *convert_from_file_to_user (const char *string)
304
char *file_charset = FILE_CHARACTER_SET;
305
char *user_charset = USER_CHARACTER_SET;
307
return convert_string(string,file_charset,user_charset);
312
* Functions to translate filename to/from UTF-8
313
* Based around the ideas under "File Name Encodings" at
314
* http://developer.gnome.org/doc/API/2.0/glib/glib-Character-Set-Conversion.html
479
* Convert a string from the filename system encoding to UTF-8.
480
* - conversion OK : returns the UTF-8 string (new allocated)
481
* - conversion KO : tries others encodings else returns an 'escaped' string
316
483
gchar *filename_to_display (const gchar *string)
318
GError *error = NULL;
319
gchar *temp = g_filename_to_utf8(string, -1, NULL, NULL, &error);
486
GError *error = NULL;
491
if (g_utf8_validate(string, -1, NULL))
493
// String already in UTF-8
494
ret = g_strdup(string);
497
const gchar *char_encoding;
499
// Get encoding associated to the locale without using UTF-8 (ex , if LANG=fr_FR.UTF-8 it will return ISO-8859-1)
500
char_encoding = get_encoding_from_locale(get_locale());
503
//g_print("> char_encoding: %s\n",char_encoding);
505
ret = g_convert(string, -1, "UTF-8", char_encoding, NULL, NULL, &error);
510
// Failing that, try ISO-8859-1
512
ret = g_convert(string, -1, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
517
gchar *escaped_str = g_strescape(string, NULL);
518
g_warning(_("The filename '%s' couldn't be converted into UTF-8 (%s).\n"),
519
escaped_str, error && error->message ? error->message : _("Invalid UTF-8"));
520
g_clear_error(&error);
527
ET_Win32_Path_Remove_Trailing_Slash(ret);
528
ET_Win32_Path_Replace_Slashes(ret);
535
* Convert a string from UTF-8 to the filename system encoding.
536
* - conversion OK : returns the string in filename system encoding (new allocated)
537
* - conversion KO : display error message + returns nothing!
539
gchar *filename_from_display (const gchar *string)
541
GError *error = NULL;
543
const gchar *char_encoding = NULL;
544
//const gchar *filename_encoding = NULL;
546
if (!string) return NULL;
548
// Get system encoding from LANG if found (ex : fr_FR.UTF-8 => UTF-8)
550
char_encoding = strchr(get_locale(), '.');
553
char_encoding = char_encoding+1; // Skip the '.'
558
if (FILENAME_CHARACTER_SET_OTHER)
560
ret = g_convert(string, -1, char_encoding, "UTF-8", NULL, NULL, &error);
562
}else if (FILENAME_CHARACTER_SET_APPROXIMATE)
565
// When the string "//TRANSLIT" is appended to tocode, transliteration
566
// is activated. This means that when a character cannot be represented
567
// in the target character set, it can be approximated through one or
568
// several similarly looking characters.
569
gchar *enc = g_strconcat(char_encoding, "//TRANSLIT", NULL);
570
ret = g_convert(string, -1, enc, "UTF-8", NULL, NULL, &error);
573
}else if (FILENAME_CHARACTER_SET_DISCARD)
576
// When the string "//IGNORE" is appended to tocode, characters that
577
// cannot be represented in the target character set will be silently
579
gchar *enc = g_strconcat(char_encoding, "//IGNORE", NULL);
580
ret = g_convert(string, -1, enc, "UTF-8", NULL, NULL, &error);
587
// Get system encoding from locale in LANG if found (ex : fr_FR.UTF-8 => fr_FR => ISO-8859-1)
588
char_encoding = get_encoding_from_locale(get_locale());
591
//g_print("> char_encoding: %s\n",char_encoding);
593
ret = g_convert(string, -1, char_encoding, "UTF-8", NULL, NULL, &error);
599
// Failing that, try ISO-8859-1
601
ret = g_convert(string, -1, "ISO-8859-1", "UTF-8", NULL, NULL, &error);
606
if (g_utf8_validate(string, -1, NULL))
608
// String already in UTF-8
609
ret = g_strdup(string);
322
615
// Conversion KO!
323
616
gchar *escaped_str = g_strescape(string, NULL);
324
g_warning(_("The filename '%s' couldn't be converted to UTF-8. "
325
"(Try setting the environment variable G_FILENAME_ENCODING): %s\n"),
326
escaped_str, error->message ? error->message : _("Invalid UTF-8"));
327
//g_free(escaped_str);
617
g_warning(_("The UTF-8 string '%s' couldn't be converted into filename encoding (%s)\n"),
618
escaped_str, error && error->message ? error->message : _("Invalid UTF-8"));
328
619
g_clear_error(&error);
330
//return g_strdup(string);
331
return g_strdup(escaped_str); // Don't free escaped_str if used!
339
gchar *filename_from_display (const gchar* string)
341
gchar *temp = g_filename_from_utf8(string, -1, NULL, NULL, NULL);
345
gchar *escaped_str = g_strescape(string, NULL);
346
g_print("WARNING: Could not convert string %s into filename encoding\n", escaped_str);
350
return temp; // We need to catch errors (e.g. temp=NULL) in the real code
625
//ET_Win32_Path_Replace_Backslashes(ret);
628
return ret; // We need to catch errors (e.g. temp=NULL) in the real code