2
* arch-tag: Implementation of various string-related utility functions
4
* Copyright (C) 2002 Jorn Baayen
5
* Copyright (C) 2003 Colin Walters <walters@verbum.org>
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2, or (at your option)
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
* GNU General Public License for more details.
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24
#include <libgnome/gnome-i18n.h>
29
#include "rb-string-helpers.h"
31
static GHashTable *encodings;
33
/* stolen from gnome-desktop-item.c */
35
check_locale (const char *locale)
37
GIConv cd = g_iconv_open ("UTF-8", locale);
44
/* stolen from gnome-desktop-item.c */
46
insert_locales (GHashTable *encodings, char *enc, ...)
53
s = va_arg (args, char *);
56
g_hash_table_insert (encodings, s, enc);
61
/* stolen from gnome-desktop-item.c */
63
rb_string_helpers_init (void)
65
/* make a standard conversion table from the desktop standard spec */
66
encodings = g_hash_table_new (g_str_hash, g_str_equal);
68
/* "C" is plain ascii */
69
insert_locales (encodings, "ASCII", "C", NULL);
71
insert_locales (encodings, "ARMSCII-8", "by", NULL);
72
insert_locales (encodings, "BIG5", "zh_TW", NULL);
73
insert_locales (encodings, "CP1251", "be", "bg", NULL);
74
if (check_locale ("EUC-CN")) {
75
insert_locales (encodings, "EUC-CN", "zh_CN", NULL);
77
insert_locales (encodings, "GB2312", "zh_CN", NULL);
79
insert_locales (encodings, "EUC-JP", "ja", NULL);
80
insert_locales (encodings, "UHC", "ko", NULL);
81
/*insert_locales (encodings, "GEORGIAN-ACADEMY", NULL);*/
82
insert_locales (encodings, "GEORGIAN-PS", "ka", NULL);
83
insert_locales (encodings, "ISO-8859-1", "br", "ca", "da", "de", "en", "es", "eu", "fi", "fr", "gl", "it", "nl", "wa", "no", "pt", "pt", "sv", NULL);
84
insert_locales (encodings, "ISO-8859-2", "cs", "hr", "hu", "pl", "ro", "sk", "sl", "sq", "sr", NULL);
85
insert_locales (encodings, "ISO-8859-3", "eo", NULL);
86
insert_locales (encodings, "ISO-8859-5", "mk", "sp", NULL);
87
insert_locales (encodings, "ISO-8859-7", "el", NULL);
88
insert_locales (encodings, "ISO-8859-9", "tr", NULL);
89
insert_locales (encodings, "ISO-8859-13", "lt", "lv", "mi", NULL);
90
insert_locales (encodings, "ISO-8859-14", "ga", "cy", NULL);
91
insert_locales (encodings, "ISO-8859-15", "et", NULL);
92
insert_locales (encodings, "KOI8-R", "ru", NULL);
93
insert_locales (encodings, "KOI8-U", "uk", NULL);
94
if (check_locale ("TCVN-5712")) {
95
insert_locales (encodings, "TCVN-5712", "vi", NULL);
97
insert_locales (encodings, "TCVN", "vi", NULL);
99
insert_locales (encodings, "TIS-620", "th", NULL);
100
/*insert_locales (encodings, "VISCII", NULL);*/
104
rb_string_helpers_shutdown (void)
106
g_hash_table_destroy (encodings);
109
/* stolen from gnome-desktop-item.c */
111
get_encoding_from_locale (const char *locale)
114
const char *encoding;
119
/* if locale includes encoding (that isn't UTF-8), use it */
120
encoding = strchr (locale, '.');
121
if (encoding != NULL && strncmp (encoding, ".UTF-8", 6)) {
125
/* first try the entire locale (at this point ll_CC) */
126
encoding = g_hash_table_lookup (encodings, locale);
127
if (encoding != NULL)
130
/* Try just the language */
131
strncpy (lang, locale, 2);
133
return g_hash_table_lookup (encodings, lang);
137
rb_unicodify (const char *str)
140
const char *char_encoding;
142
/* Try validating it as UTF-8 first */
143
if (g_utf8_validate (str, -1, NULL))
144
return g_strdup (str);
146
/* Failing that, try the legacy encoding associated
148
char_encoding = get_encoding_from_locale (getenv ("LANG"));
149
if (char_encoding == NULL)
152
ret = g_convert (str, -1, "UTF-8", char_encoding,
154
/* Failing that, try ISO-8859-1. */
156
ret = g_convert (str, -1, "UTF-8", "ISO-8859-1",
163
rb_utf8_strncasecmp (gconstpointer a, gconstpointer b)
165
char *al = g_utf8_casefold ((const char *) a, -1);
166
char *bl = g_utf8_casefold ((const char *) b, -1);
167
int ret = g_utf8_collate (al, bl);
174
rb_get_sort_key (const char *string)
176
char *collated, *folded;
177
folded = g_utf8_casefold (string, -1);
178
collated = g_utf8_collate_key (folded, -1);