1
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 8 -*-
3
* Copyright 2008 Red Hat, Inc,
4
* 2007 William Jon McCann <mccann@jhu.edu>
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* (at your option) any later version.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20
* Written by : William Jon McCann <mccann@jhu.edu>
21
* Ray Strode <rstrode@redhat.com>
37
#include <glib/gi18n.h>
38
#include <glib/gstdio.h>
40
#include "gdm-languages.h"
46
#include "locarchive.h"
48
#define ALIASES_FILE DATADIR "/gdm/locale.alias"
49
#define ARCHIVE_FILE LIBLOCALEDIR "/locale-archive"
50
#define SYSTEM_ARCHIVE_FILE "/usr/lib/locale/locale-archive"
51
#define ISO_CODES_DATADIR ISO_CODES_PREFIX "/share/xml/iso-codes"
52
#define ISO_CODES_LOCALESDIR ISO_CODES_PREFIX "/share/locale"
54
typedef struct _GdmLocale {
63
static GHashTable *gdm_languages_map;
64
static GHashTable *gdm_territories_map;
65
static GHashTable *gdm_available_locales_map;
67
static char * construct_language_name (const char *language,
68
const char *territory,
70
const char *modifier);
72
static gboolean language_name_is_valid (const char *language_name);
75
gdm_locale_free (GdmLocale *locale)
82
g_free (locale->name);
83
g_free (locale->codeset);
84
g_free (locale->modifier);
89
normalize_codeset (const char *codeset)
91
char *normalized_codeset;
95
normalized_codeset = g_strdup (codeset);
97
if (codeset != NULL) {
98
for (p = codeset, q = normalized_codeset;
101
if (*p == '-' || *p == '_') {
105
*q = g_ascii_tolower (*p);
111
return normalized_codeset;
115
* According to http://en.wikipedia.org/wiki/Locale
116
* locale names are of the form:
117
* [language[_territory][.codeset][@modifier]]
120
gdm_parse_language_name (const char *name,
121
char **language_codep,
122
char **territory_codep,
127
GMatchInfo *match_info;
130
gchar *normalized_codeset = NULL;
131
gchar *normalized_name = NULL;
138
re = g_regex_new ("^(?P<language>[^_.@[:space:]]+)"
139
"(_(?P<territory>[[:upper:]]+))?"
140
"(\\.(?P<codeset>[-_0-9a-zA-Z]+))?"
141
"(@(?P<modifier>[[:ascii:]]+))?$",
144
g_warning ("%s", error->message);
148
if (!g_regex_match (re, name, 0, &match_info) ||
149
g_match_info_is_partial_match (match_info)) {
150
g_warning ("locale %s isn't valid\n", name);
154
res = g_match_info_matches (match_info);
156
g_warning ("Unable to parse locale: %s", name);
162
if (language_codep != NULL) {
163
*language_codep = g_match_info_fetch_named (match_info, "language");
166
if (territory_codep != NULL) {
167
*territory_codep = g_match_info_fetch_named (match_info, "territory");
169
if (*territory_codep != NULL &&
170
*territory_codep[0] == '\0') {
171
g_free (*territory_codep);
172
*territory_codep = NULL;
176
if (codesetp != NULL) {
177
*codesetp = g_match_info_fetch_named (match_info, "codeset");
179
if (*codesetp != NULL &&
180
*codesetp[0] == '\0') {
186
if (modifierp != NULL) {
187
*modifierp = g_match_info_fetch_named (match_info, "modifier");
189
if (*modifierp != NULL &&
190
*modifierp[0] == '\0') {
196
if (codesetp != NULL && *codesetp != NULL) {
197
normalized_codeset = normalize_codeset (*codesetp);
198
normalized_name = construct_language_name (language_codep ? *language_codep : NULL,
199
territory_codep ? *territory_codep : NULL,
201
modifierp ? *modifierp : NULL);
203
if (language_name_is_valid (normalized_name)) {
205
*codesetp = normalized_codeset;
207
g_free (normalized_codeset);
209
g_free (normalized_name);
213
g_match_info_free (match_info);
220
construct_language_name (const char *language,
221
const char *territory,
223
const char *modifier)
227
g_assert (language[0] != 0);
228
g_assert (territory == NULL || territory[0] != 0);
229
g_assert (codeset == NULL || codeset[0] != 0);
230
g_assert (modifier == NULL || modifier[0] != 0);
232
name = g_strdup_printf ("%s%s%s%s%s%s%s",
234
territory != NULL? "_" : "",
235
territory != NULL? territory : "",
236
codeset != NULL? "." : "",
237
codeset != NULL? codeset : "",
238
modifier != NULL? "@" : "",
239
modifier != NULL? modifier : "");
245
gdm_normalize_language_name (const char *name)
247
char *normalized_name;
249
char *territory_code;
253
if (name[0] == '\0') {
257
gdm_parse_language_name (name,
260
&codeset, &modifier);
262
normalized_name = construct_language_name (language_code,
265
g_free (language_code);
266
g_free (territory_code);
270
return normalized_name;
274
language_name_is_valid (const char *language_name)
278
#ifdef WITH_INCOMPLETE_LOCALES
279
int lc_type_id = LC_CTYPE;
281
int lc_type_id = LC_MESSAGES;
284
old_locale = g_strdup (setlocale (lc_type_id, NULL));
285
is_valid = setlocale (lc_type_id, language_name) != NULL;
286
setlocale (lc_type_id, old_locale);
293
language_name_get_codeset_details (const char *language_name,
300
old_locale = g_strdup (setlocale (LC_CTYPE, NULL));
302
if (setlocale (LC_CTYPE, language_name) == NULL) {
307
codeset = nl_langinfo (CODESET);
309
if (pcodeset != NULL) {
310
*pcodeset = g_strdup (codeset);
313
if (is_utf8 != NULL) {
314
codeset = normalize_codeset (codeset);
316
*is_utf8 = strcmp (codeset, "utf8") == 0;
320
setlocale (LC_CTYPE, old_locale);
325
language_name_has_translations (const char *language_name)
330
gboolean has_translations;
332
path = g_build_filename (GNOMELOCALEDIR, language_name, "LC_MESSAGES", NULL);
334
has_translations = FALSE;
335
dir = g_dir_open (path, 0, NULL);
343
name = g_dir_read_name (dir);
349
if (g_str_has_suffix (name, ".mo")) {
350
has_translations = TRUE;
353
} while (name != NULL);
357
return has_translations;
361
add_locale (const char *language_name,
365
GdmLocale *old_locale;
369
g_return_val_if_fail (language_name != NULL, FALSE);
371
language_name_get_codeset_details (language_name, NULL, &is_utf8);
374
name = g_strdup (language_name);
375
} else if (utf8_only) {
376
name = g_strdup_printf ("%s.utf8", language_name);
378
language_name_get_codeset_details (name, NULL, &is_utf8);
384
name = g_strdup (language_name);
387
if (!language_name_is_valid (name)) {
388
g_debug ("Ignoring '%s' as a locale, since it's invalid", name);
393
locale = g_new0 (GdmLocale, 1);
394
gdm_parse_language_name (name,
395
&locale->language_code,
396
&locale->territory_code,
402
#ifdef WITH_INCOMPLETE_LOCALES
404
if (locale->territory_code == NULL || locale->modifier) {
405
gdm_locale_free (locale);
411
locale->id = construct_language_name (locale->language_code, locale->territory_code,
412
NULL, locale->modifier);
413
locale->name = construct_language_name (locale->language_code, locale->territory_code,
414
locale->codeset, locale->modifier);
416
#ifndef WITH_INCOMPLETE_LOCALES
417
if (!language_name_has_translations (locale->name) &&
418
!language_name_has_translations (locale->id) &&
419
!language_name_has_translations (locale->language_code) &&
421
g_debug ("Ignoring '%s' as a locale, since it lacks translations", locale->name);
422
gdm_locale_free (locale);
429
locale->id = g_strdup (locale->name);
432
old_locale = g_hash_table_lookup (gdm_available_locales_map, locale->id);
433
if (old_locale != NULL) {
434
if (strlen (old_locale->name) > strlen (locale->name)) {
435
gdm_locale_free (locale);
440
g_hash_table_insert (gdm_available_locales_map, g_strdup (locale->id), locale);
448
uint32_t locrec_offset;
452
collect_locales_from_archive (void)
457
struct locarhead *head;
458
struct namehashent *namehashtab;
459
struct nameent *names;
463
gboolean locales_collected;
466
mapped = g_mapped_file_new (ARCHIVE_FILE, FALSE, &error);
467
if (mapped == NULL) {
468
mapped = g_mapped_file_new (SYSTEM_ARCHIVE_FILE, FALSE, NULL);
469
if (mapped == NULL) {
470
g_warning ("Mapping failed for %s: %s", ARCHIVE_FILE, error->message);
471
g_error_free (error);
474
g_error_free (error);
477
locales_collected = FALSE;
479
addr = g_mapped_file_get_contents (mapped);
480
len = g_mapped_file_get_length (mapped);
482
head = (struct locarhead *) addr;
483
if (head->namehash_offset + head->namehash_size > len
484
|| head->string_offset + head->string_size > len
485
|| head->locrectab_offset + head->locrectab_size > len
486
|| head->sumhash_offset + head->sumhash_size > len) {
490
namehashtab = (struct namehashent *) (addr + head->namehash_offset);
492
names = (struct nameent *) g_new0 (struct nameent, head->namehash_used);
493
for (cnt = used = 0; cnt < head->namehash_size; ++cnt) {
494
if (namehashtab[cnt].locrec_offset != 0) {
495
names[used].name = addr + namehashtab[cnt].name_offset;
496
names[used++].locrec_offset = namehashtab[cnt].locrec_offset;
500
for (cnt = 0; cnt < used; ++cnt) {
501
add_locale (names[cnt].name, TRUE);
506
locales_collected = TRUE;
509
g_mapped_file_unref (mapped);
510
return locales_collected;
514
select_dirs (const struct dirent *dirent)
518
if (strcmp (dirent->d_name, ".") != 0 && strcmp (dirent->d_name, "..") != 0) {
521
#ifdef _DIRENT_HAVE_D_TYPE
522
if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_LNK) {
523
mode = DTTOIF (dirent->d_type);
530
path = g_build_filename (LIBLOCALEDIR, dirent->d_name, NULL);
531
if (g_stat (path, &st) == 0) {
537
result = S_ISDIR (mode);
544
collect_locales_from_directory (void)
546
struct dirent **dirents;
550
ndirents = scandir (LIBLOCALEDIR, &dirents, select_dirs, alphasort);
552
for (cnt = 0; cnt < ndirents; ++cnt) {
553
add_locale (dirents[cnt]->d_name, TRUE);
562
collect_locales_from_locale_file (const char *locale_file)
568
if (locale_file == NULL)
571
langlist = fopen (locale_file, "r");
573
if (langlist == NULL)
582
getsret = fgets (curline, sizeof (curline), langlist);
586
if (curline[0] <= ' ' ||
590
name = strtok (curline, " \t\r\n");
594
lang = strtok (NULL, " \t\r\n");
598
lang_list = g_strsplit (lang, ",", -1);
599
if (lang_list == NULL)
603
for (i = 0; lang_list[i] != NULL; i++) {
604
if (add_locale (lang_list[i], FALSE)) {
608
g_strfreev (lang_list);
615
collect_locales (void)
618
if (gdm_available_locales_map == NULL) {
619
gdm_available_locales_map = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, (GDestroyNotify) gdm_locale_free);
622
if (!collect_locales_from_archive ()) {
623
#ifndef WITH_INCOMPLETE_LOCALES
624
g_warning ("Could not read list of available locales from libc, "
625
"guessing possible locales from available translations, "
626
"but list may be incomplete!");
629
collect_locales_from_directory ();
631
collect_locales_from_locale_file (ALIASES_FILE);
635
is_fallback_language (const char *code)
637
const char *fallback_language_names[] = { "C", "POSIX", NULL };
640
for (i = 0; fallback_language_names[i] != NULL; i++) {
641
if (strcmp (code, fallback_language_names[i]) == 0) {
650
get_language (const char *code)
655
g_assert (code != NULL);
657
if (is_fallback_language (code)) {
658
return "Unspecified";
662
if (len != 2 && len != 3) {
666
name = (const char *) g_hash_table_lookup (gdm_languages_map, code);
672
get_first_item_in_semicolon_list (const char *list)
677
/* Some entries in iso codes have multiple values, separated
678
* by semicolons. Not really sure which one to pick, so
679
* we just arbitrarily pick the first one.
681
items = g_strsplit (list, "; ", 2);
683
item = g_strdup (items[0]);
690
get_translated_language (const char *code,
693
const char *language;
696
language = get_language (code);
699
if (language != NULL) {
700
const char *translated_name;
703
if (locale != NULL) {
704
old_locale = g_strdup (setlocale (LC_MESSAGES, NULL));
705
setlocale (LC_MESSAGES, locale);
708
if (is_fallback_language (code)) {
709
name = g_strdup (_("Unspecified"));
711
translated_name = dgettext ("iso_639", language);
712
name = get_first_item_in_semicolon_list (translated_name);
715
if (locale != NULL) {
716
setlocale (LC_MESSAGES, old_locale);
725
get_territory (const char *code)
730
g_assert (code != NULL);
733
if (len != 2 && len != 3) {
737
name = (const char *) g_hash_table_lookup (gdm_territories_map, code);
743
get_translated_territory (const char *code,
746
const char *territory;
749
territory = get_territory (code);
752
if (territory != NULL) {
753
const char *translated_territory;
756
if (locale != NULL) {
757
old_locale = g_strdup (setlocale (LC_MESSAGES, NULL));
758
setlocale (LC_MESSAGES, locale);
761
translated_territory = dgettext ("iso_3166", territory);
762
name = get_first_item_in_semicolon_list (translated_territory);
764
if (locale != NULL) {
765
setlocale (LC_MESSAGES, old_locale);
774
languages_parse_start_tag (GMarkupParseContext *ctx,
775
const char *element_name,
776
const char **attr_names,
777
const char **attr_values,
781
const char *ccode_longB;
782
const char *ccode_longT;
784
const char *ccode_id;
785
const char *lang_name;
787
if (! (g_str_equal (element_name, "iso_639_entry") || g_str_equal (element_name, "iso_639_3_entry"))
788
|| attr_names == NULL || attr_values == NULL) {
798
while (*attr_names && *attr_values) {
799
if (g_str_equal (*attr_names, "iso_639_1_code")) {
802
if (strlen (*attr_values) != 2) {
805
ccode = *attr_values;
807
} else if (g_str_equal (*attr_names, "iso_639_2B_code")) {
810
if (strlen (*attr_values) != 3) {
813
ccode_longB = *attr_values;
815
} else if (g_str_equal (*attr_names, "iso_639_2T_code")) {
818
if (strlen (*attr_values) != 3) {
821
ccode_longT = *attr_values;
823
} else if (g_str_equal (*attr_names, "id")) {
826
if (strlen (*attr_values) != 2 &&
827
strlen (*attr_values) != 3) {
830
ccode_id = *attr_values;
832
} else if (g_str_equal (*attr_names, "name")) {
833
lang_name = *attr_values;
840
if (lang_name == NULL) {
845
g_hash_table_insert (gdm_languages_map,
847
g_strdup (lang_name));
849
if (ccode_longB != NULL) {
850
g_hash_table_insert (gdm_languages_map,
851
g_strdup (ccode_longB),
852
g_strdup (lang_name));
854
if (ccode_longT != NULL) {
855
g_hash_table_insert (gdm_languages_map,
856
g_strdup (ccode_longT),
857
g_strdup (lang_name));
859
if (ccode_id != NULL) {
860
g_hash_table_insert (gdm_languages_map,
862
g_strdup (lang_name));
867
territories_parse_start_tag (GMarkupParseContext *ctx,
868
const char *element_name,
869
const char **attr_names,
870
const char **attr_values,
877
const char *territory_common_name;
878
const char *territory_name;
880
if (! g_str_equal (element_name, "iso_3166_entry") || attr_names == NULL || attr_values == NULL) {
887
territory_common_name = NULL;
888
territory_name = NULL;
890
while (*attr_names && *attr_values) {
891
if (g_str_equal (*attr_names, "alpha_2_code")) {
894
if (strlen (*attr_values) != 2) {
897
acode_2 = *attr_values;
899
} else if (g_str_equal (*attr_names, "alpha_3_code")) {
902
if (strlen (*attr_values) != 3) {
905
acode_3 = *attr_values;
907
} else if (g_str_equal (*attr_names, "numeric_code")) {
910
if (strlen (*attr_values) != 3) {
913
ncode = *attr_values;
915
} else if (g_str_equal (*attr_names, "common_name")) {
918
territory_common_name = *attr_values;
920
} else if (g_str_equal (*attr_names, "name")) {
921
territory_name = *attr_values;
928
if (territory_common_name != NULL) {
929
territory_name = territory_common_name;
932
if (territory_name == NULL) {
936
if (acode_2 != NULL) {
937
g_hash_table_insert (gdm_territories_map,
939
g_strdup (territory_name));
941
if (acode_3 != NULL) {
942
g_hash_table_insert (gdm_territories_map,
944
g_strdup (territory_name));
947
g_hash_table_insert (gdm_territories_map,
949
g_strdup (territory_name));
954
languages_variant_init (const char *variant)
962
bindtextdomain (variant, ISO_CODES_LOCALESDIR);
963
bind_textdomain_codeset (variant, "UTF-8");
966
filename = g_strdup_printf (ISO_CODES_DATADIR "/%s.xml", variant);
967
res = g_file_get_contents (filename,
972
GMarkupParseContext *ctx;
973
GMarkupParser parser = { languages_parse_start_tag, NULL, NULL, NULL, NULL };
975
ctx = g_markup_parse_context_new (&parser, 0, NULL, NULL);
978
res = g_markup_parse_context_parse (ctx, buf, buf_len, &error);
981
g_warning ("Failed to parse '%s': %s\n",
984
g_error_free (error);
988
g_markup_parse_context_free (ctx);
991
g_warning ("Failed to load '%s': %s\n",
994
g_error_free (error);
999
languages_init (void)
1001
gdm_languages_map = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
1003
languages_variant_init ("iso_639");
1004
languages_variant_init ("iso_639_3");
1008
territories_init (void)
1015
bindtextdomain ("iso_3166", ISO_CODES_LOCALESDIR);
1016
bind_textdomain_codeset ("iso_3166", "UTF-8");
1018
gdm_territories_map = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
1021
res = g_file_get_contents (ISO_CODES_DATADIR "/iso_3166.xml",
1026
GMarkupParseContext *ctx;
1027
GMarkupParser parser = { territories_parse_start_tag, NULL, NULL, NULL, NULL };
1029
ctx = g_markup_parse_context_new (&parser, 0, NULL, NULL);
1032
res = g_markup_parse_context_parse (ctx, buf, buf_len, &error);
1035
g_warning ("Failed to parse '%s': %s\n",
1036
ISO_CODES_DATADIR "/iso_3166.xml",
1038
g_error_free (error);
1041
g_markup_parse_context_free (ctx);
1044
g_warning ("Failed to load '%s': %s\n",
1045
ISO_CODES_DATADIR "/iso_3166.xml",
1047
g_error_free (error);
1052
gdm_get_language_from_name (const char *name,
1055
GString *full_language;
1056
char *language_code;
1057
char *territory_code;
1059
char *langinfo_codeset;
1060
char *translated_language;
1061
char *translated_territory;
1062
gboolean is_utf8 = TRUE;
1064
translated_territory = NULL;
1065
translated_language = NULL;
1066
langinfo_codeset = NULL;
1068
full_language = g_string_new (NULL);
1070
if (gdm_languages_map == NULL) {
1074
if (gdm_territories_map == NULL) {
1075
territories_init ();
1078
language_code = NULL;
1079
territory_code = NULL;
1080
codeset_code = NULL;
1082
gdm_parse_language_name (name,
1088
if (language_code == NULL) {
1092
translated_language = get_translated_language (language_code, locale);
1093
if (translated_language == NULL) {
1097
full_language = g_string_append (full_language, translated_language);
1099
if (territory_code != NULL) {
1100
translated_territory = get_translated_territory (territory_code, locale);
1102
if (translated_territory != NULL) {
1103
g_string_append_printf (full_language,
1105
translated_territory);
1108
language_name_get_codeset_details (name, &langinfo_codeset, &is_utf8);
1110
if (codeset_code == NULL && langinfo_codeset != NULL) {
1111
codeset_code = g_strdup (langinfo_codeset);
1114
if (!is_utf8 && codeset_code) {
1115
g_string_append_printf (full_language,
1121
g_free (language_code);
1122
g_free (territory_code);
1123
g_free (codeset_code);
1124
g_free (langinfo_codeset);
1125
g_free (translated_language);
1126
g_free (translated_territory);
1128
if (full_language->len == 0) {
1129
g_string_free (full_language, TRUE);
1133
return g_string_free (full_language, FALSE);
1137
gdm_get_all_language_names (void)
1139
GHashTableIter iter;
1140
gpointer key, value;
1143
if (gdm_available_locales_map == NULL) {
1147
array = g_ptr_array_new ();
1148
g_hash_table_iter_init (&iter, gdm_available_locales_map);
1149
while (g_hash_table_iter_next (&iter, &key, &value)) {
1152
locale = (GdmLocale *) value;
1154
g_ptr_array_add (array, g_strdup (locale->name));
1156
g_ptr_array_add (array, NULL);
1158
return (char **) g_ptr_array_free (array, FALSE);