2
**********************************************************************
3
* Copyright (C) 1997-2010, International Business Machines
4
* Corporation and others. All Rights Reserved.
5
**********************************************************************
9
* Modification History:
11
* Date Name Description
12
* 04/01/97 aliu Creation.
13
* 08/21/98 stephen JDK 1.2 sync
14
* 12/08/98 rtg New Locale implementation and C API
15
* 03/15/99 damiba overhaul.
16
* 04/06/99 stephen changed setDefault() to realloc and copy
17
* 06/14/99 stephen Changed calls to ures_open for new params
18
* 07/21/99 stephen Modified setDefault() to propagate to C++
19
* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20
* brought canonicalization code into line with spec
21
*****************************************************************************/
24
POSIX's locale format, from putil.c: [no spaces]
26
ll [ _CC ] [ . MM ] [ @ VV]
28
l = lang, C = ctry, M = charmap, V = variant
31
#include "unicode/utypes.h"
32
#include "unicode/ustring.h"
33
#include "unicode/uloc.h"
47
#include <stdio.h> /* for sprintf */
49
/* ### Declarations **************************************************/
51
/* Locale stuff from locid.cpp */
52
U_CFUNC void locale_set_default(const char *id);
53
U_CFUNC const char *locale_get_default(void);
55
locale_getKeywords(const char *localeID,
57
char *keywords, int32_t keywordCapacity,
58
char *values, int32_t valuesCapacity, int32_t *valLen,
62
/* ### Data tables **************************************************/
65
* Table of language codes, both 2- and 3-letter, with preference
66
* given to 2-letter codes where possible. Includes 3-letter codes
67
* that lack a 2-letter equivalent.
69
* This list must be in sorted order. This list is returned directly
70
* to the user by some API.
72
* This list must be kept in sync with LANGUAGES_3, with corresponding
75
* This table should be terminated with a NULL entry, followed by a
76
* second list, and another NULL entry. The first list is visible to
77
* user code when this array is returned by API. The second list
78
* contains codes we support, but do not expose through user API.
82
* Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83
* include the revisions up to 2001/7/27 *CWB*
85
* The 3 character codes are the terminology codes like RFC 3066. This
86
* is compatible with prior ICU codes
88
* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89
* table but now at the end of the table because 3 character codes are
90
* duplicates. This avoids bad searches going from 3 to 2 character
93
* The range qaa-qtz is reserved for local use
95
static const char * const LANGUAGES[] = {
96
"aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
97
"afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",
99
"ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
100
"ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
101
"bai", "bal", "ban", "bas", "bat", "be", "bej",
102
"bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
103
"bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
104
"btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
105
"cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
106
"chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
107
"cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
108
"cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
109
"dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
110
"dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
111
"enm", "eo", "es", "et", "eu", "ewo", "fa",
112
"fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",
113
"fr", "frm", "fro", "frr", "frs", "fur", "fy",
114
"ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
115
"gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
116
"grc", "gsw", "gu", "gv", "gwi",
117
"ha", "hai", "haw", "he", "hi", "hil", "him",
118
"hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
119
"ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
120
"ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
121
"iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
122
"kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
123
"kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
124
"ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks",
125
"ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
126
"lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
127
"loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
128
"lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
129
"mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min",
130
"mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
131
"mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
132
"mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
133
"nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
134
"niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub",
135
"nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
136
"om", "or", "os", "osa", "ota", "oto", "pa", "paa",
137
"pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
138
"pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
139
"raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
140
"ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
141
"sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem",
142
"sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
143
"sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
144
"sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
145
"srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
146
"sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter",
147
"tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
148
"tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
149
"ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
150
"ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
151
"uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
152
"wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
153
"yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd",
154
"zu", "zun", "zxx", "zza",
156
"in", "iw", "ji", "jw", "sh", /* obsolete language codes */
159
static const char* const DEPRECATED_LANGUAGES[]={
160
"in", "iw", "ji", "jw", NULL, NULL
162
static const char* const REPLACEMENT_LANGUAGES[]={
163
"id", "he", "yi", "jv", NULL, NULL
167
* Table of 3-letter language codes.
169
* This is a lookup table used to convert 3-letter language codes to
170
* their 2-letter equivalent, where possible. It must be kept in sync
171
* with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
172
* same language as LANGUAGES_3[i]. The commented-out lines are
173
* copied from LANGUAGES to make eyeballing this baby easier.
175
* Where a 3-letter language code has no 2-letter equivalent, the
176
* 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
178
* This table should be terminated with a NULL entry, followed by a
179
* second list, and another NULL entry. The two lists correspond to
180
* the two lists in LANGUAGES.
182
static const char * const LANGUAGES_3[] = {
183
/* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
184
"aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
185
/* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */
186
"afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
187
/* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
188
"ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
189
/* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
190
"ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
191
/* "bai", "bal", "ban", "bas", "bat", "be", "bej", */
192
"bai", "bal", "ban", "bas", "bat", "bel", "bej",
193
/* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
194
"bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
195
/* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
196
"bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
197
/* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
198
"btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
199
/* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
200
"cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
201
/* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
202
"chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
203
/* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
204
"cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
205
/* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
206
"chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
207
/* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
208
"dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
209
/* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
210
"dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
211
/* "enm", "eo", "es", "et", "eu", "ewo", "fa", */
212
"enm", "epo", "spa", "est", "eus", "ewo", "fas",
213
/* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */
214
"fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
215
/* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gay", */
216
"fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",
217
/* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
218
"gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
219
/* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */
220
"goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
221
/* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
222
"gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
223
/* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
224
"hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
225
/* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
226
"ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
227
/* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
228
"ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
229
/* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
230
"iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
231
/* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/
232
"kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
233
/* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
234
"kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
235
/* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */
236
"kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
237
/* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
238
"kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
239
/* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
240
"lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
241
/* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
242
"loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
243
/* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
244
"lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
245
/* "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min", */
246
"mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min",
247
/* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
248
"mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
249
/* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
250
"mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
251
/* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
252
"mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
253
/* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
254
"nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
255
/* "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", */
256
"niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
257
/* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
258
"nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
259
/* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
260
"orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
261
/* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
262
"pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
263
/* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
264
"pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
265
/* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
266
"raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
267
/* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
268
"rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
269
/* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */
270
"sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
271
/* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
272
"sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
273
/* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
274
"slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
275
/* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
276
"sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
277
/* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
278
"srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
279
/* "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", */
280
"swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
281
/* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
282
"tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
283
/* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", */
284
"tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
285
/* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
286
"tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
287
/* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
288
"tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
289
/* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
290
"uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
291
/* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
292
"wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
293
/* "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", */
294
"yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
295
/* "zu", "zun", "zxx", "zza", */
296
"zul", "zun", "zxx", "zza",
298
/* "in", "iw", "ji", "jw", "sh", */
299
"ind", "heb", "yid", "jaw", "srp",
304
* Table of 2-letter country codes.
306
* This list must be in sorted order. This list is returned directly
307
* to the user by some API.
309
* This list must be kept in sync with COUNTRIES_3, with corresponding
312
* This table should be terminated with a NULL entry, followed by a
313
* second list, and another NULL entry. The first list is visible to
314
* user code when this array is returned by API. The second list
315
* contains codes we support, but do not expose through user API.
319
* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
320
* http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
321
* new codes keeping the old ones for compatibility updated to include
322
* 1999/12/03 revisions *CWB*
324
* RO(ROM) is now RO(ROU) according to
325
* http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
327
static const char * const COUNTRIES[] = {
328
"AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
329
"AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
330
"BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
331
"BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
332
"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
333
"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
334
"CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
335
"DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
336
"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
337
"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
338
"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
339
"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
340
"ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
341
"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
342
"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
343
"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
344
"LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
345
"ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
346
"MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
347
"NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
348
"NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
349
"PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
350
"PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
351
"SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
352
"SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
353
"SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
354
"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
355
"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
356
"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
357
"WS", "YE", "YT", "ZA", "ZM", "ZW",
359
"FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */
363
static const char* const DEPRECATED_COUNTRIES[] ={
364
"BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
366
static const char* const REPLACEMENT_COUNTRIES[] = {
367
/* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
368
"MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL /* replacement country codes */
372
* Table of 3-letter country codes.
374
* This is a lookup table used to convert 3-letter country codes to
375
* their 2-letter equivalent. It must be kept in sync with COUNTRIES.
376
* For all valid i, COUNTRIES[i] must refer to the same country as
377
* COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
378
* to make eyeballing this baby easier.
380
* This table should be terminated with a NULL entry, followed by a
381
* second list, and another NULL entry. The two lists correspond to
382
* the two lists in COUNTRIES.
384
static const char * const COUNTRIES_3[] = {
385
/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
386
"AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
387
/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
388
"AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
389
/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
390
"BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
391
/* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
392
"BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
393
/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
394
"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
395
/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
396
"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
397
/* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
398
"CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
399
/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
400
"DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
401
/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
402
"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
403
/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
404
"GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
405
/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
406
"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
407
/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
408
"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
409
/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
410
"IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
411
/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
412
"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
413
/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
414
"COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
415
/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
416
"LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
417
/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
418
"LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
419
/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
420
"MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
421
/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
422
"MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
423
/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
424
"NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
425
/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
426
"NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
427
/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
428
"PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
429
/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
430
"PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
431
/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
432
"SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
433
/* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
434
"SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
435
/* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
436
"SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
437
/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
438
"TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
439
/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
440
"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
441
/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
442
"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
443
/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
444
"WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
446
/* "FX", "CS", "RO", "TP", "YU", "ZR", */
447
"FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
451
typedef struct CanonicalizationMap {
452
const char *id; /* input ID */
453
const char *canonicalID; /* canonicalized output ID */
454
const char *keyword; /* keyword, or NULL if none */
455
const char *value; /* keyword value, or NULL if kw==NULL */
456
} CanonicalizationMap;
459
* A map to canonicalize locale IDs. This handles a variety of
460
* different semantic kinds of transformations.
462
static const CanonicalizationMap CANONICALIZE_MAP[] = {
463
{ "", "en_US_POSIX", NULL, NULL }, /* .NET name */
464
{ "C", "en_US_POSIX", NULL, NULL }, /* POSIX name */
465
{ "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
466
{ "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
467
{ "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
468
{ "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
469
{ "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
470
{ "cel_GAULISH", "cel__GAULISH", NULL, NULL }, /* registered name */
471
{ "de_1901", "de__1901", NULL, NULL }, /* registered name */
472
{ "de_1906", "de__1906", NULL, NULL }, /* registered name */
473
{ "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
474
{ "de_AT_PREEURO", "de_AT", "currency", "ATS" },
475
{ "de_DE_PREEURO", "de_DE", "currency", "DEM" },
476
{ "de_LU_PREEURO", "de_LU", "currency", "LUF" },
477
{ "el_GR_PREEURO", "el_GR", "currency", "GRD" },
478
{ "en_BOONT", "en__BOONT", NULL, NULL }, /* registered name */
479
{ "en_SCOUSE", "en__SCOUSE", NULL, NULL }, /* registered name */
480
{ "en_BE_PREEURO", "en_BE", "currency", "BEF" },
481
{ "en_IE_PREEURO", "en_IE", "currency", "IEP" },
482
{ "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
483
{ "es_ES_PREEURO", "es_ES", "currency", "ESP" },
484
{ "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
485
{ "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
486
{ "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
487
{ "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
488
{ "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
489
{ "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
490
{ "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
491
{ "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
492
{ "it_IT_PREEURO", "it_IT", "currency", "ITL" },
493
{ "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
494
{ "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
495
{ "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
496
{ "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
497
{ "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
498
{ "sl_ROZAJ", "sl__ROZAJ", NULL, NULL }, /* registered name */
499
{ "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
500
{ "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
501
{ "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
502
{ "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
503
{ "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
504
{ "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
505
{ "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
506
{ "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
507
{ "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
508
{ "zh_GAN", "zh__GAN", NULL, NULL }, /* registered name */
509
{ "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
510
{ "zh_HAKKA", "zh__HAKKA", NULL, NULL }, /* registered name */
511
{ "zh_MIN", "zh__MIN", NULL, NULL }, /* registered name */
512
{ "zh_MIN_NAN", "zh__MINNAN", NULL, NULL }, /* registered name */
513
{ "zh_WUU", "zh__WUU", NULL, NULL }, /* registered name */
514
{ "zh_XIANG", "zh__XIANG", NULL, NULL }, /* registered name */
515
{ "zh_YUE", "zh__YUE", NULL, NULL }, /* registered name */
518
typedef struct VariantMap {
519
const char *variant; /* input ID */
520
const char *keyword; /* keyword, or NULL if none */
521
const char *value; /* keyword value, or NULL if kw==NULL */
524
static const VariantMap VARIANT_MAP[] = {
525
{ "EURO", "currency", "EUR" },
526
{ "PINYIN", "collation", "pinyin" }, /* Solaris variant */
527
{ "STROKE", "collation", "stroke" } /* Solaris variant */
530
/* ### Keywords **************************************************/
532
#define ULOC_KEYWORD_BUFFER_LEN 25
533
#define ULOC_MAX_NO_KEYWORDS 25
536
locale_getKeywordsStart(const char *localeID) {
537
const char *result = NULL;
538
if((result = uprv_strchr(localeID, '@')) != NULL) {
541
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
543
/* We do this because the @ sign is variant, and the @ sign used on one
544
EBCDIC machine won't be compiled the same way on other EBCDIC based
546
static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
547
const uint8_t *charToFind = ebcdicSigns;
549
if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
560
* @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
561
* @param keywordName incoming name to be canonicalized
562
* @param status return status (keyword too long)
563
* @return length of the keyword name
565
static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
568
int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
570
if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
571
/* keyword name too long for internal buffer */
572
*status = U_INTERNAL_PROGRAM_ERROR;
576
/* normalize the keyword name */
577
for(i = 0; i < keywordNameLen; i++) {
578
buf[i] = uprv_tolower(keywordName[i]);
582
return keywordNameLen;
586
char keyword[ULOC_KEYWORD_BUFFER_LEN];
588
const char *valueStart;
592
static int32_t U_CALLCONV
593
compareKeywordStructs(const void *context, const void *left, const void *right) {
594
const char* leftString = ((const KeywordStruct *)left)->keyword;
595
const char* rightString = ((const KeywordStruct *)right)->keyword;
596
return uprv_strcmp(leftString, rightString);
600
* Both addKeyword and addValue must already be in canonical form.
601
* Either both addKeyword and addValue are NULL, or neither is NULL.
602
* If they are not NULL they must be zero terminated.
603
* If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
606
_getKeywords(const char *localeID,
608
char *keywords, int32_t keywordCapacity,
609
char *values, int32_t valuesCapacity, int32_t *valLen,
611
const char* addKeyword,
612
const char* addValue,
615
KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
617
int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
618
int32_t numKeywords = 0;
619
const char* pos = localeID;
620
const char* equalSign = NULL;
621
const char* semicolon = NULL;
623
int32_t keywordsLen = 0;
624
int32_t valuesLen = 0;
626
if(prev == '@') { /* start of keyword definition */
627
/* we will grab pairs, trim spaces, lowercase keywords, sort and return */
629
UBool duplicate = FALSE;
630
/* skip leading spaces */
634
if (!*pos) { /* handle trailing "; " */
637
if(numKeywords == maxKeywords) {
638
*status = U_INTERNAL_PROGRAM_ERROR;
641
equalSign = uprv_strchr(pos, '=');
642
semicolon = uprv_strchr(pos, ';');
643
/* lack of '=' [foo@currency] is illegal */
644
/* ';' before '=' [foo@currency;collation=pinyin] is illegal */
645
if(!equalSign || (semicolon && semicolon<equalSign)) {
646
*status = U_INVALID_FORMAT_ERROR;
649
/* need to normalize both keyword and keyword name */
650
if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
651
/* keyword name too long for internal buffer */
652
*status = U_INTERNAL_PROGRAM_ERROR;
655
for(i = 0, n = 0; i < equalSign - pos; ++i) {
657
keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
660
keywordList[numKeywords].keyword[n] = 0;
661
keywordList[numKeywords].keywordLen = n;
662
/* now grab the value part. First we skip the '=' */
664
/* then we leading spaces */
665
while(*equalSign == ' ') {
668
keywordList[numKeywords].valueStart = equalSign;
673
while(*(pos - i - 1) == ' ') {
676
keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
679
i = (int32_t)uprv_strlen(equalSign);
680
while(equalSign[i-1] == ' ') {
683
keywordList[numKeywords].valueLen = i;
685
/* If this is a duplicate keyword, then ignore it */
686
for (j=0; j<numKeywords; ++j) {
687
if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
697
/* Handle addKeyword/addValue. */
698
if (addKeyword != NULL) {
699
UBool duplicate = FALSE;
700
U_ASSERT(addValue != NULL);
701
/* Search for duplicate; if found, do nothing. Explicit keyword
702
overrides addKeyword. */
703
for (j=0; j<numKeywords; ++j) {
704
if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
710
if (numKeywords == maxKeywords) {
711
*status = U_INTERNAL_PROGRAM_ERROR;
714
uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
715
keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
716
keywordList[numKeywords].valueStart = addValue;
717
keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
721
U_ASSERT(addValue == NULL);
724
/* now we have a list of keywords */
725
/* we need to sort it */
726
uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
728
/* Now construct the keyword part */
729
for(i = 0; i < numKeywords; i++) {
730
if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
731
uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
733
keywords[keywordsLen + keywordList[i].keywordLen] = '=';
735
keywords[keywordsLen + keywordList[i].keywordLen] = 0;
738
keywordsLen += keywordList[i].keywordLen + 1;
740
if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
741
uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
743
keywordsLen += keywordList[i].valueLen;
745
if(i < numKeywords - 1) {
746
if(keywordsLen < keywordCapacity) {
747
keywords[keywordsLen] = ';';
753
if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
754
uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
755
values[valuesLen + keywordList[i].valueLen] = 0;
757
valuesLen += keywordList[i].valueLen + 1;
761
values[valuesLen] = 0;
766
return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
773
locale_getKeywords(const char *localeID,
775
char *keywords, int32_t keywordCapacity,
776
char *values, int32_t valuesCapacity, int32_t *valLen,
778
UErrorCode *status) {
779
return _getKeywords(localeID, prev, keywords, keywordCapacity,
780
values, valuesCapacity, valLen, valuesToo,
784
U_CAPI int32_t U_EXPORT2
785
uloc_getKeywordValue(const char* localeID,
786
const char* keywordName,
787
char* buffer, int32_t bufferCapacity,
790
const char* nextSeparator = NULL;
791
char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
792
char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
796
if(status && U_SUCCESS(*status) && localeID) {
798
const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
799
if(startSearchHere == NULL) {
800
/* no keywords, return at once */
804
locale_canonKeywordName(keywordNameBuffer, keywordName, status);
805
if(U_FAILURE(*status)) {
809
/* find the first keyword */
810
while(startSearchHere) {
812
/* skip leading spaces (allowed?) */
813
while(*startSearchHere == ' ') {
816
nextSeparator = uprv_strchr(startSearchHere, '=');
817
/* need to normalize both keyword and keyword name */
821
if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
822
/* keyword name too long for internal buffer */
823
*status = U_INTERNAL_PROGRAM_ERROR;
826
for(i = 0; i < nextSeparator - startSearchHere; i++) {
827
localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
829
/* trim trailing spaces */
830
while(startSearchHere[i-1] == ' ') {
833
localeKeywordNameBuffer[i] = 0;
835
startSearchHere = uprv_strchr(nextSeparator, ';');
837
if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
839
while(*nextSeparator == ' ') {
842
/* we actually found the keyword. Copy the value */
843
if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
844
while(*(startSearchHere-1) == ' ') {
847
uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
848
result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
849
} else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
850
i = (int32_t)uprv_strlen(nextSeparator);
851
while(nextSeparator[i - 1] == ' ') {
854
uprv_strncpy(buffer, nextSeparator, i);
855
result = u_terminateChars(buffer, bufferCapacity, i, status);
857
/* give a bigger buffer, please */
858
*status = U_BUFFER_OVERFLOW_ERROR;
859
if(startSearchHere) {
860
result = (int32_t)(startSearchHere - nextSeparator);
862
result = (int32_t)uprv_strlen(nextSeparator);
872
U_CAPI int32_t U_EXPORT2
873
uloc_setKeywordValue(const char* keywordName,
874
const char* keywordValue,
875
char* buffer, int32_t bufferCapacity,
878
/* TODO: sorting. removal. */
879
int32_t keywordNameLen;
880
int32_t keywordValueLen;
883
int32_t foundValueLen;
884
int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
885
char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
886
char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
889
char* nextSeparator = NULL;
890
char* nextEqualsign = NULL;
891
char* startSearchHere = NULL;
892
char* keywordStart = NULL;
893
char *insertHere = NULL;
894
if(U_FAILURE(*status)) {
897
if(bufferCapacity>1) {
898
bufLen = (int32_t)uprv_strlen(buffer);
900
*status = U_ILLEGAL_ARGUMENT_ERROR;
903
if(bufferCapacity<bufLen) {
904
/* The capacity is less than the length?! Is this NULL terminated? */
905
*status = U_ILLEGAL_ARGUMENT_ERROR;
908
if(keywordValue && !*keywordValue) {
912
keywordValueLen = (int32_t)uprv_strlen(keywordValue);
916
keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
917
if(U_FAILURE(*status)) {
920
startSearchHere = (char*)locale_getKeywordsStart(buffer);
921
if(startSearchHere == NULL || (startSearchHere[1]==0)) {
922
if(!keywordValue) { /* no keywords = nothing to remove */
926
needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
927
if(startSearchHere) { /* had a single @ */
928
needLen--; /* already had the @ */
929
/* startSearchHere points at the @ */
931
startSearchHere=buffer+bufLen;
933
if(needLen >= bufferCapacity) {
934
*status = U_BUFFER_OVERFLOW_ERROR;
935
return needLen; /* no change */
937
*startSearchHere = '@';
939
uprv_strcpy(startSearchHere, keywordNameBuffer);
940
startSearchHere += keywordNameLen;
941
*startSearchHere = '=';
943
uprv_strcpy(startSearchHere, keywordValue);
944
startSearchHere+=keywordValueLen;
946
} /* end shortcut - no @ */
948
keywordStart = startSearchHere;
949
/* search for keyword */
950
while(keywordStart) {
952
/* skip leading spaces (allowed?) */
953
while(*keywordStart == ' ') {
956
nextEqualsign = uprv_strchr(keywordStart, '=');
957
/* need to normalize both keyword and keyword name */
961
if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
962
/* keyword name too long for internal buffer */
963
*status = U_INTERNAL_PROGRAM_ERROR;
966
for(i = 0; i < nextEqualsign - keywordStart; i++) {
967
localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
969
/* trim trailing spaces */
970
while(keywordStart[i-1] == ' ') {
973
localeKeywordNameBuffer[i] = 0;
975
nextSeparator = uprv_strchr(nextEqualsign, ';');
976
rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
979
while(*nextEqualsign == ' ') {
982
/* we actually found the keyword. Change the value */
985
foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
988
foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
990
if(keywordValue) { /* adding a value - not removing */
991
if(foundValueLen == keywordValueLen) {
992
uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
993
return bufLen; /* no change in size */
994
} else if(foundValueLen > keywordValueLen) {
995
int32_t delta = foundValueLen - keywordValueLen;
996
if(nextSeparator) { /* RH side */
997
uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
999
uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1003
} else { /* FVL < KVL */
1004
int32_t delta = keywordValueLen - foundValueLen;
1005
if((bufLen+delta) >= bufferCapacity) {
1006
*status = U_BUFFER_OVERFLOW_ERROR;
1007
return bufLen+delta;
1009
if(nextSeparator) { /* RH side */
1010
uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1012
uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1017
} else { /* removing a keyword */
1019
/* zero out the ';' or '@' just before startSearchhere */
1020
keywordStart[-1] = 0;
1021
return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1023
uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1024
keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1025
return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1028
} else if(rc<0){ /* end match keyword */
1029
/* could insert at this location. */
1030
insertHere = keywordStart;
1032
keywordStart = nextSeparator;
1033
} /* end loop searching */
1036
return bufLen; /* removal of non-extant keyword - no change */
1039
/* we know there is at least one keyword. */
1040
needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1041
if(needLen >= bufferCapacity) {
1042
*status = U_BUFFER_OVERFLOW_ERROR;
1043
return needLen; /* no change */
1047
uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1048
keywordStart = insertHere;
1050
keywordStart = buffer+bufLen;
1051
*keywordStart = ';';
1054
uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1055
keywordStart += keywordNameLen;
1056
*keywordStart = '=';
1058
uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1059
keywordStart+=keywordValueLen;
1061
*keywordStart = ';';
1068
/* ### ID parsing implementation **************************************************/
1070
#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1072
/*returns TRUE if one of the special prefixes is here (s=string)
1074
#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1076
/* Dot terminates it because of POSIX form where dot precedes the codepage
1077
* except for variant
1079
#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1081
static char* _strnchr(const char* str, int32_t len, char c) {
1082
U_ASSERT(str != 0 && len >= 0);
1083
while (len-- != 0) {
1087
} else if (d == 0) {
1096
* Lookup 'key' in the array 'list'. The array 'list' should contain
1097
* a NULL entry, followed by more entries, and a second NULL entry.
1099
* The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1102
static int16_t _findIndex(const char* const* list, const char* key)
1104
const char* const* anchor = list;
1107
/* Make two passes through two NULL-terminated arrays at 'list' */
1108
while (pass++ < 2) {
1110
if (uprv_strcmp(key, *list) == 0) {
1111
return (int16_t)(list - anchor);
1115
++list; /* skip final NULL *CWB*/
1120
/* count the length of src while copying it to dest; return strlen(src) */
1121
static U_INLINE int32_t
1122
_copyCount(char *dest, int32_t destCapacity, const char *src) {
1129
return (int32_t)(src-anchor);
1131
if(destCapacity<=0) {
1132
return (int32_t)((src-anchor)+uprv_strlen(src));
1141
uloc_getCurrentCountryID(const char* oldID){
1142
int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1144
return REPLACEMENT_COUNTRIES[offset];
1149
uloc_getCurrentLanguageID(const char* oldID){
1150
int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1152
return REPLACEMENT_LANGUAGES[offset];
1157
* the internal functions _getLanguage(), _getCountry(), _getVariant()
1158
* avoid duplicating code to handle the earlier locale ID pieces
1159
* in the functions for the later ones by
1160
* setting the *pEnd pointer to where they stopped parsing
1162
* TODO try to use this in Locale
1165
ulocimp_getLanguage(const char *localeID,
1166
char *language, int32_t languageCapacity,
1167
const char **pEnd) {
1170
char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1172
/* if it starts with i- or x- then copy that prefix */
1173
if(_isIDPrefix(localeID)) {
1174
if(i<languageCapacity) {
1175
language[i]=(char)uprv_tolower(*localeID);
1177
if(i<languageCapacity) {
1184
/* copy the language as far as possible and count its length */
1185
while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1186
if(i<languageCapacity) {
1187
language[i]=(char)uprv_tolower(*localeID);
1190
lang[i]=(char)uprv_tolower(*localeID);
1197
/* convert 3 character code to 2 character code if possible *CWB*/
1198
offset=_findIndex(LANGUAGES_3, lang);
1200
i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1211
ulocimp_getScript(const char *localeID,
1212
char *script, int32_t scriptCapacity,
1221
/* copy the second item as far as possible and count its length */
1222
while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1226
/* If it's exactly 4 characters long, then it's a script and not a country. */
1230
*pEnd = localeID+idLen;
1232
if(idLen > scriptCapacity) {
1233
idLen = scriptCapacity;
1236
script[0]=(char)uprv_toupper(*(localeID++));
1238
for (i = 1; i < idLen; i++) {
1239
script[i]=(char)uprv_tolower(*(localeID++));
1249
ulocimp_getCountry(const char *localeID,
1250
char *country, int32_t countryCapacity,
1254
char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1257
/* copy the country as far as possible and count its length */
1258
while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1259
if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1260
cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1265
/* the country should be either length 2 or 3 */
1266
if (idLen == 2 || idLen == 3) {
1267
UBool gotCountry = FALSE;
1268
/* convert 3 character code to 2 character code if possible *CWB*/
1270
offset=_findIndex(COUNTRIES_3, cnty);
1272
idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1278
for (i = 0; i < idLen; i++) {
1279
if (i < countryCapacity) {
1280
country[i]=(char)uprv_toupper(localeID[i]);
1297
* @param needSeparator if true, then add leading '_' if any variants
1298
* are added to 'variant'
1301
_getVariantEx(const char *localeID,
1303
char *variant, int32_t variantCapacity,
1304
UBool needSeparator) {
1307
/* get one or more variant tags and separate them with '_' */
1308
if(_isIDSeparator(prev)) {
1309
/* get a variant string after a '-' or '_' */
1310
while(!_isTerminator(*localeID)) {
1311
if (needSeparator) {
1312
if (i<variantCapacity) {
1316
needSeparator = FALSE;
1318
if(i<variantCapacity) {
1319
variant[i]=(char)uprv_toupper(*localeID);
1320
if(variant[i]=='-') {
1329
/* if there is no variant tag after a '-' or '_' then look for '@' */
1333
} else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1334
++localeID; /* point after the '@' */
1338
while(!_isTerminator(*localeID)) {
1339
if (needSeparator) {
1340
if (i<variantCapacity) {
1344
needSeparator = FALSE;
1346
if(i<variantCapacity) {
1347
variant[i]=(char)uprv_toupper(*localeID);
1348
if(variant[i]=='-' || variant[i]==',') {
1361
_getVariant(const char *localeID,
1363
char *variant, int32_t variantCapacity) {
1364
return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1368
* Delete ALL instances of a variant from the given list of one or
1369
* more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1370
* @param variants the source string of one or more variants,
1371
* separated by '_'. This will be MODIFIED IN PLACE. Not zero
1372
* terminated; if it is, trailing zero will NOT be maintained.
1373
* @param variantsLen length of variants
1374
* @param toDelete variant to delete, without separators, e.g. "EURO"
1375
* or "PREEURO"; not zero terminated
1376
* @param toDeleteLen length of toDelete
1377
* @return number of characters deleted from variants
1380
_deleteVariant(char* variants, int32_t variantsLen,
1381
const char* toDelete, int32_t toDeleteLen)
1383
int32_t delta = 0; /* number of chars deleted */
1386
if (variantsLen < toDeleteLen) {
1389
if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1390
(variantsLen == toDeleteLen ||
1391
(flag=(variants[toDeleteLen] == '_'))))
1393
int32_t d = toDeleteLen + (flag?1:0);
1396
if (variantsLen > 0) {
1397
uprv_memmove(variants, variants+d, variantsLen);
1400
char* p = _strnchr(variants, variantsLen, '_');
1405
variantsLen -= (int32_t)(p - variants);
1411
/* Keyword enumeration */
1413
typedef struct UKeywordsContext {
1418
static void U_CALLCONV
1419
uloc_kw_closeKeywords(UEnumeration *enumerator) {
1420
uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1421
uprv_free(enumerator->context);
1422
uprv_free(enumerator);
1425
static int32_t U_CALLCONV
1426
uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
1427
char *kw = ((UKeywordsContext *)en->context)->keywords;
1431
kw += uprv_strlen(kw)+1;
1436
static const char* U_CALLCONV
1437
uloc_kw_nextKeyword(UEnumeration* en,
1438
int32_t* resultLength,
1439
UErrorCode* status) {
1440
const char* result = ((UKeywordsContext *)en->context)->current;
1443
len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1444
((UKeywordsContext *)en->context)->current += len+1;
1449
*resultLength = len;
1454
static void U_CALLCONV
1455
uloc_kw_resetKeywords(UEnumeration* en,
1456
UErrorCode* status) {
1457
((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1460
static const UEnumeration gKeywordsEnum = {
1463
uloc_kw_closeKeywords,
1464
uloc_kw_countKeywords,
1466
uloc_kw_nextKeyword,
1467
uloc_kw_resetKeywords
1470
U_CAPI UEnumeration* U_EXPORT2
1471
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1473
UKeywordsContext *myContext = NULL;
1474
UEnumeration *result = NULL;
1476
if(U_FAILURE(*status)) {
1479
result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1480
/* Null pointer test */
1481
if (result == NULL) {
1482
*status = U_MEMORY_ALLOCATION_ERROR;
1485
uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1486
myContext = uprv_malloc(sizeof(UKeywordsContext));
1487
if (myContext == NULL) {
1488
*status = U_MEMORY_ALLOCATION_ERROR;
1492
myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1493
uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1494
myContext->keywords[keywordListSize] = 0;
1495
myContext->current = myContext->keywords;
1496
result->context = myContext;
1500
U_CAPI UEnumeration* U_EXPORT2
1501
uloc_openKeywords(const char* localeID,
1506
int32_t keywordsCapacity = 256;
1507
if(status==NULL || U_FAILURE(*status)) {
1511
if(localeID==NULL) {
1512
localeID=uloc_getDefault();
1515
/* Skip the language */
1516
ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1517
if(_isIDSeparator(*localeID)) {
1518
const char *scriptID;
1519
/* Skip the script if available */
1520
ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1521
if(scriptID != localeID+1) {
1522
/* Found optional script */
1523
localeID = scriptID;
1525
/* Skip the Country */
1526
if (_isIDSeparator(*localeID)) {
1527
ulocimp_getCountry(localeID+1, NULL, 0, &localeID);
1528
if(_isIDSeparator(*localeID)) {
1529
_getVariant(localeID+1, *localeID, NULL, 0);
1534
/* keywords are located after '@' */
1535
if((localeID = locale_getKeywordsStart(localeID)) != NULL) {
1536
i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1540
return uloc_openKeywordList(keywords, i, status);
1547
/* bit-flags for 'options' parameter of _canonicalize */
1548
#define _ULOC_STRIP_KEYWORDS 0x2
1549
#define _ULOC_CANONICALIZE 0x1
1551
#define OPTION_SET(options, mask) ((options & mask) != 0)
1553
static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1554
#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1557
* Canonicalize the given localeID, to level 1 or to level 2,
1558
* depending on the options. To specify level 1, pass in options=0.
1559
* To specify level 2, pass in options=_ULOC_CANONICALIZE.
1561
* This is the code underlying uloc_getName and uloc_canonicalize.
1564
_canonicalize(const char* localeID,
1566
int32_t resultCapacity,
1569
int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1570
char localeBuffer[ULOC_FULLNAME_CAPACITY];
1571
const char* origLocaleID;
1572
const char* keywordAssign = NULL;
1573
const char* separatorIndicator = NULL;
1574
const char* addKeyword = NULL;
1575
const char* addValue = NULL;
1577
char* variant = NULL; /* pointer into name, or NULL */
1579
if (U_FAILURE(*err)) {
1583
if (localeID==NULL) {
1584
localeID=uloc_getDefault();
1586
origLocaleID=localeID;
1588
/* if we are doing a full canonicalization, then put results in
1589
localeBuffer, if necessary; otherwise send them to result. */
1590
if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1591
(result == NULL || resultCapacity < sizeof(localeBuffer))) {
1592
name = localeBuffer;
1593
nameCapacity = sizeof(localeBuffer);
1596
nameCapacity = resultCapacity;
1599
/* get all pieces, one after another, and separate with '_' */
1600
len=ulocimp_getLanguage(localeID, name, nameCapacity, &localeID);
1602
if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1603
const char *d = uloc_getDefault();
1605
len = (int32_t)uprv_strlen(d);
1608
uprv_strncpy(name, d, len);
1610
} else if(_isIDSeparator(*localeID)) {
1611
const char *scriptID;
1614
if(len<nameCapacity) {
1619
scriptSize=ulocimp_getScript(localeID+1, name+len, nameCapacity-len, &scriptID);
1620
if(scriptSize > 0) {
1621
/* Found optional script */
1622
localeID = scriptID;
1625
if (_isIDSeparator(*localeID)) {
1626
/* If there is something else, then we add the _ */
1627
if(len<nameCapacity) {
1634
if (_isIDSeparator(*localeID)) {
1635
const char *cntryID;
1636
int32_t cntrySize = ulocimp_getCountry(localeID+1, name+len, nameCapacity-len, &cntryID);
1637
if (cntrySize > 0) {
1638
/* Found optional country */
1642
if(_isIDSeparator(*localeID)) {
1643
/* If there is something else, then we add the _ if we found country before.*/
1644
if (cntrySize > 0) {
1646
if(len<nameCapacity) {
1652
variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len);
1653
if (variantSize > 0) {
1656
localeID += variantSize + 1; /* skip '_' and variant */
1662
/* Copy POSIX-style charset specifier, if any [mr.utf8] */
1663
if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {
1673
if (len<nameCapacity) {
1683
/* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1684
After this, localeID either points to '@' or is NULL */
1685
if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1686
keywordAssign = uprv_strchr(localeID, '=');
1687
separatorIndicator = uprv_strchr(localeID, ';');
1690
/* Copy POSIX-style variant, if any [mr@FOO] */
1691
if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1692
localeID != NULL && keywordAssign == NULL) {
1698
if (len<nameCapacity) {
1706
if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1707
/* Handle @FOO variant if @ is present and not followed by = */
1708
if (localeID!=NULL && keywordAssign==NULL) {
1709
int32_t posixVariantSize;
1710
/* Add missing '_' if needed */
1711
if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1713
if(len<nameCapacity) {
1718
} while(fieldCount<2);
1720
posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len,
1721
(UBool)(variantSize > 0));
1722
if (posixVariantSize > 0) {
1723
if (variant == NULL) {
1726
len += posixVariantSize;
1727
variantSize += posixVariantSize;
1731
/* Handle generic variants first */
1733
for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1734
const char* variantToCompare = VARIANT_MAP[j].variant;
1735
int32_t n = (int32_t)uprv_strlen(variantToCompare);
1736
int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1738
if (variantLen > 0) {
1739
if (name[len-1] == '_') { /* delete trailing '_' */
1742
addKeyword = VARIANT_MAP[j].keyword;
1743
addValue = VARIANT_MAP[j].value;
1747
if (name[len-1] == '_') { /* delete trailing '_' */
1752
/* Look up the ID in the canonicalization map */
1753
for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1754
const char* id = CANONICALIZE_MAP[j].id;
1755
int32_t n = (int32_t)uprv_strlen(id);
1756
if (len == n && uprv_strncmp(name, id, n) == 0) {
1757
if (n == 0 && localeID != NULL) {
1758
break; /* Don't remap "" if keywords present */
1760
len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1761
if (CANONICALIZE_MAP[j].keyword) {
1762
addKeyword = CANONICALIZE_MAP[j].keyword;
1763
addValue = CANONICALIZE_MAP[j].value;
1770
if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1771
if (localeID!=NULL && keywordAssign!=NULL &&
1772
(!separatorIndicator || separatorIndicator > keywordAssign)) {
1773
if(len<nameCapacity) {
1778
len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
1779
addKeyword, addValue, err);
1780
} else if (addKeyword != NULL) {
1781
U_ASSERT(addValue != NULL);
1782
/* inelegant but works -- later make _getKeywords do this? */
1783
len += _copyCount(name+len, nameCapacity-len, "@");
1784
len += _copyCount(name+len, nameCapacity-len, addKeyword);
1785
len += _copyCount(name+len, nameCapacity-len, "=");
1786
len += _copyCount(name+len, nameCapacity-len, addValue);
1790
if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1791
uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1794
return u_terminateChars(result, resultCapacity, len, err);
1797
/* ### ID parsing API **************************************************/
1799
U_CAPI int32_t U_EXPORT2
1800
uloc_getParent(const char* localeID,
1802
int32_t parentCapacity,
1805
const char *lastUnderscore;
1808
if (U_FAILURE(*err))
1811
if (localeID == NULL)
1812
localeID = uloc_getDefault();
1814
lastUnderscore=uprv_strrchr(localeID, '_');
1815
if(lastUnderscore!=NULL) {
1816
i=(int32_t)(lastUnderscore-localeID);
1821
if(i>0 && parent != localeID) {
1822
uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1824
return u_terminateChars(parent, parentCapacity, i, err);
1827
U_CAPI int32_t U_EXPORT2
1828
uloc_getLanguage(const char* localeID,
1830
int32_t languageCapacity,
1833
/* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1836
if (err==NULL || U_FAILURE(*err)) {
1840
if(localeID==NULL) {
1841
localeID=uloc_getDefault();
1844
i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1845
return u_terminateChars(language, languageCapacity, i, err);
1848
U_CAPI int32_t U_EXPORT2
1849
uloc_getScript(const char* localeID,
1851
int32_t scriptCapacity,
1856
if(err==NULL || U_FAILURE(*err)) {
1860
if(localeID==NULL) {
1861
localeID=uloc_getDefault();
1864
/* skip the language */
1865
ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1866
if(_isIDSeparator(*localeID)) {
1867
i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1869
return u_terminateChars(script, scriptCapacity, i, err);
1872
U_CAPI int32_t U_EXPORT2
1873
uloc_getCountry(const char* localeID,
1875
int32_t countryCapacity,
1880
if(err==NULL || U_FAILURE(*err)) {
1884
if(localeID==NULL) {
1885
localeID=uloc_getDefault();
1888
/* Skip the language */
1889
ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1890
if(_isIDSeparator(*localeID)) {
1891
const char *scriptID;
1892
/* Skip the script if available */
1893
ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1894
if(scriptID != localeID+1) {
1895
/* Found optional script */
1896
localeID = scriptID;
1898
if(_isIDSeparator(*localeID)) {
1899
i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1902
return u_terminateChars(country, countryCapacity, i, err);
1905
U_CAPI int32_t U_EXPORT2
1906
uloc_getVariant(const char* localeID,
1908
int32_t variantCapacity,
1913
if(err==NULL || U_FAILURE(*err)) {
1917
if(localeID==NULL) {
1918
localeID=uloc_getDefault();
1921
/* Skip the language */
1922
ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1923
if(_isIDSeparator(*localeID)) {
1924
const char *scriptID;
1925
/* Skip the script if available */
1926
ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1927
if(scriptID != localeID+1) {
1928
/* Found optional script */
1929
localeID = scriptID;
1931
/* Skip the Country */
1932
if (_isIDSeparator(*localeID)) {
1933
const char *cntryID;
1934
ulocimp_getCountry(localeID+1, NULL, 0, &cntryID);
1935
if (cntryID != localeID) {
1936
/* Found optional country */
1939
if(_isIDSeparator(*localeID)) {
1940
i=_getVariant(localeID+1, *localeID, variant, variantCapacity);
1945
/* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1946
/* if we do not have a variant tag yet then try a POSIX variant after '@' */
1948
if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1949
i=_getVariant(localeID+1, '@', variant, variantCapacity);
1952
return u_terminateChars(variant, variantCapacity, i, err);
1955
U_CAPI int32_t U_EXPORT2
1956
uloc_getName(const char* localeID,
1958
int32_t nameCapacity,
1961
return _canonicalize(localeID, name, nameCapacity, 0, err);
1964
U_CAPI int32_t U_EXPORT2
1965
uloc_getBaseName(const char* localeID,
1967
int32_t nameCapacity,
1970
return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1973
U_CAPI int32_t U_EXPORT2
1974
uloc_canonicalize(const char* localeID,
1976
int32_t nameCapacity,
1979
return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1982
U_CAPI const char* U_EXPORT2
1983
uloc_getISO3Language(const char* localeID)
1986
char lang[ULOC_LANG_CAPACITY];
1987
UErrorCode err = U_ZERO_ERROR;
1989
if (localeID == NULL)
1991
localeID = uloc_getDefault();
1993
uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1996
offset = _findIndex(LANGUAGES, lang);
1999
return LANGUAGES_3[offset];
2002
U_CAPI const char* U_EXPORT2
2003
uloc_getISO3Country(const char* localeID)
2006
char cntry[ULOC_LANG_CAPACITY];
2007
UErrorCode err = U_ZERO_ERROR;
2009
if (localeID == NULL)
2011
localeID = uloc_getDefault();
2013
uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2016
offset = _findIndex(COUNTRIES, cntry);
2020
return COUNTRIES_3[offset];
2023
U_CAPI uint32_t U_EXPORT2
2024
uloc_getLCID(const char* localeID)
2026
UErrorCode status = U_ZERO_ERROR;
2027
char langID[ULOC_FULLNAME_CAPACITY];
2029
uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2030
if (U_FAILURE(status)) {
2034
return uprv_convertToLCID(langID, localeID, &status);
2037
U_CAPI int32_t U_EXPORT2
2038
uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2042
const char *posix = uprv_convertToPosix(hostid, status);
2043
if (U_FAILURE(*status) || posix == NULL) {
2046
length = (int32_t)uprv_strlen(posix);
2047
if (length+1 > localeCapacity) {
2048
*status = U_BUFFER_OVERFLOW_ERROR;
2051
uprv_strcpy(locale, posix);
2056
/* ### Default locale **************************************************/
2058
U_CAPI const char* U_EXPORT2
2061
return locale_get_default();
2064
U_CAPI void U_EXPORT2
2065
uloc_setDefault(const char* newDefaultLocale,
2068
if (U_FAILURE(*err))
2070
/* the error code isn't currently used for anything by this function*/
2072
/* propagate change to C++ */
2073
locale_set_default(newDefaultLocale);
2077
* Returns a list of all language codes defined in ISO 639. This is a pointer
2078
* to an array of pointers to arrays of char. All of these pointers are owned
2079
* by ICU-- do not delete them, and do not write through them. The array is
2080
* terminated with a null pointer.
2082
U_CAPI const char* const* U_EXPORT2
2083
uloc_getISOLanguages()
2089
* Returns a list of all 2-letter country codes defined in ISO 639. This is a
2090
* pointer to an array of pointers to arrays of char. All of these pointers are
2091
* owned by ICU-- do not delete them, and do not write through them. The array is
2092
* terminated with a null pointer.
2094
U_CAPI const char* const* U_EXPORT2
2095
uloc_getISOCountries()
2101
/* this function to be moved into cstring.c later */
2102
static char gDecimal = 0;
2107
_uloc_strtod(const char *start, char **end) {
2114
/* For machines that decide to change the decimal on you,
2115
and try to be too smart with localization.
2116
This normally should be just a '.'. */
2117
sprintf(rep, "%+1.1f", 1.0);
2121
if(gDecimal == '.') {
2122
return uprv_strtod(start, end); /* fall through to OS */
2124
uprv_strncpy(buf, start, 29);
2126
decimal = uprv_strchr(buf, '.');
2128
*decimal = gDecimal;
2130
return uprv_strtod(start, end); /* no decimal point */
2132
rv = uprv_strtod(buf, &myEnd);
2134
*end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2142
int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2146
static int32_t U_CALLCONV
2147
uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
2149
const _acceptLangItem *aa = (const _acceptLangItem*)a;
2150
const _acceptLangItem *bb = (const _acceptLangItem*)b;
2154
rc = -1; /* A > B */
2155
} else if(bb->q > aa->q) {
2162
rc = uprv_stricmp(aa->locale, bb->locale);
2165
#if defined(ULOC_DEBUG)
2166
/* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2176
mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2179
U_CAPI int32_t U_EXPORT2
2180
uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2181
const char *httpAcceptLanguage,
2182
UEnumeration* availableLocales,
2186
_acceptLangItem smallBuffer[30];
2188
char tmp[ULOC_FULLNAME_CAPACITY +1];
2190
const char *itemEnd;
2191
const char *paramEnd;
2196
int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2198
char *tempstr; /* Use for null pointer check */
2201
jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2202
if(U_FAILURE(*status)) {
2206
for(s=httpAcceptLanguage;s&&*s;) {
2207
while(isspace(*s)) /* eat space at the beginning */
2209
itemEnd=uprv_strchr(s,',');
2210
paramEnd=uprv_strchr(s,';');
2212
itemEnd = httpAcceptLanguage+l; /* end of string */
2214
if(paramEnd && paramEnd<itemEnd) {
2215
/* semicolon (;) is closer than end (,) */
2220
while(isspace(*t)) {
2226
while(isspace(*t)) {
2229
j[n].q = (float)_uloc_strtod(t,NULL);
2231
/* no semicolon - it's 1.0 */
2236
/* eat spaces prior to semi */
2237
for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2239
/* Check for null pointer from uprv_strndup */
2240
tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2241
if (tempstr == NULL) {
2242
*status = U_MEMORY_ALLOCATION_ERROR;
2245
j[n].locale = tempstr;
2246
uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2247
if(strcmp(j[n].locale,tmp)) {
2248
uprv_free(j[n].locale);
2249
j[n].locale=uprv_strdup(tmp);
2251
#if defined(ULOC_DEBUG)
2252
/*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2256
while(*s==',') { /* eat duplicate commas */
2260
if(j==smallBuffer) { /* overflowed the small buffer. */
2261
j = uprv_malloc(sizeof(j[0])*(jSize*2));
2263
uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2265
#if defined(ULOC_DEBUG)
2266
fprintf(stderr,"malloced at size %d\n", jSize);
2269
j = uprv_realloc(j, sizeof(j[0])*jSize*2);
2270
#if defined(ULOC_DEBUG)
2271
fprintf(stderr,"re-alloced at size %d\n", jSize);
2276
*status = U_MEMORY_ALLOCATION_ERROR;
2281
uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2282
if(U_FAILURE(*status)) {
2283
if(j != smallBuffer) {
2284
#if defined(ULOC_DEBUG)
2285
fprintf(stderr,"freeing j %p\n", j);
2291
strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
2292
/* Check for null pointer */
2294
uprv_free(j); /* Free to avoid memory leak */
2295
*status = U_MEMORY_ALLOCATION_ERROR;
2299
#if defined(ULOC_DEBUG)
2300
/*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2302
strs[i]=j[i].locale;
2304
res = uloc_acceptLanguage(result, resultAvailable, outResult,
2305
(const char**)strs, n, availableLocales, status);
2310
if(j != smallBuffer) {
2311
#if defined(ULOC_DEBUG)
2312
fprintf(stderr,"freeing j %p\n", j);
2320
U_CAPI int32_t U_EXPORT2
2321
uloc_acceptLanguage(char *result, int32_t resultAvailable,
2322
UAcceptResult *outResult, const char **acceptList,
2323
int32_t acceptListCount,
2324
UEnumeration* availableLocales,
2330
char tmp[ULOC_FULLNAME_CAPACITY+1];
2332
char **fallbackList;
2333
if(U_FAILURE(*status)) {
2336
fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
2337
if(fallbackList==NULL) {
2338
*status = U_MEMORY_ALLOCATION_ERROR;
2341
for(i=0;i<acceptListCount;i++) {
2342
#if defined(ULOC_DEBUG)
2343
fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2345
while((l=uenum_next(availableLocales, NULL, status))) {
2346
#if defined(ULOC_DEBUG)
2347
fprintf(stderr," %s\n", l);
2349
len = (int32_t)uprv_strlen(l);
2350
if(!uprv_strcmp(acceptList[i], l)) {
2352
*outResult = ULOC_ACCEPT_VALID;
2354
#if defined(ULOC_DEBUG)
2355
fprintf(stderr, "MATCH! %s\n", l);
2358
uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2361
uprv_free(fallbackList[j]);
2363
uprv_free(fallbackList);
2364
return u_terminateChars(result, resultAvailable, len, status);
2370
uenum_reset(availableLocales, status);
2371
/* save off parent info */
2372
if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2373
fallbackList[i] = uprv_strdup(tmp);
2379
for(maxLen--;maxLen>0;maxLen--) {
2380
for(i=0;i<acceptListCount;i++) {
2381
if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2382
#if defined(ULOC_DEBUG)
2383
fprintf(stderr,"Try: [%s]", fallbackList[i]);
2385
while((l=uenum_next(availableLocales, NULL, status))) {
2386
#if defined(ULOC_DEBUG)
2387
fprintf(stderr," %s\n", l);
2389
len = (int32_t)uprv_strlen(l);
2390
if(!uprv_strcmp(fallbackList[i], l)) {
2392
*outResult = ULOC_ACCEPT_FALLBACK;
2394
#if defined(ULOC_DEBUG)
2395
fprintf(stderr, "fallback MATCH! %s\n", l);
2398
uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2400
for(j=0;j<acceptListCount;j++) {
2401
uprv_free(fallbackList[j]);
2403
uprv_free(fallbackList);
2404
return u_terminateChars(result, resultAvailable, len, status);
2407
uenum_reset(availableLocales, status);
2409
if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2410
uprv_free(fallbackList[i]);
2411
fallbackList[i] = uprv_strdup(tmp);
2413
uprv_free(fallbackList[i]);
2419
*outResult = ULOC_ACCEPT_FAILED;
2422
for(i=0;i<acceptListCount;i++) {
2423
uprv_free(fallbackList[i]);
2425
uprv_free(fallbackList);