~ubuntu-branches/ubuntu/natty/icu/natty-updates : revision 19

1

/*

2

**********************************************************************

3

4

5

**********************************************************************

6

*

7

* File ULOC.CPP

8

*

9

* Modification History:

10

*

11

* Date Name Description

12

* 04/01/97 aliu Creation.

13

* 08/21/98 stephen JDK 1.2 sync

14

* 12/08/98 rtg New Locale implementation and C API

15

* 03/15/99 damiba overhaul.

16

* 04/06/99 stephen changed setDefault() to realloc and copy

17

* 06/14/99 stephen Changed calls to ures_open for new params

18

* 07/21/99 stephen Modified setDefault() to propagate to C++

19

* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,

20

* brought canonicalization code into line with spec

21

*****************************************************************************/

22

23

/*

24

POSIX's locale format, from putil.c: [no spaces]

25

26

ll [ _CC ] [ . MM ] [ @ VV]

27

28

l = lang, C = ctry, M = charmap, V = variant

29

*/

30

31

#include "unicode/utypes.h"

32

#include "unicode/ustring.h"

33

#include "unicode/uloc.h"

34

35

#include "putilimp.h"

36

#include "ustr_imp.h"

37

#include "ulocimp.h"

38

#include "umutex.h"

39

#include "cstring.h"

40

#include "cmemory.h"

41

#include "ucln_cmn.h"

42

#include "locmap.h"

43

#include "uarrsort.h"

44

#include "uenumimp.h"

45

#include "uassert.h"

46

47

#include <stdio.h> /* for sprintf */

48

49

/* ### Declarations **************************************************/

50

51

/* Locale stuff from locid.cpp */

52

U_CFUNC void locale_set_default(const char *id);

53

U_CFUNC const char *locale_get_default(void);

54

U_CFUNC int32_t

55

locale_getKeywords(const char *localeID,

56

char prev,

57

char *keywords, int32_t keywordCapacity,

58

char *values, int32_t valuesCapacity, int32_t *valLen,

59

UBool valuesToo,

60

UErrorCode *status);

61

62

/* ### Data tables **************************************************/

63

64

/**

65

* Table of language codes, both 2- and 3-letter, with preference

66

* given to 2-letter codes where possible. Includes 3-letter codes

67

* that lack a 2-letter equivalent.

68

*

69

* This list must be in sorted order. This list is returned directly

70

* to the user by some API.

71

*

72

* This list must be kept in sync with LANGUAGES_3, with corresponding

73

* entries matched.

74

*

75

* This table should be terminated with a NULL entry, followed by a

76

* second list, and another NULL entry. The first list is visible to

77

* user code when this array is returned by API. The second list

78

* contains codes we support, but do not expose through user API.

79

*

80

* Notes

81

*

82

* Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to

83

* include the revisions up to 2001/7/27 *CWB*

84

*

85

* The 3 character codes are the terminology codes like RFC 3066. This

86

* is compatible with prior ICU codes

87

*

88

* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the

89

* table but now at the end of the table because 3 character codes are

90

* duplicates. This avoids bad searches going from 3 to 2 character

91

* codes.

92

*

93

* The range qaa-qtz is reserved for local use

94

*/

95

static const char * const LANGUAGES[] = {

96

"aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",

97

"afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",

98

"ang", "anp", "apa",

99

"ar", "arc", "arn", "arp", "art", "arw", "as", "ast",

100

"ath", "aus", "av", "awa", "ay", "az", "ba", "bad",

101

"bai", "bal", "ban", "bas", "bat", "be", "bej",

102

"bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",

103

"bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",

104

"btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",

105

"cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",

106

"chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",

107

"cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",

108

"cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",

109

"dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",

110

"dz", "ee", "efi", "egy", "eka", "el", "elx", "en",

111

"enm", "eo", "es", "et", "eu", "ewo", "fa",

112

"fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",

113

"fr", "frm", "fro", "frr", "frs", "fur", "fy",

114

"ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",

115

"gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",

116

"grc", "gsw", "gu", "gv", "gwi",

117

"ha", "hai", "haw", "he", "hi", "hil", "him",

118

"hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",

119

"ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",

120

"ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",

121

"iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",

122

"kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",

123

"kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",

124

"ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks",

125

"ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",

126

"lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",

127

"loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",

128

"lv", "mad", "mag", "mai", "mak", "man", "map", "mas",

129

"mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min",

130

"mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",

131

"mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",

132

"mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",

133

"nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",

134

"niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub",

135

"nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",

136

"om", "or", "os", "osa", "ota", "oto", "pa", "paa",

137

"pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",

138

"pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",

139

"raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",

140

"ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",

141

"sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem",

142

"sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",

143

"sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",

144

"sms", "sn", "snk", "so", "sog", "son", "sq", "sr",

145

"srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",

146

"sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter",

147

"tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",

148

"tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",

149

"ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",

150

"ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",

151

"uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",

152

"wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",

153

"yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd",

154

"zu", "zun", "zxx", "zza",

155

NULL,

156

"in", "iw", "ji", "jw", "sh", /* obsolete language codes */

157

NULL

158

};

159

static const char* const DEPRECATED_LANGUAGES[]={

160

"in", "iw", "ji", "jw", NULL, NULL

161

};

162

static const char* const REPLACEMENT_LANGUAGES[]={

163

"id", "he", "yi", "jv", NULL, NULL

164

};

165

166

/**

167

* Table of 3-letter language codes.

168

*

169

* This is a lookup table used to convert 3-letter language codes to

170

* their 2-letter equivalent, where possible. It must be kept in sync

171

* with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the

172

* same language as LANGUAGES_3[i]. The commented-out lines are

173

* copied from LANGUAGES to make eyeballing this baby easier.

174

*

175

* Where a 3-letter language code has no 2-letter equivalent, the

176

* 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].

177

*

178

* This table should be terminated with a NULL entry, followed by a

179

* second list, and another NULL entry. The two lists correspond to

180

* the two lists in LANGUAGES.

181

*/

182

static const char * const LANGUAGES_3[] = {

183

/* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */

184

"aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",

185

/* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */

186

"afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",

187

/* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */

188

"ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",

189

/* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */

190

"ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",

191

/* "bai", "bal", "ban", "bas", "bat", "be", "bej", */

192

"bai", "bal", "ban", "bas", "bat", "bel", "bej",

193

/* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */

194

"bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",

195

/* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */

196

"bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",

197

/* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */

198

"btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",

199

/* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */

200

"cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",

201

/* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */

202

"chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",

203

/* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */

204

"cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",

205

/* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */

206

"chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",

207

/* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */

208

"dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",

209

/* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */

210

"dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",

211

/* "enm", "eo", "es", "et", "eu", "ewo", "fa", */

212

"enm", "epo", "spa", "est", "eus", "ewo", "fas",

213

/* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */

214

"fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",

215

/* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gay", */

216

"fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",

217

/* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */

218

"gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",

219

/* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */

220

"goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",

221

/* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */

222

"gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",

223

/* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */

224

"hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",

225

/* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */

226

"ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",

227

/* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */

228

"ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",

229

/* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */

230

"iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",

231

/* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/

232

"kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",

233

/* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */

234

"kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",

235

/* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */

236

"kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",

237

/* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */

238

"kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",

239

/* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */

240

"lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",

241

/* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */

242

"loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",

243

/* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */

244

"lav", "mad", "mag", "mai", "mak", "man", "map", "mas",

245

/* "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min", */

246

"mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min",

247

/* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */

248

"mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",

249

/* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */

250

"mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",

251

/* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */

252

"mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",

253

/* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */

254

"nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",

255

/* "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", */

256

"niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",

257

/* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */

258

"nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",

259

/* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */

260

"orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",

261

/* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */

262

"pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",

263

/* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */

264

"pli", "pol", "pon", "pra", "pro", "pus", "por", "que",

265

/* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */

266

"raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",

267

/* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */

268

"rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",

269

/* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */

270

"sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",

271

/* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */

272

"sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",

273

/* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */

274

"slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",

275

/* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */

276

"sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",

277

/* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */

278

"srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",

279

/* "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", */

280

"swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",

281

/* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */

282

"tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",

283

/* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", */

284

"tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",

285

/* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */

286

"tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",

287

/* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */

288

"tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",

289

/* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */

290

"uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",

291

/* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */

292

"wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",

293

/* "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", */

294

"yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",

295

/* "zu", "zun", "zxx", "zza", */

296

"zul", "zun", "zxx", "zza",

297

NULL,

298

/* "in", "iw", "ji", "jw", "sh", */

299

"ind", "heb", "yid", "jaw", "srp",

300

NULL

301

};

302

303

/**

304

* Table of 2-letter country codes.

305

*

306

* This list must be in sorted order. This list is returned directly

307

* to the user by some API.

308

*

309

* This list must be kept in sync with COUNTRIES_3, with corresponding

310

* entries matched.

311

*

312

* This table should be terminated with a NULL entry, followed by a

313

* second list, and another NULL entry. The first list is visible to

314

* user code when this array is returned by API. The second list

315

* contains codes we support, but do not expose through user API.

316

*

317

* Notes:

318

*

319

* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per

320

* http://www.evertype.com/standards/iso3166/iso3166-1-en.html added

321

* new codes keeping the old ones for compatibility updated to include

322

* 1999/12/03 revisions *CWB*

323

*

324

* RO(ROM) is now RO(ROU) according to

325

* http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html

326

*/

327

static const char * const COUNTRIES[] = {

328

"AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",

329

"AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",

330

"BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",

331

"BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",

332

"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",

333

"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",

334

"CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",

335

"DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",

336

"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",

337

"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",

338

"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",

339

"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",

340

"ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",

341

"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",

342

"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",

343

"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",

344

"LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",

345

"ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",

346

"MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",

347

"NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",

348

"NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",

349

"PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",

350

"PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",

351

"SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",

352

"SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",

353

"SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",

354

"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",

355

"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",

356

"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",

357

"WS", "YE", "YT", "ZA", "ZM", "ZW",

358

NULL,

359

"FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */

360

NULL

361

};

362

363

static const char* const DEPRECATED_COUNTRIES[] ={

364

"BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */

365

};

366

static const char* const REPLACEMENT_COUNTRIES[] = {

367

/* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */

368

"MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL /* replacement country codes */

369

};

370

371

/**

372

* Table of 3-letter country codes.

373

*

374

* This is a lookup table used to convert 3-letter country codes to

375

* their 2-letter equivalent. It must be kept in sync with COUNTRIES.

376

* For all valid i, COUNTRIES[i] must refer to the same country as

377

* COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES

378

* to make eyeballing this baby easier.

379

*

380

* This table should be terminated with a NULL entry, followed by a

381

* second list, and another NULL entry. The two lists correspond to

382

* the two lists in COUNTRIES.

383

*/

384

static const char * const COUNTRIES_3[] = {

385

/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */

386

"AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",

387

/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */

388

"AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",

389

/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */

390

"BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",

391

/* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */

392

"BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",

393

/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */

394

"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",

395

/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */

396

"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",

397

/* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */

398

"CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",

399

/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */

400

"DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",

401

/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */

402

"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",

403

/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */

404

"GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",

405

/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */

406

"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",

407

/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */

408

"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",

409

/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */

410

"IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",

411

/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */

412

"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",

413

/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */

414

"COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",

415

/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */

416

"LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",

417

/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */

418

"LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",

419

/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */

420

"MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",

421

/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */

422

"MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",

423

/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */

424

"NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",

425

/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */

426

"NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",

427

/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */

428

"PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",

429

/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */

430

"PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",

431

/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */

432

"SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",

433

/* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */

434

"SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",

435

/* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */

436

"SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",

437

/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */

438

"TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",

439

/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */

440

"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",

441

/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */

442

"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",

443

/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */

444

"WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",

445

NULL,

446

/* "FX", "CS", "RO", "TP", "YU", "ZR", */

447

"FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",

448

NULL

449

};

450

451

typedef struct CanonicalizationMap {

452

const char *id; /* input ID */

453

const char *canonicalID; /* canonicalized output ID */

454

const char *keyword; /* keyword, or NULL if none */

455

const char *value; /* keyword value, or NULL if kw==NULL */

456

} CanonicalizationMap;

457

458

/**

459

* A map to canonicalize locale IDs. This handles a variety of

460

* different semantic kinds of transformations.

461

*/

462

static const CanonicalizationMap CANONICALIZE_MAP[] = {

463

{ "", "en_US_POSIX", NULL, NULL }, /* .NET name */

464

{ "C", "en_US_POSIX", NULL, NULL }, /* POSIX name */

465

{ "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */

466

{ "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */

467

{ "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */

468

{ "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */

469

{ "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },

470

{ "cel_GAULISH", "cel__GAULISH", NULL, NULL }, /* registered name */

471

{ "de_1901", "de__1901", NULL, NULL }, /* registered name */

472

{ "de_1906", "de__1906", NULL, NULL }, /* registered name */

473

{ "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */

474

{ "de_AT_PREEURO", "de_AT", "currency", "ATS" },

475

{ "de_DE_PREEURO", "de_DE", "currency", "DEM" },

476

{ "de_LU_PREEURO", "de_LU", "currency", "LUF" },

477

{ "el_GR_PREEURO", "el_GR", "currency", "GRD" },

478

{ "en_BOONT", "en__BOONT", NULL, NULL }, /* registered name */

479

{ "en_SCOUSE", "en__SCOUSE", NULL, NULL }, /* registered name */

480

{ "en_BE_PREEURO", "en_BE", "currency", "BEF" },

481

{ "en_IE_PREEURO", "en_IE", "currency", "IEP" },

482

{ "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */

483

{ "es_ES_PREEURO", "es_ES", "currency", "ESP" },

484

{ "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },

485

{ "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },

486

{ "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },

487

{ "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },

488

{ "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },

489

{ "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },

490

{ "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },

491

{ "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */

492

{ "it_IT_PREEURO", "it_IT", "currency", "ITL" },

493

{ "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */

494

{ "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */

495

{ "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },

496

{ "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },

497

{ "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },

498

{ "sl_ROZAJ", "sl__ROZAJ", NULL, NULL }, /* registered name */

499

{ "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */

500

{ "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */

501

{ "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */

502

{ "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */

503

{ "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */

504

{ "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */

505

{ "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */

506

{ "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */

507

{ "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */

508

{ "zh_GAN", "zh__GAN", NULL, NULL }, /* registered name */

509

{ "zh_GUOYU", "zh", NULL, NULL }, /* registered name */

510

{ "zh_HAKKA", "zh__HAKKA", NULL, NULL }, /* registered name */

511

{ "zh_MIN", "zh__MIN", NULL, NULL }, /* registered name */

512

{ "zh_MIN_NAN", "zh__MINNAN", NULL, NULL }, /* registered name */

513

{ "zh_WUU", "zh__WUU", NULL, NULL }, /* registered name */

514

{ "zh_XIANG", "zh__XIANG", NULL, NULL }, /* registered name */

515

{ "zh_YUE", "zh__YUE", NULL, NULL }, /* registered name */

516

};

517

518

typedef struct VariantMap {

519

const char *variant; /* input ID */

520

const char *keyword; /* keyword, or NULL if none */

521

const char *value; /* keyword value, or NULL if kw==NULL */

522

} VariantMap;

523

524

static const VariantMap VARIANT_MAP[] = {

525

{ "EURO", "currency", "EUR" },

526

{ "PINYIN", "collation", "pinyin" }, /* Solaris variant */

527

{ "STROKE", "collation", "stroke" } /* Solaris variant */

528

};

529

530

/* ### Keywords **************************************************/

531

532

#define ULOC_KEYWORD_BUFFER_LEN 25

533

#define ULOC_MAX_NO_KEYWORDS 25

534

535

static const char *

536

locale_getKeywordsStart(const char *localeID) {

537

const char *result = NULL;

538

if((result = uprv_strchr(localeID, '@')) != NULL) {

539

return result;

540

}

541

#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)

542

else {

543

/* We do this because the @ sign is variant, and the @ sign used on one

544

EBCDIC machine won't be compiled the same way on other EBCDIC based

545

machines. */

546

static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };

547

const uint8_t *charToFind = ebcdicSigns;

548

while(*charToFind) {

549

if((result = uprv_strchr(localeID, *charToFind)) != NULL) {

550

return result;

551

}

552

charToFind++;

553

}

554

}

555

#endif

556

return NULL;

557

}

558

559

/**

560

* @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]

561

* @param keywordName incoming name to be canonicalized

562

* @param status return status (keyword too long)

563

* @return length of the keyword name

564

*/

565

static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)

566

{

567

int32_t i;

568

int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);

569

570

if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {

571

/* keyword name too long for internal buffer */

572

*status = U_INTERNAL_PROGRAM_ERROR;

573

return 0;

574

}

575

576

/* normalize the keyword name */

577

for(i = 0; i < keywordNameLen; i++) {

578

buf[i] = uprv_tolower(keywordName[i]);

579

}

580

buf[i] = 0;

581

582

return keywordNameLen;

583

}

584

585

typedef struct {

586

char keyword[ULOC_KEYWORD_BUFFER_LEN];

587

int32_t keywordLen;

588

const char *valueStart;

589

int32_t valueLen;

590

} KeywordStruct;

591

592

static int32_t U_CALLCONV

593

compareKeywordStructs(const void *context, const void *left, const void *right) {

594

const char* leftString = ((const KeywordStruct *)left)->keyword;

595

const char* rightString = ((const KeywordStruct *)right)->keyword;

596

return uprv_strcmp(leftString, rightString);

597

}

598

599

/**

600

* Both addKeyword and addValue must already be in canonical form.

601

* Either both addKeyword and addValue are NULL, or neither is NULL.

602

* If they are not NULL they must be zero terminated.

603

* If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.

604

*/

605

static int32_t

606

_getKeywords(const char *localeID,

607

char prev,

608

char *keywords, int32_t keywordCapacity,

609

char *values, int32_t valuesCapacity, int32_t *valLen,

610

UBool valuesToo,

611

const char* addKeyword,

612

const char* addValue,

613

UErrorCode *status)

614

{

615

KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];

616

617

int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;

618

int32_t numKeywords = 0;

619

const char* pos = localeID;

620

const char* equalSign = NULL;

621

const char* semicolon = NULL;

622

int32_t i = 0, j, n;

623

int32_t keywordsLen = 0;

624

int32_t valuesLen = 0;

625

626

if(prev == '@') { /* start of keyword definition */

627

/* we will grab pairs, trim spaces, lowercase keywords, sort and return */

628

do {

629

UBool duplicate = FALSE;

630

/* skip leading spaces */

631

while(*pos == ' ') {

632

pos++;

633

}

634

if (!*pos) { /* handle trailing "; " */

635

break;

636

}

637

if(numKeywords == maxKeywords) {

638

*status = U_INTERNAL_PROGRAM_ERROR;

639

return 0;

640

}

641

equalSign = uprv_strchr(pos, '=');

642

semicolon = uprv_strchr(pos, ';');

643

/* lack of '=' [foo@currency] is illegal */

644

/* ';' before '=' [foo@currency;collation=pinyin] is illegal */

645

if(!equalSign || (semicolon && semicolon<equalSign)) {

646

*status = U_INVALID_FORMAT_ERROR;

647

return 0;

648

}

649

/* need to normalize both keyword and keyword name */

650

if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {

651

/* keyword name too long for internal buffer */

652

*status = U_INTERNAL_PROGRAM_ERROR;

653

return 0;

654

}

655

for(i = 0, n = 0; i < equalSign - pos; ++i) {

656

if (pos[i] != ' ') {

657

keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);

658

}

659

}

660

keywordList[numKeywords].keyword[n] = 0;

661

keywordList[numKeywords].keywordLen = n;

662

/* now grab the value part. First we skip the '=' */

663

equalSign++;

664

/* then we leading spaces */

665

while(*equalSign == ' ') {

666

equalSign++;

667

}

668

keywordList[numKeywords].valueStart = equalSign;

669

670

pos = semicolon;

671

i = 0;

672

if(pos) {

673

while(*(pos - i - 1) == ' ') {

674

i++;

675

}

676

keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);

677

pos++;

678

} else {

679

i = (int32_t)uprv_strlen(equalSign);

680

while(equalSign[i-1] == ' ') {

681

i--;

682

}

683

keywordList[numKeywords].valueLen = i;

684

}

685

/* If this is a duplicate keyword, then ignore it */

686

for (j=0; j<numKeywords; ++j) {

687

if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {

688

duplicate = TRUE;

689

break;

690

}

691

}

692

if (!duplicate) {

693

++numKeywords;

694

}

695

} while(pos);

696

697

/* Handle addKeyword/addValue. */

698

if (addKeyword != NULL) {

699

UBool duplicate = FALSE;

700

U_ASSERT(addValue != NULL);

701

/* Search for duplicate; if found, do nothing. Explicit keyword

702

overrides addKeyword. */

703

for (j=0; j<numKeywords; ++j) {

704

if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {

705

duplicate = TRUE;

706

break;

707

}

708

}

709

if (!duplicate) {

710

if (numKeywords == maxKeywords) {

711

*status = U_INTERNAL_PROGRAM_ERROR;

712

return 0;

713

}

714

uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);

715

keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);

716

keywordList[numKeywords].valueStart = addValue;

717

keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);

718

++numKeywords;

719

}

720

} else {

721

U_ASSERT(addValue == NULL);

722

}

723

724

/* now we have a list of keywords */

725

/* we need to sort it */

726

uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);

727

728

/* Now construct the keyword part */

729

for(i = 0; i < numKeywords; i++) {

730

if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {

731

uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);

732

if(valuesToo) {

733

keywords[keywordsLen + keywordList[i].keywordLen] = '=';

734

} else {

735

keywords[keywordsLen + keywordList[i].keywordLen] = 0;

736

}

737

}

738

keywordsLen += keywordList[i].keywordLen + 1;

739

if(valuesToo) {

740

if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {

741

uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);

742

}

743

keywordsLen += keywordList[i].valueLen;

744

745

if(i < numKeywords - 1) {

746

if(keywordsLen < keywordCapacity) {

747

keywords[keywordsLen] = ';';

748

}

749

keywordsLen++;

750

}

751

}

752

if(values) {

753

if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {

754

uprv_strcpy(values+valuesLen, keywordList[i].valueStart);

755

values[valuesLen + keywordList[i].valueLen] = 0;

756

}

757

valuesLen += keywordList[i].valueLen + 1;

758

}

759

}

760

if(values) {

761

values[valuesLen] = 0;

762

if(valLen) {

763

*valLen = valuesLen;

764

}

765

}

766

return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);

767

} else {

768

return 0;

769

}

770

}

771

772

U_CFUNC int32_t

773

locale_getKeywords(const char *localeID,

774

char prev,

775

char *keywords, int32_t keywordCapacity,

776

char *values, int32_t valuesCapacity, int32_t *valLen,

777

UBool valuesToo,

778

UErrorCode *status) {

779

return _getKeywords(localeID, prev, keywords, keywordCapacity,

780

values, valuesCapacity, valLen, valuesToo,

781

NULL, NULL, status);

782

}

783

784

U_CAPI int32_t U_EXPORT2

785

uloc_getKeywordValue(const char* localeID,

786

const char* keywordName,

787

char* buffer, int32_t bufferCapacity,

788

UErrorCode* status)

789

{

790

const char* nextSeparator = NULL;

791

char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

792

char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

793

int32_t i = 0;

794

int32_t result = 0;

795

796

if(status && U_SUCCESS(*status) && localeID) {

797

798

const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */

799

if(startSearchHere == NULL) {

800

/* no keywords, return at once */

801

return 0;

802

}

803

804

locale_canonKeywordName(keywordNameBuffer, keywordName, status);

805

if(U_FAILURE(*status)) {

806

return 0;

807

}

808

809

/* find the first keyword */

810

while(startSearchHere) {

811

startSearchHere++;

812

/* skip leading spaces (allowed?) */

813

while(*startSearchHere == ' ') {

814

startSearchHere++;

815

}

816

nextSeparator = uprv_strchr(startSearchHere, '=');

817

/* need to normalize both keyword and keyword name */

818

if(!nextSeparator) {

819

break;

820

}

821

if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {

822

/* keyword name too long for internal buffer */

823

*status = U_INTERNAL_PROGRAM_ERROR;

824

return 0;

825

}

826

for(i = 0; i < nextSeparator - startSearchHere; i++) {

827

localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);

828

}

829

/* trim trailing spaces */

830

while(startSearchHere[i-1] == ' ') {

831

i--;

832

}

833

localeKeywordNameBuffer[i] = 0;

834

835

startSearchHere = uprv_strchr(nextSeparator, ';');

836

837

if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {

838

nextSeparator++;

839

while(*nextSeparator == ' ') {

840

nextSeparator++;

841

}

842

/* we actually found the keyword. Copy the value */

843

if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {

844

while(*(startSearchHere-1) == ' ') {

845

startSearchHere--;

846

}

847

uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);

848

result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);

849

} else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */

850

i = (int32_t)uprv_strlen(nextSeparator);

851

while(nextSeparator[i - 1] == ' ') {

852

i--;

853

}

854

uprv_strncpy(buffer, nextSeparator, i);

855

result = u_terminateChars(buffer, bufferCapacity, i, status);

856

} else {

857

/* give a bigger buffer, please */

858

*status = U_BUFFER_OVERFLOW_ERROR;

859

if(startSearchHere) {

860

result = (int32_t)(startSearchHere - nextSeparator);

861

} else {

862

result = (int32_t)uprv_strlen(nextSeparator);

863

}

864

}

865

return result;

866

}

867

}

868

}

869

return 0;

870

}

871

872

U_CAPI int32_t U_EXPORT2

873

uloc_setKeywordValue(const char* keywordName,

874

const char* keywordValue,

875

char* buffer, int32_t bufferCapacity,

876

UErrorCode* status)

877

{

878

/* TODO: sorting. removal. */

879

int32_t keywordNameLen;

880

int32_t keywordValueLen;

881

int32_t bufLen;

882

int32_t needLen = 0;

883

int32_t foundValueLen;

884

int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */

885

char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

886

char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

887

int32_t i = 0;

888

int32_t rc;

889

char* nextSeparator = NULL;

890

char* nextEqualsign = NULL;

891

char* startSearchHere = NULL;

892

char* keywordStart = NULL;

893

char *insertHere = NULL;

894

if(U_FAILURE(*status)) {

895

return -1;

896

}

897

if(bufferCapacity>1) {

898

bufLen = (int32_t)uprv_strlen(buffer);

899

} else {

900

*status = U_ILLEGAL_ARGUMENT_ERROR;

901

return 0;

902

}

903

if(bufferCapacity<bufLen) {

904

/* The capacity is less than the length?! Is this NULL terminated? */

905

*status = U_ILLEGAL_ARGUMENT_ERROR;

906

return 0;

907

}

908

if(keywordValue && !*keywordValue) {

909

keywordValue = NULL;

910

}

911

if(keywordValue) {

912

keywordValueLen = (int32_t)uprv_strlen(keywordValue);

913

} else {

914

keywordValueLen = 0;

915

}

916

keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);

917

if(U_FAILURE(*status)) {

918

return 0;

919

}

920

startSearchHere = (char*)locale_getKeywordsStart(buffer);

921

if(startSearchHere == NULL || (startSearchHere[1]==0)) {

922

if(!keywordValue) { /* no keywords = nothing to remove */

923

return bufLen;

924

}

925

926

needLen = bufLen+1+keywordNameLen+1+keywordValueLen;

927

if(startSearchHere) { /* had a single @ */

928

needLen--; /* already had the @ */

929

/* startSearchHere points at the @ */

930

} else {

931

startSearchHere=buffer+bufLen;

932

}

933

if(needLen >= bufferCapacity) {

934

*status = U_BUFFER_OVERFLOW_ERROR;

935

return needLen; /* no change */

936

}

937

*startSearchHere = '@';

938

startSearchHere++;

939

uprv_strcpy(startSearchHere, keywordNameBuffer);

940

startSearchHere += keywordNameLen;

941

*startSearchHere = '=';

942

startSearchHere++;

943

uprv_strcpy(startSearchHere, keywordValue);

944

startSearchHere+=keywordValueLen;

945

return needLen;

946

} /* end shortcut - no @ */

947

948

keywordStart = startSearchHere;

949

/* search for keyword */

950

while(keywordStart) {

951

keywordStart++;

952

/* skip leading spaces (allowed?) */

953

while(*keywordStart == ' ') {

954

keywordStart++;

955

}

956

nextEqualsign = uprv_strchr(keywordStart, '=');

957

/* need to normalize both keyword and keyword name */

958

if(!nextEqualsign) {

959

break;

960

}

961

if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {

962

/* keyword name too long for internal buffer */

963

*status = U_INTERNAL_PROGRAM_ERROR;

964

return 0;

965

}

966

for(i = 0; i < nextEqualsign - keywordStart; i++) {

967

localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);

968

}

969

/* trim trailing spaces */

970

while(keywordStart[i-1] == ' ') {

971

i--;

972

}

973

localeKeywordNameBuffer[i] = 0;

974

975

nextSeparator = uprv_strchr(nextEqualsign, ';');

976

rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);

977

if(rc == 0) {

978

nextEqualsign++;

979

while(*nextEqualsign == ' ') {

980

nextEqualsign++;

981

}

982

/* we actually found the keyword. Change the value */

983

if (nextSeparator) {

984

keywordAtEnd = 0;

985

foundValueLen = (int32_t)(nextSeparator - nextEqualsign);

986

} else {

987

keywordAtEnd = 1;

988

foundValueLen = (int32_t)uprv_strlen(nextEqualsign);

989

}

990

if(keywordValue) { /* adding a value - not removing */

991

if(foundValueLen == keywordValueLen) {

992

uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);

993

return bufLen; /* no change in size */

994

} else if(foundValueLen > keywordValueLen) {

995

int32_t delta = foundValueLen - keywordValueLen;

996

if(nextSeparator) { /* RH side */

997

uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));

998

}

999

uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);

1000

bufLen -= delta;

1001

buffer[bufLen]=0;

1002

return bufLen;

1003

} else { /* FVL < KVL */

1004

int32_t delta = keywordValueLen - foundValueLen;

1005

if((bufLen+delta) >= bufferCapacity) {

1006

*status = U_BUFFER_OVERFLOW_ERROR;

1007

return bufLen+delta;

1008

}

1009

if(nextSeparator) { /* RH side */

1010

uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));

1011

}

1012

uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);

1013

bufLen += delta;

1014

buffer[bufLen]=0;

1015

return bufLen;

1016

}

1017

} else { /* removing a keyword */

1018

if(keywordAtEnd) {

1019

/* zero out the ';' or '@' just before startSearchhere */

1020

keywordStart[-1] = 0;

1021

return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */

1022

} else {

1023

uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));

1024

keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;

1025

return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));

1026

}

1027

}

1028

} else if(rc<0){ /* end match keyword */

1029

/* could insert at this location. */

1030

insertHere = keywordStart;

1031

}

1032

keywordStart = nextSeparator;

1033

} /* end loop searching */

1034

1035

if(!keywordValue) {

1036

return bufLen; /* removal of non-extant keyword - no change */

1037

}

1038

1039

/* we know there is at least one keyword. */

1040

needLen = bufLen+1+keywordNameLen+1+keywordValueLen;

1041

if(needLen >= bufferCapacity) {

1042

*status = U_BUFFER_OVERFLOW_ERROR;

1043

return needLen; /* no change */

1044

}

1045

1046

if(insertHere) {

1047

uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));

1048

keywordStart = insertHere;

1049

} else {

1050

keywordStart = buffer+bufLen;

1051

*keywordStart = ';';

1052

keywordStart++;

1053

}

1054

uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);

1055

keywordStart += keywordNameLen;

1056

*keywordStart = '=';

1057

keywordStart++;

1058

uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */

1059

keywordStart+=keywordValueLen;

1060

if(insertHere) {

1061

*keywordStart = ';';

1062

keywordStart++;

1063

}

1064

buffer[needLen]=0;

1065

return needLen;

1066

}

1067

1068

/* ### ID parsing implementation **************************************************/

1069

1070

#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))

1071

1072

/*returns TRUE if one of the special prefixes is here (s=string)

1073

'x-' or 'i-' */

1074

#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))

1075

1076

/* Dot terminates it because of POSIX form where dot precedes the codepage

1077

* except for variant

1078

*/

1079

#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))

1080

1081

static char* _strnchr(const char* str, int32_t len, char c) {

1082

U_ASSERT(str != 0 && len >= 0);

1083

while (len-- != 0) {

1084

char d = *str;

1085

if (d == c) {

1086

return (char*) str;

1087

} else if (d == 0) {

1088

break;

1089

}

1090

++str;

1091

}

1092

return NULL;

1093

}

1094

1095

/**

1096

* Lookup 'key' in the array 'list'. The array 'list' should contain

1097

* a NULL entry, followed by more entries, and a second NULL entry.

1098

*

1099

* The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or

1100

* COUNTRIES_3.

1101

*/

1102

static int16_t _findIndex(const char* const* list, const char* key)

1103

{

1104

const char* const* anchor = list;

1105

int32_t pass = 0;

1106

1107

/* Make two passes through two NULL-terminated arrays at 'list' */

1108

while (pass++ < 2) {

1109

while (*list) {

1110

if (uprv_strcmp(key, *list) == 0) {

1111

return (int16_t)(list - anchor);

1112

}

1113

list++;

1114

}

1115

++list; /* skip final NULL *CWB*/

1116

}

1117

return -1;

1118

}

1119

1120

/* count the length of src while copying it to dest; return strlen(src) */

1121

static U_INLINE int32_t

1122

_copyCount(char *dest, int32_t destCapacity, const char *src) {

1123

const char *anchor;

1124

char c;

1125

1126

anchor=src;

1127

for(;;) {

1128

if((c=*src)==0) {

1129

return (int32_t)(src-anchor);

1130

}

1131

if(destCapacity<=0) {

1132

return (int32_t)((src-anchor)+uprv_strlen(src));

1133

}

1134

++src;

1135

*dest++=c;

1136

--destCapacity;

1137

}

1138

}

1139

1140

U_CFUNC const char*

1141

uloc_getCurrentCountryID(const char* oldID){

1142

int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);

1143

if (offset >= 0) {

1144

return REPLACEMENT_COUNTRIES[offset];

1145

}

1146

return oldID;

1147

}

1148

U_CFUNC const char*

1149

uloc_getCurrentLanguageID(const char* oldID){

1150

int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);

1151

if (offset >= 0) {

1152

return REPLACEMENT_LANGUAGES[offset];

1153

}

1154

return oldID;

1155

}

1156

/*

1157

* the internal functions _getLanguage(), _getCountry(), _getVariant()

1158

* avoid duplicating code to handle the earlier locale ID pieces

1159

* in the functions for the later ones by

1160

* setting the *pEnd pointer to where they stopped parsing

1161

*

1162

* TODO try to use this in Locale

1163

*/

1164

U_CFUNC int32_t

1165

ulocimp_getLanguage(const char *localeID,

1166

char *language, int32_t languageCapacity,

1167

const char **pEnd) {

1168

int32_t i=0;

1169

int32_t offset;

1170

char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */

1171

1172

/* if it starts with i- or x- then copy that prefix */

1173

if(_isIDPrefix(localeID)) {

1174

if(i<languageCapacity) {

1175

language[i]=(char)uprv_tolower(*localeID);

1176

}

1177

if(i<languageCapacity) {

1178

language[i+1]='-';

1179

}

1180

i+=2;

1181

localeID+=2;

1182

}

1183

1184

/* copy the language as far as possible and count its length */

1185

while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {

1186

if(i<languageCapacity) {

1187

language[i]=(char)uprv_tolower(*localeID);

1188

}

1189

if(i<3) {

1190

lang[i]=(char)uprv_tolower(*localeID);

1191

}

1192

i++;

1193

localeID++;

1194

}

1195

1196

if(i==3) {

1197

/* convert 3 character code to 2 character code if possible *CWB*/

1198

offset=_findIndex(LANGUAGES_3, lang);

1199

if(offset>=0) {

1200

i=_copyCount(language, languageCapacity, LANGUAGES[offset]);

1201

}

1202

}

1203

1204

if(pEnd!=NULL) {

1205

*pEnd=localeID;

1206

}

1207

return i;

1208

}

1209

1210

U_CFUNC int32_t

1211

ulocimp_getScript(const char *localeID,

1212

char *script, int32_t scriptCapacity,

1213

const char **pEnd)

1214

{

1215

int32_t idLen = 0;

1216

1217

if (pEnd != NULL) {

1218

*pEnd = localeID;

1219

}

1220

1221

/* copy the second item as far as possible and count its length */

1222

while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {

1223

idLen++;

1224

}

1225

1226

/* If it's exactly 4 characters long, then it's a script and not a country. */

1227

if (idLen == 4) {

1228

int32_t i;

1229

if (pEnd != NULL) {

1230

*pEnd = localeID+idLen;

1231

}

1232

if(idLen > scriptCapacity) {

1233

idLen = scriptCapacity;

1234

}

1235

if (idLen >= 1) {

1236

script[0]=(char)uprv_toupper(*(localeID++));

1237

}

1238

for (i = 1; i < idLen; i++) {

1239

script[i]=(char)uprv_tolower(*(localeID++));

1240

}

1241

}

1242

else {

1243

idLen = 0;

1244

}

1245

return idLen;

1246

}

1247

1248

U_CFUNC int32_t

1249

ulocimp_getCountry(const char *localeID,

1250

char *country, int32_t countryCapacity,

1251

const char **pEnd)

1252

{

1253

int32_t idLen=0;

1254

char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };

1255

int32_t offset;

1256

1257

/* copy the country as far as possible and count its length */

1258

while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {

1259

if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/

1260

cnty[idLen]=(char)uprv_toupper(localeID[idLen]);

1261

}

1262

idLen++;

1263

}

1264

1265

/* the country should be either length 2 or 3 */

1266

if (idLen == 2 || idLen == 3) {

1267

UBool gotCountry = FALSE;

1268

/* convert 3 character code to 2 character code if possible *CWB*/

1269

if(idLen==3) {

1270

offset=_findIndex(COUNTRIES_3, cnty);

1271

if(offset>=0) {

1272

idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);

1273

gotCountry = TRUE;

1274

}

1275

}

1276

if (!gotCountry) {

1277

int32_t i = 0;

1278

for (i = 0; i < idLen; i++) {

1279

if (i < countryCapacity) {

1280

country[i]=(char)uprv_toupper(localeID[i]);

1281

}

1282

}

1283

}

1284

localeID+=idLen;

1285

} else {

1286

idLen = 0;

1287

}

1288

1289

if(pEnd!=NULL) {

1290

*pEnd=localeID;

1291

}

1292

1293

return idLen;

1294

}

1295

1296

/**

1297

* @param needSeparator if true, then add leading '_' if any variants

1298

* are added to 'variant'

1299

*/

1300

static int32_t

1301

_getVariantEx(const char *localeID,

1302

char prev,

1303

char *variant, int32_t variantCapacity,

1304

UBool needSeparator) {

1305

int32_t i=0;

1306

1307

/* get one or more variant tags and separate them with '_' */

1308

if(_isIDSeparator(prev)) {

1309

/* get a variant string after a '-' or '_' */

1310

while(!_isTerminator(*localeID)) {

1311

if (needSeparator) {

1312

if (i<variantCapacity) {

1313

variant[i] = '_';

1314

}

1315

++i;

1316

needSeparator = FALSE;

1317

}

1318

if(i<variantCapacity) {

1319

variant[i]=(char)uprv_toupper(*localeID);

1320

if(variant[i]=='-') {

1321

variant[i]='_';

1322

}

1323

}

1324

i++;

1325

localeID++;

1326

}

1327

}

1328

1329

/* if there is no variant tag after a '-' or '_' then look for '@' */

1330

if(i==0) {

1331

if(prev=='@') {

1332

/* keep localeID */

1333

} else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {

1334

++localeID; /* point after the '@' */

1335

} else {

1336

return 0;

1337

}

1338

while(!_isTerminator(*localeID)) {

1339

if (needSeparator) {

1340

if (i<variantCapacity) {

1341

variant[i] = '_';

1342

}

1343

++i;

1344

needSeparator = FALSE;

1345

}

1346

if(i<variantCapacity) {

1347

variant[i]=(char)uprv_toupper(*localeID);

1348

if(variant[i]=='-' || variant[i]==',') {

1349

variant[i]='_';

1350

}

1351

}

1352

i++;

1353

localeID++;

1354

}

1355

}

1356

1357

return i;

1358

}

1359

1360

static int32_t

1361

_getVariant(const char *localeID,

1362

char prev,

1363

char *variant, int32_t variantCapacity) {

1364

return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);

1365

}

1366

1367

/**

1368

* Delete ALL instances of a variant from the given list of one or

1369

* more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".

1370

* @param variants the source string of one or more variants,

1371

* separated by '_'. This will be MODIFIED IN PLACE. Not zero

1372

* terminated; if it is, trailing zero will NOT be maintained.

1373

* @param variantsLen length of variants

1374

* @param toDelete variant to delete, without separators, e.g. "EURO"

1375

* or "PREEURO"; not zero terminated

1376

* @param toDeleteLen length of toDelete

1377

* @return number of characters deleted from variants

1378

*/

1379

static int32_t

1380

_deleteVariant(char* variants, int32_t variantsLen,

1381

const char* toDelete, int32_t toDeleteLen)

1382

{

1383

int32_t delta = 0; /* number of chars deleted */

1384

for (;;) {

1385

UBool flag = FALSE;

1386

if (variantsLen < toDeleteLen) {

1387

return delta;

1388

}

1389

if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&

1390

(variantsLen == toDeleteLen ||

1391

(flag=(variants[toDeleteLen] == '_'))))

1392

{

1393

int32_t d = toDeleteLen + (flag?1:0);

1394

variantsLen -= d;

1395

delta += d;

1396

if (variantsLen > 0) {

1397

uprv_memmove(variants, variants+d, variantsLen);

1398

}

1399

} else {

1400

char* p = _strnchr(variants, variantsLen, '_');

1401

if (p == NULL) {

1402

return delta;

1403

}

1404

++p;

1405

variantsLen -= (int32_t)(p - variants);

1406

variants = p;

1407

}

1408

}

1409

}

1410

1411

/* Keyword enumeration */

1412

1413

typedef struct UKeywordsContext {

1414

char* keywords;

1415

char* current;

1416

} UKeywordsContext;

1417

1418

static void U_CALLCONV

1419

uloc_kw_closeKeywords(UEnumeration *enumerator) {

1420

uprv_free(((UKeywordsContext *)enumerator->context)->keywords);

1421

uprv_free(enumerator->context);

1422

uprv_free(enumerator);

1423

}

1424

1425

static int32_t U_CALLCONV

1426

uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {

1427

char *kw = ((UKeywordsContext *)en->context)->keywords;

1428

int32_t result = 0;

1429

while(*kw) {

1430

result++;

1431

kw += uprv_strlen(kw)+1;

1432

}

1433

return result;

1434

}

1435

1436

static const char* U_CALLCONV

1437

uloc_kw_nextKeyword(UEnumeration* en,

1438

int32_t* resultLength,

1439

UErrorCode* status) {

1440

const char* result = ((UKeywordsContext *)en->context)->current;

1441

int32_t len = 0;

1442

if(*result) {

1443

len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);

1444

((UKeywordsContext *)en->context)->current += len+1;

1445

} else {

1446

result = NULL;

1447

}

1448

if (resultLength) {

1449

*resultLength = len;

1450

}

1451

return result;

1452

}

1453

1454

static void U_CALLCONV

1455

uloc_kw_resetKeywords(UEnumeration* en,

1456

UErrorCode* status) {

1457

((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;

1458

}

1459

1460

static const UEnumeration gKeywordsEnum = {

1461

NULL,

1462

NULL,

1463

uloc_kw_closeKeywords,

1464

uloc_kw_countKeywords,

1465

uenum_unextDefault,

1466

uloc_kw_nextKeyword,

1467

uloc_kw_resetKeywords

1468

};

1469

1470

U_CAPI UEnumeration* U_EXPORT2

1471

uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)

1472

{

1473

UKeywordsContext *myContext = NULL;

1474

UEnumeration *result = NULL;

1475

1476

if(U_FAILURE(*status)) {

1477

return NULL;

1478

}

1479

result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));

1480

/* Null pointer test */

1481

if (result == NULL) {

1482

*status = U_MEMORY_ALLOCATION_ERROR;

1483

return NULL;

1484

}

1485

uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));

1486

myContext = uprv_malloc(sizeof(UKeywordsContext));

1487

if (myContext == NULL) {

1488

*status = U_MEMORY_ALLOCATION_ERROR;

1489

uprv_free(result);

1490

return NULL;

1491

}

1492

myContext->keywords = (char *)uprv_malloc(keywordListSize+1);

1493

uprv_memcpy(myContext->keywords, keywordList, keywordListSize);

1494

myContext->keywords[keywordListSize] = 0;

1495

myContext->current = myContext->keywords;

1496

result->context = myContext;

1497

return result;

1498

}

1499

1500

U_CAPI UEnumeration* U_EXPORT2

1501

uloc_openKeywords(const char* localeID,

1502

UErrorCode* status)

1503

{

1504

int32_t i=0;

1505

char keywords[256];

1506

int32_t keywordsCapacity = 256;

1507

if(status==NULL || U_FAILURE(*status)) {

1508

return 0;

1509

}

1510

1511

if(localeID==NULL) {

1512

localeID=uloc_getDefault();

1513

}

1514

1515

/* Skip the language */

1516

ulocimp_getLanguage(localeID, NULL, 0, &localeID);

1517

if(_isIDSeparator(*localeID)) {

1518

const char *scriptID;

1519

/* Skip the script if available */

1520

ulocimp_getScript(localeID+1, NULL, 0, &scriptID);

1521

if(scriptID != localeID+1) {

1522

/* Found optional script */

1523

localeID = scriptID;

1524

}

1525

/* Skip the Country */

1526

if (_isIDSeparator(*localeID)) {

1527

ulocimp_getCountry(localeID+1, NULL, 0, &localeID);

1528

if(_isIDSeparator(*localeID)) {

1529

_getVariant(localeID+1, *localeID, NULL, 0);

1530

}

1531

}

1532

}

1533

1534

/* keywords are located after '@' */

1535

if((localeID = locale_getKeywordsStart(localeID)) != NULL) {

1536

i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);

1537

}

1538

1539

if(i) {

1540

return uloc_openKeywordList(keywords, i, status);

1541

} else {

1542

return NULL;

1543

}

1544

}

1545

1546

1547

/* bit-flags for 'options' parameter of _canonicalize */

1548

#define _ULOC_STRIP_KEYWORDS 0x2

1549

#define _ULOC_CANONICALIZE 0x1

1550

1551

#define OPTION_SET(options, mask) ((options & mask) != 0)

1552

1553

static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};

1554

#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])

1555

1556

/**

1557

* Canonicalize the given localeID, to level 1 or to level 2,

1558

* depending on the options. To specify level 1, pass in options=0.

1559

* To specify level 2, pass in options=_ULOC_CANONICALIZE.

1560

*

1561

* This is the code underlying uloc_getName and uloc_canonicalize.

1562

*/

1563

static int32_t

1564

_canonicalize(const char* localeID,

1565

char* result,

1566

int32_t resultCapacity,

1567

uint32_t options,

1568

UErrorCode* err) {

1569

int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;

1570

char localeBuffer[ULOC_FULLNAME_CAPACITY];

1571

const char* origLocaleID;

1572

const char* keywordAssign = NULL;

1573

const char* separatorIndicator = NULL;

1574

const char* addKeyword = NULL;

1575

const char* addValue = NULL;

1576

char* name;

1577

char* variant = NULL; /* pointer into name, or NULL */

1578

1579

if (U_FAILURE(*err)) {

1580

return 0;

1581

}

1582

1583

if (localeID==NULL) {

1584

localeID=uloc_getDefault();

1585

}

1586

origLocaleID=localeID;

1587

1588

/* if we are doing a full canonicalization, then put results in

1589

localeBuffer, if necessary; otherwise send them to result. */

1590

if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/

1591

(result == NULL || resultCapacity < sizeof(localeBuffer))) {

1592

name = localeBuffer;

1593

nameCapacity = sizeof(localeBuffer);

1594

} else {

1595

name = result;

1596

nameCapacity = resultCapacity;

1597

}

1598

1599

/* get all pieces, one after another, and separate with '_' */

1600

len=ulocimp_getLanguage(localeID, name, nameCapacity, &localeID);

1601

1602

if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {

1603

const char *d = uloc_getDefault();

1604

1605

len = (int32_t)uprv_strlen(d);

1606

1607

if (name != NULL) {

1608

uprv_strncpy(name, d, len);

1609

}

1610

} else if(_isIDSeparator(*localeID)) {

1611

const char *scriptID;

1612

1613

++fieldCount;

1614

if(len<nameCapacity) {

1615

name[len]='_';

1616

}

1617

++len;

1618

1619

scriptSize=ulocimp_getScript(localeID+1, name+len, nameCapacity-len, &scriptID);

1620

if(scriptSize > 0) {

1621

/* Found optional script */

1622

localeID = scriptID;

1623

++fieldCount;

1624

len+=scriptSize;

1625

if (_isIDSeparator(*localeID)) {

1626

/* If there is something else, then we add the _ */

1627

if(len<nameCapacity) {

1628

name[len]='_';

1629

}

1630

++len;

1631

}

1632

}

1633

1634

if (_isIDSeparator(*localeID)) {

1635

const char *cntryID;

1636

int32_t cntrySize = ulocimp_getCountry(localeID+1, name+len, nameCapacity-len, &cntryID);

1637

if (cntrySize > 0) {

1638

/* Found optional country */

1639

localeID = cntryID;

1640

len+=cntrySize;

1641

}

1642

if(_isIDSeparator(*localeID)) {

1643

/* If there is something else, then we add the _ if we found country before.*/

1644

if (cntrySize > 0) {

1645

++fieldCount;

1646

if(len<nameCapacity) {

1647

name[len]='_';

1648

}

1649

++len;

1650

}

1651

1652

variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len);

1653

if (variantSize > 0) {

1654

variant = name+len;

1655

len += variantSize;

1656

localeID += variantSize + 1; /* skip '_' and variant */

1657

}

1658

}

1659

}

1660

}

1661

1662

/* Copy POSIX-style charset specifier, if any [mr.utf8] */

1663

if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {

1664

UBool done = FALSE;

1665

do {

1666

char c = *localeID;

1667

switch (c) {

1668

case 0:

1669

case '@':

1670

done = TRUE;

1671

break;

1672

default:

1673

if (len<nameCapacity) {

1674

name[len] = c;

1675

}

1676

++len;

1677

++localeID;

1678

break;

1679

}

1680

} while (!done);

1681

}

1682

1683

/* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'

1684

After this, localeID either points to '@' or is NULL */

1685

if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {

1686

keywordAssign = uprv_strchr(localeID, '=');

1687

separatorIndicator = uprv_strchr(localeID, ';');

1688

}

1689

1690

/* Copy POSIX-style variant, if any [mr@FOO] */

1691

if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&

1692

localeID != NULL && keywordAssign == NULL) {

1693

for (;;) {

1694

char c = *localeID;

1695

if (c == 0) {

1696

break;

1697

}

1698

if (len<nameCapacity) {

1699

name[len] = c;

1700

}

1701

++len;

1702

++localeID;

1703

}

1704

}

1705

1706

if (OPTION_SET(options, _ULOC_CANONICALIZE)) {

1707

/* Handle @FOO variant if @ is present and not followed by = */

1708

if (localeID!=NULL && keywordAssign==NULL) {

1709

int32_t posixVariantSize;

1710

/* Add missing '_' if needed */

1711

if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {

1712

do {

1713

if(len<nameCapacity) {

1714

name[len]='_';

1715

}

1716

++len;

1717

++fieldCount;

1718

} while(fieldCount<2);

1719

}

1720

posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len,

1721

(UBool)(variantSize > 0));

1722

if (posixVariantSize > 0) {

1723

if (variant == NULL) {

1724

variant = name+len;

1725

}

1726

len += posixVariantSize;

1727

variantSize += posixVariantSize;

1728

}

1729

}

1730

1731

/* Handle generic variants first */

1732

if (variant) {

1733

for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {

1734

const char* variantToCompare = VARIANT_MAP[j].variant;

1735

int32_t n = (int32_t)uprv_strlen(variantToCompare);

1736

int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);

1737

len -= variantLen;

1738

if (variantLen > 0) {

1739

if (name[len-1] == '_') { /* delete trailing '_' */

1740

--len;

1741

}

1742

addKeyword = VARIANT_MAP[j].keyword;

1743

addValue = VARIANT_MAP[j].value;

1744

break;

1745

}

1746

}

1747

if (name[len-1] == '_') { /* delete trailing '_' */

1748

--len;

1749

}

1750

}

1751

1752

/* Look up the ID in the canonicalization map */

1753

for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {

1754

const char* id = CANONICALIZE_MAP[j].id;

1755

int32_t n = (int32_t)uprv_strlen(id);

1756

if (len == n && uprv_strncmp(name, id, n) == 0) {

1757

if (n == 0 && localeID != NULL) {

1758

break; /* Don't remap "" if keywords present */

1759

}

1760

len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);

1761

if (CANONICALIZE_MAP[j].keyword) {

1762

addKeyword = CANONICALIZE_MAP[j].keyword;

1763

addValue = CANONICALIZE_MAP[j].value;

1764

}

1765

break;

1766

}

1767

}

1768

}

1769

1770

if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {

1771

if (localeID!=NULL && keywordAssign!=NULL &&

1772

(!separatorIndicator || separatorIndicator > keywordAssign)) {

1773

if(len<nameCapacity) {

1774

name[len]='@';

1775

}

1776

++len;

1777

++fieldCount;

1778

len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,

1779

addKeyword, addValue, err);

1780

} else if (addKeyword != NULL) {

1781

U_ASSERT(addValue != NULL);

1782

/* inelegant but works -- later make _getKeywords do this? */

1783

len += _copyCount(name+len, nameCapacity-len, "@");

1784

len += _copyCount(name+len, nameCapacity-len, addKeyword);

1785

len += _copyCount(name+len, nameCapacity-len, "=");

1786

len += _copyCount(name+len, nameCapacity-len, addValue);

1787

}

1788

}

1789

1790

if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {

1791

uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);

1792

}

1793

1794

return u_terminateChars(result, resultCapacity, len, err);

1795

}

1796

1797

/* ### ID parsing API **************************************************/

1798

1799

U_CAPI int32_t U_EXPORT2

1800

uloc_getParent(const char* localeID,

1801

char* parent,

1802

int32_t parentCapacity,

1803

UErrorCode* err)

1804

{

1805

const char *lastUnderscore;

1806

int32_t i;

1807

1808

if (U_FAILURE(*err))

1809

return 0;

1810

1811

if (localeID == NULL)

1812

localeID = uloc_getDefault();

1813

1814

lastUnderscore=uprv_strrchr(localeID, '_');

1815

if(lastUnderscore!=NULL) {

1816

i=(int32_t)(lastUnderscore-localeID);

1817

} else {

1818

i=0;

1819

}

1820

1821

if(i>0 && parent != localeID) {

1822

uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));

1823

}

1824

return u_terminateChars(parent, parentCapacity, i, err);

1825

}

1826

1827

U_CAPI int32_t U_EXPORT2

1828

uloc_getLanguage(const char* localeID,

1829

char* language,

1830

int32_t languageCapacity,

1831

UErrorCode* err)

1832

{

1833

/* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/

1834

int32_t i=0;

1835

1836

if (err==NULL || U_FAILURE(*err)) {

1837

return 0;

1838

}

1839

1840

if(localeID==NULL) {

1841

localeID=uloc_getDefault();

1842

}

1843

1844

i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);

1845

return u_terminateChars(language, languageCapacity, i, err);

1846

}

1847

1848

U_CAPI int32_t U_EXPORT2

1849

uloc_getScript(const char* localeID,

1850

char* script,

1851

int32_t scriptCapacity,

1852

UErrorCode* err)

1853

{

1854

int32_t i=0;

1855

1856

if(err==NULL || U_FAILURE(*err)) {

1857

return 0;

1858

}

1859

1860

if(localeID==NULL) {

1861

localeID=uloc_getDefault();

1862

}

1863

1864

/* skip the language */

1865

ulocimp_getLanguage(localeID, NULL, 0, &localeID);

1866

if(_isIDSeparator(*localeID)) {

1867

i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);

1868

}

1869

return u_terminateChars(script, scriptCapacity, i, err);

1870

}

1871

1872

U_CAPI int32_t U_EXPORT2

1873

uloc_getCountry(const char* localeID,

1874

char* country,

1875

int32_t countryCapacity,

1876

UErrorCode* err)

1877

{

1878

int32_t i=0;

1879

1880

if(err==NULL || U_FAILURE(*err)) {

1881

return 0;

1882

}

1883

1884

if(localeID==NULL) {

1885

localeID=uloc_getDefault();

1886

}

1887

1888

/* Skip the language */

1889

ulocimp_getLanguage(localeID, NULL, 0, &localeID);

1890

if(_isIDSeparator(*localeID)) {

1891

const char *scriptID;

1892

/* Skip the script if available */

1893

ulocimp_getScript(localeID+1, NULL, 0, &scriptID);

1894

if(scriptID != localeID+1) {

1895

/* Found optional script */

1896

localeID = scriptID;

1897

}

1898

if(_isIDSeparator(*localeID)) {

1899

i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);

1900

}

1901

}

1902

return u_terminateChars(country, countryCapacity, i, err);

1903

}

1904

1905

U_CAPI int32_t U_EXPORT2

1906

uloc_getVariant(const char* localeID,

1907

char* variant,

1908

int32_t variantCapacity,

1909

UErrorCode* err)

1910

{

1911

int32_t i=0;

1912

1913

if(err==NULL || U_FAILURE(*err)) {

1914

return 0;

1915

}

1916

1917

if(localeID==NULL) {

1918

localeID=uloc_getDefault();

1919

}

1920

1921

/* Skip the language */

1922

ulocimp_getLanguage(localeID, NULL, 0, &localeID);

1923

if(_isIDSeparator(*localeID)) {

1924

const char *scriptID;

1925

/* Skip the script if available */

1926

ulocimp_getScript(localeID+1, NULL, 0, &scriptID);

1927

if(scriptID != localeID+1) {

1928

/* Found optional script */

1929

localeID = scriptID;

1930

}

1931

/* Skip the Country */

1932

if (_isIDSeparator(*localeID)) {

1933

const char *cntryID;

1934

ulocimp_getCountry(localeID+1, NULL, 0, &cntryID);

1935

if (cntryID != localeID) {

1936

/* Found optional country */

1937

localeID = cntryID;

1938

}

1939

if(_isIDSeparator(*localeID)) {

1940

i=_getVariant(localeID+1, *localeID, variant, variantCapacity);

1941

}

1942

}

1943

}

1944

1945

/* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */

1946

/* if we do not have a variant tag yet then try a POSIX variant after '@' */

1947

/*

1948

if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {

1949

i=_getVariant(localeID+1, '@', variant, variantCapacity);

1950

}

1951

*/

1952

return u_terminateChars(variant, variantCapacity, i, err);

1953

}

1954

1955

U_CAPI int32_t U_EXPORT2

1956

uloc_getName(const char* localeID,

1957

char* name,

1958

int32_t nameCapacity,

1959

UErrorCode* err)

1960

{

1961

return _canonicalize(localeID, name, nameCapacity, 0, err);

1962

}

1963

1964

U_CAPI int32_t U_EXPORT2

1965

uloc_getBaseName(const char* localeID,

1966

char* name,

1967

int32_t nameCapacity,

1968

UErrorCode* err)

1969

{

1970

return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);

1971

}

1972

1973

U_CAPI int32_t U_EXPORT2

1974

uloc_canonicalize(const char* localeID,

1975

char* name,

1976

int32_t nameCapacity,

1977

UErrorCode* err)

1978

{

1979

return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);

1980

}

1981

1982

U_CAPI const char* U_EXPORT2

1983

uloc_getISO3Language(const char* localeID)

1984

{

1985

int16_t offset;

1986

char lang[ULOC_LANG_CAPACITY];

1987

UErrorCode err = U_ZERO_ERROR;

1988

1989

if (localeID == NULL)

1990

{

1991

localeID = uloc_getDefault();

1992

}

1993

uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);

1994

if (U_FAILURE(err))

1995

return "";

1996

offset = _findIndex(LANGUAGES, lang);

1997

if (offset < 0)

1998

return "";

1999

return LANGUAGES_3[offset];

2000

}

2001

2002

U_CAPI const char* U_EXPORT2

2003

uloc_getISO3Country(const char* localeID)

2004

{

2005

int16_t offset;

2006

char cntry[ULOC_LANG_CAPACITY];

2007

UErrorCode err = U_ZERO_ERROR;

2008

2009

if (localeID == NULL)

2010

{

2011

localeID = uloc_getDefault();

2012

}

2013

uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);

2014

if (U_FAILURE(err))

2015

return "";

2016

offset = _findIndex(COUNTRIES, cntry);

2017

if (offset < 0)

2018

return "";

2019

2020

return COUNTRIES_3[offset];

2021

}

2022

2023

U_CAPI uint32_t U_EXPORT2

2024

uloc_getLCID(const char* localeID)

2025

{

2026

UErrorCode status = U_ZERO_ERROR;

2027

char langID[ULOC_FULLNAME_CAPACITY];

2028

2029

uloc_getLanguage(localeID, langID, sizeof(langID), &status);

2030

if (U_FAILURE(status)) {

2031

return 0;

2032

}

2033

2034

return uprv_convertToLCID(langID, localeID, &status);

2035

}

2036

2037

U_CAPI int32_t U_EXPORT2

2038

uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,

2039

UErrorCode *status)

2040

{

2041

int32_t length;

2042

const char *posix = uprv_convertToPosix(hostid, status);

2043

if (U_FAILURE(*status) || posix == NULL) {

2044

return 0;

2045

}

2046

length = (int32_t)uprv_strlen(posix);

2047

if (length+1 > localeCapacity) {

2048

*status = U_BUFFER_OVERFLOW_ERROR;

2049

}

2050

else {

2051

uprv_strcpy(locale, posix);

2052

}

2053

return length;

2054

}

2055

2056

/* ### Default locale **************************************************/

2057

2058

U_CAPI const char* U_EXPORT2

2059

uloc_getDefault()

2060

{

2061

return locale_get_default();

2062

}

2063

2064

U_CAPI void U_EXPORT2

2065

uloc_setDefault(const char* newDefaultLocale,

2066

UErrorCode* err)

2067

{

2068

if (U_FAILURE(*err))

2069

return;

2070

/* the error code isn't currently used for anything by this function*/

2071

2072

/* propagate change to C++ */

2073

locale_set_default(newDefaultLocale);

2074

}

2075

2076

/**

2077

* Returns a list of all language codes defined in ISO 639. This is a pointer

2078

* to an array of pointers to arrays of char. All of these pointers are owned

2079

* by ICU-- do not delete them, and do not write through them. The array is

2080

* terminated with a null pointer.

2081

*/

2082

U_CAPI const char* const* U_EXPORT2

2083

uloc_getISOLanguages()

2084

{

2085

return LANGUAGES;

2086

}

2087

2088

/**

2089

* Returns a list of all 2-letter country codes defined in ISO 639. This is a

2090

* pointer to an array of pointers to arrays of char. All of these pointers are

2091

* owned by ICU-- do not delete them, and do not write through them. The array is

2092

* terminated with a null pointer.

2093

*/

2094

U_CAPI const char* const* U_EXPORT2

2095

uloc_getISOCountries()

2096

{

2097

return COUNTRIES;

2098

}

2099

2100

2101

/* this function to be moved into cstring.c later */

2102

static char gDecimal = 0;

2103

2104

static /* U_CAPI */

2105

double

2106

/* U_EXPORT2 */

2107

_uloc_strtod(const char *start, char **end) {

2108

char *decimal;

2109

char *myEnd;

2110

char buf[30];

2111

double rv;

2112

if (!gDecimal) {

2113

char rep[5];

2114

/* For machines that decide to change the decimal on you,

2115

and try to be too smart with localization.

2116

This normally should be just a '.'. */

2117

sprintf(rep, "%+1.1f", 1.0);

2118

gDecimal = rep[2];

2119

}

2120

2121

if(gDecimal == '.') {

2122

return uprv_strtod(start, end); /* fall through to OS */

2123

} else {

2124

uprv_strncpy(buf, start, 29);

2125

buf[29]=0;

2126

decimal = uprv_strchr(buf, '.');

2127

if(decimal) {

2128

*decimal = gDecimal;

2129

} else {

2130

return uprv_strtod(start, end); /* no decimal point */

2131

}

2132

rv = uprv_strtod(buf, &myEnd);

2133

if(end) {

2134

*end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */

2135

}

2136

return rv;

2137

}

2138

}

2139

2140

typedef struct {

2141

float q;

2142

int32_t dummy; /* to avoid uninitialized memory copy from qsort */

2143

char *locale;

2144

} _acceptLangItem;

2145

2146

static int32_t U_CALLCONV

2147

uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)

2148

{

2149

const _acceptLangItem *aa = (const _acceptLangItem*)a;

2150

const _acceptLangItem *bb = (const _acceptLangItem*)b;

2151

2152

int32_t rc = 0;

2153

if(bb->q < aa->q) {

2154

rc = -1; /* A > B */

2155

} else if(bb->q > aa->q) {

2156

rc = 1; /* A < B */

2157

} else {

2158

rc = 0; /* A = B */

2159

}

2160

2161

if(rc==0) {

2162

rc = uprv_stricmp(aa->locale, bb->locale);

2163

}

2164

2165

#if defined(ULOC_DEBUG)

2166

/* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",

2167

aa->locale, aa->q,

2168

bb->locale, bb->q,

2169

rc);*/

2170

#endif

2171

2172

return rc;

2173

}

2174

2175

/*

2176

mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53

2177

*/

2178

2179

U_CAPI int32_t U_EXPORT2

2180

uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,

2181

const char *httpAcceptLanguage,

2182

UEnumeration* availableLocales,

2183

UErrorCode *status)

2184

{

2185

_acceptLangItem *j;

2186

_acceptLangItem smallBuffer[30];

2187

char **strs;

2188

char tmp[ULOC_FULLNAME_CAPACITY +1];

2189

int32_t n = 0;

2190

const char *itemEnd;

2191

const char *paramEnd;

2192

const char *s;

2193

const char *t;

2194

int32_t res;

2195

int32_t i;

2196

int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);

2197

int32_t jSize;

2198

char *tempstr; /* Use for null pointer check */

2199

2200

j = smallBuffer;

2201

jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);

2202

if(U_FAILURE(*status)) {

2203

return -1;

2204

}

2205

2206

for(s=httpAcceptLanguage;s&&*s;) {

2207

while(isspace(*s)) /* eat space at the beginning */

2208

s++;

2209

itemEnd=uprv_strchr(s,',');

2210

paramEnd=uprv_strchr(s,';');

2211

if(!itemEnd) {

2212

itemEnd = httpAcceptLanguage+l; /* end of string */

2213

}

2214

if(paramEnd && paramEnd<itemEnd) {

2215

/* semicolon (;) is closer than end (,) */

2216

t = paramEnd+1;

2217

if(*t=='q') {

2218

t++;

2219

}

2220

while(isspace(*t)) {

2221

t++;

2222

}

2223

if(*t=='=') {

2224

t++;

2225

}

2226

while(isspace(*t)) {

2227

t++;

2228

}

2229

j[n].q = (float)_uloc_strtod(t,NULL);

2230

} else {

2231

/* no semicolon - it's 1.0 */

2232

j[n].q = 1.0f;

2233

paramEnd = itemEnd;

2234

}

2235

j[n].dummy=0;

2236

/* eat spaces prior to semi */

2237

for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)

2238

;

2239

/* Check for null pointer from uprv_strndup */

2240

tempstr = uprv_strndup(s,(int32_t)((t+1)-s));

2241

if (tempstr == NULL) {

2242

*status = U_MEMORY_ALLOCATION_ERROR;

2243

return -1;

2244

}

2245

j[n].locale = tempstr;

2246

uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);

2247

if(strcmp(j[n].locale,tmp)) {

2248

uprv_free(j[n].locale);

2249

j[n].locale=uprv_strdup(tmp);

2250

}

2251

#if defined(ULOC_DEBUG)

2252

/*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/

2253

#endif

2254

n++;

2255

s = itemEnd;

2256

while(*s==',') { /* eat duplicate commas */

2257

s++;

2258

}

2259

if(n>=jSize) {

2260

if(j==smallBuffer) { /* overflowed the small buffer. */

2261

j = uprv_malloc(sizeof(j[0])*(jSize*2));

2262

if(j!=NULL) {

2263

uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);

2264

}

2265

#if defined(ULOC_DEBUG)

2266

fprintf(stderr,"malloced at size %d\n", jSize);

2267

#endif

2268

} else {

2269

j = uprv_realloc(j, sizeof(j[0])*jSize*2);

2270

#if defined(ULOC_DEBUG)

2271

fprintf(stderr,"re-alloced at size %d\n", jSize);

2272

#endif

2273

}

2274

jSize *= 2;

2275

if(j==NULL) {

2276

*status = U_MEMORY_ALLOCATION_ERROR;

2277

return -1;

2278

}

2279

}

2280

}

2281

uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);

2282

if(U_FAILURE(*status)) {

2283

if(j != smallBuffer) {

2284

#if defined(ULOC_DEBUG)

2285

fprintf(stderr,"freeing j %p\n", j);

2286

#endif

2287

uprv_free(j);

2288

}

2289

return -1;

2290

}

2291

strs = uprv_malloc((size_t)(sizeof(strs[0])*n));

2292

/* Check for null pointer */

2293

if (strs == NULL) {

2294

uprv_free(j); /* Free to avoid memory leak */

2295

*status = U_MEMORY_ALLOCATION_ERROR;

2296

return -1;

2297

}

2298

for(i=0;i<n;i++) {

2299

#if defined(ULOC_DEBUG)

2300

/*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/

2301

#endif

2302

strs[i]=j[i].locale;

2303

}

2304

res = uloc_acceptLanguage(result, resultAvailable, outResult,

2305

(const char**)strs, n, availableLocales, status);

2306

for(i=0;i<n;i++) {

2307

uprv_free(strs[i]);

2308

}

2309

uprv_free(strs);

2310

if(j != smallBuffer) {

2311

#if defined(ULOC_DEBUG)

2312

fprintf(stderr,"freeing j %p\n", j);

2313

#endif

2314

uprv_free(j);

2315

}

2316

return res;

2317

}

2318

2319

2320

U_CAPI int32_t U_EXPORT2

2321

uloc_acceptLanguage(char *result, int32_t resultAvailable,

2322

UAcceptResult *outResult, const char **acceptList,

2323

int32_t acceptListCount,

2324

UEnumeration* availableLocales,

2325

UErrorCode *status)

2326

{

2327

int32_t i,j;

2328

int32_t len;

2329

int32_t maxLen=0;

2330

char tmp[ULOC_FULLNAME_CAPACITY+1];

2331

const char *l;

2332

char **fallbackList;

2333

if(U_FAILURE(*status)) {

2334

return -1;

2335

}

2336

fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));

2337

if(fallbackList==NULL) {

2338

*status = U_MEMORY_ALLOCATION_ERROR;

2339

return -1;

2340

}

2341

for(i=0;i<acceptListCount;i++) {

2342

#if defined(ULOC_DEBUG)

2343

fprintf(stderr,"%02d: %s\n", i, acceptList[i]);

2344

#endif

2345

while((l=uenum_next(availableLocales, NULL, status))) {

2346

#if defined(ULOC_DEBUG)

2347

fprintf(stderr," %s\n", l);

2348

#endif

2349

len = (int32_t)uprv_strlen(l);

2350

if(!uprv_strcmp(acceptList[i], l)) {

2351

if(outResult) {

2352

*outResult = ULOC_ACCEPT_VALID;

2353

}

2354

#if defined(ULOC_DEBUG)

2355

fprintf(stderr, "MATCH! %s\n", l);

2356

#endif

2357

if(len>0) {

2358

uprv_strncpy(result, l, uprv_min(len, resultAvailable));

2359

}

2360

for(j=0;j<i;j++) {

2361

uprv_free(fallbackList[j]);

2362

}

2363

uprv_free(fallbackList);

2364

return u_terminateChars(result, resultAvailable, len, status);

2365

}

2366

if(len>maxLen) {

2367

maxLen = len;

2368

}

2369

}

2370

uenum_reset(availableLocales, status);

2371

/* save off parent info */

2372

if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {

2373

fallbackList[i] = uprv_strdup(tmp);

2374

} else {

2375

fallbackList[i]=0;

2376

}

2377

}

2378

2379

for(maxLen--;maxLen>0;maxLen--) {

2380

for(i=0;i<acceptListCount;i++) {

2381

if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {

2382

#if defined(ULOC_DEBUG)

2383

fprintf(stderr,"Try: [%s]", fallbackList[i]);

2384

#endif

2385

while((l=uenum_next(availableLocales, NULL, status))) {

2386

#if defined(ULOC_DEBUG)

2387

fprintf(stderr," %s\n", l);

2388

#endif

2389

len = (int32_t)uprv_strlen(l);

2390

if(!uprv_strcmp(fallbackList[i], l)) {

2391

if(outResult) {

2392

*outResult = ULOC_ACCEPT_FALLBACK;

2393

}

2394

#if defined(ULOC_DEBUG)

2395

fprintf(stderr, "fallback MATCH! %s\n", l);

2396

#endif

2397

if(len>0) {

2398

uprv_strncpy(result, l, uprv_min(len, resultAvailable));

2399

}

2400

for(j=0;j<acceptListCount;j++) {

2401

uprv_free(fallbackList[j]);

2402

}

2403

uprv_free(fallbackList);

2404

return u_terminateChars(result, resultAvailable, len, status);

2405

}

2406

}

2407

uenum_reset(availableLocales, status);

2408

2409

if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {

2410

uprv_free(fallbackList[i]);

2411

fallbackList[i] = uprv_strdup(tmp);

2412

} else {

2413

uprv_free(fallbackList[i]);

2414

fallbackList[i]=0;

2415

}

2416

}

2417

}

2418

if(outResult) {

2419

*outResult = ULOC_ACCEPT_FAILED;

2420

}

2421

}

2422

for(i=0;i<acceptListCount;i++) {

2423

uprv_free(fallbackList[i]);

2424

}

2425

uprv_free(fallbackList);

2426

return -1;

2427

}

2428

2429

/*eof*/