1
// ***************************************************************************
3
// * Copyright (C) 1997-2002, International Business Machines
4
// * Corporation and others. All Rights Reserved.
6
// ***************************************************************************
7
// Date Name Description
8
// 11/17/99 aliu Added support for transliterators.
9
// 01/15/2002 grhoten Synchronized the language and contry codes with ISO standards
15
// each variant name that occurs in locales should be listed with %% and a display string
16
"%%PREEURO" { "Pre-Euro" } // PREEURO variant display name
17
"%%EURO" { "Euro" } // PREEURO variant display name
18
"%%B" { "Bokm\u00e5l" } // Norwegian variant display name
19
"%%NY" { "Nynorsk" } // Norwegian variant display name
20
"%%AL" { "\u00C5land" } // Aland variant display name
24
// variants for collation (traditional may also be used for traditional chinese)
25
"%%PHONEBOOK" { "Phonebook Order" }
26
"%%PINYIN" { "Pinyin Order" }
27
"%%TRADITIONAL" { "Traditional" }
28
"%%STROKE" { "Stroke Order" }
29
"%%DIRECT" { "Direct Order" }
31
// this is a special tag that makes genrb include UCARules.txt for collation
32
"%%UCARULES" { "UCARules.txt"} // UCARules
34
// Here are empty collation rules just for the consistency. UCA is used instead... 01/16/2001 by weiv
39
// Formats for the display name of a locale, for a list of
40
// items, and for composing two items in a list into one item.
41
// The list patterns are used in the variant name and in the
44
// This is the language-neutral form of this resource.
46
// LocaleNamePatterns {
48
// "{0,choice,0#|1#{1}|2#{1} ({2})}", // Display name
49
// "{0,choice,0#|1#{1}|2#{1},{2}|3#{1},{2},{3}}", // List
50
// "{0},{1}" // List composition
54
//------------------------------------------------------------
55
// BEGIN Transliterator support
56
//------------------------------------------------------------
58
// See also icu/source/data/translit/index.txt
60
TransliteratorNamePattern {
61
// Format for the display name of a Transliterator.
62
// This is the language-neutral form of this resource.
63
"{0,choice,0#|1#{1}|2#{1}-{2}}" // Display name
66
// Transliterator display names
67
// This is the English form of this resource.
68
"%Translit%Hex" { "Hex Escape" }
69
"%Translit%UnicodeName" { "Unicode Name" }
70
"%Translit%UnicodeChar" { "Unicode Character" }
72
//------------------------------------------------------------
73
// END Transliterator support
74
//------------------------------------------------------------
76
//------------------------------------------------------------
77
// BEGIN BreakIterator support
78
//------------------------------------------------------------
81
// ignore non-spacing marks and enclosing marks (since we never
82
// put a break before ignore characters, this keeps combining
83
// accents with the base characters they modify)
84
"$ignore=[[:Mn:][:Me:]];"
86
// other category definitions
87
"choseong=[\u1100-\u115f];"
88
"jungseong=[\u1160-\u11a7];"
89
"jongseong=[\u11a8-\u11ff];"
90
"surr-hi=[\ud800-\udbff];"
91
"surr-lo=[\udc00-\udfff];"
93
// break after every character, except as follows:
96
// keep CRLF sequences together
99
// keep surrogate pairs together
100
"{surr-hi}{surr-lo};"
102
// keep Hangul syllables spelled out using conjoining jamo together
103
"{choseong}*{jungseong}*{jongseong}*;"
105
// various additions for Hindi support
107
"danda=[\u0964\u0965];"
109
"devVowelSign=[\u093e-\u094c\u0962\u0963];"
110
"devConsonant=[\u0915-\u0939];"
111
"devNuktaConsonant=[\u0958-\u095f];"
112
"devCharEnd=[\u0902\u0903\u0951-\u0954];"
115
"devCAMN=({devConsonant}{nukta}?);"
116
"devConsonant1=({devNuktaConsonant}|{devCAMN});"
117
"devConjunct=(({devConsonant1}{virama}{zwj}?)?{devConsonant1});"
119
"{devConjunct}{devVowelSign}?{devCharEnd}?;"
123
// default rules for finding word boundaries
125
// ignore non-spacing marks, enclosing marks, and format characters,
126
// all of which should not influence the algorithm
127
"$ignore=[[:Mn:][:Me:][:Cf:]];"
129
// Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals,
130
// other letters, and digits
131
"danda=[\u0964\u0965];"
132
"kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
133
"kata=[\u3099-\u309c\u30a1-\u30fe];"
134
"hira=[\u3041-\u309e\u30fc];"
135
"let=[[[:L:][:Mc:]]-[{kanji}{kata}{hira}]];"
138
// punctuation that can occur in the middle of a word: currently
139
// dashes, apostrophes, quotation marks, and periods
140
"mid-word=[[:Pd:]\u00ad\u2027\\\"\\\'\\.];"
142
// punctuation that can occur in the middle of a number: currently
143
// apostrophes, qoutation marks, periods, commas, and the Arabic
145
"mid-num=[\\\"\\\'\\,\u066b\\.];"
147
// punctuation that can occur at the beginning of a number: currently
148
// the period, the number sign, and all currency symbols except the cents sign
149
"pre-num=[[[:Sc:]-[\u00a2]]\\#\\.];"
151
// punctuation that can occur at the end of a number: currently
152
// the percent, per-thousand, per-ten-thousand, and Arabic percent
153
// signs, the cents sign, and the ampersand
154
"post-num=[\\%\\&\u00a2\u066a\u2030\u2031];"
156
// line separators: currently LF, FF, PS, and LS
157
"ls=[\n\u000c\u2028\u2029];"
159
// whitespace: all space separators and the tab character
162
// a word is a sequence of letters that may contain internal
163
// punctuation, as long as it begins and ends with a letter and
164
// never contains two punctuation marks in a row
165
"word=({let}+({mid-word}{let}+)*{danda}?);"
167
// a number is a sequence of digits that may contain internal
168
// punctuation, as long as it begins and ends with a digit and
169
// never contains two punctuation marks in a row.
170
"number=({dgt}+({mid-num}{dgt}+)*);"
172
// break after every character, with the following exceptions
173
// (this will cause punctuation marks that aren't considered
174
// part of words or numbers to be treated as words unto themselves)
177
// keep together any sequence of contiguous words and numbers
178
// (including just one of either), plus an optional trailing
179
// number-suffix character
180
"{word}?({number}{word})*({number}{post-num}?)?;"
182
// keep together and sequence of contiguous words and numbers
183
// that starts with a number-prefix character and a number,
184
// and may end with a number-suffix character
185
"{pre-num}({number}{word})*({number}{post-num}?)?;"
187
// keep together runs of whitespace (optionally with a single trailing
188
// line separator or CRLF sequence)
191
// keep together runs of Katakana
194
// keep together runs of Hiragana
197
// keep together runs of Kanji
201
// default rules for determining legal line-breaking positions
203
// ignore non-spacing marks, enclosing marks, and format characters
204
"$ignore=[[:Mn:][:Me:][:Cf:]];"
206
// Hindi phrase separators
207
"danda=[\u0964\u0965];"
209
// characters that always cause a break: ETX, tab, LF, FF, LS, and PS
210
"break=[\u0003\t\n\f\u2028\u2029];"
212
// characters that always prevent a break: the non-breaking space
213
// and similar characters
214
"nbsp=[\u00a0\u2007\u2011\ufeff];"
216
// whitespace: space separators and control characters, except for
217
// CR and the other characters mentioned above
218
"space=[[[:Zs:][:Cc:]]-[{nbsp}{break}\r]];"
220
// dashes: dash punctuation and the discretionary hyphen, except for
221
// non-breaking hyphens
222
"dash=[[[:Pd:]\u00ad]-[{nbsp}]];"
224
// characters that stick to a word if they precede it: currency symbols
225
// (except the cents sign) and starting punctuation
226
"pre-word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];"
228
// characters that stick to a word if they follow it: ending punctuation,
229
// other punctuation that usually occurs at the end of a sentence,
230
// small Kana characters, some CJK diacritics, etc.
231
"post-word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034"
232
"\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
233
"\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
234
"\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c"
237
// Kanji: actually includes both Kanji and Kana, except for small Kana and
239
"kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[{post-word}{$ignore}]];"
242
"digit=[[:Nd:][:No:]];"
244
// punctuation that can occur in the middle of a number: periods and commas
247
// everything not mentioned above, plus the quote marks (which are both
248
// <pre-word>, <post-word>, and <char>)
249
"char=[^{break}{space}{dash}{kanji}{nbsp}{$ignore}{pre-word}{post-word}{mid-num}{danda}\r\\\"\\\'];"
251
// a "number" is a run of prefix characters and dashes, followed by one or
252
// more digits with isolated number-punctuation characters interspersed
253
"number=([{pre-word}{dash}]*{digit}+({mid-num}{digit}+)*);"
255
// the basic core of a word can be either a "number" as defined above, a single
256
// "Kanji" character, or a run of any number of not-explicitly-mentioned
257
// characters (this includes Latin letters)
258
"word-core=([{pre-word}{char}]*|{kanji}|{number});"
260
// a word may end with an optional suffix that be either a run of one or
261
// more dashes or a run of word-suffix characters, followed by an optional
263
"word-suffix=(({dash}+|{post-word}*){space}*);"
265
// a word, thus, is an optional run of word-prefix characters, followed by
266
// a word core and a word suffix (the syntax of <word-core> and <word-suffix>
267
// actually allows either of them to match the empty string, putting a break
268
// between things like ")(" or "aaa(aaa"
269
"word=({pre-word}*{word-core}{word-suffix});"
271
// finally, the rule that does the work: Keep together any run of words that
272
// are joined by runs of one of more non-spacing mark. Also keep a trailing
273
// line-break character or CRLF combination with the word. (line separators
274
// "win" over nbsp's)
275
"{word}({nbsp}+{word})*\r?{break}?;"
278
// default rules for finding sentence boundaries
280
// ignore non-spacing marks, enclosing marks, and format characters
281
"$ignore=[[:Mn:][:Me:][:Cf:]];"
286
// uppercase Latin letters
289
// whitespace (line separators are treated as whitespace)
290
"space=[\t\r\f\n\u2028[:Zs:]];"
292
// punctuation which may occur at the beginning of a sentence: "starting
293
// punctuation" and quotation marks
294
"start=[[:Ps:]\\\"\\\'];"
296
// punctuation with may occur at the end of a sentence: "ending punctuation"
297
// and quotation marks
298
"end=[[:Pe:]\\\"\\\'];"
303
// characters that unambiguously signal the end of a sentence
304
"term=[\\!\\?\u3002\uff01\uff1f];"
306
// periods, which MAY signal the end of a sentence
307
"period=[\\.\uff0e];"
309
// characters that may occur at the beginning of a sentence: basically anything
310
// not mentioned above (lowercase letters and digits are specifically excluded)
311
"sent-start=[^{lc}{ucLatin}{space}{start}{end}{digit}{term}{period}\u2029{$ignore}];"
313
// Hindi phrase separator
314
"danda=[\u0964\u0965];"
316
// always break sentences after paragraph separators
319
// always break after a danda, if it's followed by whitespace
320
".*?{danda}{space}*;"
322
// if you see a period, skip over additional periods and ending punctuation
323
// and if the next character is a paragraph separator, break after the
324
// paragraph separator
325
".*?{period}[{period}{end}]*{space}*\u2029;"
327
// if you see a period, skip over additional periods and ending punctuation,
328
// followed by optional whitespace, followed by optional starting punctuation,
329
// and if the next character is something that can start a sentence
330
// (basically, a capital letter), then put the sentence break between the
331
// whitespace and the opening punctuation
332
".*?{period}[{period}{end}]*{space}*/({start}*{sent-start}|{start}+{ucLatin});"
334
// same as above, except that there's a sentence break before a Latin capital
335
// letter only if there's at least one space after the period
336
".*?{period}[{period}{end}]*{space}+/{ucLatin};"
338
// if you see a sentence-terminating character, skip over any additional
339
// terminators, periods, or ending punctuation, followed by any whitespace,
340
// followed by a SINGLE optional paragraph separator, and put the break there
341
".*?{term}[{term}{period}{end}]*{space}*\u2029?;"
343
// The following rules are here to aid in backwards iteration. The automatically
344
// generated backwards state table will rewind to the beginning of the
345
// paragraph all the time (or all the way to the beginning of the document
346
// if the document doesn't use the Unicode PS character) because the only
347
// unambiguous character pairs are those involving paragraph separators.
348
// These specify a few more unambiguous breaking situations.
350
// if you see a sentence-starting character, followed by starting punctuation
351
// (remember, we're iterating backwards), followed by an optional run of
352
// whitespace, followed by an optional run of ending punctuation, followed
353
// by a period, this is a safe place to turn around
354
"![{sent-start}{ucLatin}]{start}*{space}+{end}*{period};"
356
// if you see a letter or a digit, followed by an optional run of
357
// starting punctuation, followed by an optional run of whitespace,
358
// followed by an optional run of ending punctuation, followed by
359
// a sentence terminator, this is a safe place to turn around
360
"![{sent-start}{lc}{digit}]{start}*{space}*{end}*{term};"
363
//------------------------------------------------------------
364
// END BreakIterator support
365
//------------------------------------------------------------
371
// The official list list of ISO-3166 country codes are located at:
372
// http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html
374
//AA {} // Not in ISO-3166 standard
376
AE { "United Arab Emirates" }
378
AG { "Antigua and Barbuda" }
382
AN { "Netherlands Antilles" }
386
AS { "American Samoa" }
392
BA { "Bosnia and Herzegovina" }
396
BF { "Burkina Faso" }
407
BV { "Bouvet Island" }
413
CC { "Cocos Islands"}
414
CD { "Democratic Republic of the Congo" }
415
CF { "Central African Republic" }
418
CI { "C\u00F4te d'Ivoire" }
419
CK { "Cook Islands" }
427
CX { "Christmas Island" }
429
CZ { "Czech Republic" }
435
DO { "Dominican Republic" }
441
EH { "Western Sahara" }
448
FK { "Falkland Islands" }
450
FO { "Faroe Islands" }
454
GB { "United Kingdom" }
457
GF { "French Guiana" }
464
GQ { "Equatorial Guinea" }
466
GS { "South Georgia and South Sandwich Islands" }
469
GW { "Guinea-Bissau" }
472
HK { "Hong Kong S.A.R., China" }
473
HM { "Heard Island and McDonald Islands" }
483
IO { "British Indian Ocean Territory" }
498
KN { "Saint Kitts and Nevis" }
502
KY { "Cayman Islands" }
508
LI { "Liechtenstein" }
521
MH { "Marshall Islands"}
527
MP { "Northern Mariana Islands"}
540
NC { "New Caledonia" }
542
NF { "Norfolk Island" }
556
PF { "French Polynesia" }
557
PG { "Papua New Guinea" }
561
PM { "Saint Pierre and Miquelon" }
570
RE { "R\u00E9union" }
575
SA { "Saudi Arabia" }
576
SB { "Solomon Islands" }
581
SH { "Saint Helena" }
583
SJ { "Svalbard and Jan Mayen" }
585
SL { "Sierra Leone" }
591
ST { "Sao Tome and Principe" }
596
TC { "Turks and Caicos Islands" }
598
TF { "French Southern Territories" }
603
TM { "Turkmenistan" }
608
TT { "Trinidad and Tobago" }
615
UM { "United States Minor Outlying Islands" }
616
US { "United States" }
621
VC { "Saint Vincent and the Grenadines" }
623
VG { "British Virgin Islands" }
624
VI { "U.S. Virgin Islands" }
628
WF { "Wallis and Futuna" }
635
ZA { "South Africa" }
645
DateTimeElements:intvector {
654
"EEEE, MMMM d, yyyy",
682
// If there is a two letter abbreviation (ISO-639-1) for a three letter
683
// abbreviation (ISO-639-2), the two letter abbreviation must be used.
684
// Source: http://lcweb.loc.gov/standards/iso639-2/englangn.html
686
root{"Root"} // The only exception to ISO-639
695
afa { "Afro-Asiatic (Other)" }
700
alg { "Algonquian Languages" }
702
ang { "English, Old (ca.450-1100)" }
703
apa { "Apache Languages"}
708
art { "Artificial (Other)" }
711
ath { "Athapaskan Languages" }
712
aus { "Australian Languages" }
719
bai { "Bamileke Languages" }
724
bat { "Baltic (Other)" }
747
cai { "Central American Indian (Other)" }
749
cau { "Caucasian (Other)" }
752
cel { "Celtic (Other)" }
758
chn { "Chinook Jargon" }
763
cmc { "Chamic Languages" }
766
cpe { "Creoles and Pidgins, English-based (Other)" }
767
cpf { "Creoles and Pidgins, French-based (Other)" }
769
crp { "Creoles and Pidgins (Other)" }
771
cu { "Church Slavic" }
772
cus { "Cushitic (Other)" }
785
dra { "Dravidian (Other)" }
787
dum { "Dutch, Middle (ca. 1050-1350)" }
792
egy { "Egyptian (Ancient)" }
794
el { "Greek" } // Modern (1453-)
797
enm { "English, Middle( 1100 -1500)" }
808
fiu { "Finno - Ugrian" }
813
frm { "French, Middle (ca.1400-1600)" }
814
fro { "French, Old (842-ca.1400)" }
822
gd { "Scots Gaelic" }
823
gem { "Germanic (Other)" }
826
gla { "Gaelic (Scots)" }
827
gmh { "German, Middle High (ca.1050-1500)" }
829
goh { "German, Old High (ca.750-1050)" }
834
grc { "Greek, Ancient (to 1453)" }
860
//in { "Indonesian" } // Use id instead
861
inc { "Indic (Other)" }
862
ine { "Indo-European" }
864
iro { "Iroguoian Languages" }
868
iw { "Hebrew" } // Use he instead
870
//ji { "Yiddish" } // Use yi instead
871
jpr { "Judeo-Persian" }
872
jrb { "Judeo-Arabic" }
875
kaa { "Kara-Kalpak" }
909
lb { "Letzeburgesch" }
919
lub { "Luba-Katanga" }
925
lv { "Latvian (Lettish)" }
931
map { "Austronesian" }
936
mga { "Irish, Middle (900-1200)" }
940
min { "Minangkabau" }
941
mis { "Miscellaneous Languages" }
943
mkh { "Mon-Khmer (Ohter)" }
948
mno { "Manobo Languages" }
955
mul { "Multiple Languages" }
956
mun { "Munda Languages" }
963
nai { "North American Indian (Ohter)" }
964
nb { "Norwegian Bokm\u00e5l" }
965
nd { "Ndebele, North" }
966
nds { "Low German; Low Saxon" }
971
nic { "Niger - Kordofanian" }
974
nn { "Norwegian Nynorsk" }
977
nr { "Ndebele, South" }
978
nso { "Sotho, Northern" }
979
nub { "Nubian Languages" }
981
ny { "Chichewa; Nyanja" }
985
oc { "Proven\u00E7al; Occitan (post 1500)"}
987
om { "Oromo (Afan)" }
991
ota { "Turkish (Ottoman Empire)" }
992
oto { "Otomian Languages" }
994
paa { "Papuan (Ohter)" }
1000
peo { "Persian Old (ca.600-400 B.C.)" }
1001
phi { "Philippine (Other)" }
1002
phn { "Phoenician" }
1006
pra { "Prakrit Languages" }
1007
pro { "Proven\u00E7al, Old (to 1500)" }
1008
ps { "Pashto (Pushto)" }
1013
rar { "Rarotongan" }
1014
rm { "Rhaeto-Romance" }
1017
roa { "Romance (Other)" }
1020
rw { "Kinyarwanda" }
1024
sai { "South American Indian (Other)" }
1032
se { "Northern Sami" }
1036
sga { "Irish, Old (to 900)" }
1037
sgn { "Sign Languages" }
1038
sh { "Serbo-Croatian" }
1042
sio { "Siouan Languages" }
1043
sit { "Sino-Tibetan (Other)" }
1056
ssa { "Nilo-Saharam" }
1057
st { "Sotho, Southern" }
1066
tai { "Tai (Other)" }
1081
tog { "Tonga (Nyasa)" }
1082
to { "Tongan (Tonga Islands)" }
1089
tut { "Altaic (Other)" }
1098
und { "Undetermined" }
1106
wak { "Wakashan Languages" }
1110
wen { "Sorbian Languages" }
1117
ypk { "Yupik Languages" }
1126
LocaleID:int { 0x0000 }
1127
// LocaleString { "en" }
1128
MonthAbbreviations {
1170
"#,##0.###;-#,##0.###",
1171
"\u00A4 #,##0.00;-\u00A4 #,##0.00",
1175
// ShortCountry { "" }
1176
// ShortLanguage { "eng" }
1177
localPatternChars { "GyMdkHmsSEDFwWahKzYe" }
1181
"Pacific Standard Time",
1183
"Pacific Daylight Time",
1189
"Mountain Standard Time",
1191
"Mountain Daylight Time",
1197
"Mountain Standard Time",
1199
"Mountain Standard Time",
1205
"Central Standard Time",
1207
"Central Daylight Time",
1213
"Eastern Standard Time",
1215
"Eastern Daylight Time",
1221
"Eastern Standard Time",
1223
"Eastern Standard Time",
1229
"Atlantic Standard Time",
1231
"Atlantic Standard Time",
1237
"Hawaii Standard Time",
1239
"Hawaii Standard Time",
1245
"Alaska Standard Time",
1247
"Alaska Daylight Time",
1254
LocaleScript:array {
1258
//------------------------------------------------------------
1259
// Rule Based Number Format Support
1260
//------------------------------------------------------------
1263
* Default used to be English (US) rules, but now default just formats
1264
* like DecimalFormat. The former default rules are now the _en rules.
1283
CANS { "Unified Canadian Aboriginal Symbols" }
1286
DEVA { "Devanagari" }
1298
ITAL { "Old_italic" }
1304
MLYM { "Malayalam" }
1305
MONG { "Mongolian" }
1310
QAAI { "Inherited" }