1
// ***************************************************************************
3
// * Copyright (C) 1997-2002, International Business Machines
4
// * Corporation and others. All Rights Reserved.
6
// ***************************************************************************
16
Sequence { "[normalization on]" }
19
AL { "\u0391\u03bb\u03b2\u03b1\u03bd\u03af\u03b1" }
20
AS { "\u0391\u03c3\u03af\u03b1 (\u0391\u03b3\u03b3\u03bb\u03b9\u03ba\u03ac)" }
21
AT { "\u0391\u03c5\u03c3\u03c4\u03c1\u03af\u03b1" }
22
AU { "\u0391\u03c5\u03c3\u03c4\u03c1\u03b1\u03bb\u03af\u03b1 (\u0391\u03b3\u03b3\u03bb\u03b9\u03ba\u03ac)" }
23
BA { "\u0392\u03bf\u03c3\u03bd\u03af\u03b1" }
24
BE { "\u0392\u03ad\u03bb\u03b3\u03b9\u03bf" }
25
BG { "\u0392\u03bf\u03c5\u03bb\u03b3\u03b1\u03c1\u03af\u03b1" }
26
BR { "\u0392\u03c1\u03b1\u03b6\u03b9\u03bb\u03af\u03b1" }
27
CA { "\u039a\u03b1\u03bd\u03b1\u03b4\u03ac\u03c2" }
28
CH { "\u0395\u03bb\u03b2\u03b5\u03c4\u03af\u03b1" }
29
CN { "\u039a\u03af\u03bd\u03b1 (\u039b.\u0394.\u039a.)" }
30
CZ { "\u03a4\u03c3\u03b5\u03c7\u03af\u03b1" }
31
DE { "\u0393\u03b5\u03c1\u03bc\u03b1\u03bd\u03af\u03b1" }
32
DK { "\u0394\u03b1\u03bd\u03af\u03b1" }
33
EE { "\u0395\u03c3\u03b8\u03bf\u03bd\u03af\u03b1" }
34
ES { "\u0399\u03c3\u03c0\u03b1\u03bd\u03af\u03b1" }
35
FI { "\u03a6\u03b9\u03bd\u03bb\u03b1\u03bd\u03b4\u03af\u03b1" }
36
FR { "\u0393\u03b1\u03bb\u03bb\u03af\u03b1" }
37
GB { "\u0397\u03bd\u03c9\u03bc\u03ad\u03bd\u03bf \u0392\u03b1\u03c3\u03af\u03bb\u03b5\u03b9\u03bf" }
38
GR { "\u0395\u03bb\u03bb\u03ac\u03b4\u03b1" }
39
HR { "\u039a\u03c1\u03bf\u03b1\u03c4\u03af\u03b1" }
40
HU { "\u039f\u03c5\u03b3\u03b3\u03b1\u03c1\u03af\u03b1" }
41
IE { "\u0399\u03c1\u03bb\u03b1\u03bd\u03b4\u03af\u03b1" }
42
IL { "\u0399\u03c3\u03c1\u03b1\u03ae\u03bb" }
43
IS { "\u0399\u03c3\u03bb\u03b1\u03bd\u03b4\u03af\u03b1" }
44
IT { "\u0399\u03c4\u03b1\u03bb\u03af\u03b1" }
45
JP { "\u0399\u03b1\u03c0\u03c9\u03bd\u03af\u03b1" }
46
KR { "\u039a\u03bf\u03c1\u03ad\u03b1" }
47
LA { "\u039b\u03b1\u03c4\u03b9\u03bd\u03b9\u03ba\u03ae \u0391\u03bc\u03b5\u03c1\u03b9\u03ba\u03ae" }
48
LT { "\u039b\u03b9\u03b8\u03bf\u03c5\u03b1\u03bd\u03af\u03b1" }
49
LV { "\u039b\u03b5\u03c4\u03bf\u03bd\u03af\u03b1" }
50
MK { "\u03a0\u0393\u0394 \u039c\u03b1\u03ba\u03b5\u03b4\u03bf\u03bd\u03af\u03b1\u03c2" }
51
NL { "\u039f\u03bb\u03bb\u03b1\u03bd\u03b4\u03af\u03b1" }
52
NO { "\u039d\u03bf\u03c1\u03b2\u03b7\u03b3\u03af\u03b1" }
53
NZ { "\u039d\u03ad\u03b1 \u0396\u03b7\u03bb\u03b1\u03bd\u03b4\u03af\u03b1" }
54
PL { "\u03a0\u03bf\u03bb\u03c9\u03bd\u03af\u03b1" }
55
PT { "\u03a0\u03bf\u03c1\u03c4\u03bf\u03b3\u03b1\u03bb\u03af\u03b1" }
56
RO { "\u03a1\u03bf\u03c5\u03bc\u03b1\u03bd\u03af\u03b1" }
57
RU { "\u03a1\u03c9\u03c3\u03af\u03b1" }
58
SE { "\u03a3\u03bf\u03c5\u03b7\u03b4\u03af\u03b1" }
59
SI { "\u03a3\u03bb\u03bf\u03b2\u03b5\u03bd\u03af\u03b1" }
60
SK { "\u03a3\u03bb\u03bf\u03b2\u03b1\u03ba\u03af\u03b1" }
61
SP { "\u03a3\u03b5\u03c1\u03b2\u03af\u03b1" }
62
TH { "\u03a4\u03b1\u03ca\u03bb\u03ac\u03bd\u03b4\u03b7" }
63
TR { "\u03a4\u03bf\u03c5\u03c1\u03ba\u03af\u03b1" }
64
TW { "\u03a4\u03b1\u03ca\u03b2\u03ac\u03bd (\u0394.\u039a.)" }
65
US { "\u0397\u03bd\u03c9\u03bc\u03ad\u03bd\u03b5\u03c2 \u03a0\u03bf\u03bb\u03b9\u03c4\u03b5\u03af\u03b5\u03c2 \u0391\u03bc\u03b5\u03c1\u03b9\u03ba\u03ae\u03c2" }
66
ZA { "\u039d\u03cc\u03c4\u03b9\u03bf\u03c2 \u0391\u03c6\u03c1\u03b9\u03ba\u03ae" }
68
DateTimeElements:intvector {
93
"\u039A\u03C5\u03C1\u03B9\u03B1\u03BA\u03AE",
94
"\u0394\u03B5\u03C5\u03C4\u03AD\u03C1\u03B1",
95
"\u03A4\u03C1\u03AF\u03C4\u03B7",
96
"\u03A4\u03B5\u03C4\u03AC\u03C1\u03C4\u03B7",
97
"\u03A0\u03AD\u03BC\u03C0\u03C4\u03B7",
98
"\u03A0\u03B1\u03C1\u03B1\u03C3\u03BA\u03B5\u03C5\u03AE",
99
"\u03A3\u03AC\u03B2\u03B2\u03B1\u03C4\u03BF",
101
/* These languages should be all lower case, but it doesn't seem to be lowercase */
103
ar { "\u0391\u03c1\u03b1\u03b2\u03b9\u03ba\u03ac" }
104
bg { "\u0392\u03bf\u03c5\u03bb\u03b3\u03b1\u03c1\u03b9\u03ba\u03ac" }
105
ca { "\u039a\u03b1\u03c4\u03b1\u03bb\u03b1\u03bd\u03b9\u03ba\u03ac" }
106
cs { "\u03a4\u03c3\u03ad\u03c7\u03b9\u03ba\u03b1" }
107
da { "\u0394\u03b1\u03bd\u03ad\u03b6\u03b9\u03ba\u03b1" }
108
de { "\u0393\u03b5\u03c1\u03bc\u03b1\u03bd\u03b9\u03ba\u03ac" }
109
el { "\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" }
110
en { "\u0391\u03b3\u03b3\u03bb\u03b9\u03ba\u03ac" }
111
es { "\u0399\u03c3\u03c0\u03b1\u03bd\u03b9\u03ba\u03ac" }
112
fi { "\u03a6\u03b9\u03bd\u03bb\u03b1\u03bd\u03b4\u03b9\u03ba\u03ac" }
113
fr { "\u0393\u03b1\u03bb\u03bb\u03b9\u03ba\u03ac" }
114
he { "\u0395\u03b2\u03c1\u03b1\u03ca\u03ba\u03ac" }
115
hr { "\u039a\u03c1\u03bf\u03b1\u03c4\u03b9\u03ba\u03ac" }
116
hu { "\u039f\u03c5\u03b3\u03b3\u03c1\u03b9\u03ba\u03ac" }
117
it { "\u0399\u03c4\u03b1\u03bb\u03b9\u03ba\u03ac" }
118
mk { "\u03a3\u03bb\u03b1\u03b2\u03bf\u03bc\u03b1\u03ba\u03b5\u03b4\u03bf\u03bd\u03b9\u03ba\u03ac" }
119
nl { "\u039f\u03bb\u03bb\u03b1\u03bd\u03b4\u03b9\u03ba\u03ac" }
120
no { "\u039d\u03bf\u03c1\u03b2\u03b7\u03b3\u03b9\u03ba\u03ac" }
121
pl { "\u03a0\u03bf\u03bb\u03c9\u03bd\u03b9\u03ba\u03ac" }
122
pt { "\u03a0\u03bf\u03c1\u03c4\u03bf\u03b3\u03b1\u03bb\u03b9\u03ba\u03ac" }
123
ro { "\u03a1\u03bf\u03c5\u03bc\u03b1\u03bd\u03b9\u03ba\u03ac" }
124
ru { "\u03a1\u03c9\u03c3\u03b9\u03ba\u03ac" }
125
sk { "\u03a3\u03bb\u03bf\u03b2\u03b1\u03ba\u03b9\u03ba\u03ac" }
126
sl { "\u03a3\u03bb\u03bf\u03b2\u03b5\u03bd\u03b9\u03ba\u03ac" }
127
sq { "\u0391\u03bb\u03b2\u03b1\u03bd\u03b9\u03ba\u03ac" }
128
sr { "\u03a3\u03b5\u03c1\u03b2\u03b9\u03ba\u03ac" }
129
sv { "\u03a3\u03bf\u03c5\u03b7\u03b4\u03b9\u03ba\u03ac" }
131
LocaleID:int { 0x08 }
132
// LocaleString { "el" }
134
"\u0399\u03B1\u03BD",
135
"\u03A6\u03B5\u03B2",
136
"\u039C\u03B1\u03C1",
137
"\u0391\u03C0\u03C1",
138
"\u039C\u03B1\u03CA",
139
"\u0399\u03BF\u03C5\u03BD",
140
"\u0399\u03BF\u03C5\u03BB",
141
"\u0391\u03C5\u03B3",
142
"\u03A3\u03B5\u03C0",
143
"\u039F\u03BA\u03C4",
144
"\u039D\u03BF\u03B5",
145
"\u0394\u03B5\u03BA",
148
"\u0399\u03B1\u03BD\u03BF\u03C5\u03AC\u03C1\u03B9\u03BF\u03C2",
149
"\u03A6\u03B5\u03B2\u03C1\u03BF\u03C5\u03AC\u03C1\u03B9\u03BF\u03C2",
150
"\u039C\u03AC\u03C1\u03C4\u03B9\u03BF\u03C2",
151
"\u0391\u03C0\u03C1\u03AF\u03BB\u03B9\u03BF\u03C2",
152
"\u039C\u03AC\u03CA\u03BF\u03C2",
153
"\u0399\u03BF\u03CD\u03BD\u03B9\u03BF\u03C2",
154
"\u0399\u03BF\u03CD\u03BB\u03B9\u03BF\u03C2",
155
"\u0391\u03CD\u03B3\u03BF\u03C5\u03C3\u03C4\u03BF\u03C2",
156
"\u03A3\u03B5\u03C0\u03C4\u03AD\u03BC\u03B2\u03C1\u03B9\u03BF\u03C2",
157
"\u039F\u03BA\u03C4\u03CE\u03B2\u03C1\u03B9\u03BF\u03C2",
158
"\u039D\u03BF\u03AD\u03BC\u03B2\u03C1\u03B9\u03BF\u03C2",
159
"\u0394\u03B5\u03BA\u03AD\u03BC\u03B2\u03C1\u03B9\u03BF\u03C2",
174
// ShortLanguage { "ell" }
175
localPatternChars { "GanjkHmsSEDFwWxhKzAe" }
186
"Grek" // ISO 15924 Name
188
ExemplarCharacters{"[\u03ac-\u03ce]"}
190
//------------------------------------------------------------
191
// Rule Based Number Format Support
192
//------------------------------------------------------------
195
* omit rules for Greek until we have complete and valid data.
197
// * Spellout rules for Greek. Again in Greek we have to supply the words
198
// * for the multiples of 100 because they can't be derived algorithmically.
199
// * Also, the tens digit changes form when followed by a ones digit: an
200
// * accent mark disappears from the tens digit and moves to the ones digit.
201
// * Therefore, instead of using the [] notation, we actually have to use
202
// * two separate rules for each multiple of 10 to show the two forms of
205
// Can someone supply me with information on negatives and decimals?
206
// I'm also missing the word for zero. Can someone clue me in?
209
"zero (incomplete data); \u03ad\u03bd\u03b1; \u03b4\u03cd\u03bf; \u03b4\u03c1\u03af\u03b1; "
210
"\u03c4\u03ad\u03c3\u03c3\u03b5\u03c1\u03b1; \u03c0\u03ad\u03bd\u03c4\u03b5; "
211
"\u03ad\u03be\u03b9; \u03b5\u03c0\u03c4\u03ac; \u03bf\u03ba\u03c4\u03ce; "
212
"\u03b5\u03bd\u03bd\u03ad\u03b1;\n"
213
"10: \u03b4\u03ad\u03ba\u03b1; "
214
"\u03ad\u03bd\u03b4\u03b5\u03ba\u03b1; \u03b4\u03ce\u03b4\u03b5\u03ba\u03b1; "
215
"\u03b4\u03b5\u03ba\u03b1>>;\n"
216
"20: \u03b5\u03af\u03ba\u03bf\u03c3\u03b9; \u03b5\u03b9\u03ba\u03bf\u03c3\u03b9>>;\n"
217
"30: \u03c4\u03c1\u03b9\u03ac\u03bd\u03c4\u03b1; \u03c4\u03c1\u03b9\u03b1\u03bd\u03c4\u03b1>>;\n"
218
"40: \u03c3\u03b1\u03c1\u03ac\u03bd\u03c4\u03b1; \u03c3\u03b1\u03c1\u03b1\u03bd\u03c4\u03b1>>;\n"
219
"50: \u03c0\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; \u03c0\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
220
"60: \u03b5\u03be\u03ae\u03bd\u03c4\u03b1; \u03b5\u03be\u03b7\u03bd\u03c4\u03b1>>;\n"
221
"70: \u03b5\u03b2\u03b4\u03bf\u03bc\u03ae\u03bd\u03c4\u03b1; "
222
"\u03b5\u03b2\u03b4\u03bf\u03bc\u03b7\u03bd\u03c4\u03b1>>;\n"
223
"80: \u03bf\u03b3\u03b4\u03cc\u03bd\u03c4\u03b1; \u03bf\u03b3\u03b4\u03bf\u03bd\u03c4\u03b1>>;\n"
224
"90: \u03b5\u03bd\u03bd\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; "
225
"\u03b5\u03bd\u03bd\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
226
"100: \u03b5\u03ba\u03b1\u03c4\u03cc[\u03bd >>];\n"
227
"200: \u03b4\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
228
"300: \u03c4\u03c1\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
229
"400: \u03c4\u03b5\u03c4\u03c1\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
230
"500: \u03c0\u03b5\u03bd\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
231
"600: \u03b5\u03be\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
232
"700: \u03b5\u03c0\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
233
"800: \u03bf\u03ba\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
234
"900: \u03b5\u03bd\u03bd\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
235
"1000: \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
236
"2000: << \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
237
"1,000,000: << \u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
238
"1,000,000,000: << \u03b4\u03b9\u03c3\u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
239
"1,000,000,000,000: =#,##0="
246
// Rules are predicated on running NFD first, and NFC afterwards
247
// MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
248
// WARNING: need to add accents to both filters ###
249
":: [\u0301\u0304\u0306\u0308;\u00B5\u00B7\u00C0\u00C2\u00C8\u00CA\u00CC\u00CE\u00D2\u00D4\u00D9\u00DB\u00E0\u00E2\u00E8\u00EA\u00EC\u00EE\u00F2\u00F4\u00F9\u00FB\u0108-\u0109\u011C-\u011D\u0124-\u0125\u0134-\u0135\u015C-\u015D\u0174-\u0177\u01DB-\u01DC\u01F8-\u01F9\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03F0-\u03F5\u0400\u040D\u0450\u045D\u1E14-\u1E15\u1E50-\u1E51\u1E80-\u1E81\u1E90-\u1E91\u1EA4-\u1EAD\u1EB0-\u1EB1\u1EBE-\u1EC7\u1ED0-\u1ED9\u1EDC-\u1EDD\u1EEA-\u1EEB\u1EF2-\u1EF3\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCD\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDD\u1FDF-\u1FED\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126] ;"
251
// ":: [\\u0000-\\u007F \\u00B7 [:Greek:] [:nonspacing mark:]] ;"
258
"$lower = [[:latin:][:greek:] & [:Ll:]] ; "
259
"$upper = [[:latin:][:greek:] & [:Lu:]] ; "
265
"$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ; "
266
"$ucgvowel = [\u0391\u0395\u0397\u0399\u039f\u03a5\u03a9] ; "
267
"$gvowel = [$lcgvowel $ucgvowel] ; "
268
"$lcgvowelC = [$lcgvowel $accent] ; "
270
"$evowel = [aeiouyAEIOUY];"
271
"$vowel = [ $evowel $gvowel] ; "
273
"$beforeLower = $accent * $lower ; "
275
"$gammaLike = [\u0393\u039a\u039e\u03a7\u03b3\u03ba\u03be\u03c7\u03f0] ; "
276
"$egammaLike = [GKXCgkxc] ; "
277
"$smooth = \u0313 ; "
279
"$iotasub = \u0345 ; "
281
"$softener = [\u03b2\u0392\u03b3\u0393\u03b4\u0394\u03b6\u0396\u03bb\u039b\u03bc\u039c\u03bd\u039d\u03c1\u03a1$gvowel] ;"
287
"$afterLetter = [:L:] [\\'[:M:]]* ;"
288
"$beforeLetter = [\\'[:M:]]* [:L:] ;"
296
// Fix any ancient characters that creep in
306
// need to have these up here so the rules don't mask
308
"\u03b7 <> i $under ;"
309
"\u0397 <> I $under ;"
311
"\u03a8 } $beforeLower <> Ps ; "
315
"\u03c9 <> o $under ;"
316
"\u03a9 <> O $under;"
318
// at begining or end of word, convert mp to b
320
"[^[:L:][:M:]] { \u03bc\u03c0 > b ; "
321
"\u03bc\u03c0 } [^[:L:][:M:]] > b ; "
322
"[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "
323
"[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
325
"\u03bc\u03c0 < b ; "
326
"\u039c\u03c0 < B } $beforeLower ; "
327
"\u039c\u03a0 < B ; "
329
// handle diphthongs ending with upsilon
331
"$vowel { \u03c5 } $softener <> v $under ; "
332
"$vowel { \u03c5 } <> f $under; "
334
"$vowel { \u03a5 } $softener <> V $under ; "
335
"$vowel { \u03a5 <> U $under ; "
346
"\u03b3 } $gammaLike <> n } $egammaLike ; "
348
"\u0393 } $gammaLike <> N } $egammaLike ; "
361
"\u0398 } $beforeLower <> Th ; "
376
"\u03bd } $gammaLike > n\\' ; "
378
"\u039d } $gammaLike <> N\\' ; "
393
"[Pp] { } \u03c2 > \\' ; "
394
"[Pp] { } \u03c3 > \\' ;"
396
// Caron means exception
398
// before a letter, initial
399
"\u03c2 } $beforeLetter <> s $under } $beforeLetter;"
400
"\u03c3 } $beforeLetter <> s } $beforeLetter;"
402
// otherwise, after a letter = final
403
"$afterLetter { \u03c3 <> $afterLetter { s $under;"
404
"$afterLetter { \u03c2 <> $afterLetter { s ;"
406
// otherwise (isolated) = initial
407
"\u03c2 <> s $under;"
410
"[Pp] { \u03a3 <> \\'S ; "
420
"\u03a7 } $beforeLower <> Ch ; "
423
// Completeness for ASCII
425
// "$ignore = [[:Mark:]''] * ;"
445
// Completeness for Greek
447
"\u03d0 > | \u03b2 ;"
448
"\u03d1 > | \u03b8 ;"
449
"\u03d2 > | \u03a5 ;"
450
"\u03d5 > | \u03c6 ;"
451
"\u03d6 > | \u03c0 ;"
453
"\u03f0 > | \u03ba ;"
454
"\u03f1 > | \u03c1 ;"
455
"\u03f2 > | \u03c3 ;"
457
"\u03f4 > | \u0398 ;"
458
"\u03f5 > | \u03b5 ;"
459
"\u00B5 > | \u03BC ; "
461
// delete any trailing ' marks used for roundtripping
463
" < [\u03a0\u03c0] { \\' } [Ss] ;"
464
" < [\u039d\u03bd] { \\' } $egammaLike ;"
468
// MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
469
":: ( [\u0301\u0304\u0306\u0308':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u0331\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B] ) ;"
471
// ":: ([\\u0000-\\u007F [:Latin:] [:nonspacing mark:]]) ;"