1
package org.apache.lucene.analysis;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.analysis.MockTokenizer;
21
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
22
import java.io.StringReader;
23
import java.util.List;
24
import java.util.ArrayList;
25
import java.util.Iterator;
27
public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
29
// testLain1Accents() is a copy of TestLatin1AccentFilter.testU().
30
public void testLatin1Accents() throws Exception {
31
TokenStream stream = new MockTokenizer(new StringReader
32
("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ"
33
+" Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij"
34
+" ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"), MockTokenizer.WHITESPACE, false);
35
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
37
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
39
assertTermEquals("Des", filter, termAtt);
40
assertTermEquals("mot", filter, termAtt);
41
assertTermEquals("cles", filter, termAtt);
42
assertTermEquals("A", filter, termAtt);
43
assertTermEquals("LA", filter, termAtt);
44
assertTermEquals("CHAINE", filter, termAtt);
45
assertTermEquals("A", filter, termAtt);
46
assertTermEquals("A", filter, termAtt);
47
assertTermEquals("A", filter, termAtt);
48
assertTermEquals("A", filter, termAtt);
49
assertTermEquals("A", filter, termAtt);
50
assertTermEquals("A", filter, termAtt);
51
assertTermEquals("AE", filter, termAtt);
52
assertTermEquals("C", filter, termAtt);
53
assertTermEquals("E", filter, termAtt);
54
assertTermEquals("E", filter, termAtt);
55
assertTermEquals("E", filter, termAtt);
56
assertTermEquals("E", filter, termAtt);
57
assertTermEquals("I", filter, termAtt);
58
assertTermEquals("I", filter, termAtt);
59
assertTermEquals("I", filter, termAtt);
60
assertTermEquals("I", filter, termAtt);
61
assertTermEquals("IJ", filter, termAtt);
62
assertTermEquals("D", filter, termAtt);
63
assertTermEquals("N", filter, termAtt);
64
assertTermEquals("O", filter, termAtt);
65
assertTermEquals("O", filter, termAtt);
66
assertTermEquals("O", filter, termAtt);
67
assertTermEquals("O", filter, termAtt);
68
assertTermEquals("O", filter, termAtt);
69
assertTermEquals("O", filter, termAtt);
70
assertTermEquals("OE", filter, termAtt);
71
assertTermEquals("TH", filter, termAtt);
72
assertTermEquals("U", filter, termAtt);
73
assertTermEquals("U", filter, termAtt);
74
assertTermEquals("U", filter, termAtt);
75
assertTermEquals("U", filter, termAtt);
76
assertTermEquals("Y", filter, termAtt);
77
assertTermEquals("Y", filter, termAtt);
78
assertTermEquals("a", filter, termAtt);
79
assertTermEquals("a", filter, termAtt);
80
assertTermEquals("a", filter, termAtt);
81
assertTermEquals("a", filter, termAtt);
82
assertTermEquals("a", filter, termAtt);
83
assertTermEquals("a", filter, termAtt);
84
assertTermEquals("ae", filter, termAtt);
85
assertTermEquals("c", filter, termAtt);
86
assertTermEquals("e", filter, termAtt);
87
assertTermEquals("e", filter, termAtt);
88
assertTermEquals("e", filter, termAtt);
89
assertTermEquals("e", filter, termAtt);
90
assertTermEquals("i", filter, termAtt);
91
assertTermEquals("i", filter, termAtt);
92
assertTermEquals("i", filter, termAtt);
93
assertTermEquals("i", filter, termAtt);
94
assertTermEquals("ij", filter, termAtt);
95
assertTermEquals("d", filter, termAtt);
96
assertTermEquals("n", filter, termAtt);
97
assertTermEquals("o", filter, termAtt);
98
assertTermEquals("o", filter, termAtt);
99
assertTermEquals("o", filter, termAtt);
100
assertTermEquals("o", filter, termAtt);
101
assertTermEquals("o", filter, termAtt);
102
assertTermEquals("o", filter, termAtt);
103
assertTermEquals("oe", filter, termAtt);
104
assertTermEquals("ss", filter, termAtt);
105
assertTermEquals("th", filter, termAtt);
106
assertTermEquals("u", filter, termAtt);
107
assertTermEquals("u", filter, termAtt);
108
assertTermEquals("u", filter, termAtt);
109
assertTermEquals("u", filter, termAtt);
110
assertTermEquals("y", filter, termAtt);
111
assertTermEquals("y", filter, termAtt);
112
assertTermEquals("fi", filter, termAtt);
113
assertTermEquals("fl", filter, termAtt);
114
assertFalse(filter.incrementToken());
118
// The following Perl script generated the foldings[] array automatically
119
// from ASCIIFoldingFilter.java:
121
// ============== begin get.test.cases.pl ==============
126
// my $file = "ASCIIFoldingFilter.java";
127
// my $output = "testcases.txt";
131
// open IN, "<:utf8", $file || die "Error opening input file '$file': $!";
132
// open OUT, ">:utf8", $output || die "Error opening output file '$output': $!";
134
// while (my $line = <IN>) {
136
// # case '\u0133': // <char> <maybe URL> [ description ]
137
// if ($line =~ /case\s+'\\u(....)':.*\[([^\]]+)\]/) {
140
// $codes{$code} = $desc;
142
// # output[outputPos++] = 'A';
143
// elsif ($line =~ /output\[outputPos\+\+\] = '(.+)';/) {
144
// my $output_char = $1;
145
// $folded .= $output_char;
147
// elsif ($line =~ /break;/ && length($folded) > 0) {
149
// for my $code (sort { hex($a) <=> hex($b) } keys %codes) {
150
// my $desc = $codes{$code};
152
// print OUT '+ ' if (not $first);
154
// print OUT '"', chr(hex($code)), qq!" // U+$code: $desc\n!;
156
// print OUT qq! ,"$folded", // Folded result\n\n!;
163
// ============== end get.test.cases.pl ==============
165
public void testAllFoldings() throws Exception {
166
// Alternating strings of:
167
// 1. All non-ASCII characters to be folded, concatenated together as a
169
// 2. The string of ASCII characters to which each of the above
170
// characters should be folded.
171
String[] foldings = {
172
"À" // U+00C0: LATIN CAPITAL LETTER A WITH GRAVE
173
+ "Á" // U+00C1: LATIN CAPITAL LETTER A WITH ACUTE
174
+ "Â" // U+00C2: LATIN CAPITAL LETTER A WITH CIRCUMFLEX
175
+ "Ã" // U+00C3: LATIN CAPITAL LETTER A WITH TILDE
176
+ "Ä" // U+00C4: LATIN CAPITAL LETTER A WITH DIAERESIS
177
+ "Å" // U+00C5: LATIN CAPITAL LETTER A WITH RING ABOVE
178
+ "Ā" // U+0100: LATIN CAPITAL LETTER A WITH MACRON
179
+ "Ă" // U+0102: LATIN CAPITAL LETTER A WITH BREVE
180
+ "Ą" // U+0104: LATIN CAPITAL LETTER A WITH OGONEK
181
+ "Ə" // U+018F: LATIN CAPITAL LETTER SCHWA
182
+ "Ǎ" // U+01CD: LATIN CAPITAL LETTER A WITH CARON
183
+ "Ǟ" // U+01DE: LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
184
+ "Ǡ" // U+01E0: LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
185
+ "Ǻ" // U+01FA: LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
186
+ "Ȁ" // U+0200: LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
187
+ "Ȃ" // U+0202: LATIN CAPITAL LETTER A WITH INVERTED BREVE
188
+ "Ȧ" // U+0226: LATIN CAPITAL LETTER A WITH DOT ABOVE
189
+ "Ⱥ" // U+023A: LATIN CAPITAL LETTER A WITH STROKE
190
+ "ᴀ" // U+1D00: LATIN LETTER SMALL CAPITAL A
191
+ "Ḁ" // U+1E00: LATIN CAPITAL LETTER A WITH RING BELOW
192
+ "Ạ" // U+1EA0: LATIN CAPITAL LETTER A WITH DOT BELOW
193
+ "Ả" // U+1EA2: LATIN CAPITAL LETTER A WITH HOOK ABOVE
194
+ "Ấ" // U+1EA4: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
195
+ "Ầ" // U+1EA6: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
196
+ "Ẩ" // U+1EA8: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
197
+ "Ẫ" // U+1EAA: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
198
+ "Ậ" // U+1EAC: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
199
+ "Ắ" // U+1EAE: LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
200
+ "Ằ" // U+1EB0: LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
201
+ "Ẳ" // U+1EB2: LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
202
+ "Ẵ" // U+1EB4: LATIN CAPITAL LETTER A WITH BREVE AND TILDE
203
+ "Ặ" // U+1EB6: LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
204
+ "Ⓐ" // U+24B6: CIRCLED LATIN CAPITAL LETTER A
205
+ "A" // U+FF21: FULLWIDTH LATIN CAPITAL LETTER A
206
,"A", // Folded result
208
"à" // U+00E0: LATIN SMALL LETTER A WITH GRAVE
209
+ "á" // U+00E1: LATIN SMALL LETTER A WITH ACUTE
210
+ "â" // U+00E2: LATIN SMALL LETTER A WITH CIRCUMFLEX
211
+ "ã" // U+00E3: LATIN SMALL LETTER A WITH TILDE
212
+ "ä" // U+00E4: LATIN SMALL LETTER A WITH DIAERESIS
213
+ "å" // U+00E5: LATIN SMALL LETTER A WITH RING ABOVE
214
+ "ā" // U+0101: LATIN SMALL LETTER A WITH MACRON
215
+ "ă" // U+0103: LATIN SMALL LETTER A WITH BREVE
216
+ "ą" // U+0105: LATIN SMALL LETTER A WITH OGONEK
217
+ "ǎ" // U+01CE: LATIN SMALL LETTER A WITH CARON
218
+ "ǟ" // U+01DF: LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
219
+ "ǡ" // U+01E1: LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
220
+ "ǻ" // U+01FB: LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
221
+ "ȁ" // U+0201: LATIN SMALL LETTER A WITH DOUBLE GRAVE
222
+ "ȃ" // U+0203: LATIN SMALL LETTER A WITH INVERTED BREVE
223
+ "ȧ" // U+0227: LATIN SMALL LETTER A WITH DOT ABOVE
224
+ "ɐ" // U+0250: LATIN SMALL LETTER TURNED A
225
+ "ə" // U+0259: LATIN SMALL LETTER SCHWA
226
+ "ɚ" // U+025A: LATIN SMALL LETTER SCHWA WITH HOOK
227
+ "ᶏ" // U+1D8F: LATIN SMALL LETTER A WITH RETROFLEX HOOK
228
+ "ḁ" // U+1E01: LATIN SMALL LETTER A WITH RING BELOW
229
+ "ᶕ" // U+1D95: LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK
230
+ "ẚ" // U+1E9A: LATIN SMALL LETTER A WITH RIGHT HALF RING
231
+ "ạ" // U+1EA1: LATIN SMALL LETTER A WITH DOT BELOW
232
+ "ả" // U+1EA3: LATIN SMALL LETTER A WITH HOOK ABOVE
233
+ "ấ" // U+1EA5: LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
234
+ "ầ" // U+1EA7: LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
235
+ "ẩ" // U+1EA9: LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
236
+ "ẫ" // U+1EAB: LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
237
+ "ậ" // U+1EAD: LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
238
+ "ắ" // U+1EAF: LATIN SMALL LETTER A WITH BREVE AND ACUTE
239
+ "ằ" // U+1EB1: LATIN SMALL LETTER A WITH BREVE AND GRAVE
240
+ "ẳ" // U+1EB3: LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
241
+ "ẵ" // U+1EB5: LATIN SMALL LETTER A WITH BREVE AND TILDE
242
+ "ặ" // U+1EB7: LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
243
+ "ₐ" // U+2090: LATIN SUBSCRIPT SMALL LETTER A
244
+ "ₔ" // U+2094: LATIN SUBSCRIPT SMALL LETTER SCHWA
245
+ "ⓐ" // U+24D0: CIRCLED LATIN SMALL LETTER A
246
+ "ⱥ" // U+2C65: LATIN SMALL LETTER A WITH STROKE
247
+ "Ɐ" // U+2C6F: LATIN CAPITAL LETTER TURNED A
248
+ "a" // U+FF41: FULLWIDTH LATIN SMALL LETTER A
249
,"a", // Folded result
251
"Ꜳ" // U+A732: LATIN CAPITAL LETTER AA
252
,"AA", // Folded result
254
"Æ" // U+00C6: LATIN CAPITAL LETTER AE
255
+ "Ǣ" // U+01E2: LATIN CAPITAL LETTER AE WITH MACRON
256
+ "Ǽ" // U+01FC: LATIN CAPITAL LETTER AE WITH ACUTE
257
+ "ᴁ" // U+1D01: LATIN LETTER SMALL CAPITAL AE
258
,"AE", // Folded result
260
"Ꜵ" // U+A734: LATIN CAPITAL LETTER AO
261
,"AO", // Folded result
263
"Ꜷ" // U+A736: LATIN CAPITAL LETTER AU
264
,"AU", // Folded result
266
"Ꜹ" // U+A738: LATIN CAPITAL LETTER AV
267
+ "Ꜻ" // U+A73A: LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
268
,"AV", // Folded result
270
"Ꜽ" // U+A73C: LATIN CAPITAL LETTER AY
271
,"AY", // Folded result
273
"⒜" // U+249C: PARENTHESIZED LATIN SMALL LETTER A
274
,"(a)", // Folded result
276
"ꜳ" // U+A733: LATIN SMALL LETTER AA
277
,"aa", // Folded result
279
"æ" // U+00E6: LATIN SMALL LETTER AE
280
+ "ǣ" // U+01E3: LATIN SMALL LETTER AE WITH MACRON
281
+ "ǽ" // U+01FD: LATIN SMALL LETTER AE WITH ACUTE
282
+ "ᴂ" // U+1D02: LATIN SMALL LETTER TURNED AE
283
,"ae", // Folded result
285
"ꜵ" // U+A735: LATIN SMALL LETTER AO
286
,"ao", // Folded result
288
"ꜷ" // U+A737: LATIN SMALL LETTER AU
289
,"au", // Folded result
291
"ꜹ" // U+A739: LATIN SMALL LETTER AV
292
+ "ꜻ" // U+A73B: LATIN SMALL LETTER AV WITH HORIZONTAL BAR
293
,"av", // Folded result
295
"ꜽ" // U+A73D: LATIN SMALL LETTER AY
296
,"ay", // Folded result
298
"Ɓ" // U+0181: LATIN CAPITAL LETTER B WITH HOOK
299
+ "Ƃ" // U+0182: LATIN CAPITAL LETTER B WITH TOPBAR
300
+ "Ƀ" // U+0243: LATIN CAPITAL LETTER B WITH STROKE
301
+ "ʙ" // U+0299: LATIN LETTER SMALL CAPITAL B
302
+ "ᴃ" // U+1D03: LATIN LETTER SMALL CAPITAL BARRED B
303
+ "Ḃ" // U+1E02: LATIN CAPITAL LETTER B WITH DOT ABOVE
304
+ "Ḅ" // U+1E04: LATIN CAPITAL LETTER B WITH DOT BELOW
305
+ "Ḇ" // U+1E06: LATIN CAPITAL LETTER B WITH LINE BELOW
306
+ "Ⓑ" // U+24B7: CIRCLED LATIN CAPITAL LETTER B
307
+ "B" // U+FF22: FULLWIDTH LATIN CAPITAL LETTER B
308
,"B", // Folded result
310
"ƀ" // U+0180: LATIN SMALL LETTER B WITH STROKE
311
+ "ƃ" // U+0183: LATIN SMALL LETTER B WITH TOPBAR
312
+ "ɓ" // U+0253: LATIN SMALL LETTER B WITH HOOK
313
+ "ᵬ" // U+1D6C: LATIN SMALL LETTER B WITH MIDDLE TILDE
314
+ "ᶀ" // U+1D80: LATIN SMALL LETTER B WITH PALATAL HOOK
315
+ "ḃ" // U+1E03: LATIN SMALL LETTER B WITH DOT ABOVE
316
+ "ḅ" // U+1E05: LATIN SMALL LETTER B WITH DOT BELOW
317
+ "ḇ" // U+1E07: LATIN SMALL LETTER B WITH LINE BELOW
318
+ "ⓑ" // U+24D1: CIRCLED LATIN SMALL LETTER B
319
+ "b" // U+FF42: FULLWIDTH LATIN SMALL LETTER B
320
,"b", // Folded result
322
"⒝" // U+249D: PARENTHESIZED LATIN SMALL LETTER B
323
,"(b)", // Folded result
325
"Ç" // U+00C7: LATIN CAPITAL LETTER C WITH CEDILLA
326
+ "Ć" // U+0106: LATIN CAPITAL LETTER C WITH ACUTE
327
+ "Ĉ" // U+0108: LATIN CAPITAL LETTER C WITH CIRCUMFLEX
328
+ "Ċ" // U+010A: LATIN CAPITAL LETTER C WITH DOT ABOVE
329
+ "Č" // U+010C: LATIN CAPITAL LETTER C WITH CARON
330
+ "Ƈ" // U+0187: LATIN CAPITAL LETTER C WITH HOOK
331
+ "Ȼ" // U+023B: LATIN CAPITAL LETTER C WITH STROKE
332
+ "ʗ" // U+0297: LATIN LETTER STRETCHED C
333
+ "ᴄ" // U+1D04: LATIN LETTER SMALL CAPITAL C
334
+ "Ḉ" // U+1E08: LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
335
+ "Ⓒ" // U+24B8: CIRCLED LATIN CAPITAL LETTER C
336
+ "C" // U+FF23: FULLWIDTH LATIN CAPITAL LETTER C
337
,"C", // Folded result
339
"ç" // U+00E7: LATIN SMALL LETTER C WITH CEDILLA
340
+ "ć" // U+0107: LATIN SMALL LETTER C WITH ACUTE
341
+ "ĉ" // U+0109: LATIN SMALL LETTER C WITH CIRCUMFLEX
342
+ "ċ" // U+010B: LATIN SMALL LETTER C WITH DOT ABOVE
343
+ "č" // U+010D: LATIN SMALL LETTER C WITH CARON
344
+ "ƈ" // U+0188: LATIN SMALL LETTER C WITH HOOK
345
+ "ȼ" // U+023C: LATIN SMALL LETTER C WITH STROKE
346
+ "ɕ" // U+0255: LATIN SMALL LETTER C WITH CURL
347
+ "ḉ" // U+1E09: LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
348
+ "ↄ" // U+2184: LATIN SMALL LETTER REVERSED C
349
+ "ⓒ" // U+24D2: CIRCLED LATIN SMALL LETTER C
350
+ "Ꜿ" // U+A73E: LATIN CAPITAL LETTER REVERSED C WITH DOT
351
+ "ꜿ" // U+A73F: LATIN SMALL LETTER REVERSED C WITH DOT
352
+ "c" // U+FF43: FULLWIDTH LATIN SMALL LETTER C
353
,"c", // Folded result
355
"⒞" // U+249E: PARENTHESIZED LATIN SMALL LETTER C
356
,"(c)", // Folded result
358
"Ð" // U+00D0: LATIN CAPITAL LETTER ETH
359
+ "Ď" // U+010E: LATIN CAPITAL LETTER D WITH CARON
360
+ "Đ" // U+0110: LATIN CAPITAL LETTER D WITH STROKE
361
+ "Ɖ" // U+0189: LATIN CAPITAL LETTER AFRICAN D
362
+ "Ɗ" // U+018A: LATIN CAPITAL LETTER D WITH HOOK
363
+ "Ƌ" // U+018B: LATIN CAPITAL LETTER D WITH TOPBAR
364
+ "ᴅ" // U+1D05: LATIN LETTER SMALL CAPITAL D
365
+ "ᴆ" // U+1D06: LATIN LETTER SMALL CAPITAL ETH
366
+ "Ḋ" // U+1E0A: LATIN CAPITAL LETTER D WITH DOT ABOVE
367
+ "Ḍ" // U+1E0C: LATIN CAPITAL LETTER D WITH DOT BELOW
368
+ "Ḏ" // U+1E0E: LATIN CAPITAL LETTER D WITH LINE BELOW
369
+ "Ḑ" // U+1E10: LATIN CAPITAL LETTER D WITH CEDILLA
370
+ "Ḓ" // U+1E12: LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
371
+ "Ⓓ" // U+24B9: CIRCLED LATIN CAPITAL LETTER D
372
+ "Ꝺ" // U+A779: LATIN CAPITAL LETTER INSULAR D
373
+ "D" // U+FF24: FULLWIDTH LATIN CAPITAL LETTER D
374
,"D", // Folded result
376
"ð" // U+00F0: LATIN SMALL LETTER ETH
377
+ "ď" // U+010F: LATIN SMALL LETTER D WITH CARON
378
+ "đ" // U+0111: LATIN SMALL LETTER D WITH STROKE
379
+ "ƌ" // U+018C: LATIN SMALL LETTER D WITH TOPBAR
380
+ "ȡ" // U+0221: LATIN SMALL LETTER D WITH CURL
381
+ "ɖ" // U+0256: LATIN SMALL LETTER D WITH TAIL
382
+ "ɗ" // U+0257: LATIN SMALL LETTER D WITH HOOK
383
+ "ᵭ" // U+1D6D: LATIN SMALL LETTER D WITH MIDDLE TILDE
384
+ "ᶁ" // U+1D81: LATIN SMALL LETTER D WITH PALATAL HOOK
385
+ "ᶑ" // U+1D91: LATIN SMALL LETTER D WITH HOOK AND TAIL
386
+ "ḋ" // U+1E0B: LATIN SMALL LETTER D WITH DOT ABOVE
387
+ "ḍ" // U+1E0D: LATIN SMALL LETTER D WITH DOT BELOW
388
+ "ḏ" // U+1E0F: LATIN SMALL LETTER D WITH LINE BELOW
389
+ "ḑ" // U+1E11: LATIN SMALL LETTER D WITH CEDILLA
390
+ "ḓ" // U+1E13: LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW
391
+ "ⓓ" // U+24D3: CIRCLED LATIN SMALL LETTER D
392
+ "ꝺ" // U+A77A: LATIN SMALL LETTER INSULAR D
393
+ "d" // U+FF44: FULLWIDTH LATIN SMALL LETTER D
394
,"d", // Folded result
396
"DŽ" // U+01C4: LATIN CAPITAL LETTER DZ WITH CARON
397
+ "DZ" // U+01F1: LATIN CAPITAL LETTER DZ
398
,"DZ", // Folded result
400
"Dž" // U+01C5: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
401
+ "Dz" // U+01F2: LATIN CAPITAL LETTER D WITH SMALL LETTER Z
402
,"Dz", // Folded result
404
"⒟" // U+249F: PARENTHESIZED LATIN SMALL LETTER D
405
,"(d)", // Folded result
407
"ȸ" // U+0238: LATIN SMALL LETTER DB DIGRAPH
408
,"db", // Folded result
410
"dž" // U+01C6: LATIN SMALL LETTER DZ WITH CARON
411
+ "dz" // U+01F3: LATIN SMALL LETTER DZ
412
+ "ʣ" // U+02A3: LATIN SMALL LETTER DZ DIGRAPH
413
+ "ʥ" // U+02A5: LATIN SMALL LETTER DZ DIGRAPH WITH CURL
414
,"dz", // Folded result
416
"È" // U+00C8: LATIN CAPITAL LETTER E WITH GRAVE
417
+ "É" // U+00C9: LATIN CAPITAL LETTER E WITH ACUTE
418
+ "Ê" // U+00CA: LATIN CAPITAL LETTER E WITH CIRCUMFLEX
419
+ "Ë" // U+00CB: LATIN CAPITAL LETTER E WITH DIAERESIS
420
+ "Ē" // U+0112: LATIN CAPITAL LETTER E WITH MACRON
421
+ "Ĕ" // U+0114: LATIN CAPITAL LETTER E WITH BREVE
422
+ "Ė" // U+0116: LATIN CAPITAL LETTER E WITH DOT ABOVE
423
+ "Ę" // U+0118: LATIN CAPITAL LETTER E WITH OGONEK
424
+ "Ě" // U+011A: LATIN CAPITAL LETTER E WITH CARON
425
+ "Ǝ" // U+018E: LATIN CAPITAL LETTER REVERSED E
426
+ "Ɛ" // U+0190: LATIN CAPITAL LETTER OPEN E
427
+ "Ȅ" // U+0204: LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
428
+ "Ȇ" // U+0206: LATIN CAPITAL LETTER E WITH INVERTED BREVE
429
+ "Ȩ" // U+0228: LATIN CAPITAL LETTER E WITH CEDILLA
430
+ "Ɇ" // U+0246: LATIN CAPITAL LETTER E WITH STROKE
431
+ "ᴇ" // U+1D07: LATIN LETTER SMALL CAPITAL E
432
+ "Ḕ" // U+1E14: LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
433
+ "Ḗ" // U+1E16: LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
434
+ "Ḙ" // U+1E18: LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
435
+ "Ḛ" // U+1E1A: LATIN CAPITAL LETTER E WITH TILDE BELOW
436
+ "Ḝ" // U+1E1C: LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
437
+ "Ẹ" // U+1EB8: LATIN CAPITAL LETTER E WITH DOT BELOW
438
+ "Ẻ" // U+1EBA: LATIN CAPITAL LETTER E WITH HOOK ABOVE
439
+ "Ẽ" // U+1EBC: LATIN CAPITAL LETTER E WITH TILDE
440
+ "Ế" // U+1EBE: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
441
+ "Ề" // U+1EC0: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
442
+ "Ể" // U+1EC2: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
443
+ "Ễ" // U+1EC4: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
444
+ "Ệ" // U+1EC6: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
445
+ "Ⓔ" // U+24BA: CIRCLED LATIN CAPITAL LETTER E
446
+ "ⱻ" // U+2C7B: LATIN LETTER SMALL CAPITAL TURNED E
447
+ "E" // U+FF25: FULLWIDTH LATIN CAPITAL LETTER E
448
,"E", // Folded result
450
"è" // U+00E8: LATIN SMALL LETTER E WITH GRAVE
451
+ "é" // U+00E9: LATIN SMALL LETTER E WITH ACUTE
452
+ "ê" // U+00EA: LATIN SMALL LETTER E WITH CIRCUMFLEX
453
+ "ë" // U+00EB: LATIN SMALL LETTER E WITH DIAERESIS
454
+ "ē" // U+0113: LATIN SMALL LETTER E WITH MACRON
455
+ "ĕ" // U+0115: LATIN SMALL LETTER E WITH BREVE
456
+ "ė" // U+0117: LATIN SMALL LETTER E WITH DOT ABOVE
457
+ "ę" // U+0119: LATIN SMALL LETTER E WITH OGONEK
458
+ "ě" // U+011B: LATIN SMALL LETTER E WITH CARON
459
+ "ǝ" // U+01DD: LATIN SMALL LETTER TURNED E
460
+ "ȅ" // U+0205: LATIN SMALL LETTER E WITH DOUBLE GRAVE
461
+ "ȇ" // U+0207: LATIN SMALL LETTER E WITH INVERTED BREVE
462
+ "ȩ" // U+0229: LATIN SMALL LETTER E WITH CEDILLA
463
+ "ɇ" // U+0247: LATIN SMALL LETTER E WITH STROKE
464
+ "ɘ" // U+0258: LATIN SMALL LETTER REVERSED E
465
+ "ɛ" // U+025B: LATIN SMALL LETTER OPEN E
466
+ "ɜ" // U+025C: LATIN SMALL LETTER REVERSED OPEN E
467
+ "ɝ" // U+025D: LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
468
+ "ɞ" // U+025E: LATIN SMALL LETTER CLOSED REVERSED OPEN E
469
+ "ʚ" // U+029A: LATIN SMALL LETTER CLOSED OPEN E
470
+ "ᴈ" // U+1D08: LATIN SMALL LETTER TURNED OPEN E
471
+ "ᶒ" // U+1D92: LATIN SMALL LETTER E WITH RETROFLEX HOOK
472
+ "ᶓ" // U+1D93: LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK
473
+ "ᶔ" // U+1D94: LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK
474
+ "ḕ" // U+1E15: LATIN SMALL LETTER E WITH MACRON AND GRAVE
475
+ "ḗ" // U+1E17: LATIN SMALL LETTER E WITH MACRON AND ACUTE
476
+ "ḙ" // U+1E19: LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW
477
+ "ḛ" // U+1E1B: LATIN SMALL LETTER E WITH TILDE BELOW
478
+ "ḝ" // U+1E1D: LATIN SMALL LETTER E WITH CEDILLA AND BREVE
479
+ "ẹ" // U+1EB9: LATIN SMALL LETTER E WITH DOT BELOW
480
+ "ẻ" // U+1EBB: LATIN SMALL LETTER E WITH HOOK ABOVE
481
+ "ẽ" // U+1EBD: LATIN SMALL LETTER E WITH TILDE
482
+ "ế" // U+1EBF: LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
483
+ "ề" // U+1EC1: LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
484
+ "ể" // U+1EC3: LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
485
+ "ễ" // U+1EC5: LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
486
+ "ệ" // U+1EC7: LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
487
+ "ₑ" // U+2091: LATIN SUBSCRIPT SMALL LETTER E
488
+ "ⓔ" // U+24D4: CIRCLED LATIN SMALL LETTER E
489
+ "ⱸ" // U+2C78: LATIN SMALL LETTER E WITH NOTCH
490
+ "e" // U+FF45: FULLWIDTH LATIN SMALL LETTER E
491
,"e", // Folded result
493
"⒠" // U+24A0: PARENTHESIZED LATIN SMALL LETTER E
494
,"(e)", // Folded result
496
"Ƒ" // U+0191: LATIN CAPITAL LETTER F WITH HOOK
497
+ "Ḟ" // U+1E1E: LATIN CAPITAL LETTER F WITH DOT ABOVE
498
+ "Ⓕ" // U+24BB: CIRCLED LATIN CAPITAL LETTER F
499
+ "ꜰ" // U+A730: LATIN LETTER SMALL CAPITAL F
500
+ "Ꝼ" // U+A77B: LATIN CAPITAL LETTER INSULAR F
501
+ "ꟻ" // U+A7FB: LATIN EPIGRAPHIC LETTER REVERSED F
502
+ "F" // U+FF26: FULLWIDTH LATIN CAPITAL LETTER F
503
,"F", // Folded result
505
"ƒ" // U+0192: LATIN SMALL LETTER F WITH HOOK
506
+ "ᵮ" // U+1D6E: LATIN SMALL LETTER F WITH MIDDLE TILDE
507
+ "ᶂ" // U+1D82: LATIN SMALL LETTER F WITH PALATAL HOOK
508
+ "ḟ" // U+1E1F: LATIN SMALL LETTER F WITH DOT ABOVE
509
+ "ẛ" // U+1E9B: LATIN SMALL LETTER LONG S WITH DOT ABOVE
510
+ "ⓕ" // U+24D5: CIRCLED LATIN SMALL LETTER F
511
+ "ꝼ" // U+A77C: LATIN SMALL LETTER INSULAR F
512
+ "f" // U+FF46: FULLWIDTH LATIN SMALL LETTER F
513
,"f", // Folded result
515
"⒡" // U+24A1: PARENTHESIZED LATIN SMALL LETTER F
516
,"(f)", // Folded result
518
"ff" // U+FB00: LATIN SMALL LIGATURE FF
519
,"ff", // Folded result
521
"ffi" // U+FB03: LATIN SMALL LIGATURE FFI
522
,"ffi", // Folded result
524
"ffl" // U+FB04: LATIN SMALL LIGATURE FFL
525
,"ffl", // Folded result
527
"fi" // U+FB01: LATIN SMALL LIGATURE FI
528
,"fi", // Folded result
530
"fl" // U+FB02: LATIN SMALL LIGATURE FL
531
,"fl", // Folded result
533
"Ĝ" // U+011C: LATIN CAPITAL LETTER G WITH CIRCUMFLEX
534
+ "Ğ" // U+011E: LATIN CAPITAL LETTER G WITH BREVE
535
+ "Ġ" // U+0120: LATIN CAPITAL LETTER G WITH DOT ABOVE
536
+ "Ģ" // U+0122: LATIN CAPITAL LETTER G WITH CEDILLA
537
+ "Ɠ" // U+0193: LATIN CAPITAL LETTER G WITH HOOK
538
+ "Ǥ" // U+01E4: LATIN CAPITAL LETTER G WITH STROKE
539
+ "ǥ" // U+01E5: LATIN SMALL LETTER G WITH STROKE
540
+ "Ǧ" // U+01E6: LATIN CAPITAL LETTER G WITH CARON
541
+ "ǧ" // U+01E7: LATIN SMALL LETTER G WITH CARON
542
+ "Ǵ" // U+01F4: LATIN CAPITAL LETTER G WITH ACUTE
543
+ "ɢ" // U+0262: LATIN LETTER SMALL CAPITAL G
544
+ "ʛ" // U+029B: LATIN LETTER SMALL CAPITAL G WITH HOOK
545
+ "Ḡ" // U+1E20: LATIN CAPITAL LETTER G WITH MACRON
546
+ "Ⓖ" // U+24BC: CIRCLED LATIN CAPITAL LETTER G
547
+ "Ᵹ" // U+A77D: LATIN CAPITAL LETTER INSULAR G
548
+ "Ꝿ" // U+A77E: LATIN CAPITAL LETTER TURNED INSULAR G
549
+ "G" // U+FF27: FULLWIDTH LATIN CAPITAL LETTER G
550
,"G", // Folded result
552
"ĝ" // U+011D: LATIN SMALL LETTER G WITH CIRCUMFLEX
553
+ "ğ" // U+011F: LATIN SMALL LETTER G WITH BREVE
554
+ "ġ" // U+0121: LATIN SMALL LETTER G WITH DOT ABOVE
555
+ "ģ" // U+0123: LATIN SMALL LETTER G WITH CEDILLA
556
+ "ǵ" // U+01F5: LATIN SMALL LETTER G WITH ACUTE
557
+ "ɠ" // U+0260: LATIN SMALL LETTER G WITH HOOK
558
+ "ɡ" // U+0261: LATIN SMALL LETTER SCRIPT G
559
+ "ᵷ" // U+1D77: LATIN SMALL LETTER TURNED G
560
+ "ᵹ" // U+1D79: LATIN SMALL LETTER INSULAR G
561
+ "ᶃ" // U+1D83: LATIN SMALL LETTER G WITH PALATAL HOOK
562
+ "ḡ" // U+1E21: LATIN SMALL LETTER G WITH MACRON
563
+ "ⓖ" // U+24D6: CIRCLED LATIN SMALL LETTER G
564
+ "ꝿ" // U+A77F: LATIN SMALL LETTER TURNED INSULAR G
565
+ "g" // U+FF47: FULLWIDTH LATIN SMALL LETTER G
566
,"g", // Folded result
568
"⒢" // U+24A2: PARENTHESIZED LATIN SMALL LETTER G
569
,"(g)", // Folded result
571
"Ĥ" // U+0124: LATIN CAPITAL LETTER H WITH CIRCUMFLEX
572
+ "Ħ" // U+0126: LATIN CAPITAL LETTER H WITH STROKE
573
+ "Ȟ" // U+021E: LATIN CAPITAL LETTER H WITH CARON
574
+ "ʜ" // U+029C: LATIN LETTER SMALL CAPITAL H
575
+ "Ḣ" // U+1E22: LATIN CAPITAL LETTER H WITH DOT ABOVE
576
+ "Ḥ" // U+1E24: LATIN CAPITAL LETTER H WITH DOT BELOW
577
+ "Ḧ" // U+1E26: LATIN CAPITAL LETTER H WITH DIAERESIS
578
+ "Ḩ" // U+1E28: LATIN CAPITAL LETTER H WITH CEDILLA
579
+ "Ḫ" // U+1E2A: LATIN CAPITAL LETTER H WITH BREVE BELOW
580
+ "Ⓗ" // U+24BD: CIRCLED LATIN CAPITAL LETTER H
581
+ "Ⱨ" // U+2C67: LATIN CAPITAL LETTER H WITH DESCENDER
582
+ "Ⱶ" // U+2C75: LATIN CAPITAL LETTER HALF H
583
+ "H" // U+FF28: FULLWIDTH LATIN CAPITAL LETTER H
584
,"H", // Folded result
586
"ĥ" // U+0125: LATIN SMALL LETTER H WITH CIRCUMFLEX
587
+ "ħ" // U+0127: LATIN SMALL LETTER H WITH STROKE
588
+ "ȟ" // U+021F: LATIN SMALL LETTER H WITH CARON
589
+ "ɥ" // U+0265: LATIN SMALL LETTER TURNED H
590
+ "ɦ" // U+0266: LATIN SMALL LETTER H WITH HOOK
591
+ "ʮ" // U+02AE: LATIN SMALL LETTER TURNED H WITH FISHHOOK
592
+ "ʯ" // U+02AF: LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
593
+ "ḣ" // U+1E23: LATIN SMALL LETTER H WITH DOT ABOVE
594
+ "ḥ" // U+1E25: LATIN SMALL LETTER H WITH DOT BELOW
595
+ "ḧ" // U+1E27: LATIN SMALL LETTER H WITH DIAERESIS
596
+ "ḩ" // U+1E29: LATIN SMALL LETTER H WITH CEDILLA
597
+ "ḫ" // U+1E2B: LATIN SMALL LETTER H WITH BREVE BELOW
598
+ "ẖ" // U+1E96: LATIN SMALL LETTER H WITH LINE BELOW
599
+ "ⓗ" // U+24D7: CIRCLED LATIN SMALL LETTER H
600
+ "ⱨ" // U+2C68: LATIN SMALL LETTER H WITH DESCENDER
601
+ "ⱶ" // U+2C76: LATIN SMALL LETTER HALF H
602
+ "h" // U+FF48: FULLWIDTH LATIN SMALL LETTER H
603
,"h", // Folded result
605
"Ƕ" // U+01F6: LATIN CAPITAL LETTER HWAIR
606
,"HV", // Folded result
608
"⒣" // U+24A3: PARENTHESIZED LATIN SMALL LETTER H
609
,"(h)", // Folded result
611
"ƕ" // U+0195: LATIN SMALL LETTER HV
612
,"hv", // Folded result
614
"Ì" // U+00CC: LATIN CAPITAL LETTER I WITH GRAVE
615
+ "Í" // U+00CD: LATIN CAPITAL LETTER I WITH ACUTE
616
+ "Î" // U+00CE: LATIN CAPITAL LETTER I WITH CIRCUMFLEX
617
+ "Ï" // U+00CF: LATIN CAPITAL LETTER I WITH DIAERESIS
618
+ "Ĩ" // U+0128: LATIN CAPITAL LETTER I WITH TILDE
619
+ "Ī" // U+012A: LATIN CAPITAL LETTER I WITH MACRON
620
+ "Ĭ" // U+012C: LATIN CAPITAL LETTER I WITH BREVE
621
+ "Į" // U+012E: LATIN CAPITAL LETTER I WITH OGONEK
622
+ "İ" // U+0130: LATIN CAPITAL LETTER I WITH DOT ABOVE
623
+ "Ɩ" // U+0196: LATIN CAPITAL LETTER IOTA
624
+ "Ɨ" // U+0197: LATIN CAPITAL LETTER I WITH STROKE
625
+ "Ǐ" // U+01CF: LATIN CAPITAL LETTER I WITH CARON
626
+ "Ȉ" // U+0208: LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
627
+ "Ȋ" // U+020A: LATIN CAPITAL LETTER I WITH INVERTED BREVE
628
+ "ɪ" // U+026A: LATIN LETTER SMALL CAPITAL I
629
+ "ᵻ" // U+1D7B: LATIN SMALL CAPITAL LETTER I WITH STROKE
630
+ "Ḭ" // U+1E2C: LATIN CAPITAL LETTER I WITH TILDE BELOW
631
+ "Ḯ" // U+1E2E: LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
632
+ "Ỉ" // U+1EC8: LATIN CAPITAL LETTER I WITH HOOK ABOVE
633
+ "Ị" // U+1ECA: LATIN CAPITAL LETTER I WITH DOT BELOW
634
+ "Ⓘ" // U+24BE: CIRCLED LATIN CAPITAL LETTER I
635
+ "ꟾ" // U+A7FE: LATIN EPIGRAPHIC LETTER I LONGA
636
+ "I" // U+FF29: FULLWIDTH LATIN CAPITAL LETTER I
637
,"I", // Folded result
639
"ì" // U+00EC: LATIN SMALL LETTER I WITH GRAVE
640
+ "í" // U+00ED: LATIN SMALL LETTER I WITH ACUTE
641
+ "î" // U+00EE: LATIN SMALL LETTER I WITH CIRCUMFLEX
642
+ "ï" // U+00EF: LATIN SMALL LETTER I WITH DIAERESIS
643
+ "ĩ" // U+0129: LATIN SMALL LETTER I WITH TILDE
644
+ "ī" // U+012B: LATIN SMALL LETTER I WITH MACRON
645
+ "ĭ" // U+012D: LATIN SMALL LETTER I WITH BREVE
646
+ "į" // U+012F: LATIN SMALL LETTER I WITH OGONEK
647
+ "ı" // U+0131: LATIN SMALL LETTER DOTLESS I
648
+ "ǐ" // U+01D0: LATIN SMALL LETTER I WITH CARON
649
+ "ȉ" // U+0209: LATIN SMALL LETTER I WITH DOUBLE GRAVE
650
+ "ȋ" // U+020B: LATIN SMALL LETTER I WITH INVERTED BREVE
651
+ "ɨ" // U+0268: LATIN SMALL LETTER I WITH STROKE
652
+ "ᴉ" // U+1D09: LATIN SMALL LETTER TURNED I
653
+ "ᵢ" // U+1D62: LATIN SUBSCRIPT SMALL LETTER I
654
+ "ᵼ" // U+1D7C: LATIN SMALL LETTER IOTA WITH STROKE
655
+ "ᶖ" // U+1D96: LATIN SMALL LETTER I WITH RETROFLEX HOOK
656
+ "ḭ" // U+1E2D: LATIN SMALL LETTER I WITH TILDE BELOW
657
+ "ḯ" // U+1E2F: LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
658
+ "ỉ" // U+1EC9: LATIN SMALL LETTER I WITH HOOK ABOVE
659
+ "ị" // U+1ECB: LATIN SMALL LETTER I WITH DOT BELOW
660
+ "ⁱ" // U+2071: SUPERSCRIPT LATIN SMALL LETTER I
661
+ "ⓘ" // U+24D8: CIRCLED LATIN SMALL LETTER I
662
+ "i" // U+FF49: FULLWIDTH LATIN SMALL LETTER I
663
,"i", // Folded result
665
"IJ" // U+0132: LATIN CAPITAL LIGATURE IJ
666
,"IJ", // Folded result
668
"⒤" // U+24A4: PARENTHESIZED LATIN SMALL LETTER I
669
,"(i)", // Folded result
671
"ij" // U+0133: LATIN SMALL LIGATURE IJ
672
,"ij", // Folded result
674
"Ĵ" // U+0134: LATIN CAPITAL LETTER J WITH CIRCUMFLEX
675
+ "Ɉ" // U+0248: LATIN CAPITAL LETTER J WITH STROKE
676
+ "ᴊ" // U+1D0A: LATIN LETTER SMALL CAPITAL J
677
+ "Ⓙ" // U+24BF: CIRCLED LATIN CAPITAL LETTER J
678
+ "J" // U+FF2A: FULLWIDTH LATIN CAPITAL LETTER J
679
,"J", // Folded result
681
"ĵ" // U+0135: LATIN SMALL LETTER J WITH CIRCUMFLEX
682
+ "ǰ" // U+01F0: LATIN SMALL LETTER J WITH CARON
683
+ "ȷ" // U+0237: LATIN SMALL LETTER DOTLESS J
684
+ "ɉ" // U+0249: LATIN SMALL LETTER J WITH STROKE
685
+ "ɟ" // U+025F: LATIN SMALL LETTER DOTLESS J WITH STROKE
686
+ "ʄ" // U+0284: LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
687
+ "ʝ" // U+029D: LATIN SMALL LETTER J WITH CROSSED-TAIL
688
+ "ⓙ" // U+24D9: CIRCLED LATIN SMALL LETTER J
689
+ "ⱼ" // U+2C7C: LATIN SUBSCRIPT SMALL LETTER J
690
+ "j" // U+FF4A: FULLWIDTH LATIN SMALL LETTER J
691
,"j", // Folded result
693
"⒥" // U+24A5: PARENTHESIZED LATIN SMALL LETTER J
694
,"(j)", // Folded result
696
"Ķ" // U+0136: LATIN CAPITAL LETTER K WITH CEDILLA
697
+ "Ƙ" // U+0198: LATIN CAPITAL LETTER K WITH HOOK
698
+ "Ǩ" // U+01E8: LATIN CAPITAL LETTER K WITH CARON
699
+ "ᴋ" // U+1D0B: LATIN LETTER SMALL CAPITAL K
700
+ "Ḱ" // U+1E30: LATIN CAPITAL LETTER K WITH ACUTE
701
+ "Ḳ" // U+1E32: LATIN CAPITAL LETTER K WITH DOT BELOW
702
+ "Ḵ" // U+1E34: LATIN CAPITAL LETTER K WITH LINE BELOW
703
+ "Ⓚ" // U+24C0: CIRCLED LATIN CAPITAL LETTER K
704
+ "Ⱪ" // U+2C69: LATIN CAPITAL LETTER K WITH DESCENDER
705
+ "Ꝁ" // U+A740: LATIN CAPITAL LETTER K WITH STROKE
706
+ "Ꝃ" // U+A742: LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
707
+ "Ꝅ" // U+A744: LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
708
+ "K" // U+FF2B: FULLWIDTH LATIN CAPITAL LETTER K
709
,"K", // Folded result
711
"ķ" // U+0137: LATIN SMALL LETTER K WITH CEDILLA
712
+ "ƙ" // U+0199: LATIN SMALL LETTER K WITH HOOK
713
+ "ǩ" // U+01E9: LATIN SMALL LETTER K WITH CARON
714
+ "ʞ" // U+029E: LATIN SMALL LETTER TURNED K
715
+ "ᶄ" // U+1D84: LATIN SMALL LETTER K WITH PALATAL HOOK
716
+ "ḱ" // U+1E31: LATIN SMALL LETTER K WITH ACUTE
717
+ "ḳ" // U+1E33: LATIN SMALL LETTER K WITH DOT BELOW
718
+ "ḵ" // U+1E35: LATIN SMALL LETTER K WITH LINE BELOW
719
+ "ⓚ" // U+24DA: CIRCLED LATIN SMALL LETTER K
720
+ "ⱪ" // U+2C6A: LATIN SMALL LETTER K WITH DESCENDER
721
+ "ꝁ" // U+A741: LATIN SMALL LETTER K WITH STROKE
722
+ "ꝃ" // U+A743: LATIN SMALL LETTER K WITH DIAGONAL STROKE
723
+ "ꝅ" // U+A745: LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE
724
+ "k" // U+FF4B: FULLWIDTH LATIN SMALL LETTER K
725
,"k", // Folded result
727
"⒦" // U+24A6: PARENTHESIZED LATIN SMALL LETTER K
728
,"(k)", // Folded result
730
"Ĺ" // U+0139: LATIN CAPITAL LETTER L WITH ACUTE
731
+ "Ļ" // U+013B: LATIN CAPITAL LETTER L WITH CEDILLA
732
+ "Ľ" // U+013D: LATIN CAPITAL LETTER L WITH CARON
733
+ "Ŀ" // U+013F: LATIN CAPITAL LETTER L WITH MIDDLE DOT
734
+ "Ł" // U+0141: LATIN CAPITAL LETTER L WITH STROKE
735
+ "Ƚ" // U+023D: LATIN CAPITAL LETTER L WITH BAR
736
+ "ʟ" // U+029F: LATIN LETTER SMALL CAPITAL L
737
+ "ᴌ" // U+1D0C: LATIN LETTER SMALL CAPITAL L WITH STROKE
738
+ "Ḷ" // U+1E36: LATIN CAPITAL LETTER L WITH DOT BELOW
739
+ "Ḹ" // U+1E38: LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
740
+ "Ḻ" // U+1E3A: LATIN CAPITAL LETTER L WITH LINE BELOW
741
+ "Ḽ" // U+1E3C: LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
742
+ "Ⓛ" // U+24C1: CIRCLED LATIN CAPITAL LETTER L
743
+ "Ⱡ" // U+2C60: LATIN CAPITAL LETTER L WITH DOUBLE BAR
744
+ "Ɫ" // U+2C62: LATIN CAPITAL LETTER L WITH MIDDLE TILDE
745
+ "Ꝇ" // U+A746: LATIN CAPITAL LETTER BROKEN L
746
+ "Ꝉ" // U+A748: LATIN CAPITAL LETTER L WITH HIGH STROKE
747
+ "Ꞁ" // U+A780: LATIN CAPITAL LETTER TURNED L
748
+ "L" // U+FF2C: FULLWIDTH LATIN CAPITAL LETTER L
749
,"L", // Folded result
751
"ĺ" // U+013A: LATIN SMALL LETTER L WITH ACUTE
752
+ "ļ" // U+013C: LATIN SMALL LETTER L WITH CEDILLA
753
+ "ľ" // U+013E: LATIN SMALL LETTER L WITH CARON
754
+ "ŀ" // U+0140: LATIN SMALL LETTER L WITH MIDDLE DOT
755
+ "ł" // U+0142: LATIN SMALL LETTER L WITH STROKE
756
+ "ƚ" // U+019A: LATIN SMALL LETTER L WITH BAR
757
+ "ȴ" // U+0234: LATIN SMALL LETTER L WITH CURL
758
+ "ɫ" // U+026B: LATIN SMALL LETTER L WITH MIDDLE TILDE
759
+ "ɬ" // U+026C: LATIN SMALL LETTER L WITH BELT
760
+ "ɭ" // U+026D: LATIN SMALL LETTER L WITH RETROFLEX HOOK
761
+ "ᶅ" // U+1D85: LATIN SMALL LETTER L WITH PALATAL HOOK
762
+ "ḷ" // U+1E37: LATIN SMALL LETTER L WITH DOT BELOW
763
+ "ḹ" // U+1E39: LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
764
+ "ḻ" // U+1E3B: LATIN SMALL LETTER L WITH LINE BELOW
765
+ "ḽ" // U+1E3D: LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW
766
+ "ⓛ" // U+24DB: CIRCLED LATIN SMALL LETTER L
767
+ "ⱡ" // U+2C61: LATIN SMALL LETTER L WITH DOUBLE BAR
768
+ "ꝇ" // U+A747: LATIN SMALL LETTER BROKEN L
769
+ "ꝉ" // U+A749: LATIN SMALL LETTER L WITH HIGH STROKE
770
+ "ꞁ" // U+A781: LATIN SMALL LETTER TURNED L
771
+ "l" // U+FF4C: FULLWIDTH LATIN SMALL LETTER L
772
,"l", // Folded result
774
"LJ" // U+01C7: LATIN CAPITAL LETTER LJ
775
,"LJ", // Folded result
777
"Ỻ" // U+1EFA: LATIN CAPITAL LETTER MIDDLE-WELSH LL
778
,"LL", // Folded result
780
"Lj" // U+01C8: LATIN CAPITAL LETTER L WITH SMALL LETTER J
781
,"Lj", // Folded result
783
"⒧" // U+24A7: PARENTHESIZED LATIN SMALL LETTER L
784
,"(l)", // Folded result
786
"lj" // U+01C9: LATIN SMALL LETTER LJ
787
,"lj", // Folded result
789
"ỻ" // U+1EFB: LATIN SMALL LETTER MIDDLE-WELSH LL
790
,"ll", // Folded result
792
"ʪ" // U+02AA: LATIN SMALL LETTER LS DIGRAPH
793
,"ls", // Folded result
795
"ʫ" // U+02AB: LATIN SMALL LETTER LZ DIGRAPH
796
,"lz", // Folded result
798
"Ɯ" // U+019C: LATIN CAPITAL LETTER TURNED M
799
+ "ᴍ" // U+1D0D: LATIN LETTER SMALL CAPITAL M
800
+ "Ḿ" // U+1E3E: LATIN CAPITAL LETTER M WITH ACUTE
801
+ "Ṁ" // U+1E40: LATIN CAPITAL LETTER M WITH DOT ABOVE
802
+ "Ṃ" // U+1E42: LATIN CAPITAL LETTER M WITH DOT BELOW
803
+ "Ⓜ" // U+24C2: CIRCLED LATIN CAPITAL LETTER M
804
+ "Ɱ" // U+2C6E: LATIN CAPITAL LETTER M WITH HOOK
805
+ "ꟽ" // U+A7FD: LATIN EPIGRAPHIC LETTER INVERTED M
806
+ "ꟿ" // U+A7FF: LATIN EPIGRAPHIC LETTER ARCHAIC M
807
+ "M" // U+FF2D: FULLWIDTH LATIN CAPITAL LETTER M
808
,"M", // Folded result
810
"ɯ" // U+026F: LATIN SMALL LETTER TURNED M
811
+ "ɰ" // U+0270: LATIN SMALL LETTER TURNED M WITH LONG LEG
812
+ "ɱ" // U+0271: LATIN SMALL LETTER M WITH HOOK
813
+ "ᵯ" // U+1D6F: LATIN SMALL LETTER M WITH MIDDLE TILDE
814
+ "ᶆ" // U+1D86: LATIN SMALL LETTER M WITH PALATAL HOOK
815
+ "ḿ" // U+1E3F: LATIN SMALL LETTER M WITH ACUTE
816
+ "ṁ" // U+1E41: LATIN SMALL LETTER M WITH DOT ABOVE
817
+ "ṃ" // U+1E43: LATIN SMALL LETTER M WITH DOT BELOW
818
+ "ⓜ" // U+24DC: CIRCLED LATIN SMALL LETTER M
819
+ "m" // U+FF4D: FULLWIDTH LATIN SMALL LETTER M
820
,"m", // Folded result
822
"⒨" // U+24A8: PARENTHESIZED LATIN SMALL LETTER M
823
,"(m)", // Folded result
825
"Ñ" // U+00D1: LATIN CAPITAL LETTER N WITH TILDE
826
+ "Ń" // U+0143: LATIN CAPITAL LETTER N WITH ACUTE
827
+ "Ņ" // U+0145: LATIN CAPITAL LETTER N WITH CEDILLA
828
+ "Ň" // U+0147: LATIN CAPITAL LETTER N WITH CARON
829
+ "Ŋ" // U+014A: LATIN CAPITAL LETTER ENG
830
+ "Ɲ" // U+019D: LATIN CAPITAL LETTER N WITH LEFT HOOK
831
+ "Ǹ" // U+01F8: LATIN CAPITAL LETTER N WITH GRAVE
832
+ "Ƞ" // U+0220: LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
833
+ "ɴ" // U+0274: LATIN LETTER SMALL CAPITAL N
834
+ "ᴎ" // U+1D0E: LATIN LETTER SMALL CAPITAL REVERSED N
835
+ "Ṅ" // U+1E44: LATIN CAPITAL LETTER N WITH DOT ABOVE
836
+ "Ṇ" // U+1E46: LATIN CAPITAL LETTER N WITH DOT BELOW
837
+ "Ṉ" // U+1E48: LATIN CAPITAL LETTER N WITH LINE BELOW
838
+ "Ṋ" // U+1E4A: LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
839
+ "Ⓝ" // U+24C3: CIRCLED LATIN CAPITAL LETTER N
840
+ "N" // U+FF2E: FULLWIDTH LATIN CAPITAL LETTER N
841
,"N", // Folded result
843
"ñ" // U+00F1: LATIN SMALL LETTER N WITH TILDE
844
+ "ń" // U+0144: LATIN SMALL LETTER N WITH ACUTE
845
+ "ņ" // U+0146: LATIN SMALL LETTER N WITH CEDILLA
846
+ "ň" // U+0148: LATIN SMALL LETTER N WITH CARON
847
+ "ʼn" // U+0149: LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
848
+ "ŋ" // U+014B: LATIN SMALL LETTER ENG
849
+ "ƞ" // U+019E: LATIN SMALL LETTER N WITH LONG RIGHT LEG
850
+ "ǹ" // U+01F9: LATIN SMALL LETTER N WITH GRAVE
851
+ "ȵ" // U+0235: LATIN SMALL LETTER N WITH CURL
852
+ "ɲ" // U+0272: LATIN SMALL LETTER N WITH LEFT HOOK
853
+ "ɳ" // U+0273: LATIN SMALL LETTER N WITH RETROFLEX HOOK
854
+ "ᵰ" // U+1D70: LATIN SMALL LETTER N WITH MIDDLE TILDE
855
+ "ᶇ" // U+1D87: LATIN SMALL LETTER N WITH PALATAL HOOK
856
+ "ṅ" // U+1E45: LATIN SMALL LETTER N WITH DOT ABOVE
857
+ "ṇ" // U+1E47: LATIN SMALL LETTER N WITH DOT BELOW
858
+ "ṉ" // U+1E49: LATIN SMALL LETTER N WITH LINE BELOW
859
+ "ṋ" // U+1E4B: LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW
860
+ "ⁿ" // U+207F: SUPERSCRIPT LATIN SMALL LETTER N
861
+ "ⓝ" // U+24DD: CIRCLED LATIN SMALL LETTER N
862
+ "n" // U+FF4E: FULLWIDTH LATIN SMALL LETTER N
863
,"n", // Folded result
865
"NJ" // U+01CA: LATIN CAPITAL LETTER NJ
866
,"NJ", // Folded result
868
"Nj" // U+01CB: LATIN CAPITAL LETTER N WITH SMALL LETTER J
869
,"Nj", // Folded result
871
"⒩" // U+24A9: PARENTHESIZED LATIN SMALL LETTER N
872
,"(n)", // Folded result
874
"nj" // U+01CC: LATIN SMALL LETTER NJ
875
,"nj", // Folded result
877
"Ò" // U+00D2: LATIN CAPITAL LETTER O WITH GRAVE
878
+ "Ó" // U+00D3: LATIN CAPITAL LETTER O WITH ACUTE
879
+ "Ô" // U+00D4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX
880
+ "Õ" // U+00D5: LATIN CAPITAL LETTER O WITH TILDE
881
+ "Ö" // U+00D6: LATIN CAPITAL LETTER O WITH DIAERESIS
882
+ "Ø" // U+00D8: LATIN CAPITAL LETTER O WITH STROKE
883
+ "Ō" // U+014C: LATIN CAPITAL LETTER O WITH MACRON
884
+ "Ŏ" // U+014E: LATIN CAPITAL LETTER O WITH BREVE
885
+ "Ő" // U+0150: LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
886
+ "Ɔ" // U+0186: LATIN CAPITAL LETTER OPEN O
887
+ "Ɵ" // U+019F: LATIN CAPITAL LETTER O WITH MIDDLE TILDE
888
+ "Ơ" // U+01A0: LATIN CAPITAL LETTER O WITH HORN
889
+ "Ǒ" // U+01D1: LATIN CAPITAL LETTER O WITH CARON
890
+ "Ǫ" // U+01EA: LATIN CAPITAL LETTER O WITH OGONEK
891
+ "Ǭ" // U+01EC: LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
892
+ "Ǿ" // U+01FE: LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
893
+ "Ȍ" // U+020C: LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
894
+ "Ȏ" // U+020E: LATIN CAPITAL LETTER O WITH INVERTED BREVE
895
+ "Ȫ" // U+022A: LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
896
+ "Ȭ" // U+022C: LATIN CAPITAL LETTER O WITH TILDE AND MACRON
897
+ "Ȯ" // U+022E: LATIN CAPITAL LETTER O WITH DOT ABOVE
898
+ "Ȱ" // U+0230: LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
899
+ "ᴏ" // U+1D0F: LATIN LETTER SMALL CAPITAL O
900
+ "ᴐ" // U+1D10: LATIN LETTER SMALL CAPITAL OPEN O
901
+ "Ṍ" // U+1E4C: LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
902
+ "Ṏ" // U+1E4E: LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
903
+ "Ṑ" // U+1E50: LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
904
+ "Ṓ" // U+1E52: LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
905
+ "Ọ" // U+1ECC: LATIN CAPITAL LETTER O WITH DOT BELOW
906
+ "Ỏ" // U+1ECE: LATIN CAPITAL LETTER O WITH HOOK ABOVE
907
+ "Ố" // U+1ED0: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
908
+ "Ồ" // U+1ED2: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
909
+ "Ổ" // U+1ED4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
910
+ "Ỗ" // U+1ED6: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
911
+ "Ộ" // U+1ED8: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
912
+ "Ớ" // U+1EDA: LATIN CAPITAL LETTER O WITH HORN AND ACUTE
913
+ "Ờ" // U+1EDC: LATIN CAPITAL LETTER O WITH HORN AND GRAVE
914
+ "Ở" // U+1EDE: LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
915
+ "Ỡ" // U+1EE0: LATIN CAPITAL LETTER O WITH HORN AND TILDE
916
+ "Ợ" // U+1EE2: LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
917
+ "Ⓞ" // U+24C4: CIRCLED LATIN CAPITAL LETTER O
918
+ "Ꝋ" // U+A74A: LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
919
+ "Ꝍ" // U+A74C: LATIN CAPITAL LETTER O WITH LOOP
920
+ "O" // U+FF2F: FULLWIDTH LATIN CAPITAL LETTER O
921
,"O", // Folded result
923
"ò" // U+00F2: LATIN SMALL LETTER O WITH GRAVE
924
+ "ó" // U+00F3: LATIN SMALL LETTER O WITH ACUTE
925
+ "ô" // U+00F4: LATIN SMALL LETTER O WITH CIRCUMFLEX
926
+ "õ" // U+00F5: LATIN SMALL LETTER O WITH TILDE
927
+ "ö" // U+00F6: LATIN SMALL LETTER O WITH DIAERESIS
928
+ "ø" // U+00F8: LATIN SMALL LETTER O WITH STROKE
929
+ "ō" // U+014D: LATIN SMALL LETTER O WITH MACRON
930
+ "ŏ" // U+014F: LATIN SMALL LETTER O WITH BREVE
931
+ "ő" // U+0151: LATIN SMALL LETTER O WITH DOUBLE ACUTE
932
+ "ơ" // U+01A1: LATIN SMALL LETTER O WITH HORN
933
+ "ǒ" // U+01D2: LATIN SMALL LETTER O WITH CARON
934
+ "ǫ" // U+01EB: LATIN SMALL LETTER O WITH OGONEK
935
+ "ǭ" // U+01ED: LATIN SMALL LETTER O WITH OGONEK AND MACRON
936
+ "ǿ" // U+01FF: LATIN SMALL LETTER O WITH STROKE AND ACUTE
937
+ "ȍ" // U+020D: LATIN SMALL LETTER O WITH DOUBLE GRAVE
938
+ "ȏ" // U+020F: LATIN SMALL LETTER O WITH INVERTED BREVE
939
+ "ȫ" // U+022B: LATIN SMALL LETTER O WITH DIAERESIS AND MACRON
940
+ "ȭ" // U+022D: LATIN SMALL LETTER O WITH TILDE AND MACRON
941
+ "ȯ" // U+022F: LATIN SMALL LETTER O WITH DOT ABOVE
942
+ "ȱ" // U+0231: LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON
943
+ "ɔ" // U+0254: LATIN SMALL LETTER OPEN O
944
+ "ɵ" // U+0275: LATIN SMALL LETTER BARRED O
945
+ "ᴖ" // U+1D16: LATIN SMALL LETTER TOP HALF O
946
+ "ᴗ" // U+1D17: LATIN SMALL LETTER BOTTOM HALF O
947
+ "ᶗ" // U+1D97: LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK
948
+ "ṍ" // U+1E4D: LATIN SMALL LETTER O WITH TILDE AND ACUTE
949
+ "ṏ" // U+1E4F: LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
950
+ "ṑ" // U+1E51: LATIN SMALL LETTER O WITH MACRON AND GRAVE
951
+ "ṓ" // U+1E53: LATIN SMALL LETTER O WITH MACRON AND ACUTE
952
+ "ọ" // U+1ECD: LATIN SMALL LETTER O WITH DOT BELOW
953
+ "ỏ" // U+1ECF: LATIN SMALL LETTER O WITH HOOK ABOVE
954
+ "ố" // U+1ED1: LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
955
+ "ồ" // U+1ED3: LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
956
+ "ổ" // U+1ED5: LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
957
+ "ỗ" // U+1ED7: LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
958
+ "ộ" // U+1ED9: LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
959
+ "ớ" // U+1EDB: LATIN SMALL LETTER O WITH HORN AND ACUTE
960
+ "ờ" // U+1EDD: LATIN SMALL LETTER O WITH HORN AND GRAVE
961
+ "ở" // U+1EDF: LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE
962
+ "ỡ" // U+1EE1: LATIN SMALL LETTER O WITH HORN AND TILDE
963
+ "ợ" // U+1EE3: LATIN SMALL LETTER O WITH HORN AND DOT BELOW
964
+ "ₒ" // U+2092: LATIN SUBSCRIPT SMALL LETTER O
965
+ "ⓞ" // U+24DE: CIRCLED LATIN SMALL LETTER O
966
+ "ⱺ" // U+2C7A: LATIN SMALL LETTER O WITH LOW RING INSIDE
967
+ "ꝋ" // U+A74B: LATIN SMALL LETTER O WITH LONG STROKE OVERLAY
968
+ "ꝍ" // U+A74D: LATIN SMALL LETTER O WITH LOOP
969
+ "o" // U+FF4F: FULLWIDTH LATIN SMALL LETTER O
970
,"o", // Folded result
972
"Œ" // U+0152: LATIN CAPITAL LIGATURE OE
973
+ "ɶ" // U+0276: LATIN LETTER SMALL CAPITAL OE
974
,"OE", // Folded result
976
"Ꝏ" // U+A74E: LATIN CAPITAL LETTER OO
977
,"OO", // Folded result
979
"Ȣ" // U+0222: LATIN CAPITAL LETTER OU
980
+ "ᴕ" // U+1D15: LATIN LETTER SMALL CAPITAL OU
981
,"OU", // Folded result
983
"⒪" // U+24AA: PARENTHESIZED LATIN SMALL LETTER O
984
,"(o)", // Folded result
986
"œ" // U+0153: LATIN SMALL LIGATURE OE
987
+ "ᴔ" // U+1D14: LATIN SMALL LETTER TURNED OE
988
,"oe", // Folded result
990
"ꝏ" // U+A74F: LATIN SMALL LETTER OO
991
,"oo", // Folded result
993
"ȣ" // U+0223: LATIN SMALL LETTER OU
994
,"ou", // Folded result
996
"Ƥ" // U+01A4: LATIN CAPITAL LETTER P WITH HOOK
997
+ "ᴘ" // U+1D18: LATIN LETTER SMALL CAPITAL P
998
+ "Ṕ" // U+1E54: LATIN CAPITAL LETTER P WITH ACUTE
999
+ "Ṗ" // U+1E56: LATIN CAPITAL LETTER P WITH DOT ABOVE
1000
+ "Ⓟ" // U+24C5: CIRCLED LATIN CAPITAL LETTER P
1001
+ "Ᵽ" // U+2C63: LATIN CAPITAL LETTER P WITH STROKE
1002
+ "Ꝑ" // U+A750: LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
1003
+ "Ꝓ" // U+A752: LATIN CAPITAL LETTER P WITH FLOURISH
1004
+ "Ꝕ" // U+A754: LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
1005
+ "P" // U+FF30: FULLWIDTH LATIN CAPITAL LETTER P
1006
,"P", // Folded result
1008
"ƥ" // U+01A5: LATIN SMALL LETTER P WITH HOOK
1009
+ "ᵱ" // U+1D71: LATIN SMALL LETTER P WITH MIDDLE TILDE
1010
+ "ᵽ" // U+1D7D: LATIN SMALL LETTER P WITH STROKE
1011
+ "ᶈ" // U+1D88: LATIN SMALL LETTER P WITH PALATAL HOOK
1012
+ "ṕ" // U+1E55: LATIN SMALL LETTER P WITH ACUTE
1013
+ "ṗ" // U+1E57: LATIN SMALL LETTER P WITH DOT ABOVE
1014
+ "ⓟ" // U+24DF: CIRCLED LATIN SMALL LETTER P
1015
+ "ꝑ" // U+A751: LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER
1016
+ "ꝓ" // U+A753: LATIN SMALL LETTER P WITH FLOURISH
1017
+ "ꝕ" // U+A755: LATIN SMALL LETTER P WITH SQUIRREL TAIL
1018
+ "ꟼ" // U+A7FC: LATIN EPIGRAPHIC LETTER REVERSED P
1019
+ "p" // U+FF50: FULLWIDTH LATIN SMALL LETTER P
1020
,"p", // Folded result
1022
"⒫" // U+24AB: PARENTHESIZED LATIN SMALL LETTER P
1023
,"(p)", // Folded result
1025
"Ɋ" // U+024A: LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
1026
+ "Ⓠ" // U+24C6: CIRCLED LATIN CAPITAL LETTER Q
1027
+ "Ꝗ" // U+A756: LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
1028
+ "Ꝙ" // U+A758: LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
1029
+ "Q" // U+FF31: FULLWIDTH LATIN CAPITAL LETTER Q
1030
,"Q", // Folded result
1032
"ĸ" // U+0138: LATIN SMALL LETTER KRA
1033
+ "ɋ" // U+024B: LATIN SMALL LETTER Q WITH HOOK TAIL
1034
+ "ʠ" // U+02A0: LATIN SMALL LETTER Q WITH HOOK
1035
+ "ⓠ" // U+24E0: CIRCLED LATIN SMALL LETTER Q
1036
+ "ꝗ" // U+A757: LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER
1037
+ "ꝙ" // U+A759: LATIN SMALL LETTER Q WITH DIAGONAL STROKE
1038
+ "q" // U+FF51: FULLWIDTH LATIN SMALL LETTER Q
1039
,"q", // Folded result
1041
"⒬" // U+24AC: PARENTHESIZED LATIN SMALL LETTER Q
1042
,"(q)", // Folded result
1044
"ȹ" // U+0239: LATIN SMALL LETTER QP DIGRAPH
1045
,"qp", // Folded result
1047
"Ŕ" // U+0154: LATIN CAPITAL LETTER R WITH ACUTE
1048
+ "Ŗ" // U+0156: LATIN CAPITAL LETTER R WITH CEDILLA
1049
+ "Ř" // U+0158: LATIN CAPITAL LETTER R WITH CARON
1050
+ "Ȑ" // U+0210: LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
1051
+ "Ȓ" // U+0212: LATIN CAPITAL LETTER R WITH INVERTED BREVE
1052
+ "Ɍ" // U+024C: LATIN CAPITAL LETTER R WITH STROKE
1053
+ "ʀ" // U+0280: LATIN LETTER SMALL CAPITAL R
1054
+ "ʁ" // U+0281: LATIN LETTER SMALL CAPITAL INVERTED R
1055
+ "ᴙ" // U+1D19: LATIN LETTER SMALL CAPITAL REVERSED R
1056
+ "ᴚ" // U+1D1A: LATIN LETTER SMALL CAPITAL TURNED R
1057
+ "Ṙ" // U+1E58: LATIN CAPITAL LETTER R WITH DOT ABOVE
1058
+ "Ṛ" // U+1E5A: LATIN CAPITAL LETTER R WITH DOT BELOW
1059
+ "Ṝ" // U+1E5C: LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
1060
+ "Ṟ" // U+1E5E: LATIN CAPITAL LETTER R WITH LINE BELOW
1061
+ "Ⓡ" // U+24C7: CIRCLED LATIN CAPITAL LETTER R
1062
+ "Ɽ" // U+2C64: LATIN CAPITAL LETTER R WITH TAIL
1063
+ "Ꝛ" // U+A75A: LATIN CAPITAL LETTER R ROTUNDA
1064
+ "Ꞃ" // U+A782: LATIN CAPITAL LETTER INSULAR R
1065
+ "R" // U+FF32: FULLWIDTH LATIN CAPITAL LETTER R
1066
,"R", // Folded result
1068
"ŕ" // U+0155: LATIN SMALL LETTER R WITH ACUTE
1069
+ "ŗ" // U+0157: LATIN SMALL LETTER R WITH CEDILLA
1070
+ "ř" // U+0159: LATIN SMALL LETTER R WITH CARON
1071
+ "ȑ" // U+0211: LATIN SMALL LETTER R WITH DOUBLE GRAVE
1072
+ "ȓ" // U+0213: LATIN SMALL LETTER R WITH INVERTED BREVE
1073
+ "ɍ" // U+024D: LATIN SMALL LETTER R WITH STROKE
1074
+ "ɼ" // U+027C: LATIN SMALL LETTER R WITH LONG LEG
1075
+ "ɽ" // U+027D: LATIN SMALL LETTER R WITH TAIL
1076
+ "ɾ" // U+027E: LATIN SMALL LETTER R WITH FISHHOOK
1077
+ "ɿ" // U+027F: LATIN SMALL LETTER REVERSED R WITH FISHHOOK
1078
+ "ᵣ" // U+1D63: LATIN SUBSCRIPT SMALL LETTER R
1079
+ "ᵲ" // U+1D72: LATIN SMALL LETTER R WITH MIDDLE TILDE
1080
+ "ᵳ" // U+1D73: LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE
1081
+ "ᶉ" // U+1D89: LATIN SMALL LETTER R WITH PALATAL HOOK
1082
+ "ṙ" // U+1E59: LATIN SMALL LETTER R WITH DOT ABOVE
1083
+ "ṛ" // U+1E5B: LATIN SMALL LETTER R WITH DOT BELOW
1084
+ "ṝ" // U+1E5D: LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
1085
+ "ṟ" // U+1E5F: LATIN SMALL LETTER R WITH LINE BELOW
1086
+ "ⓡ" // U+24E1: CIRCLED LATIN SMALL LETTER R
1087
+ "ꝛ" // U+A75B: LATIN SMALL LETTER R ROTUNDA
1088
+ "ꞃ" // U+A783: LATIN SMALL LETTER INSULAR R
1089
+ "r" // U+FF52: FULLWIDTH LATIN SMALL LETTER R
1090
,"r", // Folded result
1092
"⒭" // U+24AD: PARENTHESIZED LATIN SMALL LETTER R
1093
,"(r)", // Folded result
1095
"Ś" // U+015A: LATIN CAPITAL LETTER S WITH ACUTE
1096
+ "Ŝ" // U+015C: LATIN CAPITAL LETTER S WITH CIRCUMFLEX
1097
+ "Ş" // U+015E: LATIN CAPITAL LETTER S WITH CEDILLA
1098
+ "Š" // U+0160: LATIN CAPITAL LETTER S WITH CARON
1099
+ "Ș" // U+0218: LATIN CAPITAL LETTER S WITH COMMA BELOW
1100
+ "Ṡ" // U+1E60: LATIN CAPITAL LETTER S WITH DOT ABOVE
1101
+ "Ṣ" // U+1E62: LATIN CAPITAL LETTER S WITH DOT BELOW
1102
+ "Ṥ" // U+1E64: LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
1103
+ "Ṧ" // U+1E66: LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
1104
+ "Ṩ" // U+1E68: LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
1105
+ "Ⓢ" // U+24C8: CIRCLED LATIN CAPITAL LETTER S
1106
+ "ꜱ" // U+A731: LATIN LETTER SMALL CAPITAL S
1107
+ "ꞅ" // U+A785: LATIN SMALL LETTER INSULAR S
1108
+ "S" // U+FF33: FULLWIDTH LATIN CAPITAL LETTER S
1109
,"S", // Folded result
1111
"ś" // U+015B: LATIN SMALL LETTER S WITH ACUTE
1112
+ "ŝ" // U+015D: LATIN SMALL LETTER S WITH CIRCUMFLEX
1113
+ "ş" // U+015F: LATIN SMALL LETTER S WITH CEDILLA
1114
+ "š" // U+0161: LATIN SMALL LETTER S WITH CARON
1115
+ "ſ" // U+017F: LATIN SMALL LETTER LONG S
1116
+ "ș" // U+0219: LATIN SMALL LETTER S WITH COMMA BELOW
1117
+ "ȿ" // U+023F: LATIN SMALL LETTER S WITH SWASH TAIL
1118
+ "ʂ" // U+0282: LATIN SMALL LETTER S WITH HOOK
1119
+ "ᵴ" // U+1D74: LATIN SMALL LETTER S WITH MIDDLE TILDE
1120
+ "ᶊ" // U+1D8A: LATIN SMALL LETTER S WITH PALATAL HOOK
1121
+ "ṡ" // U+1E61: LATIN SMALL LETTER S WITH DOT ABOVE
1122
+ "ṣ" // U+1E63: LATIN SMALL LETTER S WITH DOT BELOW
1123
+ "ṥ" // U+1E65: LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
1124
+ "ṧ" // U+1E67: LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
1125
+ "ṩ" // U+1E69: LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
1126
+ "ẜ" // U+1E9C: LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE
1127
+ "ẝ" // U+1E9D: LATIN SMALL LETTER LONG S WITH HIGH STROKE
1128
+ "ⓢ" // U+24E2: CIRCLED LATIN SMALL LETTER S
1129
+ "Ꞅ" // U+A784: LATIN CAPITAL LETTER INSULAR S
1130
+ "s" // U+FF53: FULLWIDTH LATIN SMALL LETTER S
1131
,"s", // Folded result
1133
"ẞ" // U+1E9E: LATIN CAPITAL LETTER SHARP S
1134
,"SS", // Folded result
1136
"⒮" // U+24AE: PARENTHESIZED LATIN SMALL LETTER S
1137
,"(s)", // Folded result
1139
"ß" // U+00DF: LATIN SMALL LETTER SHARP S
1140
,"ss", // Folded result
1142
"st" // U+FB06: LATIN SMALL LIGATURE ST
1143
,"st", // Folded result
1145
"Ţ" // U+0162: LATIN CAPITAL LETTER T WITH CEDILLA
1146
+ "Ť" // U+0164: LATIN CAPITAL LETTER T WITH CARON
1147
+ "Ŧ" // U+0166: LATIN CAPITAL LETTER T WITH STROKE
1148
+ "Ƭ" // U+01AC: LATIN CAPITAL LETTER T WITH HOOK
1149
+ "Ʈ" // U+01AE: LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
1150
+ "Ț" // U+021A: LATIN CAPITAL LETTER T WITH COMMA BELOW
1151
+ "Ⱦ" // U+023E: LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
1152
+ "ᴛ" // U+1D1B: LATIN LETTER SMALL CAPITAL T
1153
+ "Ṫ" // U+1E6A: LATIN CAPITAL LETTER T WITH DOT ABOVE
1154
+ "Ṭ" // U+1E6C: LATIN CAPITAL LETTER T WITH DOT BELOW
1155
+ "Ṯ" // U+1E6E: LATIN CAPITAL LETTER T WITH LINE BELOW
1156
+ "Ṱ" // U+1E70: LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
1157
+ "Ⓣ" // U+24C9: CIRCLED LATIN CAPITAL LETTER T
1158
+ "Ꞇ" // U+A786: LATIN CAPITAL LETTER INSULAR T
1159
+ "T" // U+FF34: FULLWIDTH LATIN CAPITAL LETTER T
1160
,"T", // Folded result
1162
"ţ" // U+0163: LATIN SMALL LETTER T WITH CEDILLA
1163
+ "ť" // U+0165: LATIN SMALL LETTER T WITH CARON
1164
+ "ŧ" // U+0167: LATIN SMALL LETTER T WITH STROKE
1165
+ "ƫ" // U+01AB: LATIN SMALL LETTER T WITH PALATAL HOOK
1166
+ "ƭ" // U+01AD: LATIN SMALL LETTER T WITH HOOK
1167
+ "ț" // U+021B: LATIN SMALL LETTER T WITH COMMA BELOW
1168
+ "ȶ" // U+0236: LATIN SMALL LETTER T WITH CURL
1169
+ "ʇ" // U+0287: LATIN SMALL LETTER TURNED T
1170
+ "ʈ" // U+0288: LATIN SMALL LETTER T WITH RETROFLEX HOOK
1171
+ "ᵵ" // U+1D75: LATIN SMALL LETTER T WITH MIDDLE TILDE
1172
+ "ṫ" // U+1E6B: LATIN SMALL LETTER T WITH DOT ABOVE
1173
+ "ṭ" // U+1E6D: LATIN SMALL LETTER T WITH DOT BELOW
1174
+ "ṯ" // U+1E6F: LATIN SMALL LETTER T WITH LINE BELOW
1175
+ "ṱ" // U+1E71: LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW
1176
+ "ẗ" // U+1E97: LATIN SMALL LETTER T WITH DIAERESIS
1177
+ "ⓣ" // U+24E3: CIRCLED LATIN SMALL LETTER T
1178
+ "ⱦ" // U+2C66: LATIN SMALL LETTER T WITH DIAGONAL STROKE
1179
+ "t" // U+FF54: FULLWIDTH LATIN SMALL LETTER T
1180
,"t", // Folded result
1182
"Þ" // U+00DE: LATIN CAPITAL LETTER THORN
1183
+ "Ꝧ" // U+A766: LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
1184
,"TH", // Folded result
1186
"Ꜩ" // U+A728: LATIN CAPITAL LETTER TZ
1187
,"TZ", // Folded result
1189
"⒯" // U+24AF: PARENTHESIZED LATIN SMALL LETTER T
1190
,"(t)", // Folded result
1192
"ʨ" // U+02A8: LATIN SMALL LETTER TC DIGRAPH WITH CURL
1193
,"tc", // Folded result
1195
"þ" // U+00FE: LATIN SMALL LETTER THORN
1196
+ "ᵺ" // U+1D7A: LATIN SMALL LETTER TH WITH STRIKETHROUGH
1197
+ "ꝧ" // U+A767: LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER
1198
,"th", // Folded result
1200
"ʦ" // U+02A6: LATIN SMALL LETTER TS DIGRAPH
1201
,"ts", // Folded result
1203
"ꜩ" // U+A729: LATIN SMALL LETTER TZ
1204
,"tz", // Folded result
1206
"Ù" // U+00D9: LATIN CAPITAL LETTER U WITH GRAVE
1207
+ "Ú" // U+00DA: LATIN CAPITAL LETTER U WITH ACUTE
1208
+ "Û" // U+00DB: LATIN CAPITAL LETTER U WITH CIRCUMFLEX
1209
+ "Ü" // U+00DC: LATIN CAPITAL LETTER U WITH DIAERESIS
1210
+ "Ũ" // U+0168: LATIN CAPITAL LETTER U WITH TILDE
1211
+ "Ū" // U+016A: LATIN CAPITAL LETTER U WITH MACRON
1212
+ "Ŭ" // U+016C: LATIN CAPITAL LETTER U WITH BREVE
1213
+ "Ů" // U+016E: LATIN CAPITAL LETTER U WITH RING ABOVE
1214
+ "Ű" // U+0170: LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
1215
+ "Ų" // U+0172: LATIN CAPITAL LETTER U WITH OGONEK
1216
+ "Ư" // U+01AF: LATIN CAPITAL LETTER U WITH HORN
1217
+ "Ǔ" // U+01D3: LATIN CAPITAL LETTER U WITH CARON
1218
+ "Ǖ" // U+01D5: LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
1219
+ "Ǘ" // U+01D7: LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
1220
+ "Ǚ" // U+01D9: LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
1221
+ "Ǜ" // U+01DB: LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
1222
+ "Ȕ" // U+0214: LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
1223
+ "Ȗ" // U+0216: LATIN CAPITAL LETTER U WITH INVERTED BREVE
1224
+ "Ʉ" // U+0244: LATIN CAPITAL LETTER U BAR
1225
+ "ᴜ" // U+1D1C: LATIN LETTER SMALL CAPITAL U
1226
+ "ᵾ" // U+1D7E: LATIN SMALL CAPITAL LETTER U WITH STROKE
1227
+ "Ṳ" // U+1E72: LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
1228
+ "Ṵ" // U+1E74: LATIN CAPITAL LETTER U WITH TILDE BELOW
1229
+ "Ṷ" // U+1E76: LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
1230
+ "Ṹ" // U+1E78: LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
1231
+ "Ṻ" // U+1E7A: LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
1232
+ "Ụ" // U+1EE4: LATIN CAPITAL LETTER U WITH DOT BELOW
1233
+ "Ủ" // U+1EE6: LATIN CAPITAL LETTER U WITH HOOK ABOVE
1234
+ "Ứ" // U+1EE8: LATIN CAPITAL LETTER U WITH HORN AND ACUTE
1235
+ "Ừ" // U+1EEA: LATIN CAPITAL LETTER U WITH HORN AND GRAVE
1236
+ "Ử" // U+1EEC: LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
1237
+ "Ữ" // U+1EEE: LATIN CAPITAL LETTER U WITH HORN AND TILDE
1238
+ "Ự" // U+1EF0: LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
1239
+ "Ⓤ" // U+24CA: CIRCLED LATIN CAPITAL LETTER U
1240
+ "U" // U+FF35: FULLWIDTH LATIN CAPITAL LETTER U
1241
,"U", // Folded result
1243
"ù" // U+00F9: LATIN SMALL LETTER U WITH GRAVE
1244
+ "ú" // U+00FA: LATIN SMALL LETTER U WITH ACUTE
1245
+ "û" // U+00FB: LATIN SMALL LETTER U WITH CIRCUMFLEX
1246
+ "ü" // U+00FC: LATIN SMALL LETTER U WITH DIAERESIS
1247
+ "ũ" // U+0169: LATIN SMALL LETTER U WITH TILDE
1248
+ "ū" // U+016B: LATIN SMALL LETTER U WITH MACRON
1249
+ "ŭ" // U+016D: LATIN SMALL LETTER U WITH BREVE
1250
+ "ů" // U+016F: LATIN SMALL LETTER U WITH RING ABOVE
1251
+ "ű" // U+0171: LATIN SMALL LETTER U WITH DOUBLE ACUTE
1252
+ "ų" // U+0173: LATIN SMALL LETTER U WITH OGONEK
1253
+ "ư" // U+01B0: LATIN SMALL LETTER U WITH HORN
1254
+ "ǔ" // U+01D4: LATIN SMALL LETTER U WITH CARON
1255
+ "ǖ" // U+01D6: LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
1256
+ "ǘ" // U+01D8: LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
1257
+ "ǚ" // U+01DA: LATIN SMALL LETTER U WITH DIAERESIS AND CARON
1258
+ "ǜ" // U+01DC: LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
1259
+ "ȕ" // U+0215: LATIN SMALL LETTER U WITH DOUBLE GRAVE
1260
+ "ȗ" // U+0217: LATIN SMALL LETTER U WITH INVERTED BREVE
1261
+ "ʉ" // U+0289: LATIN SMALL LETTER U BAR
1262
+ "ᵤ" // U+1D64: LATIN SUBSCRIPT SMALL LETTER U
1263
+ "ᶙ" // U+1D99: LATIN SMALL LETTER U WITH RETROFLEX HOOK
1264
+ "ṳ" // U+1E73: LATIN SMALL LETTER U WITH DIAERESIS BELOW
1265
+ "ṵ" // U+1E75: LATIN SMALL LETTER U WITH TILDE BELOW
1266
+ "ṷ" // U+1E77: LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW
1267
+ "ṹ" // U+1E79: LATIN SMALL LETTER U WITH TILDE AND ACUTE
1268
+ "ṻ" // U+1E7B: LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
1269
+ "ụ" // U+1EE5: LATIN SMALL LETTER U WITH DOT BELOW
1270
+ "ủ" // U+1EE7: LATIN SMALL LETTER U WITH HOOK ABOVE
1271
+ "ứ" // U+1EE9: LATIN SMALL LETTER U WITH HORN AND ACUTE
1272
+ "ừ" // U+1EEB: LATIN SMALL LETTER U WITH HORN AND GRAVE
1273
+ "ử" // U+1EED: LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE
1274
+ "ữ" // U+1EEF: LATIN SMALL LETTER U WITH HORN AND TILDE
1275
+ "ự" // U+1EF1: LATIN SMALL LETTER U WITH HORN AND DOT BELOW
1276
+ "ⓤ" // U+24E4: CIRCLED LATIN SMALL LETTER U
1277
+ "u" // U+FF55: FULLWIDTH LATIN SMALL LETTER U
1278
,"u", // Folded result
1280
"⒰" // U+24B0: PARENTHESIZED LATIN SMALL LETTER U
1281
,"(u)", // Folded result
1283
"ᵫ" // U+1D6B: LATIN SMALL LETTER UE
1284
,"ue", // Folded result
1286
"Ʋ" // U+01B2: LATIN CAPITAL LETTER V WITH HOOK
1287
+ "Ʌ" // U+0245: LATIN CAPITAL LETTER TURNED V
1288
+ "ᴠ" // U+1D20: LATIN LETTER SMALL CAPITAL V
1289
+ "Ṽ" // U+1E7C: LATIN CAPITAL LETTER V WITH TILDE
1290
+ "Ṿ" // U+1E7E: LATIN CAPITAL LETTER V WITH DOT BELOW
1291
+ "Ỽ" // U+1EFC: LATIN CAPITAL LETTER MIDDLE-WELSH V
1292
+ "Ⓥ" // U+24CB: CIRCLED LATIN CAPITAL LETTER V
1293
+ "Ꝟ" // U+A75E: LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
1294
+ "Ꝩ" // U+A768: LATIN CAPITAL LETTER VEND
1295
+ "V" // U+FF36: FULLWIDTH LATIN CAPITAL LETTER V
1296
,"V", // Folded result
1298
"ʋ" // U+028B: LATIN SMALL LETTER V WITH HOOK
1299
+ "ʌ" // U+028C: LATIN SMALL LETTER TURNED V
1300
+ "ᵥ" // U+1D65: LATIN SUBSCRIPT SMALL LETTER V
1301
+ "ᶌ" // U+1D8C: LATIN SMALL LETTER V WITH PALATAL HOOK
1302
+ "ṽ" // U+1E7D: LATIN SMALL LETTER V WITH TILDE
1303
+ "ṿ" // U+1E7F: LATIN SMALL LETTER V WITH DOT BELOW
1304
+ "ⓥ" // U+24E5: CIRCLED LATIN SMALL LETTER V
1305
+ "ⱱ" // U+2C71: LATIN SMALL LETTER V WITH RIGHT HOOK
1306
+ "ⱴ" // U+2C74: LATIN SMALL LETTER V WITH CURL
1307
+ "ꝟ" // U+A75F: LATIN SMALL LETTER V WITH DIAGONAL STROKE
1308
+ "v" // U+FF56: FULLWIDTH LATIN SMALL LETTER V
1309
,"v", // Folded result
1311
"Ꝡ" // U+A760: LATIN CAPITAL LETTER VY
1312
,"VY", // Folded result
1314
"⒱" // U+24B1: PARENTHESIZED LATIN SMALL LETTER V
1315
,"(v)", // Folded result
1317
"ꝡ" // U+A761: LATIN SMALL LETTER VY
1318
,"vy", // Folded result
1320
"Ŵ" // U+0174: LATIN CAPITAL LETTER W WITH CIRCUMFLEX
1321
+ "Ƿ" // U+01F7: LATIN CAPITAL LETTER WYNN
1322
+ "ᴡ" // U+1D21: LATIN LETTER SMALL CAPITAL W
1323
+ "Ẁ" // U+1E80: LATIN CAPITAL LETTER W WITH GRAVE
1324
+ "Ẃ" // U+1E82: LATIN CAPITAL LETTER W WITH ACUTE
1325
+ "Ẅ" // U+1E84: LATIN CAPITAL LETTER W WITH DIAERESIS
1326
+ "Ẇ" // U+1E86: LATIN CAPITAL LETTER W WITH DOT ABOVE
1327
+ "Ẉ" // U+1E88: LATIN CAPITAL LETTER W WITH DOT BELOW
1328
+ "Ⓦ" // U+24CC: CIRCLED LATIN CAPITAL LETTER W
1329
+ "Ⱳ" // U+2C72: LATIN CAPITAL LETTER W WITH HOOK
1330
+ "W" // U+FF37: FULLWIDTH LATIN CAPITAL LETTER W
1331
,"W", // Folded result
1333
"ŵ" // U+0175: LATIN SMALL LETTER W WITH CIRCUMFLEX
1334
+ "ƿ" // U+01BF: LATIN LETTER WYNN
1335
+ "ʍ" // U+028D: LATIN SMALL LETTER TURNED W
1336
+ "ẁ" // U+1E81: LATIN SMALL LETTER W WITH GRAVE
1337
+ "ẃ" // U+1E83: LATIN SMALL LETTER W WITH ACUTE
1338
+ "ẅ" // U+1E85: LATIN SMALL LETTER W WITH DIAERESIS
1339
+ "ẇ" // U+1E87: LATIN SMALL LETTER W WITH DOT ABOVE
1340
+ "ẉ" // U+1E89: LATIN SMALL LETTER W WITH DOT BELOW
1341
+ "ẘ" // U+1E98: LATIN SMALL LETTER W WITH RING ABOVE
1342
+ "ⓦ" // U+24E6: CIRCLED LATIN SMALL LETTER W
1343
+ "ⱳ" // U+2C73: LATIN SMALL LETTER W WITH HOOK
1344
+ "w" // U+FF57: FULLWIDTH LATIN SMALL LETTER W
1345
,"w", // Folded result
1347
"⒲" // U+24B2: PARENTHESIZED LATIN SMALL LETTER W
1348
,"(w)", // Folded result
1350
"Ẋ" // U+1E8A: LATIN CAPITAL LETTER X WITH DOT ABOVE
1351
+ "Ẍ" // U+1E8C: LATIN CAPITAL LETTER X WITH DIAERESIS
1352
+ "Ⓧ" // U+24CD: CIRCLED LATIN CAPITAL LETTER X
1353
+ "X" // U+FF38: FULLWIDTH LATIN CAPITAL LETTER X
1354
,"X", // Folded result
1356
"ᶍ" // U+1D8D: LATIN SMALL LETTER X WITH PALATAL HOOK
1357
+ "ẋ" // U+1E8B: LATIN SMALL LETTER X WITH DOT ABOVE
1358
+ "ẍ" // U+1E8D: LATIN SMALL LETTER X WITH DIAERESIS
1359
+ "ₓ" // U+2093: LATIN SUBSCRIPT SMALL LETTER X
1360
+ "ⓧ" // U+24E7: CIRCLED LATIN SMALL LETTER X
1361
+ "x" // U+FF58: FULLWIDTH LATIN SMALL LETTER X
1362
,"x", // Folded result
1364
"⒳" // U+24B3: PARENTHESIZED LATIN SMALL LETTER X
1365
,"(x)", // Folded result
1367
"Ý" // U+00DD: LATIN CAPITAL LETTER Y WITH ACUTE
1368
+ "Ŷ" // U+0176: LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
1369
+ "Ÿ" // U+0178: LATIN CAPITAL LETTER Y WITH DIAERESIS
1370
+ "Ƴ" // U+01B3: LATIN CAPITAL LETTER Y WITH HOOK
1371
+ "Ȳ" // U+0232: LATIN CAPITAL LETTER Y WITH MACRON
1372
+ "Ɏ" // U+024E: LATIN CAPITAL LETTER Y WITH STROKE
1373
+ "ʏ" // U+028F: LATIN LETTER SMALL CAPITAL Y
1374
+ "Ẏ" // U+1E8E: LATIN CAPITAL LETTER Y WITH DOT ABOVE
1375
+ "Ỳ" // U+1EF2: LATIN CAPITAL LETTER Y WITH GRAVE
1376
+ "Ỵ" // U+1EF4: LATIN CAPITAL LETTER Y WITH DOT BELOW
1377
+ "Ỷ" // U+1EF6: LATIN CAPITAL LETTER Y WITH HOOK ABOVE
1378
+ "Ỹ" // U+1EF8: LATIN CAPITAL LETTER Y WITH TILDE
1379
+ "Ỿ" // U+1EFE: LATIN CAPITAL LETTER Y WITH LOOP
1380
+ "Ⓨ" // U+24CE: CIRCLED LATIN CAPITAL LETTER Y
1381
+ "Y" // U+FF39: FULLWIDTH LATIN CAPITAL LETTER Y
1382
,"Y", // Folded result
1384
"ý" // U+00FD: LATIN SMALL LETTER Y WITH ACUTE
1385
+ "ÿ" // U+00FF: LATIN SMALL LETTER Y WITH DIAERESIS
1386
+ "ŷ" // U+0177: LATIN SMALL LETTER Y WITH CIRCUMFLEX
1387
+ "ƴ" // U+01B4: LATIN SMALL LETTER Y WITH HOOK
1388
+ "ȳ" // U+0233: LATIN SMALL LETTER Y WITH MACRON
1389
+ "ɏ" // U+024F: LATIN SMALL LETTER Y WITH STROKE
1390
+ "ʎ" // U+028E: LATIN SMALL LETTER TURNED Y
1391
+ "ẏ" // U+1E8F: LATIN SMALL LETTER Y WITH DOT ABOVE
1392
+ "ẙ" // U+1E99: LATIN SMALL LETTER Y WITH RING ABOVE
1393
+ "ỳ" // U+1EF3: LATIN SMALL LETTER Y WITH GRAVE
1394
+ "ỵ" // U+1EF5: LATIN SMALL LETTER Y WITH DOT BELOW
1395
+ "ỷ" // U+1EF7: LATIN SMALL LETTER Y WITH HOOK ABOVE
1396
+ "ỹ" // U+1EF9: LATIN SMALL LETTER Y WITH TILDE
1397
+ "ỿ" // U+1EFF: LATIN SMALL LETTER Y WITH LOOP
1398
+ "ⓨ" // U+24E8: CIRCLED LATIN SMALL LETTER Y
1399
+ "y" // U+FF59: FULLWIDTH LATIN SMALL LETTER Y
1400
,"y", // Folded result
1402
"⒴" // U+24B4: PARENTHESIZED LATIN SMALL LETTER Y
1403
,"(y)", // Folded result
1405
"Ź" // U+0179: LATIN CAPITAL LETTER Z WITH ACUTE
1406
+ "Ż" // U+017B: LATIN CAPITAL LETTER Z WITH DOT ABOVE
1407
+ "Ž" // U+017D: LATIN CAPITAL LETTER Z WITH CARON
1408
+ "Ƶ" // U+01B5: LATIN CAPITAL LETTER Z WITH STROKE
1409
+ "Ȝ" // U+021C: LATIN CAPITAL LETTER YOGH
1410
+ "Ȥ" // U+0224: LATIN CAPITAL LETTER Z WITH HOOK
1411
+ "ᴢ" // U+1D22: LATIN LETTER SMALL CAPITAL Z
1412
+ "Ẑ" // U+1E90: LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
1413
+ "Ẓ" // U+1E92: LATIN CAPITAL LETTER Z WITH DOT BELOW
1414
+ "Ẕ" // U+1E94: LATIN CAPITAL LETTER Z WITH LINE BELOW
1415
+ "Ⓩ" // U+24CF: CIRCLED LATIN CAPITAL LETTER Z
1416
+ "Ⱬ" // U+2C6B: LATIN CAPITAL LETTER Z WITH DESCENDER
1417
+ "Ꝣ" // U+A762: LATIN CAPITAL LETTER VISIGOTHIC Z
1418
+ "Z" // U+FF3A: FULLWIDTH LATIN CAPITAL LETTER Z
1419
,"Z", // Folded result
1421
"ź" // U+017A: LATIN SMALL LETTER Z WITH ACUTE
1422
+ "ż" // U+017C: LATIN SMALL LETTER Z WITH DOT ABOVE
1423
+ "ž" // U+017E: LATIN SMALL LETTER Z WITH CARON
1424
+ "ƶ" // U+01B6: LATIN SMALL LETTER Z WITH STROKE
1425
+ "ȝ" // U+021D: LATIN SMALL LETTER YOGH
1426
+ "ȥ" // U+0225: LATIN SMALL LETTER Z WITH HOOK
1427
+ "ɀ" // U+0240: LATIN SMALL LETTER Z WITH SWASH TAIL
1428
+ "ʐ" // U+0290: LATIN SMALL LETTER Z WITH RETROFLEX HOOK
1429
+ "ʑ" // U+0291: LATIN SMALL LETTER Z WITH CURL
1430
+ "ᵶ" // U+1D76: LATIN SMALL LETTER Z WITH MIDDLE TILDE
1431
+ "ᶎ" // U+1D8E: LATIN SMALL LETTER Z WITH PALATAL HOOK
1432
+ "ẑ" // U+1E91: LATIN SMALL LETTER Z WITH CIRCUMFLEX
1433
+ "ẓ" // U+1E93: LATIN SMALL LETTER Z WITH DOT BELOW
1434
+ "ẕ" // U+1E95: LATIN SMALL LETTER Z WITH LINE BELOW
1435
+ "ⓩ" // U+24E9: CIRCLED LATIN SMALL LETTER Z
1436
+ "ⱬ" // U+2C6C: LATIN SMALL LETTER Z WITH DESCENDER
1437
+ "ꝣ" // U+A763: LATIN SMALL LETTER VISIGOTHIC Z
1438
+ "z" // U+FF5A: FULLWIDTH LATIN SMALL LETTER Z
1439
,"z", // Folded result
1441
"⒵" // U+24B5: PARENTHESIZED LATIN SMALL LETTER Z
1442
,"(z)", // Folded result
1444
"⁰" // U+2070: SUPERSCRIPT ZERO
1445
+ "₀" // U+2080: SUBSCRIPT ZERO
1446
+ "⓪" // U+24EA: CIRCLED DIGIT ZERO
1447
+ "⓿" // U+24FF: NEGATIVE CIRCLED DIGIT ZERO
1448
+ "0" // U+FF10: FULLWIDTH DIGIT ZERO
1449
,"0", // Folded result
1451
"¹" // U+00B9: SUPERSCRIPT ONE
1452
+ "₁" // U+2081: SUBSCRIPT ONE
1453
+ "①" // U+2460: CIRCLED DIGIT ONE
1454
+ "⓵" // U+24F5: DOUBLE CIRCLED DIGIT ONE
1455
+ "❶" // U+2776: DINGBAT NEGATIVE CIRCLED DIGIT ONE
1456
+ "➀" // U+2780: DINGBAT CIRCLED SANS-SERIF DIGIT ONE
1457
+ "➊" // U+278A: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE
1458
+ "1" // U+FF11: FULLWIDTH DIGIT ONE
1459
,"1", // Folded result
1461
"⒈" // U+2488: DIGIT ONE FULL STOP
1462
,"1.", // Folded result
1464
"⑴" // U+2474: PARENTHESIZED DIGIT ONE
1465
,"(1)", // Folded result
1467
"²" // U+00B2: SUPERSCRIPT TWO
1468
+ "₂" // U+2082: SUBSCRIPT TWO
1469
+ "②" // U+2461: CIRCLED DIGIT TWO
1470
+ "⓶" // U+24F6: DOUBLE CIRCLED DIGIT TWO
1471
+ "❷" // U+2777: DINGBAT NEGATIVE CIRCLED DIGIT TWO
1472
+ "➁" // U+2781: DINGBAT CIRCLED SANS-SERIF DIGIT TWO
1473
+ "➋" // U+278B: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO
1474
+ "2" // U+FF12: FULLWIDTH DIGIT TWO
1475
,"2", // Folded result
1477
"⒉" // U+2489: DIGIT TWO FULL STOP
1478
,"2.", // Folded result
1480
"⑵" // U+2475: PARENTHESIZED DIGIT TWO
1481
,"(2)", // Folded result
1483
"³" // U+00B3: SUPERSCRIPT THREE
1484
+ "₃" // U+2083: SUBSCRIPT THREE
1485
+ "③" // U+2462: CIRCLED DIGIT THREE
1486
+ "⓷" // U+24F7: DOUBLE CIRCLED DIGIT THREE
1487
+ "❸" // U+2778: DINGBAT NEGATIVE CIRCLED DIGIT THREE
1488
+ "➂" // U+2782: DINGBAT CIRCLED SANS-SERIF DIGIT THREE
1489
+ "➌" // U+278C: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE
1490
+ "3" // U+FF13: FULLWIDTH DIGIT THREE
1491
,"3", // Folded result
1493
"⒊" // U+248A: DIGIT THREE FULL STOP
1494
,"3.", // Folded result
1496
"⑶" // U+2476: PARENTHESIZED DIGIT THREE
1497
,"(3)", // Folded result
1499
"⁴" // U+2074: SUPERSCRIPT FOUR
1500
+ "₄" // U+2084: SUBSCRIPT FOUR
1501
+ "④" // U+2463: CIRCLED DIGIT FOUR
1502
+ "⓸" // U+24F8: DOUBLE CIRCLED DIGIT FOUR
1503
+ "❹" // U+2779: DINGBAT NEGATIVE CIRCLED DIGIT FOUR
1504
+ "➃" // U+2783: DINGBAT CIRCLED SANS-SERIF DIGIT FOUR
1505
+ "➍" // U+278D: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR
1506
+ "4" // U+FF14: FULLWIDTH DIGIT FOUR
1507
,"4", // Folded result
1509
"⒋" // U+248B: DIGIT FOUR FULL STOP
1510
,"4.", // Folded result
1512
"⑷" // U+2477: PARENTHESIZED DIGIT FOUR
1513
,"(4)", // Folded result
1515
"⁵" // U+2075: SUPERSCRIPT FIVE
1516
+ "₅" // U+2085: SUBSCRIPT FIVE
1517
+ "⑤" // U+2464: CIRCLED DIGIT FIVE
1518
+ "⓹" // U+24F9: DOUBLE CIRCLED DIGIT FIVE
1519
+ "❺" // U+277A: DINGBAT NEGATIVE CIRCLED DIGIT FIVE
1520
+ "➄" // U+2784: DINGBAT CIRCLED SANS-SERIF DIGIT FIVE
1521
+ "➎" // U+278E: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE
1522
+ "5" // U+FF15: FULLWIDTH DIGIT FIVE
1523
,"5", // Folded result
1525
"⒌" // U+248C: DIGIT FIVE FULL STOP
1526
,"5.", // Folded result
1528
"⑸" // U+2478: PARENTHESIZED DIGIT FIVE
1529
,"(5)", // Folded result
1531
"⁶" // U+2076: SUPERSCRIPT SIX
1532
+ "₆" // U+2086: SUBSCRIPT SIX
1533
+ "⑥" // U+2465: CIRCLED DIGIT SIX
1534
+ "⓺" // U+24FA: DOUBLE CIRCLED DIGIT SIX
1535
+ "❻" // U+277B: DINGBAT NEGATIVE CIRCLED DIGIT SIX
1536
+ "➅" // U+2785: DINGBAT CIRCLED SANS-SERIF DIGIT SIX
1537
+ "➏" // U+278F: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX
1538
+ "6" // U+FF16: FULLWIDTH DIGIT SIX
1539
,"6", // Folded result
1541
"⒍" // U+248D: DIGIT SIX FULL STOP
1542
,"6.", // Folded result
1544
"⑹" // U+2479: PARENTHESIZED DIGIT SIX
1545
,"(6)", // Folded result
1547
"⁷" // U+2077: SUPERSCRIPT SEVEN
1548
+ "₇" // U+2087: SUBSCRIPT SEVEN
1549
+ "⑦" // U+2466: CIRCLED DIGIT SEVEN
1550
+ "⓻" // U+24FB: DOUBLE CIRCLED DIGIT SEVEN
1551
+ "❼" // U+277C: DINGBAT NEGATIVE CIRCLED DIGIT SEVEN
1552
+ "➆" // U+2786: DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN
1553
+ "➐" // U+2790: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN
1554
+ "7" // U+FF17: FULLWIDTH DIGIT SEVEN
1555
,"7", // Folded result
1557
"⒎" // U+248E: DIGIT SEVEN FULL STOP
1558
,"7.", // Folded result
1560
"⑺" // U+247A: PARENTHESIZED DIGIT SEVEN
1561
,"(7)", // Folded result
1563
"⁸" // U+2078: SUPERSCRIPT EIGHT
1564
+ "₈" // U+2088: SUBSCRIPT EIGHT
1565
+ "⑧" // U+2467: CIRCLED DIGIT EIGHT
1566
+ "⓼" // U+24FC: DOUBLE CIRCLED DIGIT EIGHT
1567
+ "❽" // U+277D: DINGBAT NEGATIVE CIRCLED DIGIT EIGHT
1568
+ "➇" // U+2787: DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT
1569
+ "➑" // U+2791: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT
1570
+ "8" // U+FF18: FULLWIDTH DIGIT EIGHT
1571
,"8", // Folded result
1573
"⒏" // U+248F: DIGIT EIGHT FULL STOP
1574
,"8.", // Folded result
1576
"⑻" // U+247B: PARENTHESIZED DIGIT EIGHT
1577
,"(8)", // Folded result
1579
"⁹" // U+2079: SUPERSCRIPT NINE
1580
+ "₉" // U+2089: SUBSCRIPT NINE
1581
+ "⑨" // U+2468: CIRCLED DIGIT NINE
1582
+ "⓽" // U+24FD: DOUBLE CIRCLED DIGIT NINE
1583
+ "❾" // U+277E: DINGBAT NEGATIVE CIRCLED DIGIT NINE
1584
+ "➈" // U+2788: DINGBAT CIRCLED SANS-SERIF DIGIT NINE
1585
+ "➒" // U+2792: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE
1586
+ "9" // U+FF19: FULLWIDTH DIGIT NINE
1587
,"9", // Folded result
1589
"⒐" // U+2490: DIGIT NINE FULL STOP
1590
,"9.", // Folded result
1592
"⑼" // U+247C: PARENTHESIZED DIGIT NINE
1593
,"(9)", // Folded result
1595
"⑩" // U+2469: CIRCLED NUMBER TEN
1596
+ "⓾" // U+24FE: DOUBLE CIRCLED NUMBER TEN
1597
+ "❿" // U+277F: DINGBAT NEGATIVE CIRCLED NUMBER TEN
1598
+ "➉" // U+2789: DINGBAT CIRCLED SANS-SERIF NUMBER TEN
1599
+ "➓" // U+2793: DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
1600
,"10", // Folded result
1602
"⒑" // U+2491: NUMBER TEN FULL STOP
1603
,"10.", // Folded result
1605
"⑽" // U+247D: PARENTHESIZED NUMBER TEN
1606
,"(10)", // Folded result
1608
"⑪" // U+246A: CIRCLED NUMBER ELEVEN
1609
+ "⓫" // U+24EB: NEGATIVE CIRCLED NUMBER ELEVEN
1610
,"11", // Folded result
1612
"⒒" // U+2492: NUMBER ELEVEN FULL STOP
1613
,"11.", // Folded result
1615
"⑾" // U+247E: PARENTHESIZED NUMBER ELEVEN
1616
,"(11)", // Folded result
1618
"⑫" // U+246B: CIRCLED NUMBER TWELVE
1619
+ "⓬" // U+24EC: NEGATIVE CIRCLED NUMBER TWELVE
1620
,"12", // Folded result
1622
"⒓" // U+2493: NUMBER TWELVE FULL STOP
1623
,"12.", // Folded result
1625
"⑿" // U+247F: PARENTHESIZED NUMBER TWELVE
1626
,"(12)", // Folded result
1628
"⑬" // U+246C: CIRCLED NUMBER THIRTEEN
1629
+ "⓭" // U+24ED: NEGATIVE CIRCLED NUMBER THIRTEEN
1630
,"13", // Folded result
1632
"⒔" // U+2494: NUMBER THIRTEEN FULL STOP
1633
,"13.", // Folded result
1635
"⒀" // U+2480: PARENTHESIZED NUMBER THIRTEEN
1636
,"(13)", // Folded result
1638
"⑭" // U+246D: CIRCLED NUMBER FOURTEEN
1639
+ "⓮" // U+24EE: NEGATIVE CIRCLED NUMBER FOURTEEN
1640
,"14", // Folded result
1642
"⒕" // U+2495: NUMBER FOURTEEN FULL STOP
1643
,"14.", // Folded result
1645
"⒁" // U+2481: PARENTHESIZED NUMBER FOURTEEN
1646
,"(14)", // Folded result
1648
"⑮" // U+246E: CIRCLED NUMBER FIFTEEN
1649
+ "⓯" // U+24EF: NEGATIVE CIRCLED NUMBER FIFTEEN
1650
,"15", // Folded result
1652
"⒖" // U+2496: NUMBER FIFTEEN FULL STOP
1653
,"15.", // Folded result
1655
"⒂" // U+2482: PARENTHESIZED NUMBER FIFTEEN
1656
,"(15)", // Folded result
1658
"⑯" // U+246F: CIRCLED NUMBER SIXTEEN
1659
+ "⓰" // U+24F0: NEGATIVE CIRCLED NUMBER SIXTEEN
1660
,"16", // Folded result
1662
"⒗" // U+2497: NUMBER SIXTEEN FULL STOP
1663
,"16.", // Folded result
1665
"⒃" // U+2483: PARENTHESIZED NUMBER SIXTEEN
1666
,"(16)", // Folded result
1668
"⑰" // U+2470: CIRCLED NUMBER SEVENTEEN
1669
+ "⓱" // U+24F1: NEGATIVE CIRCLED NUMBER SEVENTEEN
1670
,"17", // Folded result
1672
"⒘" // U+2498: NUMBER SEVENTEEN FULL STOP
1673
,"17.", // Folded result
1675
"⒄" // U+2484: PARENTHESIZED NUMBER SEVENTEEN
1676
,"(17)", // Folded result
1678
"⑱" // U+2471: CIRCLED NUMBER EIGHTEEN
1679
+ "⓲" // U+24F2: NEGATIVE CIRCLED NUMBER EIGHTEEN
1680
,"18", // Folded result
1682
"⒙" // U+2499: NUMBER EIGHTEEN FULL STOP
1683
,"18.", // Folded result
1685
"⒅" // U+2485: PARENTHESIZED NUMBER EIGHTEEN
1686
,"(18)", // Folded result
1688
"⑲" // U+2472: CIRCLED NUMBER NINETEEN
1689
+ "⓳" // U+24F3: NEGATIVE CIRCLED NUMBER NINETEEN
1690
,"19", // Folded result
1692
"⒚" // U+249A: NUMBER NINETEEN FULL STOP
1693
,"19.", // Folded result
1695
"⒆" // U+2486: PARENTHESIZED NUMBER NINETEEN
1696
,"(19)", // Folded result
1698
"⑳" // U+2473: CIRCLED NUMBER TWENTY
1699
+ "⓴" // U+24F4: NEGATIVE CIRCLED NUMBER TWENTY
1700
,"20", // Folded result
1702
"⒛" // U+249B: NUMBER TWENTY FULL STOP
1703
,"20.", // Folded result
1705
"⒇" // U+2487: PARENTHESIZED NUMBER TWENTY
1706
,"(20)", // Folded result
1708
"«" // U+00AB: LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
1709
+ "»" // U+00BB: RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
1710
+ "“" // U+201C: LEFT DOUBLE QUOTATION MARK
1711
+ "”" // U+201D: RIGHT DOUBLE QUOTATION MARK
1712
+ "„" // U+201E: DOUBLE LOW-9 QUOTATION MARK
1713
+ "″" // U+2033: DOUBLE PRIME
1714
+ "‶" // U+2036: REVERSED DOUBLE PRIME
1715
+ "❝" // U+275D: HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT
1716
+ "❞" // U+275E: HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
1717
+ "❮" // U+276E: HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
1718
+ "❯" // U+276F: HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
1719
+ """ // U+FF02: FULLWIDTH QUOTATION MARK
1720
,"\"", // Folded result
1722
"‘" // U+2018: LEFT SINGLE QUOTATION MARK
1723
+ "’" // U+2019: RIGHT SINGLE QUOTATION MARK
1724
+ "‚" // U+201A: SINGLE LOW-9 QUOTATION MARK
1725
+ "‛" // U+201B: SINGLE HIGH-REVERSED-9 QUOTATION MARK
1726
+ "′" // U+2032: PRIME
1727
+ "‵" // U+2035: REVERSED PRIME
1728
+ "‹" // U+2039: SINGLE LEFT-POINTING ANGLE QUOTATION MARK
1729
+ "›" // U+203A: SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
1730
+ "❛" // U+275B: HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT
1731
+ "❜" // U+275C: HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT
1732
+ "'" // U+FF07: FULLWIDTH APOSTROPHE
1733
,"'", // Folded result
1735
"‐" // U+2010: HYPHEN
1736
+ "‑" // U+2011: NON-BREAKING HYPHEN
1737
+ "‒" // U+2012: FIGURE DASH
1738
+ "–" // U+2013: EN DASH
1739
+ "—" // U+2014: EM DASH
1740
+ "⁻" // U+207B: SUPERSCRIPT MINUS
1741
+ "₋" // U+208B: SUBSCRIPT MINUS
1742
+ "-" // U+FF0D: FULLWIDTH HYPHEN-MINUS
1743
,"-", // Folded result
1745
"⁅" // U+2045: LEFT SQUARE BRACKET WITH QUILL
1746
+ "❲" // U+2772: LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
1747
+ "[" // U+FF3B: FULLWIDTH LEFT SQUARE BRACKET
1748
,"[", // Folded result
1750
"⁆" // U+2046: RIGHT SQUARE BRACKET WITH QUILL
1751
+ "❳" // U+2773: LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
1752
+ "]" // U+FF3D: FULLWIDTH RIGHT SQUARE BRACKET
1753
,"]", // Folded result
1755
"⁽" // U+207D: SUPERSCRIPT LEFT PARENTHESIS
1756
+ "₍" // U+208D: SUBSCRIPT LEFT PARENTHESIS
1757
+ "❨" // U+2768: MEDIUM LEFT PARENTHESIS ORNAMENT
1758
+ "❪" // U+276A: MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
1759
+ "(" // U+FF08: FULLWIDTH LEFT PARENTHESIS
1760
,"(", // Folded result
1762
"⸨" // U+2E28: LEFT DOUBLE PARENTHESIS
1763
,"((", // Folded result
1765
"⁾" // U+207E: SUPERSCRIPT RIGHT PARENTHESIS
1766
+ "₎" // U+208E: SUBSCRIPT RIGHT PARENTHESIS
1767
+ "❩" // U+2769: MEDIUM RIGHT PARENTHESIS ORNAMENT
1768
+ "❫" // U+276B: MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
1769
+ ")" // U+FF09: FULLWIDTH RIGHT PARENTHESIS
1770
,")", // Folded result
1772
"⸩" // U+2E29: RIGHT DOUBLE PARENTHESIS
1773
,"))", // Folded result
1775
"❬" // U+276C: MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
1776
+ "❰" // U+2770: HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
1777
+ "<" // U+FF1C: FULLWIDTH LESS-THAN SIGN
1778
,"<", // Folded result
1780
"❭" // U+276D: MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
1781
+ "❱" // U+2771: HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
1782
+ ">" // U+FF1E: FULLWIDTH GREATER-THAN SIGN
1783
,">", // Folded result
1785
"❴" // U+2774: MEDIUM LEFT CURLY BRACKET ORNAMENT
1786
+ "{" // U+FF5B: FULLWIDTH LEFT CURLY BRACKET
1787
,"{", // Folded result
1789
"❵" // U+2775: MEDIUM RIGHT CURLY BRACKET ORNAMENT
1790
+ "}" // U+FF5D: FULLWIDTH RIGHT CURLY BRACKET
1791
,"}", // Folded result
1793
"⁺" // U+207A: SUPERSCRIPT PLUS SIGN
1794
+ "₊" // U+208A: SUBSCRIPT PLUS SIGN
1795
+ "+" // U+FF0B: FULLWIDTH PLUS SIGN
1796
,"+", // Folded result
1798
"⁼" // U+207C: SUPERSCRIPT EQUALS SIGN
1799
+ "₌" // U+208C: SUBSCRIPT EQUALS SIGN
1800
+ "=" // U+FF1D: FULLWIDTH EQUALS SIGN
1801
,"=", // Folded result
1803
"!" // U+FF01: FULLWIDTH EXCLAMATION MARK
1804
,"!", // Folded result
1806
"‼" // U+203C: DOUBLE EXCLAMATION MARK
1807
,"!!", // Folded result
1809
"⁉" // U+2049: EXCLAMATION QUESTION MARK
1810
,"!?", // Folded result
1812
"#" // U+FF03: FULLWIDTH NUMBER SIGN
1813
,"#", // Folded result
1815
"$" // U+FF04: FULLWIDTH DOLLAR SIGN
1816
,"$", // Folded result
1818
"⁒" // U+2052: COMMERCIAL MINUS SIGN
1819
+ "%" // U+FF05: FULLWIDTH PERCENT SIGN
1820
,"%", // Folded result
1822
"&" // U+FF06: FULLWIDTH AMPERSAND
1823
,"&", // Folded result
1825
"⁎" // U+204E: LOW ASTERISK
1826
+ "*" // U+FF0A: FULLWIDTH ASTERISK
1827
,"*", // Folded result
1829
"," // U+FF0C: FULLWIDTH COMMA
1830
,",", // Folded result
1832
"." // U+FF0E: FULLWIDTH FULL STOP
1833
,".", // Folded result
1835
"⁄" // U+2044: FRACTION SLASH
1836
+ "/" // U+FF0F: FULLWIDTH SOLIDUS
1837
,"/", // Folded result
1839
":" // U+FF1A: FULLWIDTH COLON
1840
,":", // Folded result
1842
"⁏" // U+204F: REVERSED SEMICOLON
1843
+ ";" // U+FF1B: FULLWIDTH SEMICOLON
1844
,";", // Folded result
1846
"?" // U+FF1F: FULLWIDTH QUESTION MARK
1847
,"?", // Folded result
1849
"⁇" // U+2047: DOUBLE QUESTION MARK
1850
,"??", // Folded result
1852
"⁈" // U+2048: QUESTION EXCLAMATION MARK
1853
,"?!", // Folded result
1855
"@" // U+FF20: FULLWIDTH COMMERCIAL AT
1856
,"@", // Folded result
1858
"\" // U+FF3C: FULLWIDTH REVERSE SOLIDUS
1859
,"\\", // Folded result
1861
"‸" // U+2038: CARET
1862
+ "^" // U+FF3E: FULLWIDTH CIRCUMFLEX ACCENT
1863
,"^", // Folded result
1865
"_" // U+FF3F: FULLWIDTH LOW LINE
1866
,"_", // Folded result
1868
"⁓" // U+2053: SWUNG DASH
1869
+ "~" // U+FF5E: FULLWIDTH TILDE
1870
,"~", // Folded result
1873
// Construct input text and expected output tokens
1874
List<String> expectedOutputTokens = new ArrayList<String>();
1875
StringBuilder inputText = new StringBuilder();
1876
for (int n = 0 ; n < foldings.length ; n += 2) {
1878
inputText.append(' '); // Space between tokens
1880
inputText.append(foldings[n]);
1882
// Construct the expected output token: the ASCII string to fold to,
1883
// duplicated as many times as the number of characters in the input text.
1884
StringBuilder expected = new StringBuilder();
1885
int numChars = foldings[n].length();
1886
for (int m = 0 ; m < numChars; ++m) {
1887
expected.append(foldings[n + 1]);
1889
expectedOutputTokens.add(expected.toString());
1892
TokenStream stream = new MockTokenizer(new StringReader(inputText.toString()), MockTokenizer.WHITESPACE, false);
1893
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
1894
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
1895
Iterator<String> expectedIter = expectedOutputTokens.iterator();
1897
while (expectedIter.hasNext()) {
1898
assertTermEquals(expectedIter.next(), filter, termAtt);
1900
assertFalse(filter.incrementToken());
1903
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception {
1904
assertTrue(stream.incrementToken());
1905
assertEquals(expected, termAtt.toString());