6
//NFKD then NFC; then NFD all non-Latin-1 codepoints
8
":: [^\u0000-\u00FF] NFD ();"
10
//remove non-spacing marks
13
//change some non-Latin-1 codepoints to similar characters in Latin-1 range
14
" [\u2000-\u200A\u3000] > ' ' ;"
15
" [\u01C3\u2762]> '!' ;"
17
" [\u203D\u2048] > '?!' ;"
18
" [\u02BA\u030B\u030E\u2033\u3003\u201C-\u201F] > '\"' ;"
19
" [\u066A\u2030\u2031] > '%' ;"
20
" [\u02B9\u02BC\u02C8\u0301\u2032\u2018-\u201B] > '' ;"
21
" [\u066D\u2217\u2731] > '*' ;"
22
" [\u060C\u201A\u3001] > ',' ;"
23
" [\u2010-\u2013\u2212] > '-' ;"
24
" [\u2023\u06D4\u3002\u0589] > '.' ;"
25
" [\u0338\u2044\u2215] > '/' ;"
28
" [\u2039\u2329\u3008] > '<' ;"
30
" [\u203A\u232A\u3009] > '>' ;"
31
" [\u037E\u061F] > '?' ;"
34
" [\u2102\u212D] > C ;"
35
" [\u2107\u2130] > E ;"
36
" [\u2131\u2132] > F ;"
37
" [\u210B\u210C\u210D] > H ;"
38
" [\u2110\u2111\u2160] > I ;"
45
" [\u211B\u211C\u211D] > R ;"
46
" [\u2124\u2128] > Z ;"
48
" [\u02C4\u02C6\u0302\u2303] > '^' ;"
49
" [\u02CD\u0331\u0332\u2017] > '_' ;"
50
" [\u02CB\u0300\u2035] > '`' ;"
51
" [\u212E\u212F] > e ;"
52
" [\u0261\u210A] > g ;"
53
" [\u04BB\u210E] > h ;"
58
" [\u01C0\u2223\u2758] > '|' ;"
59
" [\u02DC\u0303\u223C\uFF5E] > '~' ;"
63
" [\u20A0-\u20AF] > \u00A4 ;"
66
" [\u226A\u300A] > \u00AB ;"
69
" [\u02C9\u0304\u0305] > \u00AF ;"
70
" [\u02DA\u030A\u2070\u2218] > \u00B0 ;"
72
" [\u02B9\u02CA\u0301\u2032] > \u00B4 ;"
73
" [\u204B\u2761] > \u00B6 ;"
74
" [\u2022\u2024\u2027\u2219\u22C5\u30FB] > \u00B7 ;"
76
" [\u226B\u300B] > \u00BB ;"
84
//change all spacing character to space
85
"[^\u0000-\u00FF] > ' ';"