5
http://www.unicode.org/Public/5.2.0/ucd/Blocks.txt
7
The blocks() function returns a list of Block named tuples: (start, end, name).
8
The block() function returns the Block named tuple containing the given charcode,
13
from __future__ import unicode_literals
17
__all__ = ['blocks', 'block']
20
Block = collections.namedtuple('Block', 'start end name')
24
"""Returns a sorted list of Block tuples: (start, end, name)."""
30
for line in block_data.splitlines():
31
if line and not line.startswith('#'):
33
range_, text = line.split(';', 1)
34
start, end = range_.split('..', 1)
35
_blocks.append(Block(int(start, 16), int(end, 16), text.strip()))
43
"""Returns the Block for the charcode, if any."""
45
lo, hi = 0, len(blocks_)
48
if blocks_[mid].start > charcode:
50
elif blocks_[mid].end < charcode:
56
# literal Blocks-5.2.0.txt
59
# Date: 2009-05-19, 16:21:00 PDT [KW]
61
# Unicode Character Database
62
# Copyright (c) 1991-2009 Unicode, Inc.
63
# For terms of use, see http://www.unicode.org/terms_of_use.html
64
# For documentation, see http://www.unicode.org/reports/tr44/
66
# Note: The casing of block names is not normative.
67
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
70
# Start Code..End Code; Block Name
72
# ================================================
74
# Note: When comparing block names, casing, whitespace, hyphens,
75
# and underbars are ignored.
76
# For example, "Latin Extended-A" and "latin extended a" are equivalent.
77
# For more information on the comparison of property values,
78
# see UAX #44: http://www.unicode.org/reports/tr44/
80
# All code points not explicitly listed for Block
81
# have the value No_Block.
85
# @missing: 0000..10FFFF; No_Block
87
0000..007F; Basic Latin
88
0080..00FF; Latin-1 Supplement
89
0100..017F; Latin Extended-A
90
0180..024F; Latin Extended-B
91
0250..02AF; IPA Extensions
92
02B0..02FF; Spacing Modifier Letters
93
0300..036F; Combining Diacritical Marks
94
0370..03FF; Greek and Coptic
96
0500..052F; Cyrillic Supplement
101
0750..077F; Arabic Supplement
104
0800..083F; Samaritan
105
0900..097F; Devanagari
113
0D00..0D7F; Malayalam
120
1100..11FF; Hangul Jamo
122
1380..139F; Ethiopic Supplement
124
1400..167F; Unified Canadian Aboriginal Syllabics
132
1800..18AF; Mongolian
133
18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
136
1980..19DF; New Tai Lue
137
19E0..19FF; Khmer Symbols
141
1B80..1BBF; Sundanese
144
1CD0..1CFF; Vedic Extensions
145
1D00..1D7F; Phonetic Extensions
146
1D80..1DBF; Phonetic Extensions Supplement
147
1DC0..1DFF; Combining Diacritical Marks Supplement
148
1E00..1EFF; Latin Extended Additional
149
1F00..1FFF; Greek Extended
150
2000..206F; General Punctuation
151
2070..209F; Superscripts and Subscripts
152
20A0..20CF; Currency Symbols
153
20D0..20FF; Combining Diacritical Marks for Symbols
154
2100..214F; Letterlike Symbols
155
2150..218F; Number Forms
157
2200..22FF; Mathematical Operators
158
2300..23FF; Miscellaneous Technical
159
2400..243F; Control Pictures
160
2440..245F; Optical Character Recognition
161
2460..24FF; Enclosed Alphanumerics
162
2500..257F; Box Drawing
163
2580..259F; Block Elements
164
25A0..25FF; Geometric Shapes
165
2600..26FF; Miscellaneous Symbols
167
27C0..27EF; Miscellaneous Mathematical Symbols-A
168
27F0..27FF; Supplemental Arrows-A
169
2800..28FF; Braille Patterns
170
2900..297F; Supplemental Arrows-B
171
2980..29FF; Miscellaneous Mathematical Symbols-B
172
2A00..2AFF; Supplemental Mathematical Operators
173
2B00..2BFF; Miscellaneous Symbols and Arrows
174
2C00..2C5F; Glagolitic
175
2C60..2C7F; Latin Extended-C
177
2D00..2D2F; Georgian Supplement
179
2D80..2DDF; Ethiopic Extended
180
2DE0..2DFF; Cyrillic Extended-A
181
2E00..2E7F; Supplemental Punctuation
182
2E80..2EFF; CJK Radicals Supplement
183
2F00..2FDF; Kangxi Radicals
184
2FF0..2FFF; Ideographic Description Characters
185
3000..303F; CJK Symbols and Punctuation
189
3130..318F; Hangul Compatibility Jamo
191
31A0..31BF; Bopomofo Extended
192
31C0..31EF; CJK Strokes
193
31F0..31FF; Katakana Phonetic Extensions
194
3200..32FF; Enclosed CJK Letters and Months
195
3300..33FF; CJK Compatibility
196
3400..4DBF; CJK Unified Ideographs Extension A
197
4DC0..4DFF; Yijing Hexagram Symbols
198
4E00..9FFF; CJK Unified Ideographs
199
A000..A48F; Yi Syllables
200
A490..A4CF; Yi Radicals
203
A640..A69F; Cyrillic Extended-B
205
A700..A71F; Modifier Tone Letters
206
A720..A7FF; Latin Extended-D
207
A800..A82F; Syloti Nagri
208
A830..A83F; Common Indic Number Forms
210
A880..A8DF; Saurashtra
211
A8E0..A8FF; Devanagari Extended
214
A960..A97F; Hangul Jamo Extended-A
217
AA60..AA7F; Myanmar Extended-A
219
ABC0..ABFF; Meetei Mayek
220
AC00..D7AF; Hangul Syllables
221
D7B0..D7FF; Hangul Jamo Extended-B
222
D800..DB7F; High Surrogates
223
DB80..DBFF; High Private Use Surrogates
224
DC00..DFFF; Low Surrogates
225
E000..F8FF; Private Use Area
226
F900..FAFF; CJK Compatibility Ideographs
227
FB00..FB4F; Alphabetic Presentation Forms
228
FB50..FDFF; Arabic Presentation Forms-A
229
FE00..FE0F; Variation Selectors
230
FE10..FE1F; Vertical Forms
231
FE20..FE2F; Combining Half Marks
232
FE30..FE4F; CJK Compatibility Forms
233
FE50..FE6F; Small Form Variants
234
FE70..FEFF; Arabic Presentation Forms-B
235
FF00..FFEF; Halfwidth and Fullwidth Forms
237
10000..1007F; Linear B Syllabary
238
10080..100FF; Linear B Ideograms
239
10100..1013F; Aegean Numbers
240
10140..1018F; Ancient Greek Numbers
241
10190..101CF; Ancient Symbols
242
101D0..101FF; Phaistos Disc
245
10300..1032F; Old Italic
247
10380..1039F; Ugaritic
248
103A0..103DF; Old Persian
249
10400..1044F; Deseret
250
10450..1047F; Shavian
251
10480..104AF; Osmanya
252
10800..1083F; Cypriot Syllabary
253
10840..1085F; Imperial Aramaic
254
10900..1091F; Phoenician
256
10A00..10A5F; Kharoshthi
257
10A60..10A7F; Old South Arabian
258
10B00..10B3F; Avestan
259
10B40..10B5F; Inscriptional Parthian
260
10B60..10B7F; Inscriptional Pahlavi
261
10C00..10C4F; Old Turkic
262
10E60..10E7F; Rumi Numeral Symbols
264
12000..123FF; Cuneiform
265
12400..1247F; Cuneiform Numbers and Punctuation
266
13000..1342F; Egyptian Hieroglyphs
267
1D000..1D0FF; Byzantine Musical Symbols
268
1D100..1D1FF; Musical Symbols
269
1D200..1D24F; Ancient Greek Musical Notation
270
1D300..1D35F; Tai Xuan Jing Symbols
271
1D360..1D37F; Counting Rod Numerals
272
1D400..1D7FF; Mathematical Alphanumeric Symbols
273
1F000..1F02F; Mahjong Tiles
274
1F030..1F09F; Domino Tiles
275
1F100..1F1FF; Enclosed Alphanumeric Supplement
276
1F200..1F2FF; Enclosed Ideographic Supplement
277
20000..2A6DF; CJK Unified Ideographs Extension B
278
2A700..2B73F; CJK Unified Ideographs Extension C
279
2F800..2FA1F; CJK Compatibility Ideographs Supplement
281
E0100..E01EF; Variation Selectors Supplement
282
F0000..FFFFF; Supplementary Private Use Area-A
283
100000..10FFFF; Supplementary Private Use Area-B