~ubuntu-branches/ubuntu/vivid/frescobaldi/vivid

« back to all changes in this revision

Viewing changes to frescobaldi_app/unicode_blocks.py

  • Committer: Package Import Robot
  • Author(s): Ryan Kavanagh
  • Date: 2012-01-03 16:20:11 UTC
  • mfrom: (1.4.1)
  • Revision ID: package-import@ubuntu.com-20120103162011-tsjkwl4sntwmprea
Tags: 2.0.0-1
* New upstream release 
* Drop the following uneeded patches:
  + 01_checkmodules_no_python-kde4_build-dep.diff
  + 02_no_pyc.diff
  + 04_no_binary_lilypond_upgrades.diff
* Needs new dependency python-poppler-qt4
* Update debian/watch for new download path
* Update copyright file with new holders and years

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#! python
 
2
 
 
3
"""
 
4
Unicode Block data,
 
5
http://www.unicode.org/Public/5.2.0/ucd/Blocks.txt
 
6
 
 
7
The blocks() function returns a list of Block named tuples: (start, end, name).
 
8
The block() function returns the Block named tuple containing the given charcode,
 
9
if any.
 
10
 
 
11
"""
 
12
 
 
13
from __future__ import unicode_literals
 
14
 
 
15
import collections
 
16
 
 
17
__all__ = ['blocks', 'block']
 
18
 
 
19
 
 
20
Block = collections.namedtuple('Block', 'start end name')
 
21
 
 
22
 
 
23
def blocks():
 
24
    """Returns a sorted list of Block tuples: (start, end, name)."""
 
25
    global _blocks
 
26
    try:
 
27
        return _blocks
 
28
    except NameError:
 
29
        _blocks = []
 
30
        for line in block_data.splitlines():
 
31
            if line and not line.startswith('#'):
 
32
                try:
 
33
                    range_, text = line.split(';', 1)
 
34
                    start, end = range_.split('..', 1)
 
35
                    _blocks.append(Block(int(start, 16), int(end, 16), text.strip()))
 
36
                except ValueError:
 
37
                    pass
 
38
        _blocks.sort()
 
39
    return _blocks
 
40
 
 
41
 
 
42
def block(charcode):
 
43
    """Returns the Block for the charcode, if any."""
 
44
    blocks_ = blocks()
 
45
    lo, hi = 0, len(blocks_)
 
46
    while lo < hi:
 
47
        mid = (lo + hi) // 2
 
48
        if blocks_[mid].start > charcode:
 
49
            hi = mid
 
50
        elif blocks_[mid].end < charcode:
 
51
            lo = mid + 1
 
52
        else:
 
53
            return blocks_[mid]
 
54
 
 
55
 
 
56
# literal Blocks-5.2.0.txt
 
57
block_data = """\
 
58
# Blocks-5.2.0.txt
 
59
# Date: 2009-05-19, 16:21:00 PDT [KW]
 
60
#
 
61
# Unicode Character Database
 
62
# Copyright (c) 1991-2009 Unicode, Inc.
 
63
# For terms of use, see http://www.unicode.org/terms_of_use.html
 
64
# For documentation, see http://www.unicode.org/reports/tr44/
 
65
#
 
66
# Note:   The casing of block names is not normative.
 
67
#         For example, "Basic Latin" and "BASIC LATIN" are equivalent.
 
68
#
 
69
# Format:
 
70
# Start Code..End Code; Block Name
 
71
 
 
72
# ================================================
 
73
 
 
74
# Note:   When comparing block names, casing, whitespace, hyphens,
 
75
#         and underbars are ignored.
 
76
#         For example, "Latin Extended-A" and "latin extended a" are equivalent.
 
77
#         For more information on the comparison of property values, 
 
78
#            see UAX #44: http://www.unicode.org/reports/tr44/
 
79
#
 
80
#  All code points not explicitly listed for Block
 
81
#  have the value No_Block.
 
82
 
 
83
# Property:     Block
 
84
#
 
85
# @missing: 0000..10FFFF; No_Block
 
86
 
 
87
0000..007F; Basic Latin
 
88
0080..00FF; Latin-1 Supplement
 
89
0100..017F; Latin Extended-A
 
90
0180..024F; Latin Extended-B
 
91
0250..02AF; IPA Extensions
 
92
02B0..02FF; Spacing Modifier Letters
 
93
0300..036F; Combining Diacritical Marks
 
94
0370..03FF; Greek and Coptic
 
95
0400..04FF; Cyrillic
 
96
0500..052F; Cyrillic Supplement
 
97
0530..058F; Armenian
 
98
0590..05FF; Hebrew
 
99
0600..06FF; Arabic
 
100
0700..074F; Syriac
 
101
0750..077F; Arabic Supplement
 
102
0780..07BF; Thaana
 
103
07C0..07FF; NKo
 
104
0800..083F; Samaritan
 
105
0900..097F; Devanagari
 
106
0980..09FF; Bengali
 
107
0A00..0A7F; Gurmukhi
 
108
0A80..0AFF; Gujarati
 
109
0B00..0B7F; Oriya
 
110
0B80..0BFF; Tamil
 
111
0C00..0C7F; Telugu
 
112
0C80..0CFF; Kannada
 
113
0D00..0D7F; Malayalam
 
114
0D80..0DFF; Sinhala
 
115
0E00..0E7F; Thai
 
116
0E80..0EFF; Lao
 
117
0F00..0FFF; Tibetan
 
118
1000..109F; Myanmar
 
119
10A0..10FF; Georgian
 
120
1100..11FF; Hangul Jamo
 
121
1200..137F; Ethiopic
 
122
1380..139F; Ethiopic Supplement
 
123
13A0..13FF; Cherokee
 
124
1400..167F; Unified Canadian Aboriginal Syllabics
 
125
1680..169F; Ogham
 
126
16A0..16FF; Runic
 
127
1700..171F; Tagalog
 
128
1720..173F; Hanunoo
 
129
1740..175F; Buhid
 
130
1760..177F; Tagbanwa
 
131
1780..17FF; Khmer
 
132
1800..18AF; Mongolian
 
133
18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
 
134
1900..194F; Limbu
 
135
1950..197F; Tai Le
 
136
1980..19DF; New Tai Lue
 
137
19E0..19FF; Khmer Symbols
 
138
1A00..1A1F; Buginese
 
139
1A20..1AAF; Tai Tham
 
140
1B00..1B7F; Balinese
 
141
1B80..1BBF; Sundanese
 
142
1C00..1C4F; Lepcha
 
143
1C50..1C7F; Ol Chiki
 
144
1CD0..1CFF; Vedic Extensions
 
145
1D00..1D7F; Phonetic Extensions
 
146
1D80..1DBF; Phonetic Extensions Supplement
 
147
1DC0..1DFF; Combining Diacritical Marks Supplement
 
148
1E00..1EFF; Latin Extended Additional
 
149
1F00..1FFF; Greek Extended
 
150
2000..206F; General Punctuation
 
151
2070..209F; Superscripts and Subscripts
 
152
20A0..20CF; Currency Symbols
 
153
20D0..20FF; Combining Diacritical Marks for Symbols
 
154
2100..214F; Letterlike Symbols
 
155
2150..218F; Number Forms
 
156
2190..21FF; Arrows
 
157
2200..22FF; Mathematical Operators
 
158
2300..23FF; Miscellaneous Technical
 
159
2400..243F; Control Pictures
 
160
2440..245F; Optical Character Recognition
 
161
2460..24FF; Enclosed Alphanumerics
 
162
2500..257F; Box Drawing
 
163
2580..259F; Block Elements
 
164
25A0..25FF; Geometric Shapes
 
165
2600..26FF; Miscellaneous Symbols
 
166
2700..27BF; Dingbats
 
167
27C0..27EF; Miscellaneous Mathematical Symbols-A
 
168
27F0..27FF; Supplemental Arrows-A
 
169
2800..28FF; Braille Patterns
 
170
2900..297F; Supplemental Arrows-B
 
171
2980..29FF; Miscellaneous Mathematical Symbols-B
 
172
2A00..2AFF; Supplemental Mathematical Operators
 
173
2B00..2BFF; Miscellaneous Symbols and Arrows
 
174
2C00..2C5F; Glagolitic
 
175
2C60..2C7F; Latin Extended-C
 
176
2C80..2CFF; Coptic
 
177
2D00..2D2F; Georgian Supplement
 
178
2D30..2D7F; Tifinagh
 
179
2D80..2DDF; Ethiopic Extended
 
180
2DE0..2DFF; Cyrillic Extended-A
 
181
2E00..2E7F; Supplemental Punctuation
 
182
2E80..2EFF; CJK Radicals Supplement
 
183
2F00..2FDF; Kangxi Radicals
 
184
2FF0..2FFF; Ideographic Description Characters
 
185
3000..303F; CJK Symbols and Punctuation
 
186
3040..309F; Hiragana
 
187
30A0..30FF; Katakana
 
188
3100..312F; Bopomofo
 
189
3130..318F; Hangul Compatibility Jamo
 
190
3190..319F; Kanbun
 
191
31A0..31BF; Bopomofo Extended
 
192
31C0..31EF; CJK Strokes
 
193
31F0..31FF; Katakana Phonetic Extensions
 
194
3200..32FF; Enclosed CJK Letters and Months
 
195
3300..33FF; CJK Compatibility
 
196
3400..4DBF; CJK Unified Ideographs Extension A
 
197
4DC0..4DFF; Yijing Hexagram Symbols
 
198
4E00..9FFF; CJK Unified Ideographs
 
199
A000..A48F; Yi Syllables
 
200
A490..A4CF; Yi Radicals
 
201
A4D0..A4FF; Lisu
 
202
A500..A63F; Vai
 
203
A640..A69F; Cyrillic Extended-B
 
204
A6A0..A6FF; Bamum
 
205
A700..A71F; Modifier Tone Letters
 
206
A720..A7FF; Latin Extended-D
 
207
A800..A82F; Syloti Nagri
 
208
A830..A83F; Common Indic Number Forms
 
209
A840..A87F; Phags-pa
 
210
A880..A8DF; Saurashtra
 
211
A8E0..A8FF; Devanagari Extended
 
212
A900..A92F; Kayah Li
 
213
A930..A95F; Rejang
 
214
A960..A97F; Hangul Jamo Extended-A
 
215
A980..A9DF; Javanese
 
216
AA00..AA5F; Cham
 
217
AA60..AA7F; Myanmar Extended-A
 
218
AA80..AADF; Tai Viet
 
219
ABC0..ABFF; Meetei Mayek
 
220
AC00..D7AF; Hangul Syllables
 
221
D7B0..D7FF; Hangul Jamo Extended-B
 
222
D800..DB7F; High Surrogates
 
223
DB80..DBFF; High Private Use Surrogates
 
224
DC00..DFFF; Low Surrogates
 
225
E000..F8FF; Private Use Area
 
226
F900..FAFF; CJK Compatibility Ideographs
 
227
FB00..FB4F; Alphabetic Presentation Forms
 
228
FB50..FDFF; Arabic Presentation Forms-A
 
229
FE00..FE0F; Variation Selectors
 
230
FE10..FE1F; Vertical Forms
 
231
FE20..FE2F; Combining Half Marks
 
232
FE30..FE4F; CJK Compatibility Forms
 
233
FE50..FE6F; Small Form Variants
 
234
FE70..FEFF; Arabic Presentation Forms-B
 
235
FF00..FFEF; Halfwidth and Fullwidth Forms
 
236
FFF0..FFFF; Specials
 
237
10000..1007F; Linear B Syllabary
 
238
10080..100FF; Linear B Ideograms
 
239
10100..1013F; Aegean Numbers
 
240
10140..1018F; Ancient Greek Numbers
 
241
10190..101CF; Ancient Symbols
 
242
101D0..101FF; Phaistos Disc
 
243
10280..1029F; Lycian
 
244
102A0..102DF; Carian
 
245
10300..1032F; Old Italic
 
246
10330..1034F; Gothic
 
247
10380..1039F; Ugaritic
 
248
103A0..103DF; Old Persian
 
249
10400..1044F; Deseret
 
250
10450..1047F; Shavian
 
251
10480..104AF; Osmanya
 
252
10800..1083F; Cypriot Syllabary
 
253
10840..1085F; Imperial Aramaic
 
254
10900..1091F; Phoenician
 
255
10920..1093F; Lydian
 
256
10A00..10A5F; Kharoshthi
 
257
10A60..10A7F; Old South Arabian
 
258
10B00..10B3F; Avestan
 
259
10B40..10B5F; Inscriptional Parthian
 
260
10B60..10B7F; Inscriptional Pahlavi
 
261
10C00..10C4F; Old Turkic
 
262
10E60..10E7F; Rumi Numeral Symbols
 
263
11080..110CF; Kaithi
 
264
12000..123FF; Cuneiform
 
265
12400..1247F; Cuneiform Numbers and Punctuation
 
266
13000..1342F; Egyptian Hieroglyphs
 
267
1D000..1D0FF; Byzantine Musical Symbols
 
268
1D100..1D1FF; Musical Symbols
 
269
1D200..1D24F; Ancient Greek Musical Notation
 
270
1D300..1D35F; Tai Xuan Jing Symbols
 
271
1D360..1D37F; Counting Rod Numerals
 
272
1D400..1D7FF; Mathematical Alphanumeric Symbols
 
273
1F000..1F02F; Mahjong Tiles
 
274
1F030..1F09F; Domino Tiles
 
275
1F100..1F1FF; Enclosed Alphanumeric Supplement
 
276
1F200..1F2FF; Enclosed Ideographic Supplement
 
277
20000..2A6DF; CJK Unified Ideographs Extension B
 
278
2A700..2B73F; CJK Unified Ideographs Extension C
 
279
2F800..2FA1F; CJK Compatibility Ideographs Supplement
 
280
E0000..E007F; Tags
 
281
E0100..E01EF; Variation Selectors Supplement
 
282
F0000..FFFFF; Supplementary Private Use Area-A
 
283
100000..10FFFF; Supplementary Private Use Area-B
 
284
 
 
285
# EOF
 
286
"""