~ubuntu-branches/ubuntu/karmic/pypy/karmic

« back to all changes in this revision

Viewing changes to lib-python/2.4.1/test/test_multibytecodec_support.py

  • Committer: Bazaar Package Importer
  • Author(s): Alexandre Fayolle
  • Date: 2007-04-13 09:33:09 UTC
  • Revision ID: james.westby@ubuntu.com-20070413093309-yoojh4jcoocu2krz
Tags: upstream-1.0.0
ImportĀ upstreamĀ versionĀ 1.0.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/env python
 
2
#
 
3
# test_multibytecodec_support.py
 
4
#   Common Unittest Routines for CJK codecs
 
5
#
 
6
# $CJKCodecs: test_multibytecodec_support.py,v 1.6 2004/06/19 06:09:55 perky Exp $
 
7
 
 
8
import sys, codecs, os.path
 
9
import unittest
 
10
from test import test_support
 
11
from StringIO import StringIO
 
12
 
 
13
__cjkcodecs__ = 0 # define this as 0 for python
 
14
 
 
15
class TestBase:
 
16
    encoding        = ''   # codec name
 
17
    codec           = None # codec tuple (with 4 elements)
 
18
    tstring         = ''   # string to test StreamReader
 
19
 
 
20
    codectests      = None # must set. codec test tuple
 
21
    roundtriptest   = 1    # set if roundtrip is possible with unicode
 
22
    has_iso10646    = 0    # set if this encoding contains whole iso10646 map
 
23
    xmlcharnametest = None # string to test xmlcharrefreplace
 
24
 
 
25
    def setUp(self):
 
26
        if self.codec is None:
 
27
            self.codec = codecs.lookup(self.encoding)
 
28
        self.encode, self.decode, self.reader, self.writer = self.codec
 
29
 
 
30
    def test_chunkcoding(self):
 
31
        for native, utf8 in zip(*[StringIO(f).readlines()
 
32
                                  for f in self.tstring]):
 
33
            u = self.decode(native)[0]
 
34
            self.assertEqual(u, utf8.decode('utf-8'))
 
35
            if self.roundtriptest:
 
36
                self.assertEqual(native, self.encode(u)[0])
 
37
 
 
38
    def test_errorhandle(self):
 
39
        for source, scheme, expected in self.codectests:
 
40
            if type(source) == type(''):
 
41
                func = self.decode
 
42
            else:
 
43
                func = self.encode
 
44
            if expected:
 
45
                result = func(source, scheme)[0]
 
46
                self.assertEqual(result, expected)
 
47
            else:
 
48
                self.assertRaises(UnicodeError, func, source, scheme)
 
49
 
 
50
    if sys.hexversion >= 0x02030000:
 
51
        def test_xmlcharrefreplace(self):
 
52
            if self.has_iso10646:
 
53
                return
 
54
 
 
55
            s = u"\u0b13\u0b23\u0b60 nd eggs"
 
56
            self.assertEqual(
 
57
                self.encode(s, "xmlcharrefreplace")[0],
 
58
                "ଓଣୠ nd eggs"
 
59
            )
 
60
 
 
61
        def test_customreplace(self):
 
62
            if self.has_iso10646:
 
63
                return
 
64
 
 
65
            import htmlentitydefs
 
66
 
 
67
            names = {}
 
68
            for (key, value) in htmlentitydefs.entitydefs.items():
 
69
                if len(value)==1:
 
70
                    names[value.decode('latin-1')] = self.decode(key)[0]
 
71
                else:
 
72
                    names[unichr(int(value[2:-1]))] = self.decode(key)[0]
 
73
 
 
74
            def xmlcharnamereplace(exc):
 
75
                if not isinstance(exc, UnicodeEncodeError):
 
76
                    raise TypeError("don't know how to handle %r" % exc)
 
77
                l = []
 
78
                for c in exc.object[exc.start:exc.end]:
 
79
                    try:
 
80
                        l.append(u"&%s;" % names[c])
 
81
                    except KeyError:
 
82
                        l.append(u"&#%d;" % ord(c))
 
83
                return (u"".join(l), exc.end)
 
84
 
 
85
            codecs.register_error(
 
86
                "test.xmlcharnamereplace", xmlcharnamereplace)
 
87
 
 
88
            if self.xmlcharnametest:
 
89
                sin, sout = self.xmlcharnametest
 
90
            else:
 
91
                sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
 
92
                sout = "«ℜ» = ⟨ሴ⟩"
 
93
            self.assertEqual(self.encode(sin,
 
94
                                        "test.xmlcharnamereplace")[0], sout)
 
95
 
 
96
    def test_streamreader(self):
 
97
        UTF8Writer = codecs.getwriter('utf-8')
 
98
        for name in ["read", "readline", "readlines"]:
 
99
            for sizehint in [None, -1] + range(1, 33) + \
 
100
                            [64, 128, 256, 512, 1024]:
 
101
                istream = self.reader(StringIO(self.tstring[0]))
 
102
                ostream = UTF8Writer(StringIO())
 
103
                func = getattr(istream, name)
 
104
                while 1:
 
105
                    data = func(sizehint)
 
106
                    if not data:
 
107
                        break
 
108
                    if name == "readlines":
 
109
                        ostream.writelines(data)
 
110
                    else:
 
111
                        ostream.write(data)
 
112
 
 
113
                self.assertEqual(ostream.getvalue(), self.tstring[1])
 
114
 
 
115
    def test_streamwriter(self):
 
116
        if __cjkcodecs__:
 
117
            readfuncs = ('read', 'readline', 'readlines')
 
118
        else:
 
119
            # standard utf8 codec has broken readline and readlines.
 
120
            readfuncs = ('read',)
 
121
        UTF8Reader = codecs.getreader('utf-8')
 
122
        for name in readfuncs:
 
123
            for sizehint in [None] + range(1, 33) + \
 
124
                            [64, 128, 256, 512, 1024]:
 
125
                istream = UTF8Reader(StringIO(self.tstring[1]))
 
126
                ostream = self.writer(StringIO())
 
127
                func = getattr(istream, name)
 
128
                while 1:
 
129
                    if sizehint is not None:
 
130
                        data = func(sizehint)
 
131
                    else:
 
132
                        data = func()
 
133
 
 
134
                    if not data:
 
135
                        break
 
136
                    if name == "readlines":
 
137
                        ostream.writelines(data)
 
138
                    else:
 
139
                        ostream.write(data)
 
140
 
 
141
                self.assertEqual(ostream.getvalue(), self.tstring[0])
 
142
 
 
143
if len(u'\U00012345') == 2: # ucs2 build
 
144
    _unichr = unichr
 
145
    def unichr(v):
 
146
        if v >= 0x10000:
 
147
            return _unichr(0xd800 + ((v - 0x10000) >> 10)) + \
 
148
                   _unichr(0xdc00 + ((v - 0x10000) & 0x3ff))
 
149
        else:
 
150
            return _unichr(v)
 
151
    _ord = ord
 
152
    def ord(c):
 
153
        if len(c) == 2:
 
154
            return 0x10000 + ((_ord(c[0]) - 0xd800) << 10) + \
 
155
                          (ord(c[1]) - 0xdc00)
 
156
        else:
 
157
            return _ord(c)
 
158
 
 
159
class TestBase_Mapping(unittest.TestCase):
 
160
    pass_enctest = []
 
161
    pass_dectest = []
 
162
    supmaps = []
 
163
 
 
164
    def __init__(self, *args, **kw):
 
165
        unittest.TestCase.__init__(self, *args, **kw)
 
166
        if not os.path.exists(self.mapfilename):
 
167
            raise test_support.TestSkipped('%s not found, download from %s' %
 
168
                    (self.mapfilename, self.mapfileurl))
 
169
 
 
170
    def test_mapping_file(self):
 
171
        unichrs = lambda s: u''.join(map(unichr, map(eval, s.split('+'))))
 
172
        urt_wa = {}
 
173
 
 
174
        for line in open(self.mapfilename):
 
175
            if not line:
 
176
                break
 
177
            data = line.split('#')[0].strip().split()
 
178
            if len(data) != 2:
 
179
                continue
 
180
 
 
181
            csetval = eval(data[0])
 
182
            if csetval <= 0x7F:
 
183
                csetch = chr(csetval & 0xff)
 
184
            elif csetval >= 0x1000000:
 
185
                csetch = chr(csetval >> 24) + chr((csetval >> 16) & 0xff) + \
 
186
                         chr((csetval >> 8) & 0xff) + chr(csetval & 0xff)
 
187
            elif csetval >= 0x10000:
 
188
                csetch = chr(csetval >> 16) + \
 
189
                         chr((csetval >> 8) & 0xff) + chr(csetval & 0xff)
 
190
            elif csetval >= 0x100:
 
191
                csetch = chr(csetval >> 8) + chr(csetval & 0xff)
 
192
            else:
 
193
                continue
 
194
 
 
195
            unich = unichrs(data[1])
 
196
            if ord(unich) == 0xfffd or urt_wa.has_key(unich):
 
197
                continue
 
198
            urt_wa[unich] = csetch
 
199
 
 
200
            self._testpoint(csetch, unich)
 
201
 
 
202
    def test_mapping_supplemental(self):
 
203
        for mapping in self.supmaps:
 
204
            self._testpoint(*mapping)
 
205
 
 
206
    def _testpoint(self, csetch, unich):
 
207
        if (csetch, unich) not in self.pass_enctest:
 
208
            self.assertEqual(unich.encode(self.encoding), csetch)
 
209
        if (csetch, unich) not in self.pass_dectest:
 
210
            self.assertEqual(unicode(csetch, self.encoding), unich)
 
211
 
 
212
def load_teststring(encoding):
 
213
    if __cjkcodecs__:
 
214
        etxt = open(os.path.join('sampletexts', encoding) + '.txt').read()
 
215
        utxt = open(os.path.join('sampletexts', encoding) + '.utf8').read()
 
216
        return (etxt, utxt)
 
217
    else:
 
218
        from test import cjkencodings_test
 
219
        return cjkencodings_test.teststring[encoding]
 
220
 
 
221
def register_skip_expected(*cases):
 
222
    for case in cases: # len(cases) must be 1 at least.
 
223
        for path in [os.path.curdir, os.path.pardir]:
 
224
            fn = os.path.join(path, case.mapfilename)
 
225
            if os.path.exists(fn):
 
226
                case.mapfilename = fn
 
227
                break
 
228
        else:
 
229
            sys.modules[case.__module__].skip_expected = True
 
230
            break
 
231
    else:
 
232
        sys.modules[case.__module__].skip_expected = False