1
# -*- coding: iso-8859-1 -*-
2
""" Test script for the Unicode implementation.
4
Written by Marc-Andre Lemburg (mal@lemburg.com).
6
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
14
from test import support, string_tests
16
# Error handling (bad decoder return)
17
def search_function(encoding):
18
def decode1(input, errors="strict"):
19
return 42 # not a tuple
20
def encode1(input, errors="strict"):
21
return 42 # not a tuple
22
def encode2(input, errors="strict"):
23
return (42, 42) # no unicode
24
def decode2(input, errors="strict"):
25
return (42, 42) # no unicode
26
if encoding=="test.unicode1":
27
return (encode1, decode1, None, None)
28
elif encoding=="test.unicode2":
29
return (encode2, decode2, None, None)
32
codecs.register(search_function)
35
string_tests.CommonTest,
36
string_tests.MixinStrUnicodeUserStringTest,
37
string_tests.MixinStrUnicodeTest,
42
self.warning_filters = warnings.filters[:]
45
warnings.filters = self.warning_filters
47
def checkequalnofix(self, result, object, methodname, *args):
48
method = getattr(object, methodname)
49
realresult = method(*args)
50
self.assertEqual(realresult, result)
51
self.assert_(type(realresult) is type(result))
53
# if the original is returned make sure that
54
# this doesn't happen with subclasses
55
if realresult is object:
58
return 'usub(%r)' % str.__repr__(self)
60
method = getattr(object, methodname)
61
realresult = method(*args)
62
self.assertEqual(realresult, result)
63
self.assert_(object is not realresult)
65
def test_literals(self):
66
self.assertEqual('\xff', '\u00ff')
67
self.assertEqual('\uffff', '\U0000ffff')
68
self.assertRaises(SyntaxError, eval, '\'\\Ufffffffe\'')
69
self.assertRaises(SyntaxError, eval, '\'\\Uffffffff\'')
70
self.assertRaises(SyntaxError, eval, '\'\\U%08x\'' % 0x110000)
71
# raw strings should not have unicode escapes
72
self.assertNotEquals(r"\u0020", " ")
75
if not sys.platform.startswith('java'):
76
# Test basic sanity of repr()
77
self.assertEqual(ascii('abc'), "'abc'")
78
self.assertEqual(ascii('ab\\c'), "'ab\\\\c'")
79
self.assertEqual(ascii('ab\\'), "'ab\\\\'")
80
self.assertEqual(ascii('\\c'), "'\\\\c'")
81
self.assertEqual(ascii('\\'), "'\\\\'")
82
self.assertEqual(ascii('\n'), "'\\n'")
83
self.assertEqual(ascii('\r'), "'\\r'")
84
self.assertEqual(ascii('\t'), "'\\t'")
85
self.assertEqual(ascii('\b'), "'\\x08'")
86
self.assertEqual(ascii("'\""), """'\\'"'""")
87
self.assertEqual(ascii("'\""), """'\\'"'""")
88
self.assertEqual(ascii("'"), '''"'"''')
89
self.assertEqual(ascii('"'), """'"'""")
91
"'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
92
"\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
93
"\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
94
"JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
95
"\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
96
"\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
97
"\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
98
"\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
99
"\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
100
"\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
101
"\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
102
"\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
103
"\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
105
testrepr = ascii(''.join(map(chr, range(256))))
106
self.assertEqual(testrepr, latin1repr)
107
# Test ascii works on wide unicode escapes without overflow.
108
self.assertEqual(ascii("\U00010000" * 39 + "\uffff" * 4096),
109
ascii("\U00010000" * 39 + "\uffff" * 4096))
114
self.assertRaises(TypeError, ascii, WrongRepr())
117
if not sys.platform.startswith('java'):
118
# Test basic sanity of repr()
119
self.assertEqual(repr('abc'), "'abc'")
120
self.assertEqual(repr('ab\\c'), "'ab\\\\c'")
121
self.assertEqual(repr('ab\\'), "'ab\\\\'")
122
self.assertEqual(repr('\\c'), "'\\\\c'")
123
self.assertEqual(repr('\\'), "'\\\\'")
124
self.assertEqual(repr('\n'), "'\\n'")
125
self.assertEqual(repr('\r'), "'\\r'")
126
self.assertEqual(repr('\t'), "'\\t'")
127
self.assertEqual(repr('\b'), "'\\x08'")
128
self.assertEqual(repr("'\""), """'\\'"'""")
129
self.assertEqual(repr("'\""), """'\\'"'""")
130
self.assertEqual(repr("'"), '''"'"''')
131
self.assertEqual(repr('"'), """'"'""")
133
"'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
134
"\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
135
"\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
136
"JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
137
"\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
138
"\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
139
"\\x9c\\x9d\\x9e\\x9f\\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9"
140
"\xaa\xab\xac\\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
141
"\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5"
142
"\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
143
"\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1"
144
"\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
145
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd"
147
testrepr = repr(''.join(map(chr, range(256))))
148
self.assertEqual(testrepr, latin1repr)
149
# Test repr works on wide unicode escapes without overflow.
150
self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096),
151
repr("\U00010000" * 39 + "\uffff" * 4096))
156
self.assertRaises(TypeError, repr, WrongRepr())
158
def test_iterators(self):
159
# Make sure unicode objects have an __iter__ method
160
it = "\u1111\u2222\u3333".__iter__()
161
self.assertEqual(next(it), "\u1111")
162
self.assertEqual(next(it), "\u2222")
163
self.assertEqual(next(it), "\u3333")
164
self.assertRaises(StopIteration, next, it)
166
def test_count(self):
167
string_tests.CommonTest.test_count(self)
168
# check mixed argument types
169
self.checkequalnofix(3, 'aaa', 'count', 'a')
170
self.checkequalnofix(0, 'aaa', 'count', 'b')
171
self.checkequalnofix(3, 'aaa', 'count', 'a')
172
self.checkequalnofix(0, 'aaa', 'count', 'b')
173
self.checkequalnofix(0, 'aaa', 'count', 'b')
174
self.checkequalnofix(1, 'aaa', 'count', 'a', -1)
175
self.checkequalnofix(3, 'aaa', 'count', 'a', -10)
176
self.checkequalnofix(2, 'aaa', 'count', 'a', 0, -1)
177
self.checkequalnofix(0, 'aaa', 'count', 'a', 0, -10)
180
self.checkequalnofix(0, 'abcdefghiabc', 'find', 'abc')
181
self.checkequalnofix(9, 'abcdefghiabc', 'find', 'abc', 1)
182
self.checkequalnofix(-1, 'abcdefghiabc', 'find', 'def', 4)
184
self.assertRaises(TypeError, 'hello'.find)
185
self.assertRaises(TypeError, 'hello'.find, 42)
187
def test_rfind(self):
188
string_tests.CommonTest.test_rfind(self)
189
# check mixed argument types
190
self.checkequalnofix(9, 'abcdefghiabc', 'rfind', 'abc')
191
self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
192
self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
194
def test_index(self):
195
string_tests.CommonTest.test_index(self)
196
self.checkequalnofix(0, 'abcdefghiabc', 'index', '')
197
self.checkequalnofix(3, 'abcdefghiabc', 'index', 'def')
198
self.checkequalnofix(0, 'abcdefghiabc', 'index', 'abc')
199
self.checkequalnofix(9, 'abcdefghiabc', 'index', 'abc', 1)
200
self.assertRaises(ValueError, 'abcdefghiabc'.index, 'hib')
201
self.assertRaises(ValueError, 'abcdefghiab'.index, 'abc', 1)
202
self.assertRaises(ValueError, 'abcdefghi'.index, 'ghi', 8)
203
self.assertRaises(ValueError, 'abcdefghi'.index, 'ghi', -1)
205
def test_rindex(self):
206
string_tests.CommonTest.test_rindex(self)
207
self.checkequalnofix(12, 'abcdefghiabc', 'rindex', '')
208
self.checkequalnofix(3, 'abcdefghiabc', 'rindex', 'def')
209
self.checkequalnofix(9, 'abcdefghiabc', 'rindex', 'abc')
210
self.checkequalnofix(0, 'abcdefghiabc', 'rindex', 'abc', 0, -1)
212
self.assertRaises(ValueError, 'abcdefghiabc'.rindex, 'hib')
213
self.assertRaises(ValueError, 'defghiabc'.rindex, 'def', 1)
214
self.assertRaises(ValueError, 'defghiabc'.rindex, 'abc', 0, -1)
215
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, 8)
216
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, -1)
218
def test_maketrans_translate(self):
219
# these work with plain translate()
220
self.checkequalnofix('bbbc', 'abababc', 'translate',
222
self.checkequalnofix('iiic', 'abababc', 'translate',
223
{ord('a'): None, ord('b'): ord('i')})
224
self.checkequalnofix('iiix', 'abababc', 'translate',
225
{ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
226
self.checkequalnofix('c', 'abababc', 'translate',
227
{ord('a'): None, ord('b'): ''})
228
self.checkequalnofix('xyyx', 'xzx', 'translate',
230
# this needs maketrans()
231
self.checkequalnofix('abababc', 'abababc', 'translate',
233
tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
234
self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
235
# test alternative way of calling maketrans()
236
tbl = self.type2test.maketrans('abc', 'xyz', 'd')
237
self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
239
self.assertRaises(TypeError, self.type2test.maketrans)
240
self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
241
self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
242
self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
243
self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
244
self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
245
self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})
247
self.assertRaises(TypeError, 'hello'.translate)
248
self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
250
def test_split(self):
251
string_tests.CommonTest.test_split(self)
254
self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
255
self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
256
self.checkequalnofix(['endcase ', ''], 'endcase test', 'split', 'test')
259
string_tests.MixinStrUnicodeUserStringTest.test_join(self)
262
def __init__(self, sval): self.sval = sval
263
def __str__(self): return self.sval
266
self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
267
self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
268
self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
269
self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
270
self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
271
self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
272
self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
273
self.checkraises(TypeError, ' ', 'join', ['1', '2', MyWrapper('foo')])
274
self.checkraises(TypeError, ' ', 'join', ['1', '2', '3', bytes()])
275
self.checkraises(TypeError, ' ', 'join', [1, 2, 3])
276
self.checkraises(TypeError, ' ', 'join', ['1', '2', 3])
278
def test_replace(self):
279
string_tests.CommonTest.test_replace(self)
281
# method call forwarded from str implementation because of unicode argument
282
self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
283
self.assertRaises(TypeError, 'replace'.replace, "r", 42)
285
def test_bytes_comparison(self):
286
warnings.simplefilter('ignore', BytesWarning)
287
self.assertEqual('abc' == b'abc', False)
288
self.assertEqual('abc' != b'abc', True)
289
self.assertEqual('abc' == bytearray(b'abc'), False)
290
self.assertEqual('abc' != bytearray(b'abc'), True)
292
def test_comparison(self):
294
self.assertEqual('abc', 'abc')
295
self.assertEqual('abc', 'abc')
296
self.assertEqual('abc', 'abc')
297
self.assert_('abcd' > 'abc')
298
self.assert_('abcd' > 'abc')
299
self.assert_('abcd' > 'abc')
300
self.assert_('abc' < 'abcd')
301
self.assert_('abc' < 'abcd')
302
self.assert_('abc' < 'abcd')
305
# Move these tests to a Unicode collation module test...
306
# Testing UTF-16 code point order comparisons...
308
# No surrogates, no fixup required.
309
self.assert_('\u0061' < '\u20ac')
310
# Non surrogate below surrogate value, no fixup required
311
self.assert_('\u0061' < '\ud800\udc02')
313
# Non surrogate above surrogate value, fixup required
314
def test_lecmp(s, s2):
354
# Surrogates on both sides, no fixup required
355
self.assert_('\ud800\udc02' < '\ud84d\udc56')
357
def test_islower(self):
358
string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
359
self.checkequalnofix(False, '\u1FFc', 'islower')
361
def test_isupper(self):
362
string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
363
if not sys.platform.startswith('java'):
364
self.checkequalnofix(False, '\u1FFc', 'isupper')
366
def test_istitle(self):
367
string_tests.MixinStrUnicodeUserStringTest.test_title(self)
368
self.checkequalnofix(True, '\u1FFc', 'istitle')
369
self.checkequalnofix(True, 'Greek \u1FFcitlecases ...', 'istitle')
371
def test_isspace(self):
372
string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
373
self.checkequalnofix(True, '\u2000', 'isspace')
374
self.checkequalnofix(True, '\u200a', 'isspace')
375
self.checkequalnofix(False, '\u2014', 'isspace')
377
def test_isalpha(self):
378
string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
379
self.checkequalnofix(True, '\u1FFc', 'isalpha')
381
def test_isdecimal(self):
382
self.checkequalnofix(False, '', 'isdecimal')
383
self.checkequalnofix(False, 'a', 'isdecimal')
384
self.checkequalnofix(True, '0', 'isdecimal')
385
self.checkequalnofix(False, '\u2460', 'isdecimal') # CIRCLED DIGIT ONE
386
self.checkequalnofix(False, '\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
387
self.checkequalnofix(True, '\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
388
self.checkequalnofix(True, '0123456789', 'isdecimal')
389
self.checkequalnofix(False, '0123456789a', 'isdecimal')
391
self.checkraises(TypeError, 'abc', 'isdecimal', 42)
393
def test_isdigit(self):
394
string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
395
self.checkequalnofix(True, '\u2460', 'isdigit')
396
self.checkequalnofix(False, '\xbc', 'isdigit')
397
self.checkequalnofix(True, '\u0660', 'isdigit')
399
def test_isnumeric(self):
400
self.checkequalnofix(False, '', 'isnumeric')
401
self.checkequalnofix(False, 'a', 'isnumeric')
402
self.checkequalnofix(True, '0', 'isnumeric')
403
self.checkequalnofix(True, '\u2460', 'isnumeric')
404
self.checkequalnofix(True, '\xbc', 'isnumeric')
405
self.checkequalnofix(True, '\u0660', 'isnumeric')
406
self.checkequalnofix(True, '0123456789', 'isnumeric')
407
self.checkequalnofix(False, '0123456789a', 'isnumeric')
409
self.assertRaises(TypeError, "abc".isnumeric, 42)
411
def test_isidentifier(self):
412
self.assertTrue("a".isidentifier())
413
self.assertTrue("Z".isidentifier())
414
self.assertTrue("_".isidentifier())
415
self.assertTrue("b0".isidentifier())
416
self.assertTrue("bc".isidentifier())
417
self.assertTrue("b_".isidentifier())
418
self.assertTrue("ļæ½".isidentifier())
420
self.assertFalse(" ".isidentifier())
421
self.assertFalse("[".isidentifier())
422
self.assertFalse("ļæ½".isidentifier())
423
self.assertFalse("0".isidentifier())
425
def test_isprintable(self):
426
self.assertTrue("".isprintable())
427
self.assertTrue("abcdefg".isprintable())
428
self.assertFalse("abcdefg\n".isprintable())
429
# some defined Unicode character
430
self.assertTrue("\u0374".isprintable())
431
# undefined character
432
self.assertFalse("\u0378".isprintable())
433
# single surrogate character
434
self.assertFalse("\ud800".isprintable())
436
def test_contains(self):
437
# Testing Unicode contains method
438
self.assert_('a' in 'abdb')
439
self.assert_('a' in 'bdab')
440
self.assert_('a' in 'bdaba')
441
self.assert_('a' in 'bdba')
442
self.assert_('a' not in 'bdb')
443
self.assert_('a' in 'bdba')
444
self.assert_('a' in ('a',1,None))
445
self.assert_('a' in (1,None,'a'))
446
self.assert_('a' in ('a',1,None))
447
self.assert_('a' in (1,None,'a'))
448
self.assert_('a' not in ('x',1,'y'))
449
self.assert_('a' not in ('x',1,None))
450
self.assert_('abcd' not in 'abcxxxx')
451
self.assert_('ab' in 'abcd')
452
self.assert_('ab' in 'abc')
453
self.assert_('ab' in (1,None,'ab'))
454
self.assert_('' in 'abc')
455
self.assert_('' in '')
456
self.assert_('' in 'abc')
457
self.assert_('\0' not in 'abc')
458
self.assert_('\0' in '\0abc')
459
self.assert_('\0' in 'abc\0')
460
self.assert_('a' in '\0abc')
461
self.assert_('asdf' in 'asdf')
462
self.assert_('asdf' not in 'asd')
463
self.assert_('asdf' not in '')
465
self.assertRaises(TypeError, "abc".__contains__)
467
def test_format(self):
468
self.assertEqual(''.format(), '')
469
self.assertEqual('a'.format(), 'a')
470
self.assertEqual('ab'.format(), 'ab')
471
self.assertEqual('a{{'.format(), 'a{')
472
self.assertEqual('a}}'.format(), 'a}')
473
self.assertEqual('{{b'.format(), '{b')
474
self.assertEqual('}}b'.format(), '}b')
475
self.assertEqual('a{{b'.format(), 'a{b')
477
# examples from the PEP:
479
self.assertEqual("My name is {0}".format('Fred'), "My name is Fred")
480
self.assertEqual("My name is {0[name]}".format(dict(name='Fred')),
482
self.assertEqual("My name is {0} :-{{}}".format('Fred'),
483
"My name is Fred :-{}")
485
d = datetime.date(2007, 8, 18)
486
self.assertEqual("The year is {0.year}".format(d),
489
# classes we'll use for testing
491
def __init__(self, x=100):
493
def __format__(self, spec):
497
def __init__(self, x):
499
def __format__(self, spec):
502
# class with __str__, but no __format__
504
def __init__(self, x):
507
return 'E(' + self.x + ')'
509
# class with __repr__, but no __format__ or __str__
511
def __init__(self, x):
514
return 'F(' + self.x + ')'
516
# class with __format__ that forwards to string, for some format_spec's
518
def __init__(self, x):
521
return "string is " + self.x
522
def __format__(self, format_spec):
523
if format_spec == 'd':
524
return 'G(' + self.x + ')'
525
return object.__format__(self, format_spec)
527
# class that returns a bad type from __format__
529
def __format__(self, format_spec):
532
class I(datetime.date):
533
def __format__(self, format_spec):
534
return self.strftime(format_spec)
537
def __format__(self, format_spec):
538
return int.__format__(self * 2, format_spec)
541
self.assertEqual(''.format(), '')
542
self.assertEqual('abc'.format(), 'abc')
543
self.assertEqual('{0}'.format('abc'), 'abc')
544
self.assertEqual('{0:}'.format('abc'), 'abc')
545
# self.assertEqual('{ 0 }'.format('abc'), 'abc')
546
self.assertEqual('X{0}'.format('abc'), 'Xabc')
547
self.assertEqual('{0}X'.format('abc'), 'abcX')
548
self.assertEqual('X{0}Y'.format('abc'), 'XabcY')
549
self.assertEqual('{1}'.format(1, 'abc'), 'abc')
550
self.assertEqual('X{1}'.format(1, 'abc'), 'Xabc')
551
self.assertEqual('{1}X'.format(1, 'abc'), 'abcX')
552
self.assertEqual('X{1}Y'.format(1, 'abc'), 'XabcY')
553
self.assertEqual('{0}'.format(-15), '-15')
554
self.assertEqual('{0}{1}'.format(-15, 'abc'), '-15abc')
555
self.assertEqual('{0}X{1}'.format(-15, 'abc'), '-15Xabc')
556
self.assertEqual('{{'.format(), '{')
557
self.assertEqual('}}'.format(), '}')
558
self.assertEqual('{{}}'.format(), '{}')
559
self.assertEqual('{{x}}'.format(), '{x}')
560
self.assertEqual('{{{0}}}'.format(123), '{123}')
561
self.assertEqual('{{{{0}}}}'.format(), '{{0}}')
562
self.assertEqual('}}{{'.format(), '}{')
563
self.assertEqual('}}x{{'.format(), '}x{')
566
self.assertEqual("{0[foo-bar]}".format({'foo-bar':'baz'}), 'baz')
567
self.assertEqual("{0[foo bar]}".format({'foo bar':'baz'}), 'baz')
568
self.assertEqual("{0[ ]}".format({' ':3}), '3')
570
self.assertEqual('{foo._x}'.format(foo=C(20)), '20')
571
self.assertEqual('{1}{0}'.format(D(10), D(20)), '2010')
572
self.assertEqual('{0._x.x}'.format(C(D('abc'))), 'abc')
573
self.assertEqual('{0[0]}'.format(['abc', 'def']), 'abc')
574
self.assertEqual('{0[1]}'.format(['abc', 'def']), 'def')
575
self.assertEqual('{0[1][0]}'.format(['abc', ['def']]), 'def')
576
self.assertEqual('{0[1][0].x}'.format(['abc', [D('def')]]), 'def')
579
self.assertEqual('{0:.3s}'.format('abc'), 'abc')
580
self.assertEqual('{0:.3s}'.format('ab'), 'ab')
581
self.assertEqual('{0:.3s}'.format('abcdef'), 'abc')
582
self.assertEqual('{0:.0s}'.format('abcdef'), '')
583
self.assertEqual('{0:3.3s}'.format('abc'), 'abc')
584
self.assertEqual('{0:2.3s}'.format('abc'), 'abc')
585
self.assertEqual('{0:2.2s}'.format('abc'), 'ab')
586
self.assertEqual('{0:3.2s}'.format('abc'), 'ab ')
587
self.assertEqual('{0:x<0s}'.format('result'), 'result')
588
self.assertEqual('{0:x<5s}'.format('result'), 'result')
589
self.assertEqual('{0:x<6s}'.format('result'), 'result')
590
self.assertEqual('{0:x<7s}'.format('result'), 'resultx')
591
self.assertEqual('{0:x<8s}'.format('result'), 'resultxx')
592
self.assertEqual('{0: <7s}'.format('result'), 'result ')
593
self.assertEqual('{0:<7s}'.format('result'), 'result ')
594
self.assertEqual('{0:>7s}'.format('result'), ' result')
595
self.assertEqual('{0:>8s}'.format('result'), ' result')
596
self.assertEqual('{0:^8s}'.format('result'), ' result ')
597
self.assertEqual('{0:^9s}'.format('result'), ' result ')
598
self.assertEqual('{0:^10s}'.format('result'), ' result ')
599
self.assertEqual('{0:10000}'.format('a'), 'a' + ' ' * 9999)
600
self.assertEqual('{0:10000}'.format(''), ' ' * 10000)
601
self.assertEqual('{0:10000000}'.format(''), ' ' * 10000000)
603
# format specifiers for user defined type
604
self.assertEqual('{0:abc}'.format(C()), 'abc')
606
# !r, !s and !a coercions
607
self.assertEqual('{0!s}'.format('Hello'), 'Hello')
608
self.assertEqual('{0!s:}'.format('Hello'), 'Hello')
609
self.assertEqual('{0!s:15}'.format('Hello'), 'Hello ')
610
self.assertEqual('{0!s:15s}'.format('Hello'), 'Hello ')
611
self.assertEqual('{0!r}'.format('Hello'), "'Hello'")
612
self.assertEqual('{0!r:}'.format('Hello'), "'Hello'")
613
self.assertEqual('{0!r}'.format(F('Hello')), 'F(Hello)')
614
self.assertEqual('{0!r}'.format('\u0378'), "'\\u0378'") # nonprintable
615
self.assertEqual('{0!r}'.format('\u0374'), "'\u0374'") # printable
616
self.assertEqual('{0!r}'.format(F('\u0374')), 'F(\u0374)')
617
self.assertEqual('{0!a}'.format('Hello'), "'Hello'")
618
self.assertEqual('{0!a}'.format('\u0378'), "'\\u0378'") # nonprintable
619
self.assertEqual('{0!a}'.format('\u0374'), "'\\u0374'") # printable
620
self.assertEqual('{0!a:}'.format('Hello'), "'Hello'")
621
self.assertEqual('{0!a}'.format(F('Hello')), 'F(Hello)')
622
self.assertEqual('{0!a}'.format(F('\u0374')), 'F(\\u0374)')
624
# test fallback to object.__format__
625
self.assertEqual('{0}'.format({}), '{}')
626
self.assertEqual('{0}'.format([]), '[]')
627
self.assertEqual('{0}'.format([1]), '[1]')
628
self.assertEqual('{0}'.format(E('data')), 'E(data)')
629
self.assertEqual('{0:^10}'.format(E('data')), ' E(data) ')
630
self.assertEqual('{0:^10s}'.format(E('data')), ' E(data) ')
631
self.assertEqual('{0:d}'.format(G('data')), 'G(data)')
632
self.assertEqual('{0:>15s}'.format(G('data')), ' string is data')
633
self.assertEqual('{0!s}'.format(G('data')), 'string is data')
635
self.assertEqual("{0:date: %Y-%m-%d}".format(I(year=2007,
640
# test deriving from a builtin type and overriding __format__
641
self.assertEqual("{0}".format(J(10)), "20")
644
# string format specifiers
645
self.assertEqual('{0:}'.format('a'), 'a')
647
# computed format specifiers
648
self.assertEqual("{0:.{1}}".format('hello world', 5), 'hello')
649
self.assertEqual("{0:.{1}s}".format('hello world', 5), 'hello')
650
self.assertEqual("{0:.{precision}s}".format('hello world', precision=5), 'hello')
651
self.assertEqual("{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello ')
652
self.assertEqual("{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello ')
654
# test various errors
655
self.assertRaises(ValueError, '{'.format)
656
self.assertRaises(ValueError, '}'.format)
657
self.assertRaises(ValueError, 'a{'.format)
658
self.assertRaises(ValueError, 'a}'.format)
659
self.assertRaises(ValueError, '{a'.format)
660
self.assertRaises(ValueError, '}a'.format)
661
self.assertRaises(IndexError, '{0}'.format)
662
self.assertRaises(IndexError, '{1}'.format, 'abc')
663
self.assertRaises(KeyError, '{x}'.format)
664
self.assertRaises(ValueError, "}{".format)
665
self.assertRaises(ValueError, "{".format)
666
self.assertRaises(ValueError, "}".format)
667
self.assertRaises(ValueError, "abc{0:{}".format)
668
self.assertRaises(ValueError, "{0".format)
669
self.assertRaises(IndexError, "{0.}".format)
670
self.assertRaises(ValueError, "{0.}".format, 0)
671
self.assertRaises(IndexError, "{0[}".format)
672
self.assertRaises(ValueError, "{0[}".format, [])
673
self.assertRaises(KeyError, "{0]}".format)
674
self.assertRaises(ValueError, "{0.[]}".format, 0)
675
self.assertRaises(ValueError, "{0..foo}".format, 0)
676
self.assertRaises(ValueError, "{0[0}".format, 0)
677
self.assertRaises(ValueError, "{0[0:foo}".format, 0)
678
self.assertRaises(KeyError, "{c]}".format)
679
self.assertRaises(ValueError, "{{ {{{0}}".format, 0)
680
self.assertRaises(ValueError, "{0}}".format, 0)
681
self.assertRaises(KeyError, "{foo}".format, bar=3)
682
self.assertRaises(ValueError, "{0!x}".format, 3)
683
self.assertRaises(ValueError, "{0!}".format, 0)
684
self.assertRaises(ValueError, "{0!rs}".format, 0)
685
self.assertRaises(ValueError, "{!}".format)
686
self.assertRaises(IndexError, "{:}".format)
687
self.assertRaises(IndexError, "{:s}".format)
688
self.assertRaises(IndexError, "{}".format)
690
# can't have a replacement on the field name portion
691
self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
693
# exceed maximum recursion depth
694
self.assertRaises(ValueError, "{0:{1:{2}}}".format, 'abc', 's', '')
695
self.assertRaises(ValueError, "{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
696
0, 1, 2, 3, 4, 5, 6, 7)
698
# string format spec errors
699
self.assertRaises(ValueError, "{0:-s}".format, '')
700
self.assertRaises(ValueError, format, "", "-")
701
self.assertRaises(ValueError, "{0:=s}".format, '')
703
# Alternate formatting is not supported
704
self.assertRaises(ValueError, format, '', '#')
705
self.assertRaises(ValueError, format, '', '#20')
707
def test_format_auto_numbering(self):
709
def __init__(self, x=100):
711
def __format__(self, spec):
714
self.assertEqual('{}'.format(10), '10')
715
self.assertEqual('{:5}'.format('s'), 's ')
716
self.assertEqual('{!r}'.format('s'), "'s'")
717
self.assertEqual('{._x}'.format(C(10)), '10')
718
self.assertEqual('{[1]}'.format([1, 2]), '2')
719
self.assertEqual('{[a]}'.format({'a':4, 'b':2}), '4')
720
self.assertEqual('a{}b{}c'.format(0, 1), 'a0b1c')
722
self.assertEqual('a{:{}}b'.format('x', '^10'), 'a x b')
723
self.assertEqual('a{:{}x}b'.format(20, '#'), 'a0x14b')
725
# can't mix and match numbering and auto-numbering
726
self.assertRaises(ValueError, '{}{1}'.format, 1, 2)
727
self.assertRaises(ValueError, '{1}{}'.format, 1, 2)
728
self.assertRaises(ValueError, '{:{1}}'.format, 1, 2)
729
self.assertRaises(ValueError, '{0:{}}'.format, 1, 2)
731
# can mix and match auto-numbering and named
732
self.assertEqual('{f}{}'.format(4, f='test'), 'test4')
733
self.assertEqual('{}{f}'.format(4, f='test'), '4test')
734
self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
735
self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')
737
def test_formatting(self):
738
string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
739
# Testing Unicode formatting strings...
740
self.assertEqual("%s, %s" % ("abc", "abc"), 'abc, abc')
741
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, 2, 3), 'abc, abc, 1, 2.000000, 3.00')
742
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, -2, 3), 'abc, abc, 1, -2.000000, 3.00')
743
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.5), 'abc, abc, -1, -2.000000, 3.50')
744
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.57), 'abc, abc, -1, -2.000000, 3.57')
745
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 1003.57), 'abc, abc, -1, -2.000000, 1003.57')
746
if not sys.platform.startswith('java'):
747
self.assertEqual("%r, %r" % (b"abc", "abc"), "b'abc', 'abc'")
748
self.assertEqual("%r" % ("\u1234",), "'\u1234'")
749
self.assertEqual("%a" % ("\u1234",), "'\\u1234'")
750
self.assertEqual("%(x)s, %(y)s" % {'x':"abc", 'y':"def"}, 'abc, def')
751
self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
753
self.assertEqual('%c' % 0x1234, '\u1234')
754
self.assertEqual('%c' % 0x21483, '\U00021483')
755
self.assertRaises(OverflowError, "%c".__mod__, (0x110000,))
756
self.assertEqual('%c' % '\U00021483', '\U00021483')
757
self.assertRaises(TypeError, "%c".__mod__, "aa")
759
# formatting jobs delegated from the string implementation:
760
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
761
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
762
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
763
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
764
self.assertEqual('...%(foo)s...' % {'foo':"abc",'def':123}, '...abc...')
765
self.assertEqual('...%(foo)s...' % {'foo':"abc",'def':123}, '...abc...')
766
self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,"abc"), '...1...2...3...abc...')
767
self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,"abc"), '...%...%s...1...2...3...abc...')
768
self.assertEqual('...%s...' % "abc", '...abc...')
769
self.assertEqual('%*s' % (5,'abc',), ' abc')
770
self.assertEqual('%*s' % (-5,'abc',), 'abc ')
771
self.assertEqual('%*.*s' % (5,2,'abc',), ' ab')
772
self.assertEqual('%*.*s' % (5,3,'abc',), ' abc')
773
self.assertEqual('%i %*.*s' % (10, 5,3,'abc',), '10 abc')
774
self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, 'abc',), '103 abc')
775
self.assertEqual('%c' % 'a', 'a')
779
self.assertEqual('%s' % Wrapper(), '\u1234')
781
@support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
782
def test_format_float(self):
783
# should not format with a comma, but always with C locale
784
self.assertEqual('1.0', '%.1f' % 1.0)
786
def test_constructor(self):
787
# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
790
str('unicode remains unicode'),
791
'unicode remains unicode'
794
class UnicodeSubclass(str):
798
str(UnicodeSubclass('unicode subclass becomes unicode')),
799
'unicode subclass becomes unicode'
803
str('strings are converted to unicode'),
804
'strings are converted to unicode'
808
def __init__(self, x):
814
str(StringCompat('__str__ compatible objects are recognized')),
815
'__str__ compatible objects are recognized'
818
# unicode(obj) is compatible to str():
820
o = StringCompat('unicode(obj) is compatible to str()')
821
self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
822
self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
824
for obj in (123, 123.45, 123):
825
self.assertEqual(str(obj), str(str(obj)))
827
# unicode(obj, encoding, error) tests (this maps to
828
# PyUnicode_FromEncodedObject() at C level)
830
if not sys.platform.startswith('java'):
834
'decoding unicode is not supported',
840
str(b'strings are decoded to unicode', 'utf-8', 'strict'),
841
'strings are decoded to unicode'
844
if not sys.platform.startswith('java'):
847
memoryview(b'character buffers are decoded to unicode'),
851
'character buffers are decoded to unicode'
854
self.assertRaises(TypeError, str, 42, 42, 42)
856
def test_codecs_utf7(self):
858
('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example
859
('Hi Mom -\u263a-!', b'Hi Mom -+Jjo--!'), # RFC2152 example
860
('\u65E5\u672C\u8A9E', b'+ZeVnLIqe-'), # RFC2152 example
861
('Item 3 is \u00a31.', b'Item 3 is +AKM-1.'), # RFC2152 example
867
(r'\\?', b'+AFwAXA?'),
868
(r'\\\?', b'+AFwAXABc?'),
872
for (x, y) in utfTests:
873
self.assertEqual(x.encode('utf-7'), y)
875
# surrogates not supported
876
self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7')
878
self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd')
880
# Issue #2242: crash on some Windows/MSVC versions
881
self.assertRaises(UnicodeDecodeError, b'+\xc1'.decode, 'utf-7')
883
def test_codecs_utf8(self):
884
self.assertEqual(''.encode('utf-8'), b'')
885
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
886
self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
887
self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
888
self.assertEqual('\ud800'.encode('utf-8'), b'\xed\xa0\x80')
889
self.assertEqual('\udc00'.encode('utf-8'), b'\xed\xb0\x80')
891
('\ud800\udc02'*1000).encode('utf-8'),
892
b'\xf0\x90\x80\x82'*1000
895
'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
896
'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
897
'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
898
'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
899
'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
900
' Nunstuck git und'.encode('utf-8'),
901
b'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
902
b'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
903
b'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
904
b'\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
905
b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
906
b'\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
907
b'\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
908
b'\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
909
b'\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
910
b'\xe3\x80\x8cWenn ist das Nunstuck git und'
913
# UTF-8 specific decoding tests
914
self.assertEqual(str(b'\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' )
915
self.assertEqual(str(b'\xf0\x90\x80\x82', 'utf-8'), '\U00010002' )
916
self.assertEqual(str(b'\xe2\x82\xac', 'utf-8'), '\u20ac' )
918
# Other possible utf-8 test cases:
919
# * strict decoding testing for all of the
920
# UTF8_ERROR cases in PyUnicode_DecodeUTF8
922
def test_codecs_idna(self):
923
# Test whether trailing dot is preserved
924
self.assertEqual("www.python.org.".encode("idna"), b"www.python.org.")
926
def test_codecs_errors(self):
927
# Error handling (encoding)
928
self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii')
929
self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii','strict')
930
self.assertEqual('Andr\202 x'.encode('ascii','ignore'), b"Andr x")
931
self.assertEqual('Andr\202 x'.encode('ascii','replace'), b"Andr? x")
933
# Error handling (decoding)
934
self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii')
935
self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii', 'strict')
936
self.assertEqual(str(b'Andr\202 x', 'ascii', 'ignore'), "Andr x")
937
self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')
939
# Error handling (unknown character names)
940
self.assertEqual(b"\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
942
# Error handling (truncated escape sequence)
943
self.assertRaises(UnicodeError, b"\\".decode, "unicode-escape")
945
self.assertRaises(TypeError, b"hello".decode, "test.unicode1")
946
self.assertRaises(TypeError, str, b"hello", "test.unicode2")
947
self.assertRaises(TypeError, "hello".encode, "test.unicode1")
948
self.assertRaises(TypeError, "hello".encode, "test.unicode2")
949
# executes PyUnicode_Encode()
954
"non-existing module",
958
# Error handling (wrong arguments)
959
self.assertRaises(TypeError, "hello".encode, 42, 42, 42)
961
# Error handling (PyUnicode_EncodeDecimal())
962
self.assertRaises(UnicodeError, int, "\u0200")
964
def test_codecs(self):
966
self.assertEqual('hello'.encode('ascii'), b'hello')
967
self.assertEqual('hello'.encode('utf-7'), b'hello')
968
self.assertEqual('hello'.encode('utf-8'), b'hello')
969
self.assertEqual('hello'.encode('utf8'), b'hello')
970
self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
971
self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
972
self.assertEqual('hello'.encode('latin-1'), b'hello')
974
# Roundtrip safety for BMP (just the first 1024 chars)
975
for c in range(1024):
977
for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
978
'utf-16-be', 'raw_unicode_escape',
979
'unicode_escape', 'unicode_internal'):
980
self.assertEqual(str(u.encode(encoding),encoding), u)
982
# Roundtrip safety for BMP (just the first 256 chars)
985
for encoding in ('latin-1',):
986
self.assertEqual(str(u.encode(encoding),encoding), u)
988
# Roundtrip safety for BMP (just the first 128 chars)
991
for encoding in ('ascii',):
992
self.assertEqual(str(u.encode(encoding),encoding), u)
994
# Roundtrip safety for non-BMP (just a few chars)
995
u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
996
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
997
#'raw_unicode_escape',
998
'unicode_escape', 'unicode_internal'):
999
self.assertEqual(str(u.encode(encoding),encoding), u)
1001
# UTF-8 must be roundtrip safe for all UCS-2 code points
1002
# This excludes surrogates: in the full range, there would be
1003
# a surrogate pair (\udbff\udc00), which gets converted back
1004
# to a non-BMP character (\U0010fc00)
1005
u = ''.join(map(chr, list(range(0,0xd800)) +
1006
list(range(0xe000,0x10000))))
1007
for encoding in ('utf-8',):
1008
self.assertEqual(str(u.encode(encoding),encoding), u)
1010
def test_codecs_charmap(self):
1012
s = bytes(range(128))
1015
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
1016
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
1017
'cp863', 'cp865', 'cp866',
1018
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
1019
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
1020
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
1021
'mac_cyrillic', 'mac_latin2',
1023
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
1024
'cp1256', 'cp1257', 'cp1258',
1025
'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
1027
'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
1028
'cp1006', 'iso8859_8',
1030
### These have undefined mappings:
1033
### These fail the round-trip:
1037
self.assertEqual(str(s, encoding).encode(encoding), s)
1040
s = bytes(range(128, 256))
1043
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
1044
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
1045
'cp863', 'cp865', 'cp866',
1046
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
1047
'iso8859_2', 'iso8859_4', 'iso8859_5',
1048
'iso8859_9', 'koi8_r', 'latin_1',
1049
'mac_cyrillic', 'mac_latin2',
1051
### These have undefined mappings:
1052
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
1053
#'cp1256', 'cp1257', 'cp1258',
1054
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
1055
#'iso8859_3', 'iso8859_6', 'iso8859_7',
1056
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
1058
### These fail the round-trip:
1059
#'cp1006', 'cp875', 'iso8859_8',
1062
self.assertEqual(str(s, encoding).encode(encoding), s)
1064
def test_concatenation(self):
1065
self.assertEqual(("abc" "def"), "abcdef")
1066
self.assertEqual(("abc" "def"), "abcdef")
1067
self.assertEqual(("abc" "def"), "abcdef")
1068
self.assertEqual(("abc" "def" "ghi"), "abcdefghi")
1069
self.assertEqual(("abc" "def" "ghi"), "abcdefghi")
1071
def test_printing(self):
1073
def write(self, text):
1077
print('abc', file=out)
1078
print('abc', 'def', file=out)
1079
print('abc', 'def', file=out)
1080
print('abc', 'def', file=out)
1081
print('abc\n', file=out)
1082
print('abc\n', end=' ', file=out)
1083
print('abc\n', end=' ', file=out)
1084
print('def\n', file=out)
1085
print('def\n', file=out)
1087
def test_ucs4(self):
1089
y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
1090
self.assertEqual(x, y)
1094
#x = y.encode("raw-unicode-escape").decode("raw-unicode-escape")
1095
#self.assertEqual(x, y)
1097
#x = y.encode("raw-unicode-escape").decode("raw-unicode-escape")
1098
#self.assertEqual(x, y)
1101
# '\U11111111'.decode("raw-unicode-escape")
1102
#except UnicodeDecodeError as e:
1103
# self.assertEqual(e.start, 0)
1104
# self.assertEqual(e.end, 10)
1106
# self.fail("Should have raised UnicodeDecodeError")
1108
def test_conversion(self):
1109
# Make sure __unicode__() works properly
1148
def __new__(cls, content=""):
1149
return str.__new__(cls, 2*content)
1155
return "not unicode"
1157
self.assertEqual(str(Foo0()), "foo")
1158
self.assertEqual(str(Foo1()), "foo")
1159
self.assertEqual(str(Foo2()), "foo")
1160
self.assertEqual(str(Foo3()), "foo")
1161
self.assertEqual(str(Foo4("bar")), "foo")
1162
self.assertEqual(str(Foo5("bar")), "foo")
1163
self.assertEqual(str(Foo6("bar")), "foou")
1164
self.assertEqual(str(Foo7("bar")), "foou")
1165
self.assertEqual(str(Foo8("foo")), "foofoo")
1166
self.assertEqual(str(Foo9("foo")), "not unicode")
1168
def test_unicode_repr(self):
1177
self.assertEqual(repr(s1()), '\\n')
1178
self.assertEqual(repr(s2()), '\\n')
1180
def test_expandtabs_overflows_gracefully(self):
1181
# This test only affects 32-bit platforms because expandtabs can only take
1182
# an int as the max value, not a 64-bit C long. If expandtabs is changed
1183
# to take a 64-bit long, this test should apply to all platforms.
1184
if sys.maxsize > (1 << 32) or struct.calcsize('P') != 4:
1186
self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxsize)
1188
def test_raiseMemError(self):
1189
# Ensure that the freelist contains a consistent object, even
1190
# when a string allocation fails with a MemoryError.
1191
# This used to crash the interpreter,
1192
# or leak references when the number was smaller.
1193
charwidth = 4 if sys.maxunicode >= 0x10000 else 2
1194
# Note: sys.maxsize is half of the actual max allocation because of
1195
# the signedness of Py_ssize_t.
1196
alloc = lambda: "a" * (sys.maxsize // charwidth * 2)
1197
self.assertRaises(MemoryError, alloc)
1198
self.assertRaises(MemoryError, alloc)
1202
support.run_unittest(__name__)
1204
if __name__ == "__main__":