2
sys.path = ['.'] + sys.path
4
from test.test_support import verbose, run_unittest
7
import sys, os, traceback
8
from weakref import proxy
10
# Misc tests from Tim Peters' re.doc
12
# WARNING: Don't change details in these tests if you don't know
13
# what you're doing. Some of these tests were carefuly modeled to
14
# cover most of the code.
18
class ReTests(unittest.TestCase):
20
def test_weakref(self):
22
x = re.compile('ab+c')
24
self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
26
def test_search_star_plus(self):
27
self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28
self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29
self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30
self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
31
self.assertEqual(re.search('x', 'aaa'), None)
32
self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33
self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34
self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35
self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
36
self.assertEqual(re.match('a+', 'xxx'), None)
38
def bump_num(self, matchobj):
39
int_value = int(matchobj.group(0))
40
return str(int_value + 1)
42
def test_basic_re_sub(self):
43
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
46
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
49
self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50
self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
53
self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54
self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55
self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
57
self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58
self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59
self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60
self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
62
self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63
'\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64
self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65
self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
68
self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
70
def test_bug_449964(self):
71
# fails for group followed by other escape
72
self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
75
def test_bug_449000(self):
76
# Test for sub() on escaped characters
77
self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
79
self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
81
self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
83
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
86
def test_bug_1140(self):
87
# re.sub(x, y, u'') should return u'', not '', and
88
# re.sub(x, y, '') should return '', not u''.
90
# re.sub(x, y, unicode(x)) should return unicode(y), and
91
# re.sub(x, y, str(x)) should return
92
# str(y) if isinstance(y, str) else unicode(y).
96
self.assertEqual(z, u'')
97
self.assertEqual(type(z), unicode)
100
self.assertEqual(z, '')
101
self.assertEqual(type(z), str)
103
z = re.sub(x, y, unicode(x))
104
self.assertEqual(z, y)
105
self.assertEqual(type(z), unicode)
107
z = re.sub(x, y, str(x))
108
self.assertEqual(z, y)
109
self.assertEqual(type(z), type(y))
111
def test_bug_1661(self):
112
# Verify that flags do not get silently ignored with compiled patterns
113
pattern = re.compile('.')
114
self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
115
self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
116
self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
117
self.assertRaises(ValueError, re.compile, pattern, re.I)
119
def test_bug_3629(self):
120
# A regex that triggered a bug in the sre-code validator
121
re.compile("(?P<quote>)(?(quote))")
123
def test_sub_template_numeric_escape(self):
124
# bug 776311 and friends
125
self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
126
self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
127
self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
128
self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
129
self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
130
self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
131
self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
133
self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
134
self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
136
self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
137
self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
138
self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
139
self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
140
self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
142
self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
143
self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
145
self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
146
self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
147
self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
148
self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
149
self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
150
self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
151
self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
152
self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
153
self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
154
self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
155
self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
156
self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
158
# in python2.3 (etc), these loop endlessly in sre_parser.py
159
self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
160
self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
162
self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
165
def test_qualified_re_sub(self):
166
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
167
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
169
def test_bug_114660(self):
170
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
173
def test_bug_462270(self):
174
# Test for empty sub() behaviour, see SF bug #462270
175
self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
176
self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
178
def test_symbolic_refs(self):
179
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
180
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
181
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
182
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
183
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
184
self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
185
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
186
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
187
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
189
def test_re_subn(self):
190
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
191
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
192
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
193
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
194
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
196
def test_re_split(self):
197
self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
198
self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
199
self.assertEqual(re.split("(:*)", ":a:b::c"),
200
['', ':', 'a', ':', 'b', '::', 'c'])
201
self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
202
self.assertEqual(re.split("(:)*", ":a:b::c"),
203
['', ':', 'a', ':', 'b', ':', 'c'])
204
self.assertEqual(re.split("([b:]+)", ":a:b::c"),
205
['', ':', 'a', ':b::', 'c'])
206
self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
207
['', None, ':', 'a', None, ':', '', 'b', None, '',
209
self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
210
['', 'a', '', '', 'c'])
212
def test_qualified_re_split(self):
213
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
214
self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
215
self.assertEqual(re.split("(:)", ":a:b::c", 2),
216
['', ':', 'a', ':', 'b::c'])
217
self.assertEqual(re.split("(:*)", ":a:b::c", 2),
218
['', ':', 'a', ':', 'b::c'])
220
def test_re_findall(self):
221
self.assertEqual(re.findall(":+", "abc"), [])
222
self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
223
self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
224
self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
228
def test_bug_117612(self):
229
self.assertEqual(re.findall(r"(a|(b))", "aba"),
230
[("a", ""),("b", "b"),("a", "")])
232
def test_re_match(self):
233
self.assertEqual(re.match('a', 'a').groups(), ())
234
self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
235
self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
236
self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
237
self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
239
pat = re.compile('((a)|(b))(c)?')
240
self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
241
self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
242
self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
243
self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
244
self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
247
m = re.match('(a)', 'a')
248
self.assertEqual(m.group(0), 'a')
249
self.assertEqual(m.group(0), 'a')
250
self.assertEqual(m.group(1), 'a')
251
self.assertEqual(m.group(1, 1), ('a', 'a'))
253
pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
254
self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
255
self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
257
self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
259
def test_re_groupref_exists(self):
260
self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
262
self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
264
self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
265
self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
266
self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
268
self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
270
self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
272
self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
275
# Tests for bug #1177831: exercise groups other than the first group
276
p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
277
self.assertEqual(p.match('abc').groups(),
279
self.assertEqual(p.match('ad').groups(),
281
self.assertEqual(p.match('abd'), None)
282
self.assertEqual(p.match('ac'), None)
285
def test_re_groupref(self):
286
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
288
self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
290
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
291
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
292
self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
294
self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
297
def test_groupdict(self):
298
self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
299
'first second').groupdict(),
300
{'first':'first', 'second':'second'})
302
def test_expand(self):
303
self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
305
.expand(r"\2 \1 \g<second> \g<first>"),
306
"second first second first")
308
def test_repeat_minmax(self):
309
self.assertEqual(re.match("^(\w){1}$", "abc"), None)
310
self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
311
self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
312
self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
314
self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
315
self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
316
self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
317
self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
318
self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
319
self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
320
self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
321
self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
323
self.assertEqual(re.match("^x{1}$", "xxx"), None)
324
self.assertEqual(re.match("^x{1}?$", "xxx"), None)
325
self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
326
self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
328
self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
329
self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
330
self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
331
self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
332
self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
333
self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
334
self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
335
self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
337
self.assertEqual(re.match("^x{}$", "xxx"), None)
338
self.assertNotEqual(re.match("^x{}$", "x{}"), None)
340
def test_getattr(self):
341
self.assertEqual(re.match("(a)", "a").pos, 0)
342
self.assertEqual(re.match("(a)", "a").endpos, 1)
343
self.assertEqual(re.match("(a)", "a").string, "a")
344
self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
345
self.assertNotEqual(re.match("(a)", "a").re, None)
347
def test_special_escapes(self):
348
self.assertEqual(re.search(r"\b(b.)\b",
349
"abcd abc bcd bx").group(1), "bx")
350
self.assertEqual(re.search(r"\B(b.)\B",
351
"abc bcd bc abxd").group(1), "bx")
352
self.assertEqual(re.search(r"\b(b.)\b",
353
"abcd abc bcd bx", re.LOCALE).group(1), "bx")
354
self.assertEqual(re.search(r"\B(b.)\B",
355
"abc bcd bc abxd", re.LOCALE).group(1), "bx")
356
self.assertEqual(re.search(r"\b(b.)\b",
357
"abcd abc bcd bx", re.UNICODE).group(1), "bx")
358
self.assertEqual(re.search(r"\B(b.)\B",
359
"abc bcd bc abxd", re.UNICODE).group(1), "bx")
360
self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
361
self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
362
self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
363
self.assertEqual(re.search(r"\b(b.)\b",
364
u"abcd abc bcd bx").group(1), "bx")
365
self.assertEqual(re.search(r"\B(b.)\B",
366
u"abc bcd bc abxd").group(1), "bx")
367
self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
368
self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
369
self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
370
self.assertEqual(re.search(r"\d\D\w\W\s\S",
371
"1aa! a").group(0), "1aa! a")
372
self.assertEqual(re.search(r"\d\D\w\W\s\S",
373
"1aa! a", re.LOCALE).group(0), "1aa! a")
374
self.assertEqual(re.search(r"\d\D\w\W\s\S",
375
"1aa! a", re.UNICODE).group(0), "1aa! a")
377
def test_bigcharset(self):
378
self.assertEqual(re.match(u"([\u2222\u2223])",
379
u"\u2222").group(1), u"\u2222")
380
self.assertEqual(re.match(u"([\u2222\u2223])",
381
u"\u2222", re.UNICODE).group(1), u"\u2222")
383
def test_anyall(self):
384
self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
386
self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
389
def test_non_consuming(self):
390
self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
391
self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
392
self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
393
self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
394
self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
395
self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
396
self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
398
self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
399
self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
400
self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
401
self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
403
def test_ignore_case(self):
404
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
405
self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
406
self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
407
self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
408
self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
409
self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
410
self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
411
self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
412
self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
413
self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
415
def test_category(self):
416
self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
418
def test_getlower(self):
420
self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
421
self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
422
self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
424
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
425
self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
427
def test_not_literal(self):
428
self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
429
self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
431
def test_search_coverage(self):
432
self.assertEqual(re.search("\s(b)", " b").group(1), "b")
433
self.assertEqual(re.search("a\s", "a ").group(0), "a ")
435
def test_re_escape(self):
437
for i in range(0, 256):
439
self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
441
self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
443
pat=re.compile(re.escape(p))
444
self.assertEqual(pat.match(p) is not None, True)
445
self.assertEqual(pat.match(p).span(), (0,256))
447
def test_pickling(self):
449
self.pickle_test(pickle)
451
self.pickle_test(cPickle)
452
# old pickles expect the _compile() reconstructor in sre module
454
with warnings.catch_warnings():
455
warnings.filterwarnings("ignore", "The sre module is deprecated",
457
from sre import _compile
459
def pickle_test(self, pickle):
460
oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
461
s = pickle.dumps(oldpat)
462
newpat = pickle.loads(s)
463
self.assertEqual(oldpat, newpat)
465
def test_constants(self):
466
self.assertEqual(re.I, re.IGNORECASE)
467
self.assertEqual(re.L, re.LOCALE)
468
self.assertEqual(re.M, re.MULTILINE)
469
self.assertEqual(re.S, re.DOTALL)
470
self.assertEqual(re.X, re.VERBOSE)
472
def test_flags(self):
473
for flag in [re.I, re.M, re.X, re.S, re.L]:
474
self.assertNotEqual(re.compile('^pattern$', flag), None)
476
def test_sre_character_literals(self):
477
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
478
self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
479
self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
480
self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
481
self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
482
self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
483
self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
484
self.assertRaises(re.error, re.match, "\911", "")
486
def test_sre_character_class_literals(self):
487
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
488
self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
489
self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
490
self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
491
self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
492
self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
493
self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
494
self.assertRaises(re.error, re.match, "[\911]", "")
496
def test_bug_113254(self):
497
self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
498
self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
499
self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
501
def test_bug_527371(self):
502
# bug described in patches 527371/672491
503
self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
504
self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
505
self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
506
self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
507
self.assertEqual(re.match("((a))", "a").lastindex, 1)
509
def test_bug_545855(self):
510
# bug 545855 -- This pattern failed to cause a compile error as it
511
# should, instead provoking a TypeError.
512
self.assertRaises(re.error, re.compile, 'foo[a-')
514
def test_bug_418626(self):
515
# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
516
# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
517
# pattern '*?' on a long string.
518
self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
519
self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
521
self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
522
# non-simple '*?' still used to hit the recursion limit, before the
523
# non-recursive scheme was implemented.
524
self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
526
def test_bug_612074(self):
527
pat=u"["+re.escape(u"\u2039")+u"]"
528
self.assertEqual(re.compile(pat) and 1, 1)
530
def test_stack_overflow(self):
531
# nasty cases that used to overflow the straightforward recursive
532
# implementation of repeated groups.
533
self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
534
self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
535
self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
537
def test_scanner(self):
538
def s_ident(scanner, token): return token
539
def s_operator(scanner, token): return "op%s" % token
540
def s_float(scanner, token): return float(token)
541
def s_int(scanner, token): return int(token)
544
(r"[a-zA-Z_]\w*", s_ident),
545
(r"\d+\.\d*", s_float),
547
(r"=|\+|-|\*|/", s_operator),
551
self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
553
self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
554
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
557
def test_bug_448951(self):
558
# bug 448951 (similar to 429357, but with single char match)
559
# (Also test greedy matches.)
560
for op in '','?','*':
561
self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
563
self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
566
def test_bug_725106(self):
567
# capturing groups in alternatives in repeats
568
self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
570
self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
572
self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
574
self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
576
self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
578
self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
580
self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
582
self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
585
def test_bug_725149(self):
586
# mark_stack_base restoring before restoring marks
587
self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
589
self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
592
def test_bug_764548(self):
593
# bug 764548, re.compile() barfs on str/unicode subclasses
597
return # no problem if we have no unicode
598
class my_unicode(unicode): pass
599
pat = re.compile(my_unicode("abc"))
600
self.assertEqual(pat.match("xyz"), None)
602
def test_finditer(self):
603
iter = re.finditer(r":+", "a:b::c:::d")
604
self.assertEqual([item.group(0) for item in iter],
607
def test_bug_926075(self):
611
return # no problem if we have no unicode
612
self.assert_(re.compile('bug_926075') is not
613
re.compile(eval("u'bug_926075'")))
615
def test_bug_931848(self):
620
pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
621
self.assertEqual(re.compile(pattern).split("a.b.c"),
624
def test_bug_581080(self):
625
iter = re.finditer(r"\s", "a b")
626
self.assertEqual(iter.next().span(), (1,2))
627
self.assertRaises(StopIteration, iter.next)
629
scanner = re.compile(r"\s").scanner("a b")
630
self.assertEqual(scanner.search().span(), (1, 2))
631
self.assertEqual(scanner.search(), None)
633
def test_bug_817234(self):
634
iter = re.finditer(r".*", "asdf")
635
self.assertEqual(iter.next().span(), (0, 4))
636
self.assertEqual(iter.next().span(), (4, 4))
637
self.assertRaises(StopIteration, iter.next)
639
def test_empty_array(self):
642
for typecode in 'cbBuhHiIlLfd':
643
a = array.array(typecode)
644
self.assertEqual(re.compile("bla").match(a), None)
645
self.assertEqual(re.compile("").match(a).groups(), ())
647
def test_inline_flags(self):
649
upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
650
lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
652
p = re.compile(upper_char, re.I | re.U)
653
q = p.match(lower_char)
654
self.assertNotEqual(q, None)
656
p = re.compile(lower_char, re.I | re.U)
657
q = p.match(upper_char)
658
self.assertNotEqual(q, None)
660
p = re.compile('(?i)' + upper_char, re.U)
661
q = p.match(lower_char)
662
self.assertNotEqual(q, None)
664
p = re.compile('(?i)' + lower_char, re.U)
665
q = p.match(upper_char)
666
self.assertNotEqual(q, None)
668
p = re.compile('(?iu)' + upper_char)
669
q = p.match(lower_char)
670
self.assertNotEqual(q, None)
672
p = re.compile('(?iu)' + lower_char)
673
q = p.match(upper_char)
674
self.assertNotEqual(q, None)
676
def test_dollar_matches_twice(self):
677
"$ matches the end of string, and just before the terminating \n"
678
pattern = re.compile('$')
679
self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
680
self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
681
self.assertEqual(pattern.sub('#', '\n'), '#\n#')
683
pattern = re.compile('$', re.MULTILINE)
684
self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
685
self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
686
self.assertEqual(pattern.sub('#', '\n'), '#\n#')
690
from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
692
print 'Running re_tests test suite'
694
# To save time, only run the first and last 10 tests
695
#tests = tests[:10] + tests[-10:]
700
pattern = s = outcome = repl = expected = None
702
pattern, s, outcome, repl, expected = t
704
pattern, s, outcome = t
706
raise ValueError, ('Test tuples should have 3 or 5 fields', t)
709
obj = re.compile(pattern)
711
if outcome == SYNTAX_ERROR: pass # Expected a syntax error
713
print '=== Syntax error:', t
714
except KeyboardInterrupt: raise KeyboardInterrupt
716
print '*** Unexpected error ***', t
718
traceback.print_exc(file=sys.stdout)
721
result = obj.search(s)
722
except re.error, msg:
723
print '=== Unexpected exception', t, repr(msg)
724
if outcome == SYNTAX_ERROR:
725
# This should have been a syntax error; forget it.
727
elif outcome == FAIL:
728
if result is None: pass # No match, as expected
729
else: print '=== Succeeded incorrectly', t
730
elif outcome == SUCCEED:
731
if result is not None:
732
# Matched, as expected, so now we compute the
733
# result string and compare it to our expected result.
734
start, end = result.span(0)
735
vardict={'found': result.group(0),
736
'groups': result.group(),
737
'flags': result.re.flags}
738
for i in range(1, 100):
741
# Special hack because else the string concat fails:
746
vardict['g%d' % i] = gi
747
for i in result.re.groupindex.keys():
755
repl = eval(repl, vardict)
757
print '=== grouping error', t,
758
print repr(repl) + ' should be ' + repr(expected)
760
print '=== Failed incorrectly', t
762
# Try the match on a unicode string, and check that it
765
result = obj.search(unicode(s, "latin-1"))
767
print '=== Fails on unicode match', t
771
continue # unicode test case
773
# Try the match on a unicode pattern, and check that it
775
obj=re.compile(unicode(pattern, "latin-1"))
776
result = obj.search(s)
778
print '=== Fails on unicode pattern match', t
780
# Try the match with the search area limited to the extent
781
# of the match and see if it still succeeds. \B will
782
# break (because it won't match at the end or start of a
783
# string), so we'll ignore patterns that feature it.
785
if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
786
and result is not None:
787
obj = re.compile(pattern)
788
result = obj.search(s, result.start(0), result.end(0) + 1)
790
print '=== Failed on range-limited match', t
792
# Try the match with IGNORECASE enabled, and check that it
794
obj = re.compile(pattern, re.IGNORECASE)
795
result = obj.search(s)
797
print '=== Fails on case-insensitive match', t
799
# Try the match with LOCALE enabled, and check that it
801
obj = re.compile(pattern, re.LOCALE)
802
result = obj.search(s)
804
print '=== Fails on locale-sensitive match', t
806
# Try the match with UNICODE locale enabled, and check
807
# that it still succeeds.
808
obj = re.compile(pattern, re.UNICODE)
809
result = obj.search(s)
811
print '=== Fails on unicode-sensitive match', t
814
run_unittest(ReTests)
817
if __name__ == "__main__":