1
"""Interp-level rsre tests."""
3
from py.test import raises
4
from pypy.rlib.rsre.rsre import SimpleStringState, set_unicode_db
5
from pypy.rlib.rsre import rsre_char
6
isre = SimpleStringState.rsre_core
8
from pypy.module.unicodedata import unicodedb_3_2_0
9
set_unicode_db(unicodedb_3_2_0)
12
INDIAN_DIGIT = u"\u0966"
14
def create_context(string, string_position, end):
15
state = SimpleStringState(string, 0, end)
16
state.string_position = string_position
17
return state._MatchContext(state, [])
19
def test_is_uni_linebreak():
20
for char in ["\n", "\r"]:
21
assert rsre_char.is_uni_linebreak(ord(char))
22
for char in [" ", "b"]:
23
assert not rsre_char.is_uni_linebreak(ord(char))
24
assert rsre_char.is_uni_linebreak(8232)
26
def test_is_uni_word():
27
for char in ["a", "_", "\xe4"]:
28
assert rsre_char.is_uni_word(ord(char))
29
for char in ["a", "_", "\xe4", u"\u00e4", u"\u03a0"]:
30
assert rsre_char.is_uni_word(ord(char))
31
for char in [".", " "]:
32
assert not rsre_char.is_uni_word(ord(char))
33
for char in [".", " ", EM_SPACE]:
34
assert not rsre_char.is_uni_word(ord(char))
36
def test_is_loc_word():
37
# should also test chars actually affected by locale (between 128 and 256)
38
for char in ["1", "2"]:
39
assert rsre_char.is_loc_word(ord(char))
40
assert rsre_char.is_loc_word(ord(char))
41
for char in [" ", u".", u"\u03a0"]:
42
assert not rsre_char.is_loc_word(ord(char))
44
def test_is_uni_digit():
45
for char in ["0", "9"]:
46
assert rsre_char.is_uni_digit(ord(char))
47
for char in ["0", "9", INDIAN_DIGIT]:
48
assert rsre_char.is_uni_digit(ord(char))
49
for char in [" ", "s"]:
50
assert not rsre_char.is_uni_digit(ord(char))
52
def test_is_uni_space():
53
for char in [" ", "\t"]:
54
assert rsre_char.is_uni_space(ord(char))
55
for char in ["\v", "\n", EM_SPACE]:
56
assert rsre_char.is_uni_space(ord(char))
57
for char in ["a", "1"]:
58
assert not rsre_char.is_uni_space(ord(char))
60
def test_at_beginning():
61
assert isre.at_beginning(create_context("", 0, 0))
62
assert not isre.at_beginning(create_context("a", 1, 1))
64
def test_at_beginning_line():
65
assert isre.at_beginning_line(create_context("", 0, 0))
66
assert isre.at_beginning_line(create_context("\na", 1, 3))
67
assert not isre.at_beginning_line(create_context("a", 1, 2))
70
for string, pos, end in [("", 0, 0), ("a", 1, 1), ("a\n", 1, 2)]:
71
assert isre.at_end(create_context(string, pos, end))
72
assert not isre.at_end(create_context("a", 0, 1))
74
def test_at_boundary():
75
for string, pos, end in [("a.", 1, 2), (".a", 1, 2)]:
76
assert isre.at_boundary(create_context(string, pos, end))
77
for string, pos, end in [(".", 0, 1), (".", 1, 1), ("ab", 1, 2)]:
78
assert not isre.at_boundary(create_context(string, pos, end))
81
assert rsre_char.getlower(ord("A"), 0) == ord("a")
83
def test_get_byte_array():
84
if sys.byteorder == "big":
85
if rsre_char.CODESIZE == 2:
86
assert [0, 1] == rsre_char.to_byte_array(1)
87
assert [1, 0] == rsre_char.to_byte_array(256)
88
assert [1, 2] == rsre_char.to_byte_array(258)
90
assert [0, 0, 0, 1] == rsre_char.to_byte_array(1)
91
assert [0, 0, 1, 0] == rsre_char.to_byte_array(256)
92
assert [1, 2, 3, 4] == rsre_char.to_byte_array(0x01020304)
94
if rsre_char.CODESIZE == 2:
95
assert [1, 0] == rsre_char.to_byte_array(1)
96
assert [0, 1] == rsre_char.to_byte_array(256)
97
assert [2, 1] == rsre_char.to_byte_array(258)
99
assert [1, 0, 0, 0] == rsre_char.to_byte_array(1)
100
assert [0, 1, 0, 0] == rsre_char.to_byte_array(256)
101
assert [4, 3, 2, 1] == rsre_char.to_byte_array(0x01020304)
103
# ____________________________________________________________
105
# XXX no matching/searching tests here, they are in pypy/module/_sre for now