1
###############################################################################
4
# Copyright (c) 2011 Ruslan Spivak
6
# Permission is hereby granted, free of charge, to any person obtaining a copy
7
# of this software and associated documentation files (the "Software"), to deal
8
# in the Software without restriction, including without limitation the rights
9
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
# copies of the Software, and to permit persons to whom the Software is
11
# furnished to do so, subject to the following conditions:
13
# The above copyright notice and this permission notice shall be included in
14
# all copies or substantial portions of the Software.
16
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24
###############################################################################
26
__author__ = 'Ruslan Spivak <ruslan.spivak@gmail.com>'
33
from slimit.lexer import Lexer
36
# The structure and some test cases are taken
37
# from https://bitbucket.org/ned/jslex
38
class LexerTestCase(unittest.TestCase):
44
def assertListEqual(self, first, second):
45
"""Assert that two lists are equal.
47
Prints differences on error.
48
This method is similar to that of Python 2.7 'assertListEqual'
52
difflib.ndiff(pprint.pformat(first).splitlines(),
53
pprint.pformat(second).splitlines())
55
self.fail('Lists differ:\n' + message)
57
def test_illegal_unicode_char_in_identifier(self):
58
lexer = self._get_lexer()
59
lexer.input(u'\u0036_tail')
61
# \u0036_tail is the same as 6_tail and that's not a correct ID
62
# Check that the token is NUMBER and not an ID
63
self.assertEqual(token.type, 'NUMBER')
64
self.assertEqual(token.value, '6')
68
('i my_variable_name c17 _dummy $str $ _ CamelCase class2type',
69
['ID i', 'ID my_variable_name', 'ID c17', 'ID _dummy',
70
'ID $str', 'ID $', 'ID _', 'ID CamelCase', 'ID class2type']
72
(ur'\u03c0 \u03c0_tail var\ua67c',
73
[ur'ID \u03c0', ur'ID \u03c0_tail', ur'ID var\ua67c']),
74
# https://github.com/rspivak/slimit/issues/2
75
('nullify truelie falsepositive',
76
['ID nullify', 'ID truelie', 'ID falsepositive']),
79
# ('break case ...', ['BREAK break', 'CASE case', ...])
80
(' '.join(kw.lower() for kw in Lexer.keywords),
81
['%s %s' % (kw, kw.lower()) for kw in Lexer.keywords]
83
('break Break BREAK', ['BREAK break', 'ID Break', 'ID BREAK']),
86
('null true false Null True False',
87
['NULL null', 'TRUE true', 'FALSE false',
88
'ID Null', 'ID True', 'ID False']
92
('a /= b', ['ID a', 'DIVEQUAL /=', 'ID b']),
93
(('= == != === !== < > <= >= || && ++ -- << >> '
94
'>>> += -= *= <<= >>= >>>= &= %= ^= |='),
95
['EQ =', 'EQEQ ==', 'NE !=', 'STREQ ===', 'STRNEQ !==', 'LT <',
96
'GT >', 'LE <=', 'GE >=', 'OR ||', 'AND &&', 'PLUSPLUS ++',
97
'MINUSMINUS --', 'LSHIFT <<', 'RSHIFT >>', 'URSHIFT >>>',
98
'PLUSEQUAL +=', 'MINUSEQUAL -=', 'MULTEQUAL *=', 'LSHIFTEQUAL <<=',
99
'RSHIFTEQUAL >>=', 'URSHIFTEQUAL >>>=', 'ANDEQUAL &=', 'MODEQUAL %=',
100
'XOREQUAL ^=', 'OREQUAL |=',
103
('. , ; : + - * % & | ^ ~ ? ! ( ) { } [ ]',
104
['PERIOD .', 'COMMA ,', 'SEMI ;', 'COLON :', 'PLUS +', 'MINUS -',
105
'MULT *', 'MOD %', 'BAND &', 'BOR |', 'BXOR ^', 'BNOT ~',
106
'CONDOP ?', 'NOT !', 'LPAREN (', 'RPAREN )', 'LBRACE {', 'RBRACE }',
107
'LBRACKET [', 'RBRACKET ]']
109
('a / b', ['ID a', 'DIV /', 'ID b']),
112
(('3 3.3 0 0. 0.0 0.001 010 3.e2 3.e-2 3.e+2 3E2 3E+2 3E-2 '
113
'0.5e2 0.5e+2 0.5e-2 33 128.15 0x001 0X12ABCDEF 0xabcdef'),
114
['NUMBER 3', 'NUMBER 3.3', 'NUMBER 0', 'NUMBER 0.', 'NUMBER 0.0',
115
'NUMBER 0.001', 'NUMBER 010', 'NUMBER 3.e2', 'NUMBER 3.e-2',
116
'NUMBER 3.e+2', 'NUMBER 3E2', 'NUMBER 3E+2', 'NUMBER 3E-2',
117
'NUMBER 0.5e2', 'NUMBER 0.5e+2', 'NUMBER 0.5e-2', 'NUMBER 33',
118
'NUMBER 128.15', 'NUMBER 0x001', 'NUMBER 0X12ABCDEF',
123
(""" '"' """, ["""STRING '"'"""]),
124
(r'''"foo" 'foo' "x\";" 'x\';' "foo\tbar"''',
125
['STRING "foo"', """STRING 'foo'""", r'STRING "x\";"',
126
r"STRING 'x\';'", r'STRING "foo\tbar"']
128
(r"""'\x55' "\x12ABCDEF" '!@#$%^&*()_+{}[]\";?'""",
129
[r"STRING '\x55'", r'STRING "\x12ABCDEF"',
130
r"STRING '!@#$%^&*()_+{}[]\";?'"]
132
(r"""'\u0001' "\uFCEF" 'a\\\b\n'""",
133
[r"STRING '\u0001'", r'STRING "\uFCEF"', r"STRING 'a\\\b\n'"]
135
(ur'"тест строки\""', [ur'STRING "тест строки\""']),
136
# Bug - https://github.com/rspivak/slimit/issues/5
137
(r"var tagRegExp = new RegExp('<(\/*)(FooBar)', 'gi');",
138
['VAR var', 'ID tagRegExp', 'EQ =',
139
'NEW new', 'ID RegExp', 'LPAREN (',
140
r"STRING '<(\/*)(FooBar)'", 'COMMA ,', "STRING 'gi'",
141
'RPAREN )', 'SEMI ;']
143
# same as above but inside double quotes
144
(r'"<(\/*)(FooBar)"', [r'STRING "<(\/*)(FooBar)"']),
151
# """, ['LINE_COMMENT //comment', 'ID a', 'EQ =', 'NUMBER 5', 'SEMI ;']
153
# ('a//comment', ['ID a', 'LINE_COMMENT //comment']),
154
# ('/***/b/=3//line',
155
# ['BLOCK_COMMENT /***/', 'ID b', 'DIVEQUAL /=',
156
# 'NUMBER 3', 'LINE_COMMENT //line']
158
# ('/*\n * Copyright LGPL 2011 \n*/\na = 1;',
159
# ['BLOCK_COMMENT /*\n * Copyright LGPL 2011 \n*/',
160
# 'ID a', 'EQ =', 'NUMBER 1', 'SEMI ;']
164
(r'a=/a*/,1', ['ID a', 'EQ =', 'REGEX /a*/', 'COMMA ,', 'NUMBER 1']),
166
['ID a', 'EQ =', 'REGEX /a*[^/]+/', 'COMMA ,', 'NUMBER 1']
169
['ID a', 'EQ =', r'REGEX /a*\[^/', 'COMMA ,', 'NUMBER 1']
171
(r'a=/\//,1', ['ID a', 'EQ =', r'REGEX /\//', 'COMMA ,', 'NUMBER 1']),
172
# not a regex, just a division
173
# https://github.com/rspivak/slimit/issues/6
175
['ID x', 'EQ =', 'THIS this', r'DIV /', r'ID y', r'SEMI ;']),
178
# http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
179
("""for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);}""",
180
["FOR for", "LPAREN (", "VAR var", "ID x", "EQ =", "ID a", "IN in",
181
"ID foo", "AND &&", 'STRING "</x>"', "OR ||", "ID mot", "CONDOP ?",
182
"ID z", "COLON :", "REGEX /x:3;x<5;y</g", "DIV /", "ID i", "RPAREN )",
183
"LBRACE {", "ID xyz", "LPAREN (", "ID x", "PLUSPLUS ++", "RPAREN )",
184
"SEMI ;", "RBRACE }"]
187
("""for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);}""",
188
["FOR for", "LPAREN (", "VAR var", "ID x", "EQ =", "ID a", "IN in",
189
"ID foo", "AND &&", 'STRING "</x>"', "OR ||", "ID mot", "CONDOP ?",
190
"ID z", "DIV /", "ID x", "COLON :", "NUMBER 3", "SEMI ;", "ID x",
191
"LT <", "NUMBER 5", "SEMI ;", "ID y", "LT <", "REGEX /g/i",
192
"RPAREN )", "LBRACE {", "ID xyz", "LPAREN (", "ID x", "PLUSPLUS ++",
193
"RPAREN )", "SEMI ;", "RBRACE }"]
196
# Various "illegal" regexes that are valid according to the std.
197
(r"""/????/, /++++/, /[----]/ """,
198
['REGEX /????/', 'COMMA ,',
199
'REGEX /++++/', 'COMMA ,', 'REGEX /[----]/']
202
# Stress cases from http://stackoverflow.com/questions/5533925/what-javascript-constructs-does-jslex-incorrectly-lex/5573409#5573409
203
(r"""/\[/""", [r"""REGEX /\[/"""]),
204
(r"""/[i]/""", [r"""REGEX /[i]/"""]),
205
(r"""/[\]]/""", [r"""REGEX /[\]]/"""]),
206
(r"""/a[\]]/""", [r"""REGEX /a[\]]/"""]),
207
(r"""/a[\]]b/""", [r"""REGEX /a[\]]b/"""]),
208
(r"""/[\]/]/gi""", [r"""REGEX /[\]/]/gi"""]),
209
(r"""/\[[^\]]+\]/gi""", [r"""REGEX /\[[^\]]+\]/gi"""]),
212
NAME: /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/,
213
UNQUOTED_LITERAL: /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/,
214
QUOTED_LITERAL: /^'(?:[^']|'')*'/,
215
NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/,
216
SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/
219
["ID rexl", "PERIOD .", "ID re", "EQ =", "LBRACE {",
220
"ID NAME", "COLON :",
221
r"""REGEX /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/""", "COMMA ,",
222
"ID UNQUOTED_LITERAL", "COLON :",
223
r"""REGEX /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""",
224
"COMMA ,", "ID QUOTED_LITERAL", "COLON :",
225
r"""REGEX /^'(?:[^']|'')*'/""", "COMMA ,", "ID NUMERIC_LITERAL",
227
r"""REGEX /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "COMMA ,",
228
"ID SYMBOL", "COLON :",
229
r"""REGEX /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""",
230
"RBRACE }", "SEMI ;"]
234
NAME: /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/,
235
UNQUOTED_LITERAL: /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/,
236
QUOTED_LITERAL: /^'(?:[^']|'')*'/,
237
NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/,
238
SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/
242
["ID rexl", "PERIOD .", "ID re", "EQ =", "LBRACE {",
243
"ID NAME", "COLON :", r"""REGEX /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/""",
244
"COMMA ,", "ID UNQUOTED_LITERAL", "COLON :",
245
r"""REGEX /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""",
246
"COMMA ,", "ID QUOTED_LITERAL", "COLON :",
247
r"""REGEX /^'(?:[^']|'')*'/""", "COMMA ,",
248
"ID NUMERIC_LITERAL", "COLON :",
249
r"""REGEX /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "COMMA ,",
250
"ID SYMBOL", "COLON :",
251
r"""REGEX /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""",
252
"RBRACE }", "SEMI ;",
253
"ID str", "EQ =", """STRING '"'""", "SEMI ;",
255
(r""" this._js = "e.str(\"" + this.value.replace(/\\/g, "\\\\").replace(/"/g, "\\\"") + "\")"; """,
256
["THIS this", "PERIOD .", "ID _js", "EQ =",
257
r'''STRING "e.str(\""''', "PLUS +", "THIS this", "PERIOD .",
258
"ID value", "PERIOD .", "ID replace", "LPAREN (", r"REGEX /\\/g",
259
"COMMA ,", r'STRING "\\\\"', "RPAREN )", "PERIOD .", "ID replace",
260
"LPAREN (", r'REGEX /"/g', "COMMA ,", r'STRING "\\\""', "RPAREN )",
261
"PLUS +", r'STRING "\")"', "SEMI ;"]),
265
def make_test_function(input, expected):
268
lexer = self._get_lexer()
270
result = ['%s %s' % (token.type, token.value) for token in lexer]
271
self.assertListEqual(result, expected)
275
for index, (input, expected) in enumerate(LexerTestCase.TEST_CASES):
276
func = make_test_function(input, expected)
277
setattr(LexerTestCase, 'test_case_%d' % index, func)
281
return unittest.TestSuite((
282
unittest.makeSuite(LexerTestCase),
283
doctest.DocFileSuite(
285
optionflags=doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS