2
from pypy.rlib.parsing.lexer import SourcePos
3
from pypy.rlib.parsing.tree import Node, Symbol, Nonterminal
6
def __init__(self, nonterminal, expansions):
7
self.nonterminal = nonterminal
8
self.expansions = expansions
11
return (self.nonterminal, tuple(self.expansions))
14
# return hash(self.getkey())
16
def __eq__(self, other):
17
return self.getkey() == other.getkey()
19
def __ne__(self, other):
20
return not self == other
24
self.nonterminal, " | ".join([repr(e) for e in self.expansions]))
27
return "Rule(%r, %r)" % (self.nonterminal, self.expansions)
29
class LazyInputStream(object):
30
def __init__(self, iterator):
31
self.iterator = iter(iterator)
34
def __getitem__(self, index):
36
while len(self.data) <= index:
38
self.data.append(self.iterator.next())
40
raise IndexError("index out of range")
41
return self.data[index]
43
class ParseError(Exception):
44
def __init__(self, source_pos, errorinformation):
45
self.source_pos = source_pos
46
self.errorinformation = errorinformation
47
self.args = (source_pos, errorinformation)
49
def nice_error_message(self, filename="<unknown>", source=""):
50
result = [" File %s, line %s" % (filename, self.source_pos.lineno)]
52
result.append(source.split("\n")[self.source_pos.lineno])
53
result.append(" " * self.source_pos.columnno + "^")
55
result.append("<couldn't get source>")
56
if self.errorinformation:
57
failure_reasons = self.errorinformation.failure_reasons
58
if len(failure_reasons) > 1:
59
all_but_one = failure_reasons[:-1]
60
last = failure_reasons[-1]
61
expected = "%s or '%s'" % (
62
", ".join(["'%s'" % e for e in all_but_one]), last)
64
expected = failure_reasons[0]
65
result.append("ParseError: expected %s" % (expected, ))
67
result.append("ParseError")
68
return "\n".join(result)
71
class ErrorInformation(object):
72
def __init__(self, pos, failure_reasons=None):
73
if failure_reasons is None:
75
self.failure_reasons = failure_reasons
78
def combine_errors(self, other):
81
if (other is None or self.pos > other.pos or
82
len(other.failure_reasons) == 0):
84
elif other.pos > self.pos or len(self.failure_reasons) == 0:
88
for fr in [self.failure_reasons, other.failure_reasons]:
90
if reason not in already_there:
91
already_there[reason] = True
92
failure_reasons.append(reason)
93
return ErrorInformation(self.pos, failure_reasons)
95
class LazyParseTable(object):
96
def __init__(self, input, parser):
100
self.errorinformation = {}
102
def match_symbol(self, i, symbol):
104
#print self.matched.keys()
105
if (i, symbol) in self.matched:
106
return self.matched[i, symbol]
107
error = None # for the annotator
108
if self.parser.is_nonterminal(symbol):
109
rule = self.parser.get_rule(symbol)
110
lastexpansion = len(rule.expansions) - 1
113
for expansionindex in range(len(rule.expansions)):
114
expansion = rule.expansions[expansionindex]
117
for j in range(len(expansion)):
118
subsymbol = expansion[j]
119
node, next, error2 = self.match_symbol(curr, subsymbol)
121
error = combine_errors(error, error2)
123
children.append(node)
126
assert len(expansion) == len(children)
127
result = (Nonterminal(symbol, children), curr, error)
128
self.matched[i, symbol] = result
130
self.matched[i, symbol] = None, 0, error
131
return None, 0, error
134
input = self.input[i]
135
if self.terminal_equality(symbol, input):
136
result = (Symbol(symbol, input.source, input), i + 1, error)
137
self.matched[i, symbol] = result
140
# XXX hack unnice: handles the sort of token names that
142
if (symbol.startswith("__") and
143
symbol.split("_")[2][0] in "0123456789"):
144
expected = symbol.split("_")[-1]
147
error = ErrorInformation(i, [expected])
149
error = ErrorInformation(i)
150
return None, 0, error
152
def terminal_equality(self, symbol, input):
153
return symbol == input.name
156
class PackratParser(object):
157
def __init__(self, rules, startsymbol, parsetablefactory=LazyParseTable,
158
check_for_left_recursion=True):
160
self.nonterminal_to_rule = {}
162
self.nonterminal_to_rule[rule.nonterminal] = rule
163
self.startsymbol = startsymbol
164
if check_for_left_recursion:
165
assert not self.has_left_recursion()
166
self.parsetablefactory = parsetablefactory
168
def is_nonterminal(self, symbol):
169
return symbol in self.nonterminal_to_rule
171
def get_rule(self, symbol):
172
return self.nonterminal_to_rule[symbol]
174
def parse(self, tokeniterator, lazy=False):
176
input = LazyInputStream(tokeniterator)
178
input = list(tokeniterator)
179
table = self.parsetablefactory(input, self)
180
result = table.match_symbol(0, self.startsymbol)
181
if result[0] is None:
183
raise ParseError(input[error.pos].source_pos, error)
186
def has_left_recursion(self):
189
for rule in self.rules:
191
follows[rule.nonterminal] = follow
192
for expansion in rule.expansions:
193
if expansion and self.is_nonterminal(expansion[0]):
194
follow.add(expansion[0])
198
for nonterminal, follow in follows.iteritems():
200
subfollow = follows[nt]
201
update = subfollow - follow
204
follow.update(update)
206
for nonterminal, follow in follows.iteritems():
207
if nonterminal in follow:
208
print "nonterminal %s is in its own follow %s" % (nonterminal, follow)
213
from pprint import pformat
214
return "%s%s" % (self.__class__.__name__,
215
pformat((self.rules, self.startsymbol)), )
217
class ParserCompiler(object):
218
def __init__(self, parser):
221
self.symbol_to_number = {}
225
from pypy.tool.sourcetools import func_with_new_name
226
self.allcode.append("class CompileableParser(baseclass):")
227
self.make_matcher(self.parser.startsymbol)
229
miniglobals = globals().copy()
230
miniglobals["baseclass"] = self.parser.__class__
231
#print "\n".join(self.allcode)
232
exec py.code.Source("\n".join(self.allcode)).compile() in miniglobals
233
kls = miniglobals["CompileableParser"]
235
parsetable = self.parser.parsetablefactory([], self.parser)
236
kls.terminal_equality = func_with_new_name(
237
parsetable.terminal_equality.im_func,
238
"terminal_equality_compileable")
241
def get_number(self, symbol):
242
if symbol in self.symbol_to_number:
243
return self.symbol_to_number[symbol]
244
result = len(self.symbol_to_number)
245
self.symbol_to_number[symbol] = result
248
def make_matcher(self, symbol):
249
if symbol not in self.made:
250
self.made[symbol] = True
251
if self.parser.is_nonterminal(symbol):
252
self.make_nonterminal_matcher(symbol)
254
self.make_terminal_matcher(symbol)
256
def make_terminal_matcher(self, symbol):
257
number = self.get_number(symbol)
258
self.allcode.append("""
259
def match_terminal%(number)s(self, i):
260
# matcher for terminal %(number)s %(symbol)r
261
if i in self.matched_terminals%(number)s:
262
return self.matched_terminals%(number)s[i]
264
input = self.input[i]
265
if self.terminal_equality(%(symbol)r, input):
266
symbol = Symbol(%(symbol)r, input.name, input)
267
result = (symbol, i + 1)
268
self.matched_terminals%(number)s[i] = result
272
return None, i""" % vars())
274
def make_nonterminal_matcher(self, symbol):
275
number = self.get_number(symbol)
276
rule = self.parser.nonterminal_to_rule[symbol]
279
def match_nonterminal%(number)s(self, i):
280
# matcher for nonterminal %(number)s %(symbol)s
281
if i in self.matched_nonterminals%(number)s:
282
return self.matched_nonterminals%(number)s[i]
283
last_failed_position = 0
286
while 1:""" % vars())
287
for expansionindex, expansion in enumerate(rule.expansions):
288
nextindex = expansionindex + 1
290
if expansionindex == %s:""" % (expansionindex, ))
293
result = (Nonterminal(symbol, []), i)
294
self.matched_nonterminals%(number)s[i] = result
295
return result""" % vars())
300
for subsymbol in expansion:
301
self.make_matcher(subsymbol)
302
if self.parser.is_nonterminal(subsymbol):
303
match = "match_nonterminal%s" % self.get_number(subsymbol)
305
match = "match_terminal%s" % self.get_number(subsymbol)
307
node, next = self.%(match)s(curr)
309
last_failed_position = next
310
expansionindex = %(nextindex)s
312
curr = next""" % vars())
314
result = (Nonterminal(%(symbol)r, children), curr)
315
self.matched_nonterminals%(number)s[i] = result
316
return result""" % vars())
318
if expansionindex == %(nextindex)s:
319
result = None, last_failed_position
320
self.matched_nonterminals%(number)s[i] = result
321
return result""" % vars())
322
self.allcode.extend(code)
324
def make_fixed(self):
328
self.rules = [] # dummy
329
self.nonterminal_to_rule = {} # dummy
330
self.startsymbol = "" # dummy
331
self.parsetablefactory = None # dummy"""]
332
for symbol, number in self.symbol_to_number.iteritems():
333
if self.parser.is_nonterminal(symbol):
334
name = "matched_nonterminals%s" % number
336
name = "matched_terminals%s" % number
338
self.%(name)s = {}""" % vars())
340
startsymbol = self.get_number(self.parser.startsymbol)
342
def parse(self, tokenlist, lazy=True):
343
self.input = tokenlist
344
result = self.match_nonterminal%(startsymbol)s(0)
345
if result[0] is None:
346
raise ParseError(None, self.input[result[1]])
347
return result[0]""" % (vars()))
348
self.allcode.extend(code)