~ubuntu-branches/ubuntu/karmic/pypy/karmic

« back to all changes in this revision

Viewing changes to pypy/rlib/parsing/parsing.py

  • Committer: Bazaar Package Importer
  • Author(s): Alexandre Fayolle
  • Date: 2007-04-13 09:33:09 UTC
  • Revision ID: james.westby@ubuntu.com-20070413093309-yoojh4jcoocu2krz
Tags: upstream-1.0.0
ImportĀ upstreamĀ versionĀ 1.0.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
import py
 
2
from pypy.rlib.parsing.lexer import SourcePos
 
3
from pypy.rlib.parsing.tree import Node, Symbol, Nonterminal
 
4
 
 
5
class Rule(object):
 
6
    def __init__(self, nonterminal, expansions):
 
7
        self.nonterminal = nonterminal
 
8
        self.expansions = expansions
 
9
 
 
10
    def getkey(self):
 
11
        return (self.nonterminal, tuple(self.expansions))
 
12
 
 
13
#    def __hash__(self):
 
14
#        return hash(self.getkey())
 
15
 
 
16
    def __eq__(self, other):
 
17
        return self.getkey() == other.getkey()
 
18
 
 
19
    def __ne__(self, other):
 
20
        return not self == other
 
21
 
 
22
    def __str__(self):
 
23
        return "%s: %s" % (
 
24
            self.nonterminal, " | ".join([repr(e) for e in self.expansions]))
 
25
 
 
26
    def __repr__(self):
 
27
        return "Rule(%r, %r)" % (self.nonterminal, self.expansions)
 
28
 
 
29
class LazyInputStream(object):
 
30
    def __init__(self, iterator):
 
31
        self.iterator = iter(iterator)
 
32
        self.data = []
 
33
 
 
34
    def __getitem__(self, index):
 
35
        assert index >= 0
 
36
        while len(self.data) <= index:
 
37
            try:
 
38
                self.data.append(self.iterator.next())
 
39
            except StopIteration:
 
40
                raise IndexError("index out of range")
 
41
        return self.data[index]
 
42
 
 
43
class ParseError(Exception):
 
44
    def __init__(self, source_pos, errorinformation):
 
45
        self.source_pos = source_pos
 
46
        self.errorinformation = errorinformation
 
47
        self.args = (source_pos, errorinformation)
 
48
 
 
49
    def nice_error_message(self, filename="<unknown>", source=""):
 
50
        result = ["  File %s, line %s" % (filename, self.source_pos.lineno)]
 
51
        if source:
 
52
            result.append(source.split("\n")[self.source_pos.lineno])
 
53
            result.append(" " * self.source_pos.columnno + "^")
 
54
        else:
 
55
            result.append("<couldn't get source>")
 
56
        if self.errorinformation:
 
57
            failure_reasons = self.errorinformation.failure_reasons
 
58
            if len(failure_reasons) > 1:
 
59
                all_but_one = failure_reasons[:-1]
 
60
                last = failure_reasons[-1]
 
61
                expected = "%s or '%s'" % (
 
62
                    ", ".join(["'%s'" % e for e in all_but_one]), last)
 
63
            else:
 
64
                expected = failure_reasons[0]
 
65
            result.append("ParseError: expected %s" % (expected, ))
 
66
        else:
 
67
            result.append("ParseError")
 
68
        return "\n".join(result)
 
69
 
 
70
 
 
71
class ErrorInformation(object):
 
72
    def __init__(self, pos, failure_reasons=None):
 
73
        if failure_reasons is None:
 
74
            failure_reasons = []
 
75
        self.failure_reasons = failure_reasons
 
76
        self.pos = pos
 
77
 
 
78
def combine_errors(self, other):
 
79
    if self is None:
 
80
        return other
 
81
    if (other is None or self.pos > other.pos or
 
82
        len(other.failure_reasons) == 0):
 
83
        return self
 
84
    elif other.pos > self.pos or len(self.failure_reasons) == 0:
 
85
        return other
 
86
    failure_reasons = []
 
87
    already_there = {}
 
88
    for fr in [self.failure_reasons, other.failure_reasons]:
 
89
        for reason in fr:
 
90
            if reason not in already_there:
 
91
                already_there[reason] = True
 
92
                failure_reasons.append(reason)
 
93
    return ErrorInformation(self.pos, failure_reasons)
 
94
 
 
95
class LazyParseTable(object):
 
96
    def __init__(self, input, parser):
 
97
        self.parser = parser
 
98
        self.input = input
 
99
        self.matched = {}
 
100
        self.errorinformation = {}
 
101
 
 
102
    def match_symbol(self, i, symbol):
 
103
        #print i, symbol
 
104
        #print self.matched.keys()
 
105
        if (i, symbol) in self.matched:
 
106
            return self.matched[i, symbol]
 
107
        error = None # for the annotator
 
108
        if self.parser.is_nonterminal(symbol):
 
109
            rule = self.parser.get_rule(symbol)
 
110
            lastexpansion = len(rule.expansions) - 1
 
111
            subsymbol = None
 
112
            error = None
 
113
            for expansionindex in range(len(rule.expansions)):
 
114
                expansion = rule.expansions[expansionindex]
 
115
                curr = i
 
116
                children = []
 
117
                for j in range(len(expansion)):
 
118
                    subsymbol = expansion[j]
 
119
                    node, next, error2 = self.match_symbol(curr, subsymbol)
 
120
                    if node is None:
 
121
                        error = combine_errors(error, error2)
 
122
                        break
 
123
                    children.append(node)
 
124
                    curr = next
 
125
                else:
 
126
                    assert len(expansion) == len(children)
 
127
                    result = (Nonterminal(symbol, children), curr, error)
 
128
                    self.matched[i, symbol] = result
 
129
                    return result
 
130
            self.matched[i, symbol] = None, 0, error
 
131
            return None, 0, error
 
132
        else:
 
133
            try:
 
134
                input = self.input[i]
 
135
                if self.terminal_equality(symbol, input):
 
136
                    result = (Symbol(symbol, input.source, input), i + 1, error)
 
137
                    self.matched[i, symbol] = result
 
138
                    return result
 
139
                else:
 
140
                    # XXX hack unnice: handles the sort of token names that
 
141
                    # ebnfparse produces
 
142
                    if (symbol.startswith("__") and
 
143
                        symbol.split("_")[2][0] in "0123456789"):
 
144
                        expected = symbol.split("_")[-1]
 
145
                    else:
 
146
                        expected = symbol
 
147
                    error = ErrorInformation(i, [expected])
 
148
            except IndexError:
 
149
                error = ErrorInformation(i)
 
150
        return None, 0, error
 
151
    
 
152
    def terminal_equality(self, symbol, input):
 
153
        return symbol == input.name
 
154
 
 
155
 
 
156
class PackratParser(object):
 
157
    def __init__(self, rules, startsymbol, parsetablefactory=LazyParseTable,
 
158
                 check_for_left_recursion=True):
 
159
        self.rules = rules
 
160
        self.nonterminal_to_rule = {}
 
161
        for rule in rules:
 
162
            self.nonterminal_to_rule[rule.nonterminal] = rule
 
163
        self.startsymbol = startsymbol
 
164
        if check_for_left_recursion:
 
165
            assert not self.has_left_recursion()
 
166
        self.parsetablefactory = parsetablefactory
 
167
 
 
168
    def is_nonterminal(self, symbol):
 
169
        return symbol in self.nonterminal_to_rule
 
170
 
 
171
    def get_rule(self, symbol):
 
172
        return self.nonterminal_to_rule[symbol]
 
173
 
 
174
    def parse(self, tokeniterator, lazy=False):
 
175
        if lazy:
 
176
            input = LazyInputStream(tokeniterator)
 
177
        else:
 
178
            input = list(tokeniterator)
 
179
        table = self.parsetablefactory(input, self)
 
180
        result = table.match_symbol(0, self.startsymbol)
 
181
        if result[0] is None:
 
182
            error = result[2]
 
183
            raise ParseError(input[error.pos].source_pos, error)
 
184
        return result[0]
 
185
 
 
186
    def has_left_recursion(self):
 
187
        """NOT_RPYTHON"""
 
188
        follows = {}
 
189
        for rule in self.rules:
 
190
            follow = set()
 
191
            follows[rule.nonterminal] = follow
 
192
            for expansion in rule.expansions:
 
193
                if expansion and self.is_nonterminal(expansion[0]):
 
194
                    follow.add(expansion[0])
 
195
        changed = True
 
196
        while changed:
 
197
            changed = False
 
198
            for nonterminal, follow in follows.iteritems():
 
199
                for nt in follow:
 
200
                    subfollow = follows[nt]
 
201
                    update = subfollow - follow
 
202
                    if update:
 
203
                        changed = True
 
204
                        follow.update(update)
 
205
                        break
 
206
        for nonterminal, follow in follows.iteritems():
 
207
            if nonterminal in follow:
 
208
                print "nonterminal %s is in its own follow %s" % (nonterminal, follow)
 
209
                return True
 
210
        return False
 
211
 
 
212
    def __repr__(self):
 
213
        from pprint import pformat
 
214
        return "%s%s" % (self.__class__.__name__,
 
215
                         pformat((self.rules, self.startsymbol)), )
 
216
 
 
217
class ParserCompiler(object):
 
218
    def __init__(self, parser):
 
219
        self.parser = parser
 
220
        self.allcode = []
 
221
        self.symbol_to_number = {}
 
222
        self.made = {}
 
223
 
 
224
    def compile(self):
 
225
        from pypy.tool.sourcetools import func_with_new_name
 
226
        self.allcode.append("class CompileableParser(baseclass):")
 
227
        self.make_matcher(self.parser.startsymbol)
 
228
        self.make_fixed()
 
229
        miniglobals = globals().copy()
 
230
        miniglobals["baseclass"] = self.parser.__class__
 
231
        #print "\n".join(self.allcode)
 
232
        exec py.code.Source("\n".join(self.allcode)).compile() in miniglobals
 
233
        kls = miniglobals["CompileableParser"]
 
234
        # XXX
 
235
        parsetable = self.parser.parsetablefactory([], self.parser)
 
236
        kls.terminal_equality = func_with_new_name(
 
237
            parsetable.terminal_equality.im_func,
 
238
            "terminal_equality_compileable")
 
239
        return kls
 
240
 
 
241
    def get_number(self, symbol):
 
242
        if symbol in self.symbol_to_number:
 
243
            return self.symbol_to_number[symbol]
 
244
        result = len(self.symbol_to_number)
 
245
        self.symbol_to_number[symbol] = result
 
246
        return result
 
247
 
 
248
    def make_matcher(self, symbol):
 
249
        if symbol not in self.made:
 
250
            self.made[symbol] = True
 
251
            if self.parser.is_nonterminal(symbol):
 
252
                self.make_nonterminal_matcher(symbol)
 
253
            else:
 
254
                self.make_terminal_matcher(symbol)
 
255
 
 
256
    def make_terminal_matcher(self, symbol):
 
257
        number = self.get_number(symbol)
 
258
        self.allcode.append("""
 
259
    def match_terminal%(number)s(self, i):
 
260
        # matcher for terminal %(number)s %(symbol)r
 
261
        if i in self.matched_terminals%(number)s:
 
262
            return self.matched_terminals%(number)s[i]
 
263
        try:
 
264
            input = self.input[i]
 
265
            if self.terminal_equality(%(symbol)r, input):
 
266
                symbol = Symbol(%(symbol)r, input.name, input)
 
267
                result = (symbol, i + 1)
 
268
                self.matched_terminals%(number)s[i] = result
 
269
                return result
 
270
        except IndexError:
 
271
            pass
 
272
        return None, i""" % vars())
 
273
 
 
274
    def make_nonterminal_matcher(self, symbol):
 
275
        number = self.get_number(symbol)
 
276
        rule = self.parser.nonterminal_to_rule[symbol]
 
277
        code = []
 
278
        code.append("""
 
279
    def match_nonterminal%(number)s(self, i):
 
280
        # matcher for nonterminal %(number)s %(symbol)s
 
281
        if i in self.matched_nonterminals%(number)s:
 
282
            return self.matched_nonterminals%(number)s[i]
 
283
        last_failed_position = 0
 
284
        subsymbol = None
 
285
        expansionindex = 0
 
286
        while 1:""" % vars())
 
287
        for expansionindex, expansion in enumerate(rule.expansions):
 
288
            nextindex = expansionindex + 1
 
289
            code.append("""\
 
290
            if expansionindex == %s:""" % (expansionindex, ))
 
291
            if not expansion:
 
292
                code.append("""\
 
293
                result = (Nonterminal(symbol, []), i)
 
294
                self.matched_nonterminals%(number)s[i] = result
 
295
                return result""" % vars())
 
296
                continue
 
297
            code.append("""\
 
298
                curr = i
 
299
                children = []""")
 
300
            for subsymbol in expansion:
 
301
                self.make_matcher(subsymbol)
 
302
                if self.parser.is_nonterminal(subsymbol):
 
303
                    match = "match_nonterminal%s" % self.get_number(subsymbol)
 
304
                else:
 
305
                    match = "match_terminal%s" % self.get_number(subsymbol)
 
306
                code.append("""\
 
307
                node, next = self.%(match)s(curr)
 
308
                if node is None:
 
309
                    last_failed_position = next
 
310
                    expansionindex = %(nextindex)s
 
311
                    continue
 
312
                curr = next""" % vars())
 
313
            code.append("""\
 
314
                result = (Nonterminal(%(symbol)r, children), curr)
 
315
                self.matched_nonterminals%(number)s[i] = result
 
316
                return result""" % vars())
 
317
        code.append("""\
 
318
            if expansionindex == %(nextindex)s:
 
319
                result = None, last_failed_position
 
320
                self.matched_nonterminals%(number)s[i] = result
 
321
                return result""" % vars())
 
322
        self.allcode.extend(code)
 
323
 
 
324
    def make_fixed(self):
 
325
        # __init__
 
326
        code = ["""
 
327
    def __init__(self):
 
328
        self.rules = [] # dummy
 
329
        self.nonterminal_to_rule = {} # dummy
 
330
        self.startsymbol = "" # dummy
 
331
        self.parsetablefactory = None # dummy"""]
 
332
        for symbol, number in self.symbol_to_number.iteritems():
 
333
            if self.parser.is_nonterminal(symbol):
 
334
                name = "matched_nonterminals%s" % number
 
335
            else:
 
336
                name = "matched_terminals%s" % number
 
337
            code.append("""\
 
338
        self.%(name)s = {}""" % vars())
 
339
        # parse
 
340
        startsymbol = self.get_number(self.parser.startsymbol)
 
341
        code.append("""
 
342
    def parse(self, tokenlist, lazy=True):
 
343
        self.input = tokenlist
 
344
        result = self.match_nonterminal%(startsymbol)s(0)
 
345
        if result[0] is None:
 
346
            raise ParseError(None, self.input[result[1]])
 
347
        return result[0]""" % (vars()))
 
348
        self.allcode.extend(code)
 
349