1
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2
# Licensed to PSF under a Contributor Agreement.
4
"""Parser engine for the grammar tables generated by pgen.
6
The grammar table must be loaded first.
8
See Parser/parser.c in the Python distribution for additional info on
9
how this parsing engine works.
16
class ParseError(Exception):
17
"""Exception to signal the parser is stuck."""
19
def __init__(self, msg, type, value, context):
20
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
21
(msg, type, value, context))
25
self.context = context
30
The proper usage sequence is:
32
p = Parser(grammar, [converter]) # create instance
33
p.setup([start]) # prepare for parsing
34
<for each input token>:
35
if p.addtoken(...): # parse a token; may raise ParseError
37
root = p.rootnode # root of abstract syntax tree
39
A Parser instance may be reused by calling setup() repeatedly.
41
A Parser instance contains state pertaining to the current token
42
sequence, and should not be used concurrently by different threads
43
to parse separate token sequences.
45
See driver.py for how to get input tokens by tokenizing a file or
48
Parsing is complete when addtoken() returns True; the root of the
49
abstract syntax tree can then be retrieved from the rootnode
50
instance variable. When a syntax error occurs, addtoken() raises
51
the ParseError exception. There is no error recovery; the parser
52
cannot be used after a syntax error was reported (but it can be
53
reinitialized by calling setup()).
57
def __init__(self, grammar, convert=None):
60
The grammar argument is a grammar.Grammar instance; see the
61
grammar module for more information.
63
The parser is not ready yet for parsing; you must call the
64
setup() method to get it started.
66
The optional convert argument is a function mapping concrete
67
syntax tree nodes to abstract syntax tree nodes. If not
68
given, no conversion is done and the syntax tree produced is
69
the concrete syntax tree. If given, it must be a function of
70
two arguments, the first being the grammar (a grammar.Grammar
71
instance), and the second being the concrete syntax tree node
72
to be converted. The syntax tree is converted from the bottom
75
A concrete syntax tree node is a (type, value, context, nodes)
76
tuple, where type is the node type (a token or symbol number),
77
value is None for symbols and a string for tokens, context is
78
None or an opaque value used for error reporting (typically a
79
(lineno, offset) pair), and nodes is a list of children for
80
symbols, and None for tokens.
82
An abstract syntax tree node may be anything; this is entirely
83
up to the converter function.
86
self.grammar = grammar
87
self.convert = convert or (lambda grammar, node: node)
89
def setup(self, start=None):
90
"""Prepare for parsing.
92
This *must* be called before starting to parse.
94
The optional argument is an alternative start symbol; it
95
defaults to the grammar's start symbol.
97
You can use a Parser instance to parse any number of programs;
98
each time you call setup() the parser is reset to an initial
99
state determined by the (implicit or explicit) start symbol.
103
start = self.grammar.start
104
# Each stack entry is a tuple: (dfa, state, node).
105
# A node is a tuple: (type, value, context, children),
106
# where children is a list of nodes or None, and context may be None.
107
newnode = (start, None, None, [])
108
stackentry = (self.grammar.dfas[start], 0, newnode)
109
self.stack = [stackentry]
111
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
113
def addtoken(self, type, value, context):
114
"""Add a token; return True iff this is the end of the program."""
115
# Map from token to label
116
ilabel = self.classify(type, value, context)
117
# Loop until the token is shifted; may raise exceptions
119
dfa, state, node = self.stack[-1]
122
# Look for a state with this label
123
for i, newstate in arcs:
124
t, v = self.grammar.labels[i]
126
# Look it up in the list of labels
128
# Shift a token; we're done with it
129
self.shift(type, value, newstate, context)
130
# Pop while we are in an accept-only state
132
while states[state] == [(0, state)]:
137
dfa, state, node = self.stack[-1]
139
# Done with this token
142
# See if it's a symbol and if we're in its first set
143
itsdfa = self.grammar.dfas[t]
144
itsstates, itsfirst = itsdfa
145
if ilabel in itsfirst:
147
self.push(t, self.grammar.dfas[t], newstate, context)
148
break # To continue the outer while loop
150
if (0, state) in arcs:
151
# An accepting state, pop it and try something else
154
# Done parsing, but another token is input
155
raise ParseError("too much input",
156
type, value, context)
158
# No success finding a transition
159
raise ParseError("bad input", type, value, context)
161
def classify(self, type, value, context):
162
"""Turn a token into a label. (Internal)"""
163
if type == token.NAME:
164
# Keep a listing of all used names
165
self.used_names.add(value)
166
# Check for reserved words
167
ilabel = self.grammar.keywords.get(value)
168
if ilabel is not None:
170
ilabel = self.grammar.tokens.get(type)
172
raise ParseError("bad token", type, value, context)
175
def shift(self, type, value, newstate, context):
176
"""Shift a token. (Internal)"""
177
dfa, state, node = self.stack[-1]
178
newnode = (type, value, context, None)
179
newnode = self.convert(self.grammar, newnode)
180
if newnode is not None:
181
node[-1].append(newnode)
182
self.stack[-1] = (dfa, newstate, node)
184
def push(self, type, newdfa, newstate, context):
185
"""Push a nonterminal. (Internal)"""
186
dfa, state, node = self.stack[-1]
187
newnode = (type, None, context, [])
188
self.stack[-1] = (dfa, newstate, node)
189
self.stack.append((newdfa, 0, newnode))
192
"""Pop a nonterminal. (Internal)"""
193
popdfa, popstate, popnode = self.stack.pop()
194
newnode = self.convert(self.grammar, popnode)
195
if newnode is not None:
197
dfa, state, node = self.stack[-1]
198
node[-1].append(newnode)
200
self.rootnode = newnode
201
self.rootnode.used_names = self.used_names