1
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2
# Licensed to PSF under a Contributor Agreement.
4
"""This module defines the data structures used to represent a grammar.
6
These are a bit arcane because they are derived from the data
7
structures used by Python's 'pgen' parser generator.
9
There's also a table here mapping operators to their names in the
10
token module; the Python tokenize module reports all operators as the
11
fallback token code OP, but the parser needs the actual token code.
19
from . import token, tokenize
22
class Grammar(object):
23
"""Pgen parsing tables conversion class.
25
Once initialized, this class supplies the grammar tables for the
26
parsing engine implemented by parse.py. The parsing engine
27
accesses the instance variables directly. The class here does not
28
provide initialization of the tables; several subclasses exist to
29
do this (see the conv and pgen modules).
31
The load() method reads the tables from a pickle file, which is
32
much faster than the other ways offered by subclasses. The pickle
33
file is written by calling dump() (after loading the grammar
34
tables using a subclass). The report() method prints a readable
35
representation of the tables to stdout, for debugging.
37
The instance variables are as follows:
39
symbol2number -- a dict mapping symbol names to numbers. Symbol
40
numbers are always 256 or higher, to distinguish
41
them from token numbers, which are between 0 and
44
number2symbol -- a dict mapping numbers to symbol names;
45
these two are each other's inverse.
47
states -- a list of DFAs, where each DFA is a list of
48
states, each state is a list of arcs, and each
49
arc is a (i, j) pair where i is a label and j is
50
a state number. The DFA number is the index into
51
this list. (This name is slightly confusing.)
52
Final states are represented by a special arc of
53
the form (0, j) where j is its own state number.
55
dfas -- a dict mapping symbol numbers to (DFA, first)
56
pairs, where DFA is an item from the states list
57
above, and first is a set of tokens that can
58
begin this grammar rule (represented by a dict
59
whose values are always 1).
61
labels -- a list of (x, y) pairs where x is either a token
62
number or a symbol number, and y is either None
63
or a string; the strings are keywords. The label
64
number is the index in this list; label numbers
65
are used to mark state transitions (arcs) in the
68
start -- the number of the grammar's start symbol.
70
keywords -- a dict mapping keyword strings to arc labels.
72
tokens -- a dict mapping token numbers to arc labels.
77
self.symbol2number = {}
78
self.number2symbol = {}
81
self.labels = [(0, "EMPTY")]
84
self.symbol2label = {}
87
def dump(self, filename):
88
"""Dump the grammar tables to a pickle file."""
89
with open(filename, "wb") as f:
90
pickle.dump(self.__dict__, f, 2)
92
def load(self, filename):
93
"""Load the grammar tables from a pickle file."""
94
with open(filename, "rb") as f:
96
self.__dict__.update(d)
102
new = self.__class__()
103
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
104
"tokens", "symbol2label"):
105
setattr(new, dict_attr, getattr(self, dict_attr).copy())
106
new.labels = self.labels[:]
107
new.states = self.states[:]
108
new.start = self.start
112
"""Dump the grammar tables to standard output, for debugging."""
113
from pprint import pprint
115
pprint(self.symbol2number)
117
pprint(self.number2symbol)
124
print("start", self.start)
127
# Map from operator to number (since tokenize doesn't do this)
179
for line in opmap_raw.splitlines():
181
op, name = line.split()
182
opmap[op] = getattr(token, name)