1
# -----------------------------------------------------------------------------
4
# Copyright (C) 2001-2009,
5
# David M. Beazley (Dabeaz LLC)
8
# Redistribution and use in source and binary forms, with or without
9
# modification, are permitted provided that the following conditions are
12
# * Redistributions of source code must retain the above copyright notice,
13
# this list of conditions and the following disclaimer.
14
# * Redistributions in binary form must reproduce the above copyright notice,
15
# this list of conditions and the following disclaimer in the documentation
16
# and/or other materials provided with the distribution.
17
# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18
# endorse or promote products derived from this software without
19
# specific prior written permission.
21
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
# -----------------------------------------------------------------------------
34
# This implements an LR parser that is constructed from grammar rules defined
35
# as Python functions. The grammer is specified by supplying the BNF inside
36
# Python documentation strings. The inspiration for this technique was borrowed
37
# from John Aycock's Spark parsing system. PLY might be viewed as cross between
38
# Spark and the GNU bison utility.
40
# The current implementation is only somewhat object-oriented. The
41
# LR parser itself is defined in terms of an object (which allows multiple
42
# parsers to co-exist). However, most of the variables used during table
43
# construction are defined in terms of global variables. Users shouldn't
44
# notice unless they are trying to define multiple parsers at the same
45
# time using threads (in which case they should have their head examined).
47
# This implementation supports both SLR and LALR(1) parsing. LALR(1)
48
# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49
# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50
# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51
# by the more efficient DeRemer and Pennello algorithm.
53
# :::::::: WARNING :::::::
55
# Construction of LR parsing tables is fairly complicated and expensive.
56
# To make this module run fast, a *LOT* of work has been put into
57
# optimization---often at the expensive of readability and what might
58
# consider to be good Python "coding style." Modify the code at your
60
# ----------------------------------------------------------------------------
63
__tabversion__ = "3.2" # Table version
65
#-----------------------------------------------------------------------------
66
# === User configurable parameters ===
68
# Change these to modify the default behavior of yacc (if you wish)
69
#-----------------------------------------------------------------------------
71
yaccdebug = 1 # Debugging mode. If set, yacc generates a
72
# a 'parser.out' file in the current directory
74
debug_file = 'parser.out' # Default name of the debugging file
75
tab_module = 'parsetab' # Default name of the table module
76
default_lr = 'LALR' # Default LR table generation method
78
error_count = 3 # Number of symbols that must be shifted to leave recovery mode
80
yaccdevel = 0 # Set to True if developing yacc. This turns off optimized
81
# implementations of certain functions.
83
resultlimit = 40 # Size limit of results when running in debug mode.
85
pickle_protocol = 0 # Protocol to use when writing pickle files
87
import re, types, sys, os.path
89
# Compatibility function for python 2.6/3.0
90
if sys.version_info[0] < 3:
100
except AttributeError:
103
# Python 2.x/3.0 compatibility.
105
if sys.version_info[0] < 3:
108
import ply.lex as lex
111
# This object is a stand-in for a logging object created by the
112
# logging module. PLY will use this by default to create things
113
# such as the parser.out file. If a user wants more detailed
114
# information, they can create their own logging object and pass
117
class PlyLogger(object):
118
def __init__(self,f):
120
def debug(self,msg,*args,**kwargs):
121
self.f.write((msg % args) + "\n")
124
def warning(self,msg,*args,**kwargs):
125
self.f.write("WARNING: "+ (msg % args) + "\n")
127
def error(self,msg,*args,**kwargs):
128
self.f.write("ERROR: " + (msg % args) + "\n")
132
# Null logger is used when no output is generated. Does nothing.
133
class NullLogger(object):
134
def __getattribute__(self,name):
136
def __call__(self,*args,**kwargs):
139
# Exception raised for yacc-related errors
140
class YaccError(Exception): pass
142
# Format the result message that the parser produces when running in debug mode.
143
def format_result(r):
145
if '\n' in repr_str: repr_str = repr(repr_str)
146
if len(repr_str) > resultlimit:
147
repr_str = repr_str[:resultlimit]+" ..."
148
result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
152
# Format stack entries when the parser is running in debug mode
153
def format_stack_entry(r):
155
if '\n' in repr_str: repr_str = repr(repr_str)
156
if len(repr_str) < 16:
159
return "<%s @ 0x%x>" % (type(r).__name__,id(r))
161
#-----------------------------------------------------------------------------
162
# === LR Parsing Engine ===
164
# The following classes are used for the LR parser itself. These are not
165
# used during table construction and are independent of the actual LR
166
# table generation algorithm
167
#-----------------------------------------------------------------------------
169
# This class is used to hold non-terminal grammar symbols during parsing.
170
# It normally has the following attributes set:
171
# .type = Grammar symbol type
172
# .value = Symbol value
173
# .lineno = Starting line number
174
# .endlineno = Ending line number (optional, set automatically)
175
# .lexpos = Starting lex position
176
# .endlexpos = Ending lex position (optional, set automatically)
179
def __str__(self): return self.type
180
def __repr__(self): return str(self)
182
# This class is a wrapper around the objects actually passed to each
183
# grammar rule. Index lookup and assignment actually assign the
184
# .value attribute of the underlying YaccSymbol object.
185
# The lineno() method returns the line number of a given
186
# item (or 0 if not defined). The linespan() method returns
187
# a tuple of (startline,endline) representing the range of lines
188
# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
189
# representing the range of positional information for a symbol.
191
class YaccProduction:
192
def __init__(self,s,stack=None):
197
def __getitem__(self,n):
198
if n >= 0: return self.slice[n].value
199
else: return self.stack[n].value
201
def __setitem__(self,n,v):
202
self.slice[n].value = v
204
def __getslice__(self,i,j):
205
return [s.value for s in self.slice[i:j]]
208
return len(self.slice)
211
return getattr(self.slice[n],"lineno",0)
213
def set_lineno(self,n,lineno):
214
self.slice[n].lineno = lineno
216
def linespan(self,n):
217
startline = getattr(self.slice[n],"lineno",0)
218
endline = getattr(self.slice[n],"endlineno",startline)
219
return startline,endline
222
return getattr(self.slice[n],"lexpos",0)
225
startpos = getattr(self.slice[n],"lexpos",0)
226
endpos = getattr(self.slice[n],"endlexpos",startpos)
227
return startpos,endpos
233
# -----------------------------------------------------------------------------
236
# The LR Parsing engine.
237
# -----------------------------------------------------------------------------
240
def __init__(self,lrtab,errorf):
241
self.productions = lrtab.lr_productions
242
self.action = lrtab.lr_action
243
self.goto = lrtab.lr_goto
244
self.errorfunc = errorf
250
del self.statestack[:]
254
self.symstack.append(sym)
255
self.statestack.append(0)
257
def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
258
if debug or yaccdevel:
259
if isinstance(debug,int):
260
debug = PlyLogger(sys.stderr)
261
return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
263
return self.parseopt(input,lexer,debug,tracking,tokenfunc)
265
return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
268
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
271
# This is the debugging enabled version of parse(). All changes made to the
272
# parsing engine should be made here. For the non-debugging version,
273
# copy this code to a method parseopt() and delete all of the sections
280
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
282
def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
283
lookahead = None # Current lookahead symbol
284
lookaheadstack = [ ] # Stack of lookahead symbols
285
actions = self.action # Local reference to action table (to avoid lookup on self.)
286
goto = self.goto # Local reference to goto table (to avoid lookup on self.)
287
prod = self.productions # Local reference to production list (to avoid lookup on self.)
288
pslice = YaccProduction(None) # Production object passed to grammar rules
289
errorcount = 0 # Used during error recovery
292
debug.info("PLY: PARSE DEBUG START")
295
# If no lexer was given, we will try to use the lex module
300
# Set up the lexer and parser objects on pslice
304
# If input was supplied, pass to lexer
305
if input is not None:
308
if tokenfunc is None:
310
get_token = lexer.token
312
get_token = tokenfunc
314
# Set up the state and symbol stacks
316
statestack = [ ] # Stack of parsing states
317
self.statestack = statestack
318
symstack = [ ] # Stack of grammar symbols
319
self.symstack = symstack
321
pslice.stack = symstack # Put in the production
322
errtoken = None # Err token
324
# The start state is assumed to be (0,$end)
332
# Get the next symbol on the input. If a lookahead symbol
333
# is already set, we just use that. Otherwise, we'll pull
334
# the next token off of the lookaheadstack or from the lexer
338
debug.debug('State : %s', state)
342
if not lookaheadstack:
343
lookahead = get_token() # Get the next token
345
lookahead = lookaheadstack.pop()
347
lookahead = YaccSymbol()
348
lookahead.type = "$end"
351
debug.debug('Stack : %s',
352
("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
355
# Check the action table
356
ltype = lookahead.type
357
t = actions[state].get(ltype)
361
# shift a symbol on the stack
366
debug.debug("Action : Shift and goto state %s", t)
369
symstack.append(lookahead)
372
# Decrease error count on successful shift
373
if errorcount: errorcount -=1
377
# reduce a symbol on the stack, emit a production
382
# Get production function
384
sym.type = pname # Production name
389
debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
391
debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
396
targ = symstack[-plen-1:]
402
sym.lineno = t1.lineno
403
sym.lexpos = t1.lexpos
405
sym.endlineno = getattr(t1,"endlineno",t1.lineno)
406
sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
410
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
411
# The code enclosed in this section is duplicated
412
# below as a performance optimization. Make sure
413
# changes get made in both locations.
418
# Call the grammar rule with our special slice object
420
del statestack[-plen:]
423
debug.info("Result : %s", format_result(pslice[0]))
426
state = goto[statestack[-1]][pname]
427
statestack.append(state)
429
# If an error was set. Enter error recovery state
430
lookaheadstack.append(lookahead)
433
state = statestack[-1]
436
errorcount = error_count
439
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
445
sym.lineno = lexer.lineno
446
sym.lexpos = lexer.lexpos
451
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
452
# The code enclosed in this section is duplicated
453
# above as a performance optimization. Make sure
454
# changes get made in both locations.
459
# Call the grammar rule with our special slice object
462
debug.info("Result : %s", format_result(pslice[0]))
465
state = goto[statestack[-1]][pname]
466
statestack.append(state)
468
# If an error was set. Enter error recovery state
469
lookaheadstack.append(lookahead)
472
state = statestack[-1]
475
errorcount = error_count
478
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
482
result = getattr(n,"value",None)
484
debug.info("Done : Returning %s", format_result(result))
485
debug.info("PLY: PARSE DEBUG END")
492
debug.error('Error : %s',
493
("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
496
# We have some kind of parsing error here. To handle
497
# this, we are going to push the current token onto
498
# the tokenstack and replace it with an 'error' token.
499
# If there are any synchronization rules, they may
502
# In addition to pushing the error token, we call call
503
# the user defined p_error() function if this is the
504
# first syntax error. This function is only called if
506
if errorcount == 0 or self.errorok:
507
errorcount = error_count
510
if errtoken.type == "$end":
511
errtoken = None # End of file!
513
global errok,token,restart
514
errok = self.errok # Set some special functions available in error recovery
516
restart = self.restart
517
if errtoken and not hasattr(errtoken,'lexer'):
518
errtoken.lexer = lexer
519
tok = self.errorfunc(errtoken)
520
del errok, token, restart # Delete special functions
523
# User must have done some kind of panic
524
# mode recovery on their own. The
525
# returned token is the next lookahead
531
if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
534
sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
536
sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
538
sys.stderr.write("yacc: Parse error in input. EOF\n")
542
errorcount = error_count
544
# case 1: the statestack only has 1 entry on it. If we're in this state, the
545
# entire parse has been rolled back and we're completely hosed. The token is
546
# discarded and we just keep going.
548
if len(statestack) <= 1 and lookahead.type != "$end":
552
# Nuke the pushback stack
553
del lookaheadstack[:]
556
# case 2: the statestack has a couple of entries on it, but we're
557
# at the end of the file. nuke the top entry and generate an error token
559
# Start nuking entries on the stack
560
if lookahead.type == "$end":
561
# Whoa. We're really hosed here. Bail out
564
if lookahead.type != 'error':
566
if sym.type == 'error':
567
# Hmmm. Error is on top of stack, we'll just nuke input
568
# symbol and continue
573
if hasattr(lookahead,"lineno"):
574
t.lineno = lookahead.lineno
576
lookaheadstack.append(lookahead)
581
state = statestack[-1] # Potential bug fix
585
# Call an error function here
586
raise RuntimeError("yacc: internal parser error!!!\n")
588
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
591
# Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.
592
# Edit the debug version above, then copy any modifications to the method
593
# below while removing #--! DEBUG sections.
594
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
597
def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
598
lookahead = None # Current lookahead symbol
599
lookaheadstack = [ ] # Stack of lookahead symbols
600
actions = self.action # Local reference to action table (to avoid lookup on self.)
601
goto = self.goto # Local reference to goto table (to avoid lookup on self.)
602
prod = self.productions # Local reference to production list (to avoid lookup on self.)
603
pslice = YaccProduction(None) # Production object passed to grammar rules
604
errorcount = 0 # Used during error recovery
606
# If no lexer was given, we will try to use the lex module
611
# Set up the lexer and parser objects on pslice
615
# If input was supplied, pass to lexer
616
if input is not None:
619
if tokenfunc is None:
621
get_token = lexer.token
623
get_token = tokenfunc
625
# Set up the state and symbol stacks
627
statestack = [ ] # Stack of parsing states
628
self.statestack = statestack
629
symstack = [ ] # Stack of grammar symbols
630
self.symstack = symstack
632
pslice.stack = symstack # Put in the production
633
errtoken = None # Err token
635
# The start state is assumed to be (0,$end)
643
# Get the next symbol on the input. If a lookahead symbol
644
# is already set, we just use that. Otherwise, we'll pull
645
# the next token off of the lookaheadstack or from the lexer
648
if not lookaheadstack:
649
lookahead = get_token() # Get the next token
651
lookahead = lookaheadstack.pop()
653
lookahead = YaccSymbol()
654
lookahead.type = '$end'
656
# Check the action table
657
ltype = lookahead.type
658
t = actions[state].get(ltype)
662
# shift a symbol on the stack
666
symstack.append(lookahead)
669
# Decrease error count on successful shift
670
if errorcount: errorcount -=1
674
# reduce a symbol on the stack, emit a production
679
# Get production function
681
sym.type = pname # Production name
685
targ = symstack[-plen-1:]
691
sym.lineno = t1.lineno
692
sym.lexpos = t1.lexpos
694
sym.endlineno = getattr(t1,"endlineno",t1.lineno)
695
sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
699
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
700
# The code enclosed in this section is duplicated
701
# below as a performance optimization. Make sure
702
# changes get made in both locations.
707
# Call the grammar rule with our special slice object
709
del statestack[-plen:]
712
state = goto[statestack[-1]][pname]
713
statestack.append(state)
715
# If an error was set. Enter error recovery state
716
lookaheadstack.append(lookahead)
719
state = statestack[-1]
722
errorcount = error_count
725
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
731
sym.lineno = lexer.lineno
732
sym.lexpos = lexer.lexpos
737
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
738
# The code enclosed in this section is duplicated
739
# above as a performance optimization. Make sure
740
# changes get made in both locations.
745
# Call the grammar rule with our special slice object
748
state = goto[statestack[-1]][pname]
749
statestack.append(state)
751
# If an error was set. Enter error recovery state
752
lookaheadstack.append(lookahead)
755
state = statestack[-1]
758
errorcount = error_count
761
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
765
return getattr(n,"value",None)
769
# We have some kind of parsing error here. To handle
770
# this, we are going to push the current token onto
771
# the tokenstack and replace it with an 'error' token.
772
# If there are any synchronization rules, they may
775
# In addition to pushing the error token, we call call
776
# the user defined p_error() function if this is the
777
# first syntax error. This function is only called if
779
if errorcount == 0 or self.errorok:
780
errorcount = error_count
783
if errtoken.type == '$end':
784
errtoken = None # End of file!
786
global errok,token,restart
787
errok = self.errok # Set some special functions available in error recovery
789
restart = self.restart
790
if errtoken and not hasattr(errtoken,'lexer'):
791
errtoken.lexer = lexer
792
tok = self.errorfunc(errtoken)
793
del errok, token, restart # Delete special functions
796
# User must have done some kind of panic
797
# mode recovery on their own. The
798
# returned token is the next lookahead
804
if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
807
sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
809
sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
811
sys.stderr.write("yacc: Parse error in input. EOF\n")
815
errorcount = error_count
817
# case 1: the statestack only has 1 entry on it. If we're in this state, the
818
# entire parse has been rolled back and we're completely hosed. The token is
819
# discarded and we just keep going.
821
if len(statestack) <= 1 and lookahead.type != '$end':
825
# Nuke the pushback stack
826
del lookaheadstack[:]
829
# case 2: the statestack has a couple of entries on it, but we're
830
# at the end of the file. nuke the top entry and generate an error token
832
# Start nuking entries on the stack
833
if lookahead.type == '$end':
834
# Whoa. We're really hosed here. Bail out
837
if lookahead.type != 'error':
839
if sym.type == 'error':
840
# Hmmm. Error is on top of stack, we'll just nuke input
841
# symbol and continue
846
if hasattr(lookahead,"lineno"):
847
t.lineno = lookahead.lineno
849
lookaheadstack.append(lookahead)
854
state = statestack[-1] # Potential bug fix
858
# Call an error function here
859
raise RuntimeError("yacc: internal parser error!!!\n")
861
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
862
# parseopt_notrack().
864
# Optimized version of parseopt() with line number tracking removed.
865
# DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
866
# code in the #--! TRACKING sections
867
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
869
def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
870
lookahead = None # Current lookahead symbol
871
lookaheadstack = [ ] # Stack of lookahead symbols
872
actions = self.action # Local reference to action table (to avoid lookup on self.)
873
goto = self.goto # Local reference to goto table (to avoid lookup on self.)
874
prod = self.productions # Local reference to production list (to avoid lookup on self.)
875
pslice = YaccProduction(None) # Production object passed to grammar rules
876
errorcount = 0 # Used during error recovery
878
# If no lexer was given, we will try to use the lex module
883
# Set up the lexer and parser objects on pslice
887
# If input was supplied, pass to lexer
888
if input is not None:
891
if tokenfunc is None:
893
get_token = lexer.token
895
get_token = tokenfunc
897
# Set up the state and symbol stacks
899
statestack = [ ] # Stack of parsing states
900
self.statestack = statestack
901
symstack = [ ] # Stack of grammar symbols
902
self.symstack = symstack
904
pslice.stack = symstack # Put in the production
905
errtoken = None # Err token
907
# The start state is assumed to be (0,$end)
915
# Get the next symbol on the input. If a lookahead symbol
916
# is already set, we just use that. Otherwise, we'll pull
917
# the next token off of the lookaheadstack or from the lexer
920
if not lookaheadstack:
921
lookahead = get_token() # Get the next token
923
lookahead = lookaheadstack.pop()
925
lookahead = YaccSymbol()
926
lookahead.type = '$end'
928
# Check the action table
929
ltype = lookahead.type
930
t = actions[state].get(ltype)
934
# shift a symbol on the stack
938
symstack.append(lookahead)
941
# Decrease error count on successful shift
942
if errorcount: errorcount -=1
946
# reduce a symbol on the stack, emit a production
951
# Get production function
953
sym.type = pname # Production name
957
targ = symstack[-plen-1:]
960
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
961
# The code enclosed in this section is duplicated
962
# below as a performance optimization. Make sure
963
# changes get made in both locations.
968
# Call the grammar rule with our special slice object
970
del statestack[-plen:]
973
state = goto[statestack[-1]][pname]
974
statestack.append(state)
976
# If an error was set. Enter error recovery state
977
lookaheadstack.append(lookahead)
980
state = statestack[-1]
983
errorcount = error_count
986
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
992
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
993
# The code enclosed in this section is duplicated
994
# above as a performance optimization. Make sure
995
# changes get made in both locations.
1000
# Call the grammar rule with our special slice object
1002
symstack.append(sym)
1003
state = goto[statestack[-1]][pname]
1004
statestack.append(state)
1006
# If an error was set. Enter error recovery state
1007
lookaheadstack.append(lookahead)
1010
state = statestack[-1]
1013
errorcount = error_count
1016
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1020
return getattr(n,"value",None)
1024
# We have some kind of parsing error here. To handle
1025
# this, we are going to push the current token onto
1026
# the tokenstack and replace it with an 'error' token.
1027
# If there are any synchronization rules, they may
1030
# In addition to pushing the error token, we call call
1031
# the user defined p_error() function if this is the
1032
# first syntax error. This function is only called if
1034
if errorcount == 0 or self.errorok:
1035
errorcount = error_count
1037
errtoken = lookahead
1038
if errtoken.type == '$end':
1039
errtoken = None # End of file!
1041
global errok,token,restart
1042
errok = self.errok # Set some special functions available in error recovery
1044
restart = self.restart
1045
if errtoken and not hasattr(errtoken,'lexer'):
1046
errtoken.lexer = lexer
1047
tok = self.errorfunc(errtoken)
1048
del errok, token, restart # Delete special functions
1051
# User must have done some kind of panic
1052
# mode recovery on their own. The
1053
# returned token is the next lookahead
1059
if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
1062
sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
1064
sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1066
sys.stderr.write("yacc: Parse error in input. EOF\n")
1070
errorcount = error_count
1072
# case 1: the statestack only has 1 entry on it. If we're in this state, the
1073
# entire parse has been rolled back and we're completely hosed. The token is
1074
# discarded and we just keep going.
1076
if len(statestack) <= 1 and lookahead.type != '$end':
1080
# Nuke the pushback stack
1081
del lookaheadstack[:]
1084
# case 2: the statestack has a couple of entries on it, but we're
1085
# at the end of the file. nuke the top entry and generate an error token
1087
# Start nuking entries on the stack
1088
if lookahead.type == '$end':
1089
# Whoa. We're really hosed here. Bail out
1092
if lookahead.type != 'error':
1094
if sym.type == 'error':
1095
# Hmmm. Error is on top of stack, we'll just nuke input
1096
# symbol and continue
1101
if hasattr(lookahead,"lineno"):
1102
t.lineno = lookahead.lineno
1104
lookaheadstack.append(lookahead)
1109
state = statestack[-1] # Potential bug fix
1113
# Call an error function here
1114
raise RuntimeError("yacc: internal parser error!!!\n")
1116
# -----------------------------------------------------------------------------
1117
# === Grammar Representation ===
1119
# The following functions, classes, and variables are used to represent and
1120
# manipulate the rules that make up a grammar.
1121
# -----------------------------------------------------------------------------
1125
# regex matching identifiers
1126
_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1128
# -----------------------------------------------------------------------------
1131
# This class stores the raw information about a single production or grammar rule.
1132
# A grammar rule refers to a specification such as this:
1134
# expr : expr PLUS term
1136
# Here are the basic attributes defined on all productions
1138
# name - Name of the production. For example 'expr'
1139
# prod - A list of symbols on the right side ['expr','PLUS','term']
1140
# prec - Production precedence level
1141
# number - Production number.
1142
# func - Function that executes on reduce
1143
# file - File where production function is defined
1144
# lineno - Line number where production function is defined
1146
# The following attributes are defined or optional.
1148
# len - Length of the production (number of symbols on right hand side)
1149
# usyms - Set of unique symbols found in the production
1150
# -----------------------------------------------------------------------------
1152
class Production(object):
1154
def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
1156
self.prod = tuple(prod)
1157
self.number = number
1159
self.callable = None
1162
self.prec = precedence
1164
# Internal settings used during table construction
1166
self.len = len(self.prod) # Length of the production
1168
# Create a list of unique production symbols used in the production
1171
if s not in self.usyms:
1172
self.usyms.append(s)
1174
# List of all LR items for the production
1178
# Create a string representation
1180
self.str = "%s -> %s" % (self.name," ".join(self.prod))
1182
self.str = "%s -> <empty>" % self.name
1188
return "Production("+str(self)+")"
1191
return len(self.prod)
1193
def __nonzero__(self):
1196
def __getitem__(self,index):
1197
return self.prod[index]
1199
# Return the nth lr_item from the production (or None if at the end)
1200
def lr_item(self,n):
1201
if n > len(self.prod): return None
1204
# Precompute the list of productions immediately following. Hack. Remove later
1206
p.lr_after = Prodnames[p.prod[n+1]]
1207
except (IndexError,KeyError):
1210
p.lr_before = p.prod[n-1]
1216
# Bind the production function name to a callable
1217
def bind(self,pdict):
1219
self.callable = pdict[self.func]
1221
# This class serves as a minimal standin for Production objects when
1222
# reading table data from files. It only contains information
1223
# actually used by the LR parsing engine, plus some additional
1224
# debugging information.
1225
class MiniProduction(object):
1226
def __init__(self,str,name,len,func,file,line):
1230
self.callable = None
1237
return "MiniProduction(%s)" % self.str
1239
# Bind the production function name to a callable
1240
def bind(self,pdict):
1242
self.callable = pdict[self.func]
1245
# -----------------------------------------------------------------------------
1248
# This class represents a specific stage of parsing a production rule. For
1251
# expr : expr . PLUS term
1253
# In the above, the "." represents the current location of the parse. Here
1256
# name - Name of the production. For example 'expr'
1257
# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1258
# number - Production number.
1260
# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1261
# then lr_next refers to 'expr -> expr PLUS . term'
1262
# lr_index - LR item index (location of the ".") in the prod list.
1263
# lookaheads - LALR lookahead symbols for this item
1264
# len - Length of the production (number of symbols on right hand side)
1265
# lr_after - List of all productions that immediately follow
1266
# lr_before - Grammar symbol immediately before
1267
# -----------------------------------------------------------------------------
1269
class LRItem(object):
1270
def __init__(self,p,n):
1272
self.prod = list(p.prod)
1273
self.number = p.number
1275
self.lookaheads = { }
1276
self.prod.insert(n,".")
1277
self.prod = tuple(self.prod)
1278
self.len = len(self.prod)
1279
self.usyms = p.usyms
1283
s = "%s -> %s" % (self.name," ".join(self.prod))
1285
s = "%s -> <empty>" % self.name
1289
return "LRItem("+str(self)+")"
1291
# -----------------------------------------------------------------------------
1292
# rightmost_terminal()
1294
# Return the rightmost terminal from a list of symbols. Used in add_production()
1295
# -----------------------------------------------------------------------------
1296
def rightmost_terminal(symbols, terminals):
1297
i = len(symbols) - 1
1299
if symbols[i] in terminals:
1304
# -----------------------------------------------------------------------------
1305
# === GRAMMAR CLASS ===
1307
# The following class represents the contents of the specified grammar along
1308
# with various computed properties such as first sets, follow sets, LR items, etc.
1309
# This data is used for critical parts of the table generation process later.
1310
# -----------------------------------------------------------------------------
1312
class GrammarError(YaccError): pass
1314
class Grammar(object):
1315
def __init__(self,terminals):
1316
self.Productions = [None] # A list of all of the productions. The first
1317
# entry is always reserved for the purpose of
1318
# building an augmented grammar
1320
self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
1321
# productions of that nonterminal.
1323
self.Prodmap = { } # A dictionary that is only used to detect duplicate
1326
self.Terminals = { } # A dictionary mapping the names of terminal symbols to a
1327
# list of the rules where they are used.
1329
for term in terminals:
1330
self.Terminals[term] = []
1332
self.Terminals['error'] = []
1334
self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list
1335
# of rule numbers where they are used.
1337
self.First = { } # A dictionary of precomputed FIRST(x) symbols
1339
self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
1341
self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the
1342
# form ('right',level) or ('nonassoc', level) or ('left',level)
1344
self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer.
1345
# This is only used to provide error checking and to generate
1346
# a warning about unused precedence rules.
1348
self.Start = None # Starting symbol for the grammar
1352
return len(self.Productions)
1354
def __getitem__(self,index):
1355
return self.Productions[index]
1357
# -----------------------------------------------------------------------------
1360
# Sets the precedence for a given terminal. assoc is the associativity such as
1361
# 'left','right', or 'nonassoc'. level is a numeric level.
1363
# -----------------------------------------------------------------------------
1365
def set_precedence(self,term,assoc,level):
1366
assert self.Productions == [None],"Must call set_precedence() before add_production()"
1367
if term in self.Precedence:
1368
raise GrammarError("Precedence already specified for terminal '%s'" % term)
1369
if assoc not in ['left','right','nonassoc']:
1370
raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1371
self.Precedence[term] = (assoc,level)
1373
# -----------------------------------------------------------------------------
1376
# Given an action function, this function assembles a production rule and
1377
# computes its precedence level.
1379
# The production rule is supplied as a list of symbols. For example,
1380
# a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1381
# symbols ['expr','PLUS','term'].
1383
# Precedence is determined by the precedence of the right-most non-terminal
1384
# or the precedence of a terminal specified by %prec.
1386
# A variety of error checks are performed to make sure production symbols
1387
# are valid and that %prec is used correctly.
1388
# -----------------------------------------------------------------------------
1390
def add_production(self,prodname,syms,func=None,file='',line=0):
1392
if prodname in self.Terminals:
1393
raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
1394
if prodname == 'error':
1395
raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
1396
if not _is_identifier.match(prodname):
1397
raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
1399
# Look for literal tokens
1400
for n,s in enumerate(syms):
1405
raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
1406
if not c in self.Terminals:
1407
self.Terminals[c] = []
1412
if not _is_identifier.match(s) and s != '%prec':
1413
raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
1415
# Determine the precedence level
1417
if syms[-1] == '%prec':
1418
raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
1419
if syms[-2] != '%prec':
1420
raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
1422
prodprec = self.Precedence.get(precname,None)
1424
raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
1426
self.UsedPrecedence[precname] = 1
1427
del syms[-2:] # Drop %prec from the rule
1429
# If no %prec, precedence is determined by the rightmost terminal symbol
1430
precname = rightmost_terminal(syms,self.Terminals)
1431
prodprec = self.Precedence.get(precname,('right',0))
1433
# See if the rule is already in the rulemap
1434
map = "%s -> %s" % (prodname,syms)
1435
if map in self.Prodmap:
1436
m = self.Prodmap[map]
1437
raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
1438
"Previous definition at %s:%d" % (m.file, m.line))
1440
# From this point on, everything is valid. Create a new Production instance
1441
pnumber = len(self.Productions)
1442
if not prodname in self.Nonterminals:
1443
self.Nonterminals[prodname] = [ ]
1445
# Add the production number to Terminals and Nonterminals
1447
if t in self.Terminals:
1448
self.Terminals[t].append(pnumber)
1450
if not t in self.Nonterminals:
1451
self.Nonterminals[t] = [ ]
1452
self.Nonterminals[t].append(pnumber)
1454
# Create a production and add it to the list of productions
1455
p = Production(pnumber,prodname,syms,prodprec,func,file,line)
1456
self.Productions.append(p)
1457
self.Prodmap[map] = p
1459
# Add to the global productions list
1461
self.Prodnames[prodname].append(p)
1463
self.Prodnames[prodname] = [ p ]
1466
# -----------------------------------------------------------------------------
1469
# Sets the starting symbol and creates the augmented grammar. Production
1470
# rule 0 is S' -> start where start is the start symbol.
1471
# -----------------------------------------------------------------------------
1473
def set_start(self,start=None):
1475
start = self.Productions[1].name
1476
if start not in self.Nonterminals:
1477
raise GrammarError("start symbol %s undefined" % start)
1478
self.Productions[0] = Production(0,"S'",[start])
1479
self.Nonterminals[start].append(0)
1482
# -----------------------------------------------------------------------------
1483
# find_unreachable()
1485
# Find all of the nonterminal symbols that can't be reached from the starting
1486
# symbol. Returns a list of nonterminals that can't be reached.
1487
# -----------------------------------------------------------------------------
1489
def find_unreachable(self):
1491
# Mark all symbols that are reachable from a symbol s
1492
def mark_reachable_from(s):
1494
# We've already reached symbol s.
1497
for p in self.Prodnames.get(s,[]):
1499
mark_reachable_from(r)
1502
for s in list(self.Terminals) + list(self.Nonterminals):
1505
mark_reachable_from( self.Productions[0].prod[0] )
1507
return [s for s in list(self.Nonterminals)
1508
if not reachable[s]]
1510
# -----------------------------------------------------------------------------
1513
# This function looks at the various parsing rules and tries to detect
1514
# infinite recursion cycles (grammar rules where there is no possible way
1515
# to derive a string of only terminals).
1516
# -----------------------------------------------------------------------------
1518
def infinite_cycles(self):
1522
for t in self.Terminals:
1525
terminates['$end'] = 1
1529
# Initialize to false:
1530
for n in self.Nonterminals:
1533
# Then propagate termination until no change:
1536
for (n,pl) in self.Prodnames.items():
1537
# Nonterminal n terminates iff any of its productions terminates.
1539
# Production p terminates iff all of its rhs symbols terminate.
1541
if not terminates[s]:
1542
# The symbol s does not terminate,
1543
# so production p does not terminate.
1547
# didn't break from the loop,
1548
# so every symbol s terminates
1549
# so production p terminates.
1553
# symbol n terminates!
1554
if not terminates[n]:
1557
# Don't need to consider any more productions for this n.
1564
for (s,term) in terminates.items():
1566
if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1567
# s is used-but-not-defined, and we've already warned of that,
1568
# so it would be overkill to say that it's also non-terminating.
1576
# -----------------------------------------------------------------------------
1577
# undefined_symbols()
1579
# Find all symbols that were used the grammar, but not defined as tokens or
1580
# grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1581
# and prod is the production where the symbol was used.
1582
# -----------------------------------------------------------------------------
1583
def undefined_symbols(self):
1585
for p in self.Productions:
1589
if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1590
result.append((s,p))
1593
# -----------------------------------------------------------------------------
1594
# unused_terminals()
1596
# Find all terminals that were defined, but not used by the grammar. Returns
1597
# a list of all symbols.
1598
# -----------------------------------------------------------------------------
1599
def unused_terminals(self):
1601
for s,v in self.Terminals.items():
1602
if s != 'error' and not v:
1603
unused_tok.append(s)
1607
# ------------------------------------------------------------------------------
1610
# Find all grammar rules that were defined, but not used (maybe not reachable)
1611
# Returns a list of productions.
1612
# ------------------------------------------------------------------------------
1614
def unused_rules(self):
1616
for s,v in self.Nonterminals.items():
1618
p = self.Prodnames[s][0]
1619
unused_prod.append(p)
1622
# -----------------------------------------------------------------------------
1623
# unused_precedence()
1625
# Returns a list of tuples (term,precedence) corresponding to precedence
1626
# rules that were never used by the grammar. term is the name of the terminal
1627
# on which precedence was applied and precedence is a string such as 'left' or
1628
# 'right' corresponding to the type of precedence.
1629
# -----------------------------------------------------------------------------
1631
def unused_precedence(self):
1633
for termname in self.Precedence:
1634
if not (termname in self.Terminals or termname in self.UsedPrecedence):
1635
unused.append((termname,self.Precedence[termname][0]))
1639
# -------------------------------------------------------------------------
1642
# Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1644
# During execution of compute_first1, the result may be incomplete.
1645
# Afterward (e.g., when called from compute_follow()), it will be complete.
1646
# -------------------------------------------------------------------------
1647
def _first(self,beta):
1649
# We are computing First(x1,x2,x3,...,xn)
1652
x_produces_empty = 0
1654
# Add all the non-<empty> symbols of First[x] to the result.
1655
for f in self.First[x]:
1657
x_produces_empty = 1
1659
if f not in result: result.append(f)
1661
if x_produces_empty:
1662
# We have to consider the next x in beta,
1663
# i.e. stay in the loop.
1666
# We don't have to consider any further symbols in beta.
1669
# There was no 'break' from the loop,
1670
# so x_produces_empty was true for all x in beta,
1671
# so beta produces empty as well.
1672
result.append('<empty>')
1676
# -------------------------------------------------------------------------
1679
# Compute the value of FIRST1(X) for all symbols
1680
# -------------------------------------------------------------------------
1681
def compute_first(self):
1686
for t in self.Terminals:
1689
self.First['$end'] = ['$end']
1693
# Initialize to the empty set:
1694
for n in self.Nonterminals:
1697
# Then propagate symbols until no change:
1700
for n in self.Nonterminals:
1701
for p in self.Prodnames[n]:
1702
for f in self._first(p.prod):
1703
if f not in self.First[n]:
1704
self.First[n].append( f )
1711
# ---------------------------------------------------------------------
1714
# Computes all of the follow sets for every non-terminal symbol. The
1715
# follow set is the set of all symbols that might follow a given
1716
# non-terminal. See the Dragon book, 2nd Ed. p. 189.
1717
# ---------------------------------------------------------------------
1718
def compute_follow(self,start=None):
1719
# If already computed, return the result
1723
# If first sets not computed yet, do that first.
1725
self.compute_first()
1727
# Add '$end' to the follow list of the start symbol
1728
for k in self.Nonterminals:
1729
self.Follow[k] = [ ]
1732
start = self.Productions[1].name
1734
self.Follow[start] = [ '$end' ]
1738
for p in self.Productions[1:]:
1739
# Here is the production set
1740
for i in range(len(p.prod)):
1742
if B in self.Nonterminals:
1743
# Okay. We got a non-terminal in a production
1744
fst = self._first(p.prod[i+1:])
1747
if f != '<empty>' and f not in self.Follow[B]:
1748
self.Follow[B].append(f)
1752
if hasempty or i == (len(p.prod)-1):
1753
# Add elements of follow(a) to follow(b)
1754
for f in self.Follow[p.name]:
1755
if f not in self.Follow[B]:
1756
self.Follow[B].append(f)
1758
if not didadd: break
1762
# -----------------------------------------------------------------------------
1765
# This function walks the list of productions and builds a complete set of the
1766
# LR items. The LR items are stored in two ways: First, they are uniquely
1767
# numbered and placed in the list _lritems. Second, a linked list of LR items
1768
# is built for each production. For example:
1774
# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1775
# -----------------------------------------------------------------------------
1777
def build_lritems(self):
1778
for p in self.Productions:
1787
# Precompute the list of productions immediately following
1789
lri.lr_after = self.Prodnames[lri.prod[i+1]]
1790
except (IndexError,KeyError):
1793
lri.lr_before = lri.prod[i-1]
1795
lri.lr_before = None
1797
lastlri.lr_next = lri
1799
lr_items.append(lri)
1802
p.lr_items = lr_items
1804
# -----------------------------------------------------------------------------
1805
# == Class LRTable ==
1807
# This basic class represents a basic table of LR parsing information.
1808
# Methods for generating the tables are not defined here. They are defined
1809
# in the derived class LRGeneratedTable.
1810
# -----------------------------------------------------------------------------
1812
class VersionError(YaccError): pass
1814
class LRTable(object):
1816
self.lr_action = None
1818
self.lr_productions = None
1819
self.lr_method = None
1821
def read_table(self,module):
1822
if isinstance(module,types.ModuleType):
1825
if sys.version_info[0] < 3:
1826
exec("import %s as parsetab" % module)
1829
exec("import %s as parsetab" % module, env, env)
1830
parsetab = env['parsetab']
1832
if parsetab._tabversion != __tabversion__:
1833
raise VersionError("yacc table file version is out of date")
1835
self.lr_action = parsetab._lr_action
1836
self.lr_goto = parsetab._lr_goto
1838
self.lr_productions = []
1839
for p in parsetab._lr_productions:
1840
self.lr_productions.append(MiniProduction(*p))
1842
self.lr_method = parsetab._lr_method
1843
return parsetab._lr_signature
1845
def read_pickle(self,filename):
1847
import cPickle as pickle
1851
in_f = open(filename,"rb")
1853
tabversion = pickle.load(in_f)
1854
if tabversion != __tabversion__:
1855
raise VersionError("yacc table file version is out of date")
1856
self.lr_method = pickle.load(in_f)
1857
signature = pickle.load(in_f)
1858
self.lr_action = pickle.load(in_f)
1859
self.lr_goto = pickle.load(in_f)
1860
productions = pickle.load(in_f)
1862
self.lr_productions = []
1863
for p in productions:
1864
self.lr_productions.append(MiniProduction(*p))
1869
# Bind all production function names to callable objects in pdict
1870
def bind_callables(self,pdict):
1871
for p in self.lr_productions:
1874
# -----------------------------------------------------------------------------
1875
# === LR Generator ===
1877
# The following classes and functions are used to generate LR parsing tables on
1879
# -----------------------------------------------------------------------------
1881
# -----------------------------------------------------------------------------
1885
# The following two functions are used to compute set valued functions
1888
# F(x) = F'(x) U U{F(y) | x R y}
1890
# This is used to compute the values of Read() sets as well as FOLLOW sets
1891
# in LALR(1) generation.
1893
# Inputs: X - An input set
1895
# FP - Set-valued function
1896
# ------------------------------------------------------------------------------
1898
def digraph(X,R,FP):
1905
if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
1908
def traverse(x,N,stack,F,X,R,FP):
1912
F[x] = FP(x) # F(X) <- F'(x)
1914
rel = R(x) # Get y's related to x
1917
traverse(y,N,stack,F,X,R,FP)
1918
N[x] = min(N[x],N[y])
1919
for a in F.get(y,[]):
1920
if a not in F[x]: F[x].append(a)
1922
N[stack[-1]] = MAXINT
1924
element = stack.pop()
1926
N[stack[-1]] = MAXINT
1928
element = stack.pop()
1930
class LALRError(YaccError): pass
1932
# -----------------------------------------------------------------------------
1933
# == LRGeneratedTable ==
1935
# This class implements the LR table generation algorithm. There are no
1936
# public methods except for write()
1937
# -----------------------------------------------------------------------------
1939
class LRGeneratedTable(LRTable):
1940
def __init__(self,grammar,method='LALR',log=None):
1941
if method not in ['SLR','LALR']:
1942
raise LALRError("Unsupported method %s" % method)
1944
self.grammar = grammar
1945
self.lr_method = method
1952
# Internal attributes
1953
self.lr_action = {} # Action table
1954
self.lr_goto = {} # Goto table
1955
self.lr_productions = grammar.Productions # Copy of grammar Production array
1956
self.lr_goto_cache = {} # Cache of computed gotos
1957
self.lr0_cidhash = {} # Cache of closures
1959
self._add_count = 0 # Internal counter used to detect cycles
1961
# Diagonistic information filled in by the table generator
1962
self.sr_conflict = 0
1963
self.rr_conflict = 0
1964
self.conflicts = [] # List of conflicts
1966
self.sr_conflicts = []
1967
self.rr_conflicts = []
1970
self.grammar.build_lritems()
1971
self.grammar.compute_first()
1972
self.grammar.compute_follow()
1973
self.lr_parse_table()
1975
# Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1977
def lr0_closure(self,I):
1978
self._add_count += 1
1980
# Add everything in I to J
1986
for x in j.lr_after:
1987
if getattr(x,"lr0_added",0) == self._add_count: continue
1990
x.lr0_added = self._add_count
1995
# Compute the LR(0) goto function goto(I,X) where I is a set
1996
# of LR(0) items and X is a grammar symbol. This function is written
1997
# in a way that guarantees uniqueness of the generated goto sets
1998
# (i.e. the same goto set will never be returned as two different Python
1999
# objects). With uniqueness, we can later do fast set comparisons using
2000
# id(obj) instead of element-wise comparison.
2002
def lr0_goto(self,I,x):
2003
# First we look for a previously cached entry
2004
g = self.lr_goto_cache.get((id(I),x),None)
2007
# Now we generate the goto set in a way that guarantees uniqueness
2010
s = self.lr_goto_cache.get(x,None)
2013
self.lr_goto_cache[x] = s
2018
if n and n.lr_before == x:
2019
s1 = s.get(id(n),None)
2025
g = s.get('$end',None)
2028
g = self.lr0_closure(gs)
2032
self.lr_goto_cache[(id(I),x)] = g
2035
# Compute the LR(0) sets of item function
2036
def lr0_items(self):
2038
C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
2041
self.lr0_cidhash[id(I)] = i
2044
# Loop over the items in C and each grammar symbols
2050
# Collect all of the symbols that could possibly be in the goto(I,X) sets
2057
g = self.lr0_goto(I,x)
2059
if id(g) in self.lr0_cidhash: continue
2060
self.lr0_cidhash[id(g)] = len(C)
2065
# -----------------------------------------------------------------------------
2066
# ==== LALR(1) Parsing ====
2068
# LALR(1) parsing is almost exactly the same as SLR except that instead of
2069
# relying upon Follow() sets when performing reductions, a more selective
2070
# lookahead set that incorporates the state of the LR(0) machine is utilized.
2071
# Thus, we mainly just have to focus on calculating the lookahead sets.
2073
# The method used here is due to DeRemer and Pennelo (1982).
2075
# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2076
# Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2077
# Vol. 4, No. 4, Oct. 1982, pp. 615-649
2079
# Further details can also be found in:
2081
# J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2082
# McGraw-Hill Book Company, (1985).
2084
# -----------------------------------------------------------------------------
2086
# -----------------------------------------------------------------------------
2087
# compute_nullable_nonterminals()
2089
# Creates a dictionary containing all of the non-terminals that might produce
2090
# an empty production.
2091
# -----------------------------------------------------------------------------
2093
def compute_nullable_nonterminals(self):
2097
for p in self.grammar.Productions[1:]:
2099
nullable[p.name] = 1
2102
if not t in nullable: break
2104
nullable[p.name] = 1
2105
if len(nullable) == num_nullable: break
2106
num_nullable = len(nullable)
2109
# -----------------------------------------------------------------------------
2110
# find_nonterminal_trans(C)
2112
# Given a set of LR(0) items, this functions finds all of the non-terminal
2113
# transitions. These are transitions in which a dot appears immediately before
2114
# a non-terminal. Returns a list of tuples of the form (state,N) where state
2115
# is the state number and N is the nonterminal symbol.
2117
# The input C is the set of LR(0) items.
2118
# -----------------------------------------------------------------------------
2120
def find_nonterminal_transitions(self,C):
2122
for state in range(len(C)):
2124
if p.lr_index < p.len - 1:
2125
t = (state,p.prod[p.lr_index+1])
2126
if t[1] in self.grammar.Nonterminals:
2127
if t not in trans: trans.append(t)
2131
# -----------------------------------------------------------------------------
2134
# Computes the DR(p,A) relationships for non-terminal transitions. The input
2135
# is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2137
# Returns a list of terminals.
2138
# -----------------------------------------------------------------------------
2140
def dr_relation(self,C,trans,nullable):
2145
g = self.lr0_goto(C[state],N)
2147
if p.lr_index < p.len - 1:
2148
a = p.prod[p.lr_index+1]
2149
if a in self.grammar.Terminals:
2150
if a not in terms: terms.append(a)
2152
# This extra bit is to handle the start state
2153
if state == 0 and N == self.grammar.Productions[0].prod[0]:
2154
terms.append('$end')
2158
# -----------------------------------------------------------------------------
2161
# Computes the READS() relation (p,A) READS (t,C).
2162
# -----------------------------------------------------------------------------
2164
def reads_relation(self,C, trans, empty):
2165
# Look for empty transitions
2169
g = self.lr0_goto(C[state],N)
2170
j = self.lr0_cidhash.get(id(g),-1)
2172
if p.lr_index < p.len - 1:
2173
a = p.prod[p.lr_index + 1]
2179
# -----------------------------------------------------------------------------
2180
# compute_lookback_includes()
2182
# Determines the lookback and includes relations
2186
# This relation is determined by running the LR(0) state machine forward.
2187
# For example, starting with a production "N : . A B C", we run it forward
2188
# to obtain "N : A B C ." We then build a relationship between this final
2189
# state and the starting state. These relationships are stored in a dictionary
2194
# Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2196
# This relation is used to determine non-terminal transitions that occur
2197
# inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2198
# if the following holds:
2200
# B -> LAT, where T -> epsilon and p' -L-> p
2202
# L is essentially a prefix (which may be empty), T is a suffix that must be
2203
# able to derive an empty string. State p' must lead to state p with the string L.
2205
# -----------------------------------------------------------------------------
2207
def compute_lookback_includes(self,C,trans,nullable):
2209
lookdict = {} # Dictionary of lookback relations
2210
includedict = {} # Dictionary of include relations
2212
# Make a dictionary of non-terminal transitions
2217
# Loop over all transitions and compute lookbacks and includes
2218
for state,N in trans:
2222
if p.name != N: continue
2224
# Okay, we have a name match. We now follow the production all the way
2225
# through the state machine until we get the . on the right hand side
2227
lr_index = p.lr_index
2229
while lr_index < p.len - 1:
2230
lr_index = lr_index + 1
2231
t = p.prod[lr_index]
2233
# Check to see if this symbol and state are a non-terminal transition
2235
# Yes. Okay, there is some chance that this is an includes relation
2236
# the only way to know for certain is whether the rest of the
2237
# production derives empty
2241
if p.prod[li] in self.grammar.Terminals: break # No forget it
2242
if not p.prod[li] in nullable: break
2245
# Appears to be a relation between (j,t) and (state,N)
2246
includes.append((j,t))
2248
g = self.lr0_goto(C[j],t) # Go to next set
2249
j = self.lr0_cidhash.get(id(g),-1) # Go to next state
2251
# When we get here, j is the final state, now we have to locate the production
2253
if r.name != p.name: continue
2254
if r.len != p.len: continue
2256
# This look is comparing a production ". A B C" with "A B C ."
2257
while i < r.lr_index:
2258
if r.prod[i] != p.prod[i+1]: break
2263
if not i in includedict: includedict[i] = []
2264
includedict[i].append((state,N))
2265
lookdict[(state,N)] = lookb
2267
return lookdict,includedict
2269
# -----------------------------------------------------------------------------
2270
# compute_read_sets()
2272
# Given a set of LR(0) items, this function computes the read sets.
2274
# Inputs: C = Set of LR(0) items
2275
# ntrans = Set of nonterminal transitions
2276
# nullable = Set of empty transitions
2278
# Returns a set containing the read sets
2279
# -----------------------------------------------------------------------------
2281
def compute_read_sets(self,C, ntrans, nullable):
2282
FP = lambda x: self.dr_relation(C,x,nullable)
2283
R = lambda x: self.reads_relation(C,x,nullable)
2284
F = digraph(ntrans,R,FP)
2287
# -----------------------------------------------------------------------------
2288
# compute_follow_sets()
2290
# Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2291
# and an include set, this function computes the follow sets
2293
# Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2296
# ntrans = Set of nonterminal transitions
2297
# readsets = Readset (previously computed)
2298
# inclsets = Include sets (previously computed)
2300
# Returns a set containing the follow sets
2301
# -----------------------------------------------------------------------------
2303
def compute_follow_sets(self,ntrans,readsets,inclsets):
2304
FP = lambda x: readsets[x]
2305
R = lambda x: inclsets.get(x,[])
2306
F = digraph(ntrans,R,FP)
2309
# -----------------------------------------------------------------------------
2312
# Attaches the lookahead symbols to grammar rules.
2314
# Inputs: lookbacks - Set of lookback relations
2315
# followset - Computed follow set
2317
# This function directly attaches the lookaheads to productions contained
2318
# in the lookbacks set
2319
# -----------------------------------------------------------------------------
2321
def add_lookaheads(self,lookbacks,followset):
2322
for trans,lb in lookbacks.items():
2323
# Loop over productions in lookback
2325
if not state in p.lookaheads:
2326
p.lookaheads[state] = []
2327
f = followset.get(trans,[])
2329
if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
2331
# -----------------------------------------------------------------------------
2332
# add_lalr_lookaheads()
2334
# This function does all of the work of adding lookahead information for use
2336
# -----------------------------------------------------------------------------
2338
def add_lalr_lookaheads(self,C):
2339
# Determine all of the nullable nonterminals
2340
nullable = self.compute_nullable_nonterminals()
2342
# Find all non-terminal transitions
2343
trans = self.find_nonterminal_transitions(C)
2346
readsets = self.compute_read_sets(C,trans,nullable)
2348
# Compute lookback/includes relations
2349
lookd, included = self.compute_lookback_includes(C,trans,nullable)
2351
# Compute LALR FOLLOW sets
2352
followsets = self.compute_follow_sets(trans,readsets,included)
2354
# Add all of the lookaheads
2355
self.add_lookaheads(lookd,followsets)
2357
# -----------------------------------------------------------------------------
2360
# This function constructs the parse tables for SLR or LALR
2361
# -----------------------------------------------------------------------------
2362
def lr_parse_table(self):
2363
Productions = self.grammar.Productions
2364
Precedence = self.grammar.Precedence
2365
goto = self.lr_goto # Goto array
2366
action = self.lr_action # Action array
2367
log = self.log # Logger for output
2369
actionp = { } # Action production array (temporary)
2371
log.info("Parsing method: %s", self.lr_method)
2373
# Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2374
# This determines the number of states
2376
C = self.lr0_items()
2378
if self.lr_method == 'LALR':
2379
self.add_lalr_lookaheads(C)
2381
# Build the parser table, state by state
2384
# Loop over each production in I
2385
actlist = [ ] # List of actions
2390
log.info("state %d", st)
2393
log.info(" (%d) %s", p.number, str(p))
2397
if p.len == p.lr_index + 1:
2399
# Start symbol. Accept!
2400
st_action["$end"] = 0
2401
st_actionp["$end"] = p
2403
# We are at the end of a production. Reduce!
2404
if self.lr_method == 'LALR':
2405
laheads = p.lookaheads[st]
2407
laheads = self.grammar.Follow[p.name]
2409
actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
2410
r = st_action.get(a,None)
2412
# Whoa. Have a shift/reduce or reduce/reduce conflict
2414
# Need to decide on shift or reduce here
2415
# By default we favor shifting. Need to add
2416
# some precedence rules here.
2417
sprec,slevel = Productions[st_actionp[a].number].prec
2418
rprec,rlevel = Precedence.get(a,('right',0))
2419
if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2420
# We really need to reduce here.
2421
st_action[a] = -p.number
2423
if not slevel and not rlevel:
2424
log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
2425
self.sr_conflicts.append((st,a,'reduce'))
2426
Productions[p.number].reduced += 1
2427
elif (slevel == rlevel) and (rprec == 'nonassoc'):
2430
# Hmmm. Guess we'll keep the shift
2432
log.info(" ! shift/reduce conflict for %s resolved as shift",a)
2433
self.sr_conflicts.append((st,a,'shift'))
2435
# Reduce/reduce conflict. In this case, we favor the rule
2436
# that was defined first in the grammar file
2437
oldp = Productions[-r]
2438
pp = Productions[p.number]
2439
if oldp.line > pp.line:
2440
st_action[a] = -p.number
2442
chosenp,rejectp = pp,oldp
2443
Productions[p.number].reduced += 1
2444
Productions[oldp.number].reduced -= 1
2446
chosenp,rejectp = oldp,pp
2447
self.rr_conflicts.append((st,chosenp,rejectp))
2448
log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
2450
raise LALRError("Unknown conflict in state %d" % st)
2452
st_action[a] = -p.number
2454
Productions[p.number].reduced += 1
2457
a = p.prod[i+1] # Get symbol right after the "."
2458
if a in self.grammar.Terminals:
2459
g = self.lr0_goto(I,a)
2460
j = self.lr0_cidhash.get(id(g),-1)
2462
# We are in a shift state
2463
actlist.append((a,p,"shift and go to state %d" % j))
2464
r = st_action.get(a,None)
2466
# Whoa have a shift/reduce or shift/shift conflict
2469
raise LALRError("Shift/shift conflict in state %d" % st)
2471
# Do a precedence check.
2472
# - if precedence of reduce rule is higher, we reduce.
2473
# - if precedence of reduce is same and left assoc, we reduce.
2474
# - otherwise we shift
2475
rprec,rlevel = Productions[st_actionp[a].number].prec
2476
sprec,slevel = Precedence.get(a,('right',0))
2477
if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2478
# We decide to shift here... highest precedence to shift
2479
Productions[st_actionp[a].number].reduced -= 1
2483
log.info(" ! shift/reduce conflict for %s resolved as shift",a)
2484
self.sr_conflicts.append((st,a,'shift'))
2485
elif (slevel == rlevel) and (rprec == 'nonassoc'):
2488
# Hmmm. Guess we'll keep the reduce
2489
if not slevel and not rlevel:
2490
log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
2491
self.sr_conflicts.append((st,a,'reduce'))
2494
raise LALRError("Unknown conflict in state %d" % st)
2499
# Print the actions associated with each terminal
2501
for a,p,m in actlist:
2503
if p is st_actionp[a]:
2504
log.info(" %-15s %s",a,m)
2505
_actprint[(a,m)] = 1
2507
# Print the actions that were not used. (debugging)
2509
for a,p,m in actlist:
2511
if p is not st_actionp[a]:
2512
if not (a,m) in _actprint:
2513
log.debug(" ! %-15s [ %s ]",a,m)
2515
_actprint[(a,m)] = 1
2519
# Construct the goto table for this state
2524
if s in self.grammar.Nonterminals:
2527
g = self.lr0_goto(I,n)
2528
j = self.lr0_cidhash.get(id(g),-1)
2531
log.info(" %-30s shift and go to state %d",n,j)
2533
action[st] = st_action
2534
actionp[st] = st_actionp
2539
# -----------------------------------------------------------------------------
2542
# This function writes the LR parsing tables to a file
2543
# -----------------------------------------------------------------------------
2545
def write_table(self,modulename,outputdir='',signature=""):
2546
basemodulename = modulename.split(".")[-1]
2547
filename = os.path.join(outputdir,basemodulename) + ".py"
2549
f = open(filename,"w")
2553
# This file is automatically generated. Do not edit.
2559
""" % (filename, __tabversion__, self.lr_method, signature))
2561
# Change smaller to 0 to go back to original tables
2564
# Factor out names to try and make smaller
2568
for s,nd in self.lr_action.items():
2569
for name,v in nd.items():
2577
f.write("\n_lr_action_items = {")
2578
for k,v in items.items():
2579
f.write("%r:([" % k)
2591
for _k, _v in _lr_action_items.items():
2592
for _x,_y in zip(_v[0],_v[1]):
2593
if not _x in _lr_action: _lr_action[_x] = { }
2594
_lr_action[_x][_k] = _y
2595
del _lr_action_items
2599
f.write("\n_lr_action = { ");
2600
for k,v in self.lr_action.items():
2601
f.write("(%r,%r):%r," % (k[0],k[1],v))
2605
# Factor out names to try and make smaller
2608
for s,nd in self.lr_goto.items():
2609
for name,v in nd.items():
2617
f.write("\n_lr_goto_items = {")
2618
for k,v in items.items():
2619
f.write("%r:([" % k)
2631
for _k, _v in _lr_goto_items.items():
2632
for _x,_y in zip(_v[0],_v[1]):
2633
if not _x in _lr_goto: _lr_goto[_x] = { }
2634
_lr_goto[_x][_k] = _y
2638
f.write("\n_lr_goto = { ");
2639
for k,v in self.lr_goto.items():
2640
f.write("(%r,%r):%r," % (k[0],k[1],v))
2643
# Write production table
2644
f.write("_lr_productions = [\n")
2645
for p in self.lr_productions:
2647
f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
2649
f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
2654
e = sys.exc_info()[1]
2655
sys.stderr.write("Unable to create '%s'\n" % filename)
2656
sys.stderr.write(str(e)+"\n")
2660
# -----------------------------------------------------------------------------
2663
# This function pickles the LR parsing tables to a supplied file object
2664
# -----------------------------------------------------------------------------
2666
def pickle_table(self,filename,signature=""):
2668
import cPickle as pickle
2671
outf = open(filename,"wb")
2672
pickle.dump(__tabversion__,outf,pickle_protocol)
2673
pickle.dump(self.lr_method,outf,pickle_protocol)
2674
pickle.dump(signature,outf,pickle_protocol)
2675
pickle.dump(self.lr_action,outf,pickle_protocol)
2676
pickle.dump(self.lr_goto,outf,pickle_protocol)
2679
for p in self.lr_productions:
2681
outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
2683
outp.append((str(p),p.name,p.len,None,None,None))
2684
pickle.dump(outp,outf,pickle_protocol)
2687
# -----------------------------------------------------------------------------
2688
# === INTROSPECTION ===
2690
# The following functions and classes are used to implement the PLY
2691
# introspection features followed by the yacc() function itself.
2692
# -----------------------------------------------------------------------------
2694
# -----------------------------------------------------------------------------
2695
# get_caller_module_dict()
2697
# This function returns a dictionary containing all of the symbols defined within
2698
# a caller further down the call stack. This is used to get the environment
2699
# associated with the yacc() call if none was provided.
2700
# -----------------------------------------------------------------------------
2702
def get_caller_module_dict(levels):
2705
except RuntimeError:
2706
e,b,t = sys.exc_info()
2711
ldict = f.f_globals.copy()
2712
if f.f_globals != f.f_locals:
2713
ldict.update(f.f_locals)
2717
# -----------------------------------------------------------------------------
2720
# This takes a raw grammar rule string and parses it into production data
2721
# -----------------------------------------------------------------------------
2722
def parse_grammar(doc,file,line):
2724
# Split the doc string into lines
2725
pstrings = doc.splitlines()
2734
# This is a continuation of a previous rule
2736
raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
2744
if assign != ':' and assign != '::=':
2745
raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
2747
grammar.append((file,dline,prodname,syms))
2751
raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
2755
# -----------------------------------------------------------------------------
2758
# This class represents information extracted for building a parser including
2759
# start symbol, error function, tokens, precedence list, action functions,
2761
# -----------------------------------------------------------------------------
2762
class ParserReflect(object):
2763
def __init__(self,pdict,log=None):
2766
self.error_func = None
2773
self.log = PlyLogger(sys.stderr)
2777
# Get all of the basic information
2780
self.get_error_func()
2782
self.get_precedence()
2783
self.get_pfunctions()
2785
# Validate all of the information
2786
def validate_all(self):
2787
self.validate_start()
2788
self.validate_error_func()
2789
self.validate_tokens()
2790
self.validate_precedence()
2791
self.validate_pfunctions()
2792
self.validate_files()
2795
# Compute a signature over the grammar
2796
def signature(self):
2798
from hashlib import md5
2804
sig.update(self.start.encode('latin-1'))
2806
sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
2808
sig.update(" ".join(self.tokens).encode('latin-1'))
2809
for f in self.pfuncs:
2811
sig.update(f[3].encode('latin-1'))
2812
except (TypeError,ValueError):
2816
# -----------------------------------------------------------------------------
2819
# This method checks to see if there are duplicated p_rulename() functions
2820
# in the parser module file. Without this function, it is really easy for
2821
# users to make mistakes by cutting and pasting code fragments (and it's a real
2822
# bugger to try and figure out why the resulting parser doesn't work). Therefore,
2823
# we just do a little regular expression pattern matching of def statements
2824
# to try and detect duplicates.
2825
# -----------------------------------------------------------------------------
2827
def validate_files(self):
2828
# Match def p_funcname(
2829
fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2831
for filename in self.files.keys():
2832
base,ext = os.path.splitext(filename)
2833
if ext != '.py': return 1 # No idea. Assume it's okay.
2837
lines = f.readlines()
2843
for linen,l in enumerate(lines):
2848
prev = counthash.get(name)
2850
counthash[name] = linen
2852
self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
2854
# Get the start symbol
2855
def get_start(self):
2856
self.start = self.pdict.get('start')
2858
# Validate the start symbol
2859
def validate_start(self):
2860
if self.start is not None:
2861
if not isinstance(self.start,str):
2862
self.log.error("'start' must be a string")
2864
# Look for error handler
2865
def get_error_func(self):
2866
self.error_func = self.pdict.get('p_error')
2868
# Validate the error function
2869
def validate_error_func(self):
2871
if isinstance(self.error_func,types.FunctionType):
2873
elif isinstance(self.error_func, types.MethodType):
2876
self.log.error("'p_error' defined, but is not a function or method")
2880
eline = func_code(self.error_func).co_firstlineno
2881
efile = func_code(self.error_func).co_filename
2882
self.files[efile] = 1
2884
if (func_code(self.error_func).co_argcount != 1+ismethod):
2885
self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
2888
# Get the tokens map
2889
def get_tokens(self):
2890
tokens = self.pdict.get("tokens",None)
2892
self.log.error("No token list is defined")
2896
if not isinstance(tokens,(list, tuple)):
2897
self.log.error("tokens must be a list or tuple")
2902
self.log.error("tokens is empty")
2906
self.tokens = tokens
2908
# Validate the tokens
2909
def validate_tokens(self):
2910
# Validate the tokens.
2911
if 'error' in self.tokens:
2912
self.log.error("Illegal token name 'error'. Is a reserved word")
2917
for n in self.tokens:
2919
self.log.warning("Token '%s' multiply defined", n)
2922
# Get the precedence map (if any)
2923
def get_precedence(self):
2924
self.prec = self.pdict.get("precedence",None)
2926
# Validate and parse the precedence map
2927
def validate_precedence(self):
2930
if not isinstance(self.prec,(list,tuple)):
2931
self.log.error("precedence must be a list or tuple")
2934
for level,p in enumerate(self.prec):
2935
if not isinstance(p,(list,tuple)):
2936
self.log.error("Bad precedence table")
2941
self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
2945
if not isinstance(assoc,str):
2946
self.log.error("precedence associativity must be a string")
2950
if not isinstance(term,str):
2951
self.log.error("precedence items must be strings")
2954
preclist.append((term,assoc,level+1))
2955
self.preclist = preclist
2957
# Get all p_functions from the grammar
2958
def get_pfunctions(self):
2960
for name, item in self.pdict.items():
2961
if name[:2] != 'p_': continue
2962
if name == 'p_error': continue
2963
if isinstance(item,(types.FunctionType,types.MethodType)):
2964
line = func_code(item).co_firstlineno
2965
file = func_code(item).co_filename
2966
p_functions.append((line,file,name,item.__doc__))
2968
# Sort all of the actions by line number
2970
self.pfuncs = p_functions
2973
# Validate all of the p_functions
2974
def validate_pfunctions(self):
2976
# Check for non-empty symbols
2977
if len(self.pfuncs) == 0:
2978
self.log.error("no rules of the form p_rulename are defined")
2982
for line, file, name, doc in self.pfuncs:
2983
func = self.pdict[name]
2984
if isinstance(func, types.MethodType):
2988
if func_code(func).co_argcount > reqargs:
2989
self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
2991
elif func_code(func).co_argcount < reqargs:
2992
self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
2994
elif not func.__doc__:
2995
self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
2998
parsed_g = parse_grammar(doc,file,line)
3000
grammar.append((name, g))
3002
e = sys.exc_info()[1]
3003
self.log.error(str(e))
3006
# Looks like a valid grammar rule
3007
# Mark the file in which defined.
3008
self.files[file] = 1
3010
# Secondary validation step that looks for p_ definitions that are not functions
3011
# or functions that look like they might be grammar rules.
3013
for n,v in self.pdict.items():
3014
if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
3015
if n[0:2] == 't_': continue
3016
if n[0:2] == 'p_' and n != 'p_error':
3017
self.log.warning("'%s' not defined as a function", n)
3018
if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
3019
(isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
3021
doc = v.__doc__.split(" ")
3023
self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
3024
func_code(v).co_filename, func_code(v).co_firstlineno,n)
3028
self.grammar = grammar
3030
# -----------------------------------------------------------------------------
3034
# -----------------------------------------------------------------------------
3036
def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3037
check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
3038
debuglog=None, errorlog = None, picklefile=None):
3040
global parse # Reference to the parsing method of the last built parser
3042
# If pickling is enabled, table files are not created
3047
if errorlog is None:
3048
errorlog = PlyLogger(sys.stderr)
3050
# Get the module dictionary used for the parser
3052
_items = [(k,getattr(module,k)) for k in dir(module)]
3053
pdict = dict(_items)
3055
pdict = get_caller_module_dict(2)
3057
# Collect parser information from the dictionary
3058
pinfo = ParserReflect(pdict,log=errorlog)
3062
raise YaccError("Unable to build parser")
3064
# Check signature against table files (if any)
3065
signature = pinfo.signature()
3071
read_signature = lr.read_pickle(picklefile)
3073
read_signature = lr.read_table(tabmodule)
3074
if optimize or (read_signature == signature):
3076
lr.bind_callables(pinfo.pdict)
3077
parser = LRParser(lr,pinfo.error_func)
3078
parse = parser.parse
3081
e = sys.exc_info()[1]
3082
errorlog.warning("There was a problem loading the table file: %s", repr(e))
3083
except VersionError:
3085
errorlog.warning(str(e))
3089
if debuglog is None:
3091
debuglog = PlyLogger(open(debugfile,"w"))
3093
debuglog = NullLogger()
3095
debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
3100
# Validate the parser information
3101
if pinfo.validate_all():
3102
raise YaccError("Unable to build parser")
3104
if not pinfo.error_func:
3105
errorlog.warning("no p_error() function is defined")
3107
# Create a grammar object
3108
grammar = Grammar(pinfo.tokens)
3110
# Set precedence level for terminals
3111
for term, assoc, level in pinfo.preclist:
3113
grammar.set_precedence(term,assoc,level)
3114
except GrammarError:
3115
e = sys.exc_info()[1]
3116
errorlog.warning("%s",str(e))
3118
# Add productions to the grammar
3119
for funcname, gram in pinfo.grammar:
3120
file, line, prodname, syms = gram
3122
grammar.add_production(prodname,syms,funcname,file,line)
3123
except GrammarError:
3124
e = sys.exc_info()[1]
3125
errorlog.error("%s",str(e))
3128
# Set the grammar start symbols
3131
grammar.set_start(pinfo.start)
3133
grammar.set_start(start)
3134
except GrammarError:
3135
e = sys.exc_info()[1]
3136
errorlog.error(str(e))
3140
raise YaccError("Unable to build parser")
3142
# Verify the grammar structure
3143
undefined_symbols = grammar.undefined_symbols()
3144
for sym, prod in undefined_symbols:
3145
errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
3148
unused_terminals = grammar.unused_terminals()
3149
if unused_terminals:
3151
debuglog.info("Unused terminals:")
3153
for term in unused_terminals:
3154
errorlog.warning("Token '%s' defined, but not used", term)
3155
debuglog.info(" %s", term)
3157
# Print out all productions to the debug log
3160
debuglog.info("Grammar")
3162
for n,p in enumerate(grammar.Productions):
3163
debuglog.info("Rule %-5d %s", n, p)
3165
# Find unused non-terminals
3166
unused_rules = grammar.unused_rules()
3167
for prod in unused_rules:
3168
errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
3170
if len(unused_terminals) == 1:
3171
errorlog.warning("There is 1 unused token")
3172
if len(unused_terminals) > 1:
3173
errorlog.warning("There are %d unused tokens", len(unused_terminals))
3175
if len(unused_rules) == 1:
3176
errorlog.warning("There is 1 unused rule")
3177
if len(unused_rules) > 1:
3178
errorlog.warning("There are %d unused rules", len(unused_rules))
3182
debuglog.info("Terminals, with rules where they appear")
3184
terms = list(grammar.Terminals)
3187
debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
3190
debuglog.info("Nonterminals, with rules where they appear")
3192
nonterms = list(grammar.Nonterminals)
3194
for nonterm in nonterms:
3195
debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
3199
unreachable = grammar.find_unreachable()
3200
for u in unreachable:
3201
errorlog.warning("Symbol '%s' is unreachable",u)
3203
infinite = grammar.infinite_cycles()
3204
for inf in infinite:
3205
errorlog.error("Infinite recursion detected for symbol '%s'", inf)
3208
unused_prec = grammar.unused_precedence()
3209
for term, assoc in unused_prec:
3210
errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
3214
raise YaccError("Unable to build parser")
3216
# Run the LRGeneratedTable on the grammar
3218
errorlog.debug("Generating %s tables", method)
3220
lr = LRGeneratedTable(grammar,method,debuglog)
3223
num_sr = len(lr.sr_conflicts)
3225
# Report shift/reduce and reduce/reduce conflicts
3227
errorlog.warning("1 shift/reduce conflict")
3229
errorlog.warning("%d shift/reduce conflicts", num_sr)
3231
num_rr = len(lr.rr_conflicts)
3233
errorlog.warning("1 reduce/reduce conflict")
3235
errorlog.warning("%d reduce/reduce conflicts", num_rr)
3237
# Write out conflicts to the output file
3238
if debug and (lr.sr_conflicts or lr.rr_conflicts):
3239
debuglog.warning("")
3240
debuglog.warning("Conflicts:")
3241
debuglog.warning("")
3243
for state, tok, resolution in lr.sr_conflicts:
3244
debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)
3246
already_reported = {}
3247
for state, rule, rejected in lr.rr_conflicts:
3248
if (state,id(rule),id(rejected)) in already_reported:
3250
debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3251
debuglog.warning("rejected rule (%s) in state %d", rejected,state)
3252
errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3253
errorlog.warning("rejected rule (%s) in state %d", rejected, state)
3254
already_reported[state,id(rule),id(rejected)] = 1
3257
for state, rule, rejected in lr.rr_conflicts:
3258
if not rejected.reduced and (rejected not in warned_never):
3259
debuglog.warning("Rule (%s) is never reduced", rejected)
3260
errorlog.warning("Rule (%s) is never reduced", rejected)
3261
warned_never.append(rejected)
3263
# Write the table file if requested
3265
lr.write_table(tabmodule,outputdir,signature)
3267
# Write a pickled version of the tables
3269
lr.pickle_table(picklefile,signature)
3272
lr.bind_callables(pinfo.pdict)
3273
parser = LRParser(lr,pinfo.error_func)
3275
parse = parser.parse