3
# Copyright (c) 2003,2004 Paul T. McGuire
5
# Permission is hereby granted, free of charge, to any person obtaining
6
# a copy of this software and associated documentation files (the
7
# "Software"), to deal in the Software without restriction, including
8
# without limitation the rights to use, copy, modify, merge, publish,
9
# distribute, sublicense, and/or sell copies of the Software, and to
10
# permit persons to whom the Software is furnished to do so, subject to
11
# the following conditions:
13
# The above copyright notice and this permission notice shall be
14
# included in all copies or substantial portions of the Software.
16
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
# - add pprint() - pretty-print output of defined BNF
27
from __future__ import generators
30
pyparsing module - Classes and methods to define and execute parsing grammars
32
The pyparsing module is an alternative approach to creating and executing simple grammars,
33
vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
34
don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
35
provides a library of classes that you use to construct the grammar directly in Python.
37
Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
39
from pyparsing import Word, alphas
41
# define grammar of a greeting
42
greet = Word( alphas ) + "," + Word( alphas ) + "!"
44
hello = "Hello, World!"
45
print hello, "->", greet.parseString( hello )
47
The program outputs the following::
49
Hello, World! -> ['Hello', ',', 'World', '!']
51
The Python representation of the grammar is quite readable, owing to the self-explanatory
52
class names, and the use of '+', '|' and '^' operators.
54
The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
55
object with named attributes.
57
The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
58
- extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
63
__versionTime__ = "27 September 04 00:22"
64
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
68
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
70
class ParseException(Exception):
71
"""exception thrown when parse expressions don't match class"""
72
__slots__ = ( "loc","msg","pstr" )
73
# Performance tuning: we construct a *lot* of these, so keep this
74
# constructor as small and fast as possible
75
def __init__( self, pstr, loc, msg ):
80
def __getattr__( self, aname ):
81
"""supported attributes by name are:
82
- lineno - returns the line number of the exception text
83
- col - returns the column number of the exception text
84
- line - returns the line containing the exception text
86
if( aname == "lineno" ):
87
return lineno( self.loc, self.pstr )
88
elif( aname in ("col", "column") ):
89
return col( self.loc, self.pstr )
90
elif( aname == "line" ):
91
return line( self.loc, self.pstr )
93
raise AttributeError, aname
96
return "%s (%d), (%d,%d)" % ( self.msg, self.loc, self.lineno, self.column )
100
class RecursiveGrammarException(Exception):
101
"""exception thrown by validate() if the grammar could be improperly recursive"""
102
def __init__( self, parseElementList ):
103
self.parseElementTrace = parseElementList
106
return "RecursiveGrammarException: %s" % self.parseElementTrace
108
class ParseResults(object):
109
"""Structured parse results, to provide multiple means of access to the parsed data:
110
- as a list (len(results))
111
- by list index (results[0], results[1], etc.)
112
- by attribute (results.<resultsName>)
114
__slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__modal" )
115
def __new__(cls, toklist, name=None, asList=True, modal=True ):
116
if isinstance(toklist, cls):
118
retobj = object.__new__(cls)
119
retobj.__doinit = True
122
# Performance tuning: we construct a *lot* of these, so keep this
123
# constructor as small and fast as possible
124
def __init__( self, toklist, name=None, asList=True, modal=True ):
126
self.__doinit = False
130
if type(toklist) is list:
131
self.__toklist = toklist[:]
133
self.__toklist = [toklist]
137
if isinstance(name,int):
141
if isinstance(toklist,str):
142
toklist = [ toklist ]
144
if isinstance(toklist,ParseResults):
145
self[name] = (toklist.copy(),-1)
147
self[name] = (ParseResults(toklist[0]),-1)
148
self[name].__name = name
151
self[name] = toklist[0]
155
def __getitem__( self, i ):
156
if isinstance( i, (int,slice) ):
157
return self.__toklist[i]
160
return self.__tokdict[i][-1][0]
162
return ParseResults([ v[0] for v in self.__tokdict[i] ])
164
def __setitem__( self, k, v ):
165
if isinstance(v,tuple):
166
self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
169
self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
171
if isinstance(sub,ParseResults):
174
def __delitem__( self, i ):
175
del self.__toklist[i]
177
def __contains__( self, k ):
178
return self.__tokdict.has_key(k)
180
def __len__( self ): return len( self.__toklist )
181
def __iter__( self ): return iter( self.__toklist )
183
"""Returns all named result keys."""
184
return self.__tokdict.keys()
187
"""Returns all named result keys and values as a list of tuples."""
188
return [(k,v[-1][0]) for k,v in self.__tokdict.items()]
191
"""Returns all named result values."""
192
return [ v[-1][0] for v in self.__tokdict.values() ]
194
def __getattr__( self, name ):
195
if name not in self.__slots__:
196
if self.__tokdict.has_key( name ):
198
return self.__tokdict[name][-1][0]
200
return ParseResults([ v[0] for v in self.__tokdict[name] ])
205
def __iadd__( self, other ):
206
offset = len(self.__toklist)
207
self.__toklist += other.__toklist
209
addOffset = lambda a: (a<0 and offset) or (a + offset)
210
otherdictitems = [(k,(v[0],addOffset(v[1])) ) for (k,vlist) in other.__tokdict.items() for v in vlist]
211
for k,v in otherdictitems:
213
if isinstance(v[0],ParseResults):
217
def __repr__( self ):
218
return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
223
for i in self.__toklist:
224
if isinstance(i, ParseResults):
232
def _asStringList( self, sep='' ):
234
for item in self.__toklist:
237
if isinstance( item, ParseResults ):
238
out += item._asStringList()
240
out.append( str(item) )
244
"""Returns the parse results as a nested list of matching tokens, all converted to strings."""
246
for res in self.__toklist:
247
if isinstance(res,ParseResults):
248
out.append( res.asList() )
254
"""Returns a new copy of a ParseResults object."""
255
ret = ParseResults( self.__toklist )
256
ret.__tokdict = self.__tokdict.copy()
257
ret.__parent = self.__parent
258
ret.__modal = self.__modal
259
ret.__name = self.__name
262
def asXML( self, doctag=None, namedItemsOnly=False, indent="" ):
263
"""Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
266
namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
269
if doctag is not None:
273
selfTag = self.__name
281
out += [ nl, indent, "<", selfTag, ">" ]
283
worklist = self.__toklist
284
for i,res in enumerate(worklist):
285
if isinstance(res,ParseResults):
287
out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, indent+" ")]
289
out += [ res.asXML(None, namedItemsOnly and doctag is None, indent+" ")]
291
# individual token, see if there is a name for it
294
resTag = namedItems[i]
300
out += [ nl, indent, " ", "<", resTag, ">", str(res), "</", resTag, ">" ]
302
out += [ nl, indent, "</", selfTag, ">" ]
306
def __lookup(self,sub):
307
for k,vlist in self.__tokdict.items():
314
"""Returns the results name for this token expression."""
320
return par.__lookup(self)
323
elif (len(self) == 1 and
324
len(self.__tokdict) == 1 and
325
self.__tokdict.values()[0][0][1] in (0,-1)):
326
return self.__tokdict.keys()[0]
330
col = lambda loc,strg: loc - strg.rfind("\n", 0, loc)
331
col.__doc__ = """Returns current column within a string, counting newlines as line separators
332
The first column is number 1.
335
lineno = lambda loc,strg: strg.count("\n",0,loc) + 1
336
lineno.__doc__ = """Returns current line number within a string, counting newlines as line separators
337
The first line is number 1.
340
def line( loc, strg ):
341
"""Returns the line of text containing loc within a string, counting newlines as line separators
342
The first line is number 1.
344
lastCR = strg.rfind("\n", 0, loc)
345
nextCR = strg.find("\n", loc)
347
return strg[lastCR+1:nextCR]
349
return strg[lastCR+1:]
352
class ParserElement(object):
353
"""Abstract base level parser element class."""
354
def __init__( self, savelist=False ):
355
self.parseAction = None
356
#~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
358
self.resultsName = None
359
self.saveList = savelist
360
self.skipWhitespace = True
361
self.whiteChars = " \n\t\r"
362
self.mayReturnEmpty = False
363
self.keepTabs = False
364
self.ignoreExprs = []
366
self.streamlined = False
367
self.mayIndexError = True
369
self.modalResults = True
371
def setName( self, name ):
372
"""Define name for this expression, for use in debugging."""
374
self.errmsg = "Expected " + self.name
377
def setResultsName( self, name, listAllMatches=False ):
378
"""Define name for referencing matching tokens as a nested attribute
379
of the returned parse results.
380
NOTE: this returns a *copy* of the original ParseElement object;
381
this is so that the client can define a basic element, such as an
382
integer, and reference it in multiple places with different names.
384
newself = copy.copy( self )
385
newself.resultsName = name
386
newself.modalResults = not listAllMatches
389
def setParseAction( self, fn ):
390
"""Define action to perform when successfully matching parse element definition.
391
Parse action fn is a callable method with the arguments (s, loc, toks) where:
392
- s = the original string being parsed
393
- loc = the location of the matching substring
394
- toks = a list of the matched tokens
395
If the function fn modifies the tokens, it can return them as the return
396
value from fn, and the modified list of tokens will replace the original.
397
Otherwise, fn does not need to return any value.
399
self.parseAction = fn
402
def skipIgnorables( self, instring, loc ):
406
for e in self.ignoreExprs:
409
loc,dummy = e.parse( instring, loc )
411
except ParseException:
415
def preParse( self, instring, loc ):
417
loc = self.skipIgnorables( instring, loc )
419
if self.skipWhitespace:
421
instrlen = len(instring)
422
while loc < instrlen and instring[loc] in wt:
427
def parseImpl( self, instring, loc, doActions=True ):
430
def postParse( self, instring, loc, tokenlist ):
433
def parse( self, instring, loc, doActions=True ):
434
debugging = ( self.debug and doActions )
437
print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
438
loc = self.preParse( instring, loc )
441
loc,tokens = self.parseImpl( instring, loc, doActions )
443
raise ParseException, ( instring, len(instring), self.errmsg )
444
except ParseException, err:
445
print "Exception raised:", err
448
loc = self.preParse( instring, loc )
450
if self.mayIndexError or loc >= len(instring):
452
loc,tokens = self.parseImpl( instring, loc, doActions )
454
raise ParseException, ( instring, len(instring), self.errmsg )
456
loc,tokens = self.parseImpl( instring, loc, doActions )
458
tokens = self.postParse( instring, loc, tokens )
460
retTokens = ParseResults( tokens, self.resultsName, asList=self.saveList, modal=self.modalResults )
461
if self.parseAction and doActions:
464
tokens = self.parseAction( instring, tokensStart, retTokens )
465
if tokens is not None:
466
if isinstance(tokens,tuple):
468
retTokens = ParseResults( tokens, self.resultsName, asList=self.saveList, modal=self.modalResults )
469
except ParseException, err:
470
print "Exception raised in user parse action:", err
473
tokens = self.parseAction( instring, tokensStart, retTokens )
474
if tokens is not None:
475
if isinstance(tokens,tuple):
477
retTokens = ParseResults( tokens,
479
asList=self.saveList and isinstance(tokens,(ParseResults,list)),
480
modal=self.modalResults )
483
print "Matched",self,"->",retTokens.asList()
485
return loc, retTokens
487
def tryParse( self, instring, loc ):
488
return self.parse( instring, loc, doActions=False )[0]
490
def parseString( self, instring ):
491
"""Execute the parse expression with the given string.
492
This is the main interface to the client code, once the complete
493
expression has been built.
495
if not self.streamlined:
498
for e in self.ignoreExprs:
501
loc, tokens = self.parse( instring, 0 )
503
loc, tokens = self.parse( instring.expandtabs(), 0 )
506
def scanString( self, instring ):
507
"""Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location."""
508
if not self.streamlined:
510
for e in self.ignoreExprs:
513
if not self.keepTabs:
514
instring = instring.expandtabs()
515
instrlen = len(instring)
517
preparseFn = self.preParse
519
while loc < instrlen:
521
loc = preparseFn( instring, loc )
522
nextLoc,tokens = parseFn( instring, loc )
523
except ParseException:
526
yield tokens, loc, nextLoc
529
def transformString( self, instring ):
530
"""Extension to scanString, to modify matching text with modified tokens that may
531
be returned from a parse action. To use transformString, define a grammar and
532
attach a parse action to it that modifies the returned token list.
533
Invoking transformString() on a target string will then scan for matches,
534
and replace the matched text patterns according to the logic in the parse
535
action. transformString() returns the resulting transformed string."""
538
# force preservation of <TAB>s, to minimize unwanted transformation of string, and to
539
# keep string locs straight between transformString and scanString
541
for t,s,e in self.scanString( instring ):
542
out.append( instring[lastE:s] )
544
if isinstance(t,ParseResults):
546
elif isinstance(t,list):
551
out.append(instring[lastE:])
554
def __add__(self, other ):
555
"""Implementation of + operator - returns And"""
556
if isinstance( other, str ):
557
other = Literal( other )
558
return And( [ self, other ] )
560
def __radd__(self, other ):
561
"""Implementation of += operator"""
562
if isinstance( other, str ):
563
other = Literal( other )
566
def __or__(self, other ):
567
"""Implementation of | operator - returns MatchFirst"""
568
if isinstance( other, str ):
569
other = Literal( other )
570
return MatchFirst( [ self, other ] )
572
def __ror__(self, other ):
573
"""Implementation of |= operator"""
574
if isinstance( other, str ):
575
other = Literal( other )
578
def __xor__(self, other ):
579
"""Implementation of ^ operator - returns Or"""
580
if isinstance( other, str ):
581
other = Literal( other )
582
return Or( [ self, other ] )
584
def __rxor__(self, other ):
585
"""Implementation of ^= operator"""
586
if isinstance( other, str ):
587
other = Literal( other )
590
def __invert__( self ):
591
"""Implementation of ~ operator - returns NotAny"""
592
return NotAny( self )
594
def suppress( self ):
595
"""Suppresses the output of this ParseElement; useful to keep punctuation from
596
cluttering up returned output.
598
return Suppress( self )
600
def leaveWhitespace( self ):
601
self.skipWhitespace = False
604
def parseWithTabs( self ):
605
"""Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
606
Must be called before parseString when the input grammar contains elements that
607
match <TAB> characters."""
611
def ignore( self, other ):
612
"""Define expression to be ignored (e.g., comments) while doing pattern
613
matching; may be called repeatedly, to define multiple comment or other
616
if isinstance( other, Suppress ):
617
if other not in self.ignoreExprs:
618
self.ignoreExprs.append( other )
620
self.ignoreExprs.append( Suppress( other ) )
623
def setDebug( self, flag=True ):
624
"""Enable display of debugging messages while doing pattern matching."""
631
def __repr__( self ):
634
def streamline( self ):
635
self.streamlined = True
639
def checkRecursion( self, parseElementList ):
642
def validate( self, validateTrace=[] ):
643
"""Check defined expressions for valid structure, check for infinite recursive definitions."""
644
self.checkRecursion( [] )
646
def parseFile( self, file_or_filename ):
647
"""Execute the parse expression on the given file or filename.
648
If a filename is specified (instead of a file object),
649
the entire file is opened, read, and closed before parsing.
652
file_contents = file_or_filename.read()
653
except AttributeError:
654
f = open(file_or_filename, "rb")
655
file_contents = f.read()
657
return self.parseString(file_contents)
660
class Token(ParserElement):
661
"""Abstract ParserElement subclass, for defining atomic matching patterns."""
662
def __init__( self ):
663
super(Token,self).__init__( savelist=False )
664
self.myException = ParseException("",0,"")
666
def setName(self, name):
667
s = super(Token,self).setName(name)
668
self.errmsg = "Expected " + self.name
669
s.myException.msg = self.errmsg
674
"""An empty token, will always match."""
675
def __init__( self ):
676
super(Empty,self).__init__()
678
self.mayReturnEmpty = True
679
self.mayIndexError = False
682
class NoMatch(Token):
683
"""A token that will never match."""
684
def __init__( self ):
685
super(NoMatch,self).__init__()
686
self.name = "NoMatch"
687
self.mayReturnEmpty = True
688
self.mayIndexError = False
689
self.errmsg = "Unmatchable token"
690
s.myException.msg = self.errmsg
692
def parseImpl( self, instring, loc, doActions=True ):
693
exc = self.myException
699
class Literal(Token):
700
"""Token to exactly match a specified string."""
701
def __init__( self, matchString ):
702
super(Literal,self).__init__()
703
self.match = matchString
704
self.matchLen = len(matchString)
706
self.firstMatchChar = matchString[0]
708
sys.stderr.write("\nnull string passed to Literal; use Empty() instead\n")
710
self.name = '"%s"' % self.match
711
self.errmsg = "Expected " + self.name
712
self.mayReturnEmpty = False
713
self.myException.msg = self.errmsg
714
self.mayIndexError = False
716
# Performance tuning: this routine gets called a *lot*
717
# if this is a single character match string and the first character matches,
718
# short-circuit as quickly as possible, and avoid calling startswith
719
def parseImpl( self, instring, loc, doActions=True ):
720
if (instring[loc] == self.firstMatchChar and
721
(self.matchLen==1 or instring.startswith(self.match,loc)) ):
722
return loc+self.matchLen, [ self.match ]
723
#~ raise ParseException, ( instring, loc, self.errmsg )
724
exc = self.myException
730
class CaselessLiteral(Literal):
731
"""Token to match a specified string, ignoring case of letters.
732
Note: the matched results will always be in the case of the given
733
match string, NOT the case of the input text.
735
def __init__( self, matchString ):
736
super(CaselessLiteral,self).__init__( matchString.upper() )
737
# Preserve the defining literal.
738
self.returnString = matchString
739
self.name = "'%s'" % self.returnString
740
self.errmsg = "Expected " + self.name
741
self.myException.msg = self.errmsg
743
def parseImpl( self, instring, loc, doActions=True ):
744
if instring[ loc:loc+self.matchLen ].upper() == self.match:
745
return loc+self.matchLen, [ self.returnString ]
746
#~ raise ParseException, ( instring, loc, self.errmsg )
747
exc = self.myException
754
"""Token for matching words composed of allowed character sets.
755
Defined with string containing all allowed initial characters,
756
an optional string containing allowed body characters (if omitted,
757
defaults to the initial character set), and an optional minimum,
758
maximum, and/or exact length.
760
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
761
super(Word,self).__init__()
762
self.initChars = initChars
764
self.bodyChars = bodyChars
766
self.bodyChars = initChars
773
self.maxLen = sys.maxint
779
self.name = str(self)
780
self.errmsg = "Expected " + self.name
781
self.myException.msg = self.errmsg
782
self.mayIndexError = False
784
def parseImpl( self, instring, loc, doActions=True ):
785
if not(instring[ loc ] in self.initChars):
786
#~ raise ParseException, ( instring, loc, self.errmsg )
787
exc = self.myException
793
bodychars = self.bodyChars
794
maxloc = start + self.maxLen
795
maxloc = min( maxloc, len(instring) )
796
while loc < maxloc and instring[loc] in bodychars:
799
if loc - start < self.minLen:
800
#~ raise ParseException, ( instring, loc, self.errmsg )
801
exc = self.myException
806
return loc, [ instring[start:loc] ]
810
return super(Word,self).__str__()
815
if self.strRepr is None:
823
if ( self.initChars != self.bodyChars ):
824
self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initChars), charsAsStr(self.bodyChars) )
826
self.strRepr = "W:(%s)" % charsAsStr(self.initChars)
831
class CharsNotIn(Token):
832
"""Token for matching words composed of characters *not* in a given set.
833
Defined with string containing all disallowed characters, and an optional
834
minimum, maximum, and/or exact length.
836
def __init__( self, notChars, min=1, max=0, exact=0 ):
837
super(CharsNotIn,self).__init__()
838
self.skipWhitespace = False
839
self.notChars = notChars
846
self.maxLen = sys.maxint
852
self.name = str(self)
853
self.errmsg = "Expected " + self.name
854
self.mayReturnEmpty = ( self.minLen == 0 )
855
self.myException.msg = self.errmsg
856
self.mayIndexError = False
858
def parseImpl( self, instring, loc, doActions=True ):
859
if instring[loc] in self.notChars:
860
#~ raise ParseException, ( instring, loc, self.errmsg )
861
exc = self.myException
868
notchars = self.notChars
869
maxlen = min( start+self.maxLen, len(instring) )
870
while loc < maxlen and \
871
(instring[loc] not in notchars):
874
if loc - start < self.minLen:
875
#~ raise ParseException, ( instring, loc, self.errmsg )
876
exc = self.myException
881
return loc, [ instring[start:loc] ]
885
return super(CharsNotIn, self).__str__()
889
if self.strRepr is None:
890
if len(self.notChars) > 4:
891
self.strRepr = "!W:(%s...)" % self.notChars[:4]
893
self.strRepr = "!W:(%s)" % self.notChars
898
"""Special matching class for matching whitespace. Normally, whitespace is ignored
899
by pyparsing grammars. This class is included when some whitespace structures
900
are significant. Define with a string containing the whitespace characters to be
901
matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
902
as defined for the Word class."""
909
def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
910
super(White,self).__init__()
912
self.whiteChars = "".join([c for c in self.whiteChars if c not in self.matchWhite])
913
#~ self.leaveWhitespace()
914
self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
915
self.mayReturnEmpty = True
916
self.errmsg = "Expected " + self.name
917
self.myException.msg = self.errmsg
924
self.maxLen = sys.maxint
930
def parseImpl( self, instring, loc, doActions=True ):
931
if not(instring[ loc ] in self.matchWhite):
932
#~ raise ParseException, ( instring, loc, self.errmsg )
933
exc = self.myException
939
maxloc = start + self.maxLen
940
maxloc = min( maxloc, len(instring) )
941
while loc < maxloc and instring[loc] in self.matchWhite:
944
if loc - start < self.minLen:
945
#~ raise ParseException, ( instring, loc, self.errmsg )
946
exc = self.myException
951
return loc, [ instring[start:loc] ]
954
class PositionToken(Token):
955
def __init__( self ):
956
super(PositionToken,self).__init__()
957
self.name=self.__class__.__name__
958
self.mayReturnEmpty = True
960
class GoToColumn(PositionToken):
961
"""Token to advance to a specific column of input text; useful for tabular report scraping."""
962
def __init__( self, colno ):
963
super(GoToColumn,self).__init__()
966
def preParse( self, instring, loc ):
967
if col(loc,instring) != self.col:
968
instrlen = len(instring)
970
loc = self.skipIgnorables( instring, loc )
971
while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
975
def parseImpl( self, instring, loc, doActions=True ):
976
thiscol = col( loc, instring )
977
if thiscol > self.col:
978
raise ParseException, ( instring, loc, "Text not in expected column" )
979
newloc = loc + self.col - thiscol
980
ret = instring[ loc: newloc ]
981
return newloc, [ ret ]
983
class LineStart(PositionToken):
984
"""Matches if current position is at the beginning of a line within the parse string"""
985
def __init__( self ):
986
super(LineStart,self).__init__()
987
self.whiteChars = " \t"
988
self.errmsg = "Expected start of line"
989
self.myException.msg = self.errmsg
991
def preParse( self, instring, loc ):
992
loc = super(LineStart,self).preParse(instring,loc)
993
if instring[loc] == "\n":
997
def parseImpl( self, instring, loc, doActions=True ):
998
if not( loc==0 or ( loc<len(instring) and instring[loc-1] == "\n" ) ): #col(loc, instring) != 1:
999
#~ raise ParseException, ( instring, loc, "Expected start of line" )
1000
exc = self.myException
1006
class LineEnd(PositionToken):
1007
"""Matches if current position is at the end of a line within the parse string"""
1008
def __init__( self ):
1009
super(LineEnd,self).__init__()
1010
self.whiteChars = " \t"
1011
self.errmsg = "Expected end of line"
1012
self.myException.msg = self.errmsg
1014
def parseImpl( self, instring, loc, doActions=True ):
1015
if loc<len(instring):
1016
if instring[loc] == "\n":
1017
return loc+1, ["\n"]
1019
#~ raise ParseException, ( instring, loc, "Expected end of line" )
1020
exc = self.myException
1027
class StringStart(PositionToken):
1028
"""Matches if current position is at the beginning of the parse string"""
1029
def __init__( self ):
1030
super(StringStart,self).__init__()
1031
self.errmsg = "Expected start of text"
1032
self.myException.msg = self.errmsg
1034
def parseImpl( self, instring, loc, doActions=True ):
1036
# see if entire string up to here is just whitespace and ignoreables
1037
if loc != self.preParse( instring, 0 ):
1038
#~ raise ParseException, ( instring, loc, "Expected start of text" )
1039
exc = self.myException
1045
class StringEnd(PositionToken):
1046
"""Matches if current position is at the end of the parse string"""
1047
def __init__( self ):
1048
super(StringEnd,self).__init__()
1049
self.errmsg = "Expected end of text"
1050
self.myException.msg = self.errmsg
1052
def parseImpl( self, instring, loc, doActions=True ):
1053
if loc < len(instring):
1054
#~ raise ParseException, ( instring, loc, "Expected end of text" )
1055
exc = self.myException
1062
class ParseExpression(ParserElement):
1063
"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1064
def __init__( self, exprs, savelist = False ):
1065
super(ParseExpression,self).__init__(savelist)
1066
if isinstance( exprs, list ):
1068
elif isinstance( exprs, str ):
1069
self.exprs = [ Literal( exprs ) ]
1071
self.exprs = [ exprs ]
1073
def __getitem__( self, i ):
1074
return self.exprs[i]
1076
def append( self, other ):
1077
self.exprs.append( other )
1081
def leaveWhitespace( self ):
1082
self.skipWhitespace = False
1083
self.exprs = [ copy.copy(e) for e in self.exprs ]
1084
for e in self.exprs:
1088
def ignore( self, other ):
1089
if isinstance( other, Suppress ):
1090
if other not in self.ignoreExprs:
1091
super( ParseExpression, self).ignore( other )
1092
for e in self.exprs:
1093
e.ignore( self.ignoreExprs[-1] )
1095
super( ParseExpression, self).ignore( other )
1096
for e in self.exprs:
1097
e.ignore( self.ignoreExprs[-1] )
1100
def __str__( self ):
1102
return super(ParseExpression,self).__str__()
1106
if self.strRepr is None:
1107
self.strRepr = "%s:(%s)" % ( self.__class__.__name__, str(self.exprs) )
1110
def streamline( self ):
1111
super(ParseExpression,self).streamline()
1113
for e in self.exprs:
1116
# collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
1117
# but only if there are no parse actions or resultsNames on the nested And's
1118
# (likewise for Or's and MatchFirst's)
1119
if ( len(self.exprs) == 2 ):
1120
other = self.exprs[0]
1121
if ( isinstance( other, self.__class__ ) and
1122
other.parseAction is None and
1123
other.resultsName is None and
1125
self.exprs = other.exprs[:] + [ self.exprs[1] ]
1128
other = self.exprs[-1]
1129
if ( isinstance( other, self.__class__ ) and
1130
other.parseAction is None and
1131
other.resultsName is None and
1133
self.exprs = self.exprs[:-1] + other.exprs[:]
1138
def setResultsName( self, name, listAllMatches=False ):
1139
ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
1143
def validate( self, validateTrace=[] ):
1144
tmp = validateTrace[:]+[self]
1145
for e in self.exprs:
1147
self.checkRecursion( [] )
1150
class And(ParseExpression):
1151
"""Requires all given ParseExpressions to be found in the given order.
1152
Expressions may be separated by whitespace.
1153
May be constructed using the '+' operator.
1155
def __init__( self, exprs, savelist = False ):
1156
super(And,self).__init__(exprs, savelist)
1157
self.mayReturnEmpty = True
1159
if not e.mayReturnEmpty:
1160
self.mayReturnEmpty = False
1162
self.skipWhitespace = exprs[0].skipWhitespace
1163
self.whiteChars = exprs[0].whiteChars
1166
def parseImpl( self, instring, loc, doActions=True ):
1167
loc, resultlist = self.exprs[0].parse( instring, loc, doActions )
1168
for e in self.exprs[1:]:
1169
loc, exprtokens = e.parse( instring, loc, doActions )
1170
if exprtokens or exprtokens.keys():
1171
resultlist += exprtokens
1172
return loc, resultlist
1174
def __iadd__(self, other ):
1175
if isinstance( other, str ):
1176
other = Literal( other )
1177
return self.append( other ) #And( [ self, other ] )
1179
def checkRecursion( self, parseElementList ):
1180
subRecCheckList = parseElementList[:] + [ self ]
1181
for e in self.exprs:
1182
e.checkRecursion( subRecCheckList )
1183
if not e.mayReturnEmpty:
1186
def __str__( self ):
1187
if hasattr(self,"name"):
1190
if self.strRepr is None:
1191
self.strRepr = "{" + " ".join( [ str(e) for e in self.exprs ] ) + "}"
1196
class Or(ParseExpression):
1197
"""Requires that at least one ParseExpression is found.
1198
If two expressions match, the expression that matches the longest string will be used.
1199
May be constructed using the '^' operator.
1201
def __init__( self, exprs, savelist = False ):
1202
super(Or,self).__init__(exprs, savelist)
1203
self.mayReturnEmpty = False
1205
if e.mayReturnEmpty:
1206
self.mayReturnEmpty = True
1209
def parseImpl( self, instring, loc, doActions=True ):
1212
for e in self.exprs:
1214
loc2 = e.tryParse( instring, loc )
1215
except ParseException, err:
1216
if err.loc > maxExcLoc:
1219
except IndexError, err:
1220
if len(instring) > maxExcLoc:
1221
maxException = ParseException(instring,len(instring),e.errmsg)
1222
maxExcLoc = len(instring)
1224
if loc2 > maxMatchLoc:
1231
return maxMatchExp.parse( instring, loc, doActions )
1233
def __ixor__(self, other ):
1234
if isinstance( other, str ):
1235
other = Literal( other )
1236
return self.append( other ) #Or( [ self, other ] )
1238
def __str__( self ):
1239
if hasattr(self,"name"):
1242
if self.strRepr is None:
1243
self.strRepr = "{" + " ^ ".join( [ str(e) for e in self.exprs ] ) + "}"
1247
def checkRecursion( self, parseElementList ):
1248
subRecCheckList = parseElementList[:] + [ self ]
1249
for e in self.exprs:
1250
e.checkRecursion( subRecCheckList )
1253
class MatchFirst(ParseExpression):
1254
"""Requires that at least one ParseExpression is found.
1255
If two expressions match, the first one listed is the one that will match.
1256
May be constructed using the '|' operator.
1258
def __init__( self, exprs, savelist = False ):
1259
super(MatchFirst,self).__init__(exprs, savelist)
1260
self.mayReturnEmpty = False
1262
if e.mayReturnEmpty:
1263
self.mayReturnEmpty = True
1266
def parseImpl( self, instring, loc, doActions=True ):
1268
for e in self.exprs:
1270
return e.parse( instring, loc, doActions )
1271
except ParseException, err:
1272
if err.loc > maxExcLoc:
1275
except IndexError, err:
1276
if len(instring) > maxExcLoc:
1277
maxException = ParseException(instring,len(instring),e.errmsg)
1278
maxExcLoc = len(instring)
1280
# only got here if no expression matched, raise exception for match that made it the furthest
1285
def __ior__(self, other ):
1286
if isinstance( other, str ):
1287
other = Literal( other )
1288
return self.append( other ) #MatchFirst( [ self, other ] )
1290
def __str__( self ):
1291
if hasattr(self,"name"):
1294
if self.strRepr is None:
1295
self.strRepr = "{" + " | ".join( [ str(e) for e in self.exprs ] ) + "}"
1299
def checkRecursion( self, parseElementList ):
1300
subRecCheckList = parseElementList[:] + [ self ]
1301
for e in self.exprs:
1302
e.checkRecursion( subRecCheckList )
1305
class ParseElementEnhance(ParserElement):
1306
"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1307
def __init__( self, expr, savelist=False ):
1308
super(ParseElementEnhance,self).__init__(savelist)
1309
if isinstance( expr, str ):
1310
expr = Literal(expr)
1313
if expr is not None:
1314
self.mayIndexError = expr.mayIndexError
1315
self.skipWhitespace = expr.skipWhitespace
1316
self.whiteChars = expr.whiteChars
1319
def parseImpl( self, instring, loc, doActions=True ):
1320
if self.expr is not None:
1321
return self.expr.parse( instring, loc, doActions )
1323
raise ParseException(loc,"",instring)
1325
def leaveWhitespace( self ):
1326
self.skipWhitespace = False
1327
self.expr = copy.copy(self.expr)
1328
if self.expr is not None:
1329
self.expr.leaveWhitespace()
1332
def ignore( self, other ):
1333
if isinstance( other, Suppress ):
1334
if other not in self.ignoreExprs:
1335
super( ParseElementEnhance, self).ignore( other )
1336
if self.expr is not None:
1337
self.expr.ignore( self.ignoreExprs[-1] )
1339
super( ParseElementEnhance, self).ignore( other )
1340
if self.expr is not None:
1341
self.expr.ignore( self.ignoreExprs[-1] )
1344
def streamline( self ):
1345
super(ParseElementEnhance,self).streamline()
1346
if self.expr is not None:
1347
self.expr.streamline()
1350
def checkRecursion( self, parseElementList ):
1351
if self in parseElementList:
1352
raise RecursiveGrammarException( parseElementList+[self] )
1353
subRecCheckList = parseElementList[:] + [ self ]
1354
if self.expr is not None:
1355
self.expr.checkRecursion( subRecCheckList )
1357
def validate( self, validateTrace=[] ):
1358
tmp = validateTrace[:]+[self]
1359
if self.expr is not None:
1360
self.expr.validate(tmp)
1361
self.checkRecursion( [] )
1363
def __str__( self ):
1365
return super(ParseElementEnhance,self).__str__()
1369
if self.strRepr is None and self.expr is not None:
1370
self.strRepr = "%s:(%s)" % ( self.__class__.__name__, str(self.expr) )
1374
class FollowedBy(ParseElementEnhance):
1375
"""Lookahead matching of the given parse expression. FollowedBy
1376
does *not* advance the parsing position within the input string, it only
1377
verifies that the specified parse expression matches at the current
1378
position. FollowedBy always returns a null token list."""
1379
def __init__( self, expr ):
1380
super(FollowedBy,self).__init__(expr)
1381
self.mayReturnEmpty = True
1383
def parseImpl( self, instring, loc, doActions=True ):
1384
self.expr.tryParse( instring, loc )
1388
class NotAny(ParseElementEnhance):
1389
"""Lookahead to disallow matching with the given parse expression. NotAny
1390
does *not* advance the parsing position within the input string, it only
1391
verifies that the specified parse expression does *not* match at the current
1392
position. Also, NotAny does *not* skip over leading whitespace. NotAny
1393
always returns a null token list. May be constructed using the '~' operator."""
1394
def __init__( self, expr ):
1395
super(NotAny,self).__init__(expr)
1396
#~ self.leaveWhitespace()
1397
self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
1398
self.mayReturnEmpty = True
1399
self.errmsg = "Found unexpected token, "+str(self.expr)
1400
self.myException = ParseException("",0,self.errmsg)
1402
def parseImpl( self, instring, loc, doActions=True ):
1404
self.expr.tryParse( instring, loc )
1405
except (ParseException,IndexError):
1408
#~ raise ParseException, (instring, loc, self.errmsg )
1409
exc = self.myException
1415
def __str__( self ):
1416
if hasattr(self,"name"):
1419
if self.strRepr is None:
1420
self.strRepr = "~{" + str(self.expr) + "}"
1425
class ZeroOrMore(ParseElementEnhance):
1426
"""Optional repetition of zero or more of the given expression."""
1427
def __init__( self, expr ):
1428
super(ZeroOrMore,self).__init__(expr)
1429
self.mayReturnEmpty = True
1431
def parseImpl( self, instring, loc, doActions=True ):
1434
loc, tokens = self.expr.parse( instring, loc, doActions )
1435
hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
1438
loc = self.skipIgnorables( instring, loc )
1439
loc, tmptokens = self.expr.parse( instring, loc, doActions )
1440
if tmptokens or tmptokens.keys():
1442
except (ParseException,IndexError):
1447
def __str__( self ):
1448
if hasattr(self,"name"):
1451
if self.strRepr is None:
1452
self.strRepr = "[" + str(self.expr) + "]..."
1456
def setResultsName( self, name, listAllMatches=False ):
1457
ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
1462
class OneOrMore(ParseElementEnhance):
1463
"""Repetition of one or more of the given expression."""
1464
def parseImpl( self, instring, loc, doActions=True ):
1465
# must be at least one
1466
loc, tokens = self.expr.parse( instring, loc, doActions )
1468
hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
1471
loc = self.skipIgnorables( instring, loc )
1472
loc, tmptokens = self.expr.parse( instring, loc, doActions )
1473
if tmptokens or tmptokens.keys():
1475
except (ParseException,IndexError):
1480
def __str__( self ):
1481
if hasattr(self,"name"):
1484
if self.strRepr is None:
1485
self.strRepr = "{" + str(self.expr) + "}..."
1489
def setResultsName( self, name, listAllMatches=False ):
1490
ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
1495
class Optional(ParseElementEnhance):
1496
"""Optional matching of the given expression.
1497
A default return string can also be specified, if the optional expression
1500
def __init__( self, exprs, default=None ):
1501
super(Optional,self).__init__( exprs, savelist=False )
1502
self.defaultValue = default
1503
self.mayReturnEmpty = True
1505
def parseImpl( self, instring, loc, doActions=True ):
1507
loc, tokens = self.expr.parse( instring, loc, doActions )
1508
except (ParseException,IndexError):
1509
if self.defaultValue is not None:
1510
tokens = [ self.defaultValue ]
1516
def __str__( self ):
1517
if hasattr(self,"name"):
1520
if self.strRepr is None:
1521
self.strRepr = "[" + str(self.expr) + "]"
1526
class SkipTo(ParseElementEnhance):
1527
"""Token for skipping over all undefined text until the matched expression is found.
1528
If include is set to true, the matched expression is also consumed. The ignore
1529
argument is used to define grammars (typically quoted strings and comments) that
1530
might contain false matches.
1532
def __init__( self, other, include=False, ignore=None ):
1533
super( SkipTo, self ).__init__( other )
1534
if ignore is not None:
1535
self.expr = copy.copy( self.expr )
1536
self.expr.ignore(ignore)
1537
self.mayReturnEmpty = True
1538
self.mayIndexError = False
1539
self.includeMatch = include
1540
self.errmsg = "No match found for "+str(self.expr)
1541
self.myException = ParseException("",0,self.errmsg)
1543
def parseImpl( self, instring, loc, doActions=True ):
1545
instrlen = len(instring)
1547
while loc < instrlen:
1549
expr.tryParse(instring, loc)
1550
if self.includeMatch:
1551
skipText = instring[startLoc:loc]
1552
loc,mat = expr.parse(instring,loc)
1554
return loc, [ skipText, mat ]
1556
return loc, [ skipText ]
1558
return loc, [ instring[startLoc:loc] ]
1559
except (ParseException,IndexError):
1561
exc = self.myException
1566
class Forward(ParseElementEnhance):
1567
"""Forward declaration of an expression to be defined later -
1568
used for recursive grammars, such as algebraic infix notation.
1569
When the expression is known, it is assigned to the Forward variable using the '<<' operator.
1571
def __init__( self, other=None ):
1572
super(Forward,self).__init__( other, savelist=False )
1574
def __lshift__( self, other ):
1576
self.mayReturnEmpty = other.mayReturnEmpty
1580
def leaveWhitespace( self ):
1581
self.skipWhitespace = False
1584
def streamline( self ):
1585
if not self.streamlined:
1586
self.streamlined = True
1587
if self.expr is not None:
1588
self.expr.streamline()
1591
def validate( self, validateTrace=[] ):
1592
if self not in validateTrace:
1593
tmp = validateTrace[:]+[self]
1594
if self.expr is not None:
1595
self.expr.validate(tmp)
1596
self.checkRecursion([])
1598
def __str__( self ):
1599
if hasattr(self,"name"):
1602
strmethod = self.__str__
1603
self.__class__ = _ForwardNoRecurse
1604
if self.expr is not None:
1605
retString = str(self.expr)
1608
self.__class__ = Forward
1609
return "Forward: "+retString
1611
class _ForwardNoRecurse(Forward):
1612
def __str__( self ):
1615
class TokenConverter(ParseElementEnhance):
1616
"""Abstract subclass of ParseExpression, for converting parsed results."""
1617
def __init__( self, expr, savelist=False ):
1618
super(TokenConverter,self).__init__( expr )#, savelist )
1621
class Upcase(TokenConverter):
1622
"""Converter to upper case all matching tokens."""
1623
def postParse( self, instring, loc, tokenlist ):
1624
return map( string.upper, tokenlist )
1627
class Combine(TokenConverter):
1628
"""Converter to concatenate all matching tokens to a single string.
1629
By default, the matching patterns must also be contiguous in the input string;
1630
this can be disabled by specifying 'adjacent=False' in the constructor.
1632
def __init__( self, expr, joinString="", adjacent=True ):
1633
super(Combine,self).__init__( expr )
1634
# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
1636
self.leaveWhitespace()
1637
self.skipWhitespace = True
1638
self.joinString = joinString
1640
def postParse( self, instring, loc, tokenlist ):
1641
retToks = tokenlist.copy()
1643
retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
1645
if self.resultsName and len(retToks.keys())>0:
1651
class Group(TokenConverter):
1652
"""Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
1653
def __init__( self, expr ):
1654
super(Group,self).__init__( expr )
1655
self.saveList = True
1657
def postParse( self, instring, loc, tokenlist ):
1658
return [ tokenlist ]
1660
class Dict(TokenConverter):
1661
"""Converter to return a repetitive expression as a list, but also as a dictionary.
1662
Each element can also be referenced using the first token in the expression as its key.
1663
Useful for tabular report scraping when the first column can be used as a item key.
1665
def __init__( self, exprs ):
1666
super(Dict,self).__init__( exprs )
1667
self.saveList = True
1669
def postParse( self, instring, loc, tokenlist ):
1670
for i,tok in enumerate(tokenlist):
1671
ikey = str(tok[0]).strip()
1673
tokenlist[ikey] = ("",i)
1675
tokenlist[ikey] = (tok[1],i)
1677
dictvalue = tok.copy() #ParseResults(i)
1679
tokenlist[ikey] = (dictvalue,i)
1681
if self.resultsName:
1682
return [ tokenlist ]
1687
class Suppress(TokenConverter):
1688
"""Converter for ignoring the results of a parsed expression."""
1689
def postParse( self, instring, loc, tokenlist ):
1692
def suppress( self ):
1698
def delimitedList( expr, delim=",", combine=False ):
1699
"""Helper to define a delimited list of expressions - the delimiter defaults to ','.
1700
By default, the list elements and delimiters can have intervening whitespace, and
1701
comments, but this can be overridden by passing 'combine=True' in the constructor.
1702
If combine is set to True, the matching tokens are returned as a single token
1703
string, with the delimiters included; otherwise, the matching tokens are returned
1704
as a list of tokens, with the delimiters suppressed.
1707
return Combine( expr + ZeroOrMore( delim + expr ) ).setName(str(expr)+str(delim)+"...")
1709
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(str(expr)+str(delim)+"...")
1711
def oneOf( strs, caseless=False ):
1712
"""Helper to quickly define a set of alternative Literals, and makes sure to do
1713
longest-first testing when there is a conflict, regardless of the input order,
1714
but returns a MatchFirst for best performance.
1717
isequal = ( lambda a,b: a.upper() == b.upper() )
1718
parseElementClass = CaselessLiteral
1720
isequal = ( lambda a,b: a == b )
1721
parseElementClass = Literal
1723
symbols = strs.split()
1725
while i < len(symbols)-1:
1727
for j,other in enumerate(symbols[i+1:]):
1728
if ( isequal(other, cur) ):
1731
elif ( isequal(other[:len(cur)],cur) ):
1733
symbols.insert(i,other)
1739
return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
1741
def dictOf( key, value ):
1742
"""Helper to easily and clearly define a dictionary by specifying the respective patterns
1743
for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
1744
in the proper order. The key pattern can include delimiting markers or punctuation,
1745
as long as they are suppressed, thereby leaving the significant key text. The value
1746
pattern can include named results, so that the Dict results can include named token
1749
return Dict( ZeroOrMore( Group ( key + value ) ) )
1751
alphas = string.letters
1752
nums = string.digits
1753
alphanums = alphas + nums
1754
printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
1755
empty = Empty().setName("empty")
1758
_escapables = "tnrfbacdeghijklmopqsuvwxyz" + _bslash
1759
_octDigits = "01234567"
1760
_escapedChar = ( Word( _bslash, _escapables, exact=2 ) |
1761
Word( _bslash, _octDigits, min=2, max=4 ) )
1762
_sglQuote = Literal("'")
1763
_dblQuote = Literal('"')
1764
dblQuotedString = Combine( _dblQuote + ZeroOrMore( CharsNotIn('\\"\n\r') | _escapedChar ) + _dblQuote ).streamline().setName("string enclosed in double quotes")
1765
sglQuotedString = Combine( _sglQuote + ZeroOrMore( CharsNotIn("\\'\n\r") | _escapedChar ) + _sglQuote ).streamline().setName("string enclosed in single quotes")
1766
quotedString = ( dblQuotedString | sglQuotedString ).setName("quotedString using single or double quotes")
1768
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
1769
cStyleComment = Combine( Literal("/*") +
1770
ZeroOrMore( CharsNotIn("*") | ( "*" + ~Literal("/") ) ) +
1771
Literal("*/") ).streamline().setName("cStyleComment enclosed in /* ... */")
1772
htmlComment = Combine( Literal("<!--") + ZeroOrMore( CharsNotIn("-") |
1773
(~Literal("-->") + Literal("-").leaveWhitespace() ) ) +
1774
Literal("-->") ).streamline().setName("htmlComment enclosed in <!-- ... -->")
1775
restOfLine = Optional( CharsNotIn( "\n\r" ), default="" ).setName("rest of line up to \\n").leaveWhitespace()
1776
_noncomma = "".join( [ c for c in printables if c != "," ] )
1777
_commasepitem = Combine(OneOrMore(Word(_noncomma) +
1778
Optional( Word(" \t") +
1779
~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
1780
commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
1783
if __name__ == "__main__":
1785
def test( teststring ):
1786
print teststring,"->",
1788
tokens = simpleSQL.parseString( teststring )
1789
tokenlist = tokens.asList()
1791
print "tokens = ", tokens
1792
print "tokens.columns =", tokens.columns
1793
print "tokens.tables =", tokens.tables
1794
print tokens.asXML("SQL",True)
1795
except ParseException, err:
1797
print " "*(err.column-1) + "^"
1801
selectToken = CaselessLiteral( "select" )
1802
fromToken = CaselessLiteral( "from" )
1804
ident = Word( alphas, alphanums + "_$" )
1805
columnName = Upcase( delimitedList( ident, ".", combine=True ) )
1806
columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
1807
tableName = Upcase( delimitedList( ident, ".", combine=True ) )
1808
tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
1809
simpleSQL = ( selectToken + \
1810
( '*' | columnNameList ).setResultsName( "columns" ) + \
1812
tableNameList.setResultsName( "tables" ) )
1814
test( "SELECT * from XYZZY, ABC" )
1815
test( "select * from SYS.XYZZY" )
1816
test( "Select A from Sys.dual" )
1817
test( "Select AA,BB,CC from Sys.dual" )
1818
test( "Select A, B, C from Sys.dual" )
1819
test( "Select A, B, C from Sys.dual" )
1820
test( "Xelect A, B, C from Sys.dual" )
1821
test( "Select A, B, C frox Sys.dual" )
1823
test( "Select ^^^ frox Sys.dual" )
1824
test( "Select A, B, C from Sys.dual, Table2 " )