~ibmcharmers/charms/xenial/ibm-cinder-storwize-svc/trunk

The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an

object with named attributes.

The pyparsing module handles some of the problems that are typically vexing when writing text parsers:

- extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)

- quoted strings

- embedded comments

"""

__version__ = "2.1.10"

__versionTime__ = "07 Oct 2016 01:31 UTC"

__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"

import string

from weakref import ref as wkref

import copy

import sys

import warnings

import re

import sre_constants

import collections

import pprint

import traceback

import types

from datetime import datetime

try:

from _thread import RLock

except ImportError:

from threading import RLock

try:

from collections import OrderedDict as _OrderedDict

except ImportError:

try:

from ordereddict import OrderedDict as _OrderedDict

except ImportError:

_OrderedDict = None

#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )

__all__ = [

'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',

'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',

'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',

'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',

100

'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',

101

'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',

102

'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',

103

'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',

104

'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',

105

'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',

106

'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',

107

'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',

108

'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',

109

'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',

110

'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',

111

'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',

112

'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',

113

'CloseMatch', 'tokenMap', 'pyparsing_common',

114

]

115

116

system_version = tuple(sys.version_info)[:3]

117

PY_3 = system_version[0] == 3

118

if PY_3:

119

_MAX_INT = sys.maxsize

120

basestring = str

121

unichr = chr

122

_ustr = str

123

124

# build list of single arg builtins, that can be used as parse actions

125

singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]

126

127

else:

128

_MAX_INT = sys.maxint

129

range = xrange

130

131

def _ustr(obj):

132

"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries

133

str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It

134

then < returns the unicode object | encodes it with the default encoding | ... >.

135

"""

136

if isinstance(obj,unicode):

137

return obj

138

139

try:

140

# If this works, then _ustr(obj) has the same behaviour as str(obj), so

141

# it won't break any existing code.

142

return str(obj)

143

144

except UnicodeEncodeError:

145

# Else encode it

146

ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')

147

xmlcharref = Regex('&#\d+;')

148

xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])

149

return xmlcharref.transformString(ret)

150

151

# build list of single arg builtins, tolerant of Python version, that can be used as parse actions

152

singleArgBuiltins = []

153

import __builtin__

154

for fname in "sum len sorted reversed list tuple set any all min max".split():

155

try:

156

singleArgBuiltins.append(getattr(__builtin__,fname))

157

except AttributeError:

158

continue

159

160

_generatorType = type((y for y in range(1)))

161

162

def _xml_escape(data):

163

"""Escape &, <, >, ", ', etc. in a string of data."""

164

165

# ampersand must be replaced first

166

from_symbols = '&><"\''

167

to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())

168

for from_,to_ in zip(from_symbols, to_symbols):

169

data = data.replace(from_, to_)

170

return data

171

172

class _Constants(object):

173

pass

174

175

alphas = string.ascii_uppercase + string.ascii_lowercase

176

nums = "0123456789"

177

hexnums = nums + "ABCDEFabcdef"

178

alphanums = alphas + nums

179

_bslash = chr(92)

180

printables = "".join(c for c in string.printable if c not in string.whitespace)

181

182

class ParseBaseException(Exception):

183

"""base exception class for all parsing runtime exceptions"""

184

# Performance tuning: we construct a *lot* of these, so keep this

185

# constructor as small and fast as possible

186

def __init__( self, pstr, loc=0, msg=None, elem=None ):

187

self.loc = loc

188

if msg is None:

189

self.msg = pstr

190

self.pstr = ""

191

else:

192

self.msg = msg

193

self.pstr = pstr

194

self.parserElement = elem

195

self.args = (pstr, loc, msg)

196

197

@classmethod

198

def _from_exception(cls, pe):

199

"""

200

internal factory method to simplify creating one type of ParseException

201

from another - avoids having __init__ signature conflicts among subclasses

202

"""

203

return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)

204

205

def __getattr__( self, aname ):

206

"""supported attributes by name are:

207

- lineno - returns the line number of the exception text

208

- col - returns the column number of the exception text

209

- line - returns the line containing the exception text

210

"""

211

if( aname == "lineno" ):

212

return lineno( self.loc, self.pstr )

213

elif( aname in ("col", "column") ):

214

return col( self.loc, self.pstr )

215

elif( aname == "line" ):

216

return line( self.loc, self.pstr )

217

else:

218

raise AttributeError(aname)

219

220

def __str__( self ):

221

return "%s (at char %d), (line:%d, col:%d)" % \

222

( self.msg, self.loc, self.lineno, self.column )

223

def __repr__( self ):

224

return _ustr(self)

225

def markInputline( self, markerString = ">!<" ):

226

"""Extracts the exception line from the input string, and marks

227

the location of the exception with a special symbol.

228

"""

229

line_str = self.line

230

line_column = self.column - 1

231

if markerString:

232

line_str = "".join((line_str[:line_column],

233

markerString, line_str[line_column:]))

234

return line_str.strip()

235

def __dir__(self):

236

return "lineno col line".split() + dir(type(self))

237

238

class ParseException(ParseBaseException):

239

"""

240

Exception thrown when parse expressions don't match class;

241

supported attributes by name are:

242

- lineno - returns the line number of the exception text

243

- col - returns the column number of the exception text

244

- line - returns the line containing the exception text

245

246

Example::

247

try:

248

Word(nums).setName("integer").parseString("ABC")

249

except ParseException as pe:

250

print(pe)

251

print("column: {}".format(pe.col))

252

253

prints::

254

Expected integer (at char 0), (line:1, col:1)

255

column: 1

256

"""

257

pass

258

259

class ParseFatalException(ParseBaseException):

260

"""user-throwable exception thrown when inconsistent parse content

261

is found; stops all parsing immediately"""

262

pass

263

264

class ParseSyntaxException(ParseFatalException):

265

"""just like L{ParseFatalException}, but thrown internally when an

266

L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop

267

immediately because an unbacktrackable syntax error has been found"""

268

pass

269

270

#~ class ReparseException(ParseBaseException):

271

#~ """Experimental class - parse actions can raise this exception to cause

272

#~ pyparsing to reparse the input string:

273

#~ - with a modified input string, and/or

274

#~ - with a modified start location

275

#~ Set the values of the ReparseException in the constructor, and raise the

276

#~ exception in a parse action to cause pyparsing to use the new string/location.

277

#~ Setting the values as None causes no change to be made.

278

#~ """

279

#~ def __init_( self, newstring, restartLoc ):

280

#~ self.newParseText = newstring

281

#~ self.reparseLoc = restartLoc

282

283

class RecursiveGrammarException(Exception):

284

"""exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""

285

def __init__( self, parseElementList ):

286

self.parseElementTrace = parseElementList

287

288

def __str__( self ):

289

return "RecursiveGrammarException: %s" % self.parseElementTrace

290

291

class _ParseResultsWithOffset(object):

292

def __init__(self,p1,p2):

293

self.tup = (p1,p2)

294

def __getitem__(self,i):

295

return self.tup[i]

296

def __repr__(self):

297

return repr(self.tup[0])

298

def setOffset(self,i):

299

self.tup = (self.tup[0],i)

300

301

class ParseResults(object):

302

"""

303

Structured parse results, to provide multiple means of access to the parsed data:

304

- as a list (C{len(results)})

305

- by list index (C{results[0], results[1]}, etc.)

306

- by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})

307

308

Example::

309

integer = Word(nums)

310

date_str = (integer.setResultsName("year") + '/'

311

+ integer.setResultsName("month") + '/'

312

+ integer.setResultsName("day"))

313

# equivalent form:

314

# date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

315

316

# parseString returns a ParseResults object

317

result = date_str.parseString("1999/12/31")

318

319

def test(s, fn=repr):

320

print("%s -> %s" % (s, fn(eval(s))))

321

test("list(result)")

322

test("result[0]")

323

test("result['month']")

324

test("result.day")

325

test("'month' in result")

326

test("'minutes' in result")

327

test("result.dump()", str)

328

prints::

329

list(result) -> ['1999', '/', '12', '/', '31']

330

result[0] -> '1999'

331

result['month'] -> '12'

332

result.day -> '31'

333

'month' in result -> True

334

'minutes' in result -> False

335

result.dump() -> ['1999', '/', '12', '/', '31']

336

- day: 31

337

- month: 12

338

- year: 1999

339

"""

340

def __new__(cls, toklist=None, name=None, asList=True, modal=True ):

341

if isinstance(toklist, cls):

342

return toklist

343

retobj = object.__new__(cls)

344

retobj.__doinit = True

345

return retobj

346

347

# Performance tuning: we construct a *lot* of these, so keep this

348

# constructor as small and fast as possible

349

def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):

350

if self.__doinit:

351

self.__doinit = False

352

self.__name = None

353

self.__parent = None

354

self.__accumNames = {}

355

self.__asList = asList

356

self.__modal = modal

357

if toklist is None:

358

toklist = []

359

if isinstance(toklist, list):

360

self.__toklist = toklist[:]

361

elif isinstance(toklist, _generatorType):

362

self.__toklist = list(toklist)

363

else:

364

self.__toklist = [toklist]

365

self.__tokdict = dict()

366

367

if name is not None and name:

368

if not modal:

369

self.__accumNames[name] = 0

370

if isinstance(name,int):

371

name = _ustr(name) # will always return a str, but use _ustr for consistency

372

self.__name = name

373

if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):

374

if isinstance(toklist,basestring):

375

toklist = [ toklist ]

376

if asList:

377

if isinstance(toklist,ParseResults):

378

self[name] = _ParseResultsWithOffset(toklist.copy(),0)

379

else:

380

self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)

381

self[name].__name = name

382

else:

383

try:

384

self[name] = toklist[0]

385

except (KeyError,TypeError,IndexError):

386

self[name] = toklist

387

388

def __getitem__( self, i ):

389

if isinstance( i, (int,slice) ):

390

return self.__toklist[i]

391

else:

392

if i not in self.__accumNames:

393

return self.__tokdict[i][-1][0]

394

else:

395

return ParseResults([ v[0] for v in self.__tokdict[i] ])

396

397

def __setitem__( self, k, v, isinstance=isinstance ):

398

if isinstance(v,_ParseResultsWithOffset):

399

self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]

400

sub = v[0]

401

elif isinstance(k,(int,slice)):

402

self.__toklist[k] = v

403

sub = v

404

else:

405

self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]

406

sub = v

407

if isinstance(sub,ParseResults):

408

sub.__parent = wkref(self)

409

410

def __delitem__( self, i ):

411

if isinstance(i,(int,slice)):

412

mylen = len( self.__toklist )

413

del self.__toklist[i]

414

415

# convert int to slice

416

if isinstance(i, int):

417

if i < 0:

418

i += mylen

419

i = slice(i, i+1)

420

# get removed indices

421

removed = list(range(*i.indices(mylen)))

422

removed.reverse()

423

# fixup indices in token dictionary

424

for name,occurrences in self.__tokdict.items():

425

for j in removed:

426

for k, (value, position) in enumerate(occurrences):

427

occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))

428

else:

429

del self.__tokdict[i]

430

431

def __contains__( self, k ):

432

return k in self.__tokdict

433

434

def __len__( self ): return len( self.__toklist )

435

def __bool__(self): return ( not not self.__toklist )

436

__nonzero__ = __bool__

437

def __iter__( self ): return iter( self.__toklist )

438

def __reversed__( self ): return iter( self.__toklist[::-1] )

439

def _iterkeys( self ):

440

if hasattr(self.__tokdict, "iterkeys"):

441

return self.__tokdict.iterkeys()

442

else:

443

return iter(self.__tokdict)

444

445

def _itervalues( self ):

446

return (self[k] for k in self._iterkeys())

447

448

def _iteritems( self ):

449

return ((k, self[k]) for k in self._iterkeys())

450

451

if PY_3:

452

keys = _iterkeys

453

"""Returns an iterator of all named result keys (Python 3.x only)."""

454

455

values = _itervalues

456

"""Returns an iterator of all named result values (Python 3.x only)."""

457

458

items = _iteritems

459

"""Returns an iterator of all named result key-value tuples (Python 3.x only)."""

460

461

else:

462

iterkeys = _iterkeys

463

"""Returns an iterator of all named result keys (Python 2.x only)."""

464

465

itervalues = _itervalues

466

"""Returns an iterator of all named result values (Python 2.x only)."""

467

468

iteritems = _iteritems

469

"""Returns an iterator of all named result key-value tuples (Python 2.x only)."""

470

471

def keys( self ):

472

"""Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""

473

return list(self.iterkeys())

474

475

def values( self ):

476

"""Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""

477

return list(self.itervalues())

478

479

def items( self ):

480

"""Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""

481

return list(self.iteritems())

482

483

def haskeys( self ):

484

"""Since keys() returns an iterator, this method is helpful in bypassing

485

code that looks for the existence of any defined results names."""

486

return bool(self.__tokdict)

487

488

def pop( self, *args, **kwargs):

489

"""

490

Removes and returns item at specified index (default=C{last}).

491

Supports both C{list} and C{dict} semantics for C{pop()}. If passed no

492

argument or an integer argument, it will use C{list} semantics

493

and pop tokens from the list of parsed tokens. If passed a

494

non-integer argument (most likely a string), it will use C{dict}

495

semantics and pop the corresponding value from any defined

496

results names. A second default return value argument is

497

supported, just as in C{dict.pop()}.

498

499

Example::

500

def remove_first(tokens):

501

tokens.pop(0)

502

print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']

503

print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']

504

505

label = Word(alphas)

506

patt = label("LABEL") + OneOrMore(Word(nums))

507

print(patt.parseString("AAB 123 321").dump())

508

509

# Use pop() in a parse action to remove named result (note that corresponding value is not

510

# removed from list form of results)

511

def remove_LABEL(tokens):

512

tokens.pop("LABEL")

513

return tokens

514

patt.addParseAction(remove_LABEL)

515

print(patt.parseString("AAB 123 321").dump())

516

prints::

517

['AAB', '123', '321']

518

- LABEL: AAB

519

520

['AAB', '123', '321']

521

"""

522

if not args:

523

args = [-1]

524

for k,v in kwargs.items():

525

if k == 'default':

526

args = (args[0], v)

527

else:

528

raise TypeError("pop() got an unexpected keyword argument '%s'" % k)

529

if (isinstance(args[0], int) or

530

len(args) == 1 or

531

args[0] in self):

532

index = args[0]

533

ret = self[index]

534

del self[index]

535

return ret

536

else:

537

defaultvalue = args[1]

538

return defaultvalue

539

540

def get(self, key, defaultValue=None):

541

"""

542

Returns named result matching the given key, or if there is no

543

such name, then returns the given C{defaultValue} or C{None} if no

544

C{defaultValue} is specified.

545

546

Similar to C{dict.get()}.

547

548

Example::

549

integer = Word(nums)

550

date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

551

552

result = date_str.parseString("1999/12/31")

553

print(result.get("year")) # -> '1999'

554

print(result.get("hour", "not specified")) # -> 'not specified'

555

print(result.get("hour")) # -> None

556

"""

557

if key in self:

558

return self[key]

559

else:

560

return defaultValue

561

562

def insert( self, index, insStr ):

563

"""

564

Inserts new element at location index in the list of parsed tokens.

565

566

Similar to C{list.insert()}.

567

568

Example::

569

print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']

570

571

# use a parse action to insert the parse location in the front of the parsed results

572

def insert_locn(locn, tokens):

573

tokens.insert(0, locn)

574

print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']

575

"""

576

self.__toklist.insert(index, insStr)

577

# fixup indices in token dictionary

578

for name,occurrences in self.__tokdict.items():

579

for k, (value, position) in enumerate(occurrences):

580

occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))

581

582

def append( self, item ):

583

"""

584

Add single element to end of ParseResults list of elements.

585

586

Example::

587

print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']

588

589

# use a parse action to compute the sum of the parsed integers, and add it to the end

590

def append_sum(tokens):

591

tokens.append(sum(map(int, tokens)))

592

print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]

593

"""

594

self.__toklist.append(item)

595

596

def extend( self, itemseq ):

597

"""

598

Add sequence of elements to end of ParseResults list of elements.

599

600

Example::

601

patt = OneOrMore(Word(alphas))

602

603

# use a parse action to append the reverse of the matched strings, to make a palindrome

604

def make_palindrome(tokens):

605

tokens.extend(reversed([t[::-1] for t in tokens]))

606

return ''.join(tokens)

607

print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'

608

"""

609

if isinstance(itemseq, ParseResults):

610

self += itemseq

611

else:

612

self.__toklist.extend(itemseq)

613

614

def clear( self ):

615

"""

616

Clear all elements and results names.

617

"""

618

del self.__toklist[:]

619

self.__tokdict.clear()

620

621

def __getattr__( self, name ):

622

try:

623

return self[name]

624

except KeyError:

625

return ""

626

627

if name in self.__tokdict:

628

if name not in self.__accumNames:

629

return self.__tokdict[name][-1][0]

630

else:

631

return ParseResults([ v[0] for v in self.__tokdict[name] ])

632

else:

633

return ""

634

635

def __add__( self, other ):

636

ret = self.copy()

637

ret += other

638

return ret

639

640

def __iadd__( self, other ):

641

if other.__tokdict:

642

offset = len(self.__toklist)

643

addoffset = lambda a: offset if a<0 else a+offset

644

otheritems = other.__tokdict.items()

645

otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )

646

for (k,vlist) in otheritems for v in vlist]

647

for k,v in otherdictitems:

648

self[k] = v

649

if isinstance(v[0],ParseResults):

650

v[0].__parent = wkref(self)

651

652

self.__toklist += other.__toklist

653

self.__accumNames.update( other.__accumNames )

654

return self

655

656

def __radd__(self, other):

657

if isinstance(other,int) and other == 0:

658

# useful for merging many ParseResults using sum() builtin

659

return self.copy()

660

else:

661

# this may raise a TypeError - so be it

662

return other + self

663

664

def __repr__( self ):

665

return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )

666

667

def __str__( self ):

668

return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'

669

670

def _asStringList( self, sep='' ):

671

out = []

672

for item in self.__toklist:

673

if out and sep:

674

out.append(sep)

675

if isinstance( item, ParseResults ):

676

out += item._asStringList()

677

else:

678

out.append( _ustr(item) )

679

return out

680

681

def asList( self ):

682

"""

683

Returns the parse results as a nested list of matching tokens, all converted to strings.

684

685

Example::

686

patt = OneOrMore(Word(alphas))

687

result = patt.parseString("sldkj lsdkj sldkj")

688

# even though the result prints in string-like form, it is actually a pyparsing ParseResults

689

print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']

690

691

# Use asList() to create an actual list

692

result_list = result.asList()

693

print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']

694

"""

695

return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]

696

697

def asDict( self ):

698

"""

699

Returns the named parse results as a nested dictionary.

700

701

Example::

702

integer = Word(nums)

703

date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

704

705

result = date_str.parseString('12/31/1999')

706

print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})

707

708

result_dict = result.asDict()

709

print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}

710

711

# even though a ParseResults supports dict-like access, sometime you just need to have a dict

712

import json

713

print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable

714

print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}

715

"""

716

if PY_3:

717

item_fn = self.items

718

else:

719

item_fn = self.iteritems

720

721

def toItem(obj):

722

if isinstance(obj, ParseResults):

723

if obj.haskeys():

724

return obj.asDict()

725

else:

726

return [toItem(v) for v in obj]

727

else:

728

return obj

729

730

return dict((k,toItem(v)) for k,v in item_fn())

731

732

def copy( self ):

733

"""

734

Returns a new copy of a C{ParseResults} object.

735

"""

736

ret = ParseResults( self.__toklist )

737

ret.__tokdict = self.__tokdict.copy()

738

ret.__parent = self.__parent

739

ret.__accumNames.update( self.__accumNames )

740

ret.__name = self.__name

741

return ret

742

743

def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):

744

"""

745

(Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.

746

"""

747

nl = "\n"

748

out = []

749

namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()

750

for v in vlist)

751

nextLevelIndent = indent + " "

752

753

# collapse out indents if formatting is not desired

754

if not formatted:

755

indent = ""

756

nextLevelIndent = ""

757

nl = ""

758

759

selfTag = None

760

if doctag is not None:

761

selfTag = doctag

762

else:

763

if self.__name:

764

selfTag = self.__name

765

766

if not selfTag:

767

if namedItemsOnly:

768

return ""

769

else:

770

selfTag = "ITEM"

771

772

out += [ nl, indent, "<", selfTag, ">" ]

773

774

for i,res in enumerate(self.__toklist):

775

if isinstance(res,ParseResults):

776

if i in namedItems:

777

out += [ res.asXML(namedItems[i],

778

namedItemsOnly and doctag is None,

779

nextLevelIndent,

780

formatted)]

781

else:

782

out += [ res.asXML(None,

783

namedItemsOnly and doctag is None,

784

nextLevelIndent,

785

formatted)]

786

else:

787

# individual token, see if there is a name for it

788

resTag = None

789

if i in namedItems:

790

resTag = namedItems[i]

791

if not resTag:

792

if namedItemsOnly:

793

continue

794

else:

795

resTag = "ITEM"

796

xmlBodyText = _xml_escape(_ustr(res))

797

out += [ nl, nextLevelIndent, "<", resTag, ">",

798

xmlBodyText,

799

"</", resTag, ">" ]

800

801

out += [ nl, indent, "</", selfTag, ">" ]

802

return "".join(out)

803

804

def __lookup(self,sub):

805

for k,vlist in self.__tokdict.items():

806

for v,loc in vlist:

807

if sub is v:

808

return k

809

return None

810

811

def getName(self):

812

"""

813

Returns the results name for this token expression. Useful when several

814

different expressions might match at a particular location.

815

816

Example::

817

integer = Word(nums)

818

ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")

819

house_number_expr = Suppress('#') + Word(nums, alphanums)

820

user_data = (Group(house_number_expr)("house_number")

821

| Group(ssn_expr)("ssn")

822

| Group(integer)("age"))

823

user_info = OneOrMore(user_data)

824

825

result = user_info.parseString("22 111-22-3333 #221B")

826

for item in result:

827

print(item.getName(), ':', item[0])

828

prints::

829

age : 22

830

ssn : 111-22-3333

831

house_number : 221B

832

"""

833

if self.__name:

834

return self.__name

835

elif self.__parent:

836

par = self.__parent()

837

if par:

838

return par.__lookup(self)

839

else:

840

return None

841

elif (len(self) == 1 and

842

len(self.__tokdict) == 1 and

843

next(iter(self.__tokdict.values()))[0][1] in (0,-1)):

844

return next(iter(self.__tokdict.keys()))

845

else:

846

return None

847

848

def dump(self, indent='', depth=0, full=True):

849

"""

850

Diagnostic method for listing out the contents of a C{ParseResults}.

851

Accepts an optional C{indent} argument so that this string can be embedded

852

in a nested display of other data.

853

854

Example::

855

integer = Word(nums)

856

date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

857

858

result = date_str.parseString('12/31/1999')

859

print(result.dump())

860

prints::

861

['12', '/', '31', '/', '1999']

862

- day: 1999

863

- month: 31

864

- year: 12

865

"""

866

out = []

867

NL = '\n'

868

out.append( indent+_ustr(self.asList()) )

869

if full:

870

if self.haskeys():

871

items = sorted((str(k), v) for k,v in self.items())

872

for k,v in items:

873

if out:

874

out.append(NL)

875

out.append( "%s%s- %s: " % (indent,(' '*depth), k) )

876

if isinstance(v,ParseResults):

877

if v:

878

out.append( v.dump(indent,depth+1) )

879

else:

880

out.append(_ustr(v))

881

else:

882

out.append(repr(v))

883

elif any(isinstance(vv,ParseResults) for vv in self):

884

v = self

885

for i,vv in enumerate(v):

886

if isinstance(vv,ParseResults):

887

out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))

888

else:

889

out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))

890

891

return "".join(out)

892

893

def pprint(self, *args, **kwargs):

894

"""

895

Pretty-printer for parsed results as a list, using the C{pprint} module.

896

Accepts additional positional or keyword args as defined for the

897

C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})

898

899

Example::

900

ident = Word(alphas, alphanums)

901

num = Word(nums)

902

func = Forward()

903

term = ident | num | Group('(' + func + ')')

904

func <<= ident + Group(Optional(delimitedList(term)))

905

result = func.parseString("fna a,b,(fnb c,d,200),100")

906

result.pprint(width=40)

907

prints::

908

['fna',

909

['a',

910

'b',

911

['(', 'fnb', ['c', 'd', '200'], ')'],

912

'100']]

913

"""

914

pprint.pprint(self.asList(), *args, **kwargs)

915

916

# add support for pickle protocol

917

def __getstate__(self):

918

return ( self.__toklist,

919

( self.__tokdict.copy(),

920

self.__parent is not None and self.__parent() or None,

921

self.__accumNames,

922

self.__name ) )

923

924

def __setstate__(self,state):

925

self.__toklist = state[0]

926

(self.__tokdict,

927

par,

928

inAccumNames,

929

self.__name) = state[1]

930

self.__accumNames = {}

931

self.__accumNames.update(inAccumNames)

932

if par is not None:

933

self.__parent = wkref(par)

934

else:

935

self.__parent = None

936

937

def __getnewargs__(self):

938

return self.__toklist, self.__name, self.__asList, self.__modal

939

940

def __dir__(self):

941

return (dir(type(self)) + list(self.keys()))

942

943

collections.MutableMapping.register(ParseResults)

944

945

def col (loc,strg):

946

"""Returns current column within a string, counting newlines as line separators.

947

The first column is number 1.

948

949

Note: the default parsing behavior is to expand tabs in the input string

950

before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information

951

on parsing strings containing C{<TAB>}s, and suggested methods to maintain a

952

consistent view of the parsed string, the parse location, and line and column

953

positions within the parsed string.

954

"""

955

s = strg

956

return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)

957

958

def lineno(loc,strg):

959

"""Returns current line number within a string, counting newlines as line separators.

960

The first line is number 1.

961

962

Note: the default parsing behavior is to expand tabs in the input string

963

before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information

964

on parsing strings containing C{<TAB>}s, and suggested methods to maintain a

965

consistent view of the parsed string, the parse location, and line and column

966

positions within the parsed string.

967

"""

968

return strg.count("\n",0,loc) + 1

969

970

def line( loc, strg ):

971

"""Returns the line of text containing loc within a string, counting newlines as line separators.

972

"""

973

lastCR = strg.rfind("\n", 0, loc)

974

nextCR = strg.find("\n", loc)

975

if nextCR >= 0:

976

return strg[lastCR+1:nextCR]

977

else:

978

return strg[lastCR+1:]

979

980

def _defaultStartDebugAction( instring, loc, expr ):

981

print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))

982

983

def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):

984

print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))

985

986

def _defaultExceptionDebugAction( instring, loc, expr, exc ):

987

print ("Exception raised:" + _ustr(exc))

988

989

def nullDebugAction(*args):

990

"""'Do-nothing' debug action, to suppress debugging output during parsing."""

991

pass

992

993

# Only works on Python 3.x - nonlocal is toxic to Python 2 installs

994

#~ 'decorator to trim function calls to match the arity of the target'

995

#~ def _trim_arity(func, maxargs=3):

996

#~ if func in singleArgBuiltins:

997

#~ return lambda s,l,t: func(t)

998

#~ limit = 0

999

#~ foundArity = False

1000

#~ def wrapper(*args):

1001

#~ nonlocal limit,foundArity

1002

#~ while 1:

1003

#~ try:

1004

#~ ret = func(*args[limit:])

1005

#~ foundArity = True

1006

#~ return ret

1007

#~ except TypeError:

1008

#~ if limit == maxargs or foundArity:

1009

#~ raise

1010

#~ limit += 1

1011

#~ continue

1012

#~ return wrapper

1013

1014

# this version is Python 2.x-3.x cross-compatible

1015

'decorator to trim function calls to match the arity of the target'

1016

def _trim_arity(func, maxargs=2):

1017

if func in singleArgBuiltins:

1018

return lambda s,l,t: func(t)

1019

limit = [0]

1020

foundArity = [False]

1021

1022

# traceback return data structure changed in Py3.5 - normalize back to plain tuples

1023

if system_version[:2] >= (3,5):

1024

def extract_stack(limit=0):

1025

# special handling for Python 3.5.0 - extra deep call stack by 1

1026

offset = -3 if system_version == (3,5,0) else -2

1027

frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]

1028

return [(frame_summary.filename, frame_summary.lineno)]

1029

def extract_tb(tb, limit=0):

1030

frames = traceback.extract_tb(tb, limit=limit)

1031

frame_summary = frames[-1]

1032

return [(frame_summary.filename, frame_summary.lineno)]

1033

else:

1034

extract_stack = traceback.extract_stack

1035

extract_tb = traceback.extract_tb

1036

1037

# synthesize what would be returned by traceback.extract_stack at the call to

1038

# user's parse action 'func', so that we don't incur call penalty at parse time

1039

1040

LINE_DIFF = 6

1041

# IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND

1042

# THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!

1043

this_line = extract_stack(limit=2)[-1]

1044

pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)

1045

1046

def wrapper(*args):

1047

while 1:

1048

try:

1049

ret = func(*args[limit[0]:])

1050

foundArity[0] = True

1051

return ret

1052

except TypeError:

1053

# re-raise TypeErrors if they did not come from our arity testing

1054

if foundArity[0]:

1055

raise

1056

else:

1057

try:

1058

tb = sys.exc_info()[-1]

1059

if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:

1060

raise

1061

finally:

1062

del tb

1063

1064

if limit[0] <= maxargs:

1065

limit[0] += 1

1066

continue

1067

raise

1068

1069

# copy func name to wrapper for sensible debug output

1070

func_name = "<parse action>"

1071

try:

1072

func_name = getattr(func, '__name__',

1073

getattr(func, '__class__').__name__)

1074

except Exception:

1075

func_name = str(func)

1076

wrapper.__name__ = func_name

1077

1078

return wrapper

1079

1080

class ParserElement(object):

1081

"""Abstract base level parser element class."""

1082

DEFAULT_WHITE_CHARS = " \n\t\r"

1083

verbose_stacktrace = False

1084

1085

@staticmethod

1086

def setDefaultWhitespaceChars( chars ):

1087

r"""

1088

Overrides the default whitespace chars

1089

1090

Example::

1091

# default whitespace chars are space, <TAB> and newline

1092

OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']

1093

1094

# change to just treat newline as significant

1095

ParserElement.setDefaultWhitespaceChars(" \t")

1096

OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']

1097

"""

1098

ParserElement.DEFAULT_WHITE_CHARS = chars

1099

1100

@staticmethod

1101

def inlineLiteralsUsing(cls):

1102

"""

1103

Set class to be used for inclusion of string literals into a parser.

1104

1105

Example::

1106

# default literal class used is Literal

1107

integer = Word(nums)

1108

date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

1109

1110

date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']

1111

1112

1113

# change to Suppress

1114

ParserElement.inlineLiteralsUsing(Suppress)

1115

date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

1116

1117

date_str.parseString("1999/12/31") # -> ['1999', '12', '31']

1118

"""

1119

ParserElement._literalStringClass = cls

1120

1121

def __init__( self, savelist=False ):

1122

self.parseAction = list()

1123

self.failAction = None

1124

#~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall

1125

self.strRepr = None

1126

self.resultsName = None

1127

self.saveAsList = savelist

1128

self.skipWhitespace = True

1129

self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS

1130

self.copyDefaultWhiteChars = True

1131

self.mayReturnEmpty = False # used when checking for left-recursion

1132

self.keepTabs = False

1133

self.ignoreExprs = list()

1134

self.debug = False

1135

self.streamlined = False

1136

self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index

1137

self.errmsg = ""

1138

self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)

1139

self.debugActions = ( None, None, None ) #custom debug actions

1140

self.re = None

1141

self.callPreparse = True # used to avoid redundant calls to preParse

1142

self.callDuringTry = False

1143

1144

def copy( self ):

1145

"""

1146

Make a copy of this C{ParserElement}. Useful for defining different parse actions

1147

for the same parsing pattern, using copies of the original parse element.

1148

1149

Example::

1150

integer = Word(nums).setParseAction(lambda toks: int(toks[0]))

1151

integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")

1152

integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")

1153

1154

print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))

1155

prints::

1156

[5120, 100, 655360, 268435456]

1157

Equivalent form of C{expr.copy()} is just C{expr()}::

1158

integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")

1159

"""

1160

cpy = copy.copy( self )

1161

cpy.parseAction = self.parseAction[:]

1162

cpy.ignoreExprs = self.ignoreExprs[:]

1163

if self.copyDefaultWhiteChars:

1164

cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS

1165

return cpy

1166

1167

def setName( self, name ):

1168

"""

1169

Define name for this expression, makes debugging and exception messages clearer.

1170

1171

Example::

1172

Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)

1173

Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)

1174

"""

1175

self.name = name

1176

self.errmsg = "Expected " + self.name

1177

if hasattr(self,"exception"):

1178

self.exception.msg = self.errmsg

1179

return self

1180

1181

def setResultsName( self, name, listAllMatches=False ):

1182

"""

1183

Define name for referencing matching tokens as a nested attribute

1184

of the returned parse results.

1185

NOTE: this returns a *copy* of the original C{ParserElement} object;

1186

this is so that the client can define a basic element, such as an

1187

integer, and reference it in multiple places with different names.

1188

1189

You can also set results names using the abbreviated syntax,

1190

C{expr("name")} in place of C{expr.setResultsName("name")} -

1191

see L{I{__call__}<__call__>}.

1192

1193

Example::

1194

date_str = (integer.setResultsName("year") + '/'

1195

+ integer.setResultsName("month") + '/'

1196

+ integer.setResultsName("day"))

1197

1198

# equivalent form:

1199

date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

1200

"""

1201

newself = self.copy()

1202

if name.endswith("*"):

1203

name = name[:-1]

1204

listAllMatches=True

1205

newself.resultsName = name

1206

newself.modalResults = not listAllMatches

1207

return newself

1208

1209

def setBreak(self,breakFlag = True):

1210

"""Method to invoke the Python pdb debugger when this element is

1211

about to be parsed. Set C{breakFlag} to True to enable, False to

1212

disable.

1213

"""

1214

if breakFlag:

1215

_parseMethod = self._parse

1216

def breaker(instring, loc, doActions=True, callPreParse=True):

1217

import pdb

1218

pdb.set_trace()

1219

return _parseMethod( instring, loc, doActions, callPreParse )

1220

breaker._originalParseMethod = _parseMethod

1221

self._parse = breaker

1222

else:

1223

if hasattr(self._parse,"_originalParseMethod"):

1224

self._parse = self._parse._originalParseMethod

1225

return self

1226

1227

def setParseAction( self, *fns, **kwargs ):

1228

"""

1229

Define action to perform when successfully matching parse element definition.

1230

Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},

1231

C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:

1232

- s = the original string being parsed (see note below)

1233

- loc = the location of the matching substring

1234

- toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object

1235

If the functions in fns modify the tokens, they can return them as the return

1236

value from fn, and the modified list of tokens will replace the original.

1237

Otherwise, fn does not need to return any value.

1238

1239

Optional keyword arguments:

1240

- callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing

1241

1242

Note: the default parsing behavior is to expand tabs in the input string

1243

before starting the parsing process. See L{I{parseString}<parseString>} for more information

1244

on parsing strings containing C{<TAB>}s, and suggested methods to maintain a

1245

consistent view of the parsed string, the parse location, and line and column

1246

positions within the parsed string.

1247

1248

Example::

1249

integer = Word(nums)

1250

date_str = integer + '/' + integer + '/' + integer

1251

1252

date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']

1253

1254

# use parse action to convert to ints at parse time

1255

integer = Word(nums).setParseAction(lambda toks: int(toks[0]))

1256

date_str = integer + '/' + integer + '/' + integer

1257

1258

# note that integer fields are now ints, not strings

1259

date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]

1260

"""

1261

self.parseAction = list(map(_trim_arity, list(fns)))

1262

self.callDuringTry = kwargs.get("callDuringTry", False)

1263

return self

1264

1265

def addParseAction( self, *fns, **kwargs ):

1266

"""

1267

Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.

1268

1269

See examples in L{I{copy}<copy>}.

1270

"""

1271

self.parseAction += list(map(_trim_arity, list(fns)))

1272

self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)

1273

return self

1274

1275

def addCondition(self, *fns, **kwargs):

1276

"""Add a boolean predicate function to expression's list of parse actions. See

1277

L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},

1278

functions passed to C{addCondition} need to return boolean success/fail of the condition.

1279

1280

Optional keyword arguments:

1281

- message = define a custom message to be used in the raised exception

1282

- fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException

1283

1284

Example::

1285

integer = Word(nums).setParseAction(lambda toks: int(toks[0]))

1286

year_int = integer.copy()

1287

year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")

1288

date_str = year_int + '/' + integer + '/' + integer

1289

1290

result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)

1291

"""

1292

msg = kwargs.get("message", "failed user-defined condition")

1293

exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException

1294

for fn in fns:

1295

def pa(s,l,t):

1296

if not bool(_trim_arity(fn)(s,l,t)):

1297

raise exc_type(s,l,msg)

1298

self.parseAction.append(pa)

1299

self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)

1300

return self

1301

1302

def setFailAction( self, fn ):

1303

"""Define action to perform if parsing fails at this expression.

1304

Fail acton fn is a callable function that takes the arguments

1305

C{fn(s,loc,expr,err)} where:

1306

- s = string being parsed

1307

- loc = location where expression match was attempted and failed

1308

- expr = the parse expression that failed

1309

- err = the exception thrown

1310

The function returns no value. It may throw C{L{ParseFatalException}}

1311

if it is desired to stop parsing immediately."""

1312

self.failAction = fn

1313

return self

1314

1315

def _skipIgnorables( self, instring, loc ):

1316

exprsFound = True

1317

while exprsFound:

1318

exprsFound = False

1319

for e in self.ignoreExprs:

1320

try:

1321

while 1:

1322

loc,dummy = e._parse( instring, loc )

1323

exprsFound = True

1324

except ParseException:

1325

pass

1326

return loc

1327

1328

def preParse( self, instring, loc ):

1329

if self.ignoreExprs:

1330

loc = self._skipIgnorables( instring, loc )

1331

1332

if self.skipWhitespace:

1333

wt = self.whiteChars

1334

instrlen = len(instring)

1335

while loc < instrlen and instring[loc] in wt:

1336

loc += 1

1337

1338

return loc

1339

1340

def parseImpl( self, instring, loc, doActions=True ):

1341

return loc, []

1342

1343

def postParse( self, instring, loc, tokenlist ):

1344

return tokenlist

1345

1346

#~ @profile

1347

def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):

1348

debugging = ( self.debug ) #and doActions )

1349

1350

if debugging or self.failAction:

1351

#~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))

1352

if (self.debugActions[0] ):

1353

self.debugActions[0]( instring, loc, self )

1354

if callPreParse and self.callPreparse:

1355

preloc = self.preParse( instring, loc )

1356

else:

1357

preloc = loc

1358

tokensStart = preloc

1359

try:

1360

try:

1361

loc,tokens = self.parseImpl( instring, preloc, doActions )

1362

except IndexError:

1363

raise ParseException( instring, len(instring), self.errmsg, self )

1364

except ParseBaseException as err:

1365

#~ print ("Exception raised:", err)

1366

if self.debugActions[2]:

1367

self.debugActions[2]( instring, tokensStart, self, err )

1368

if self.failAction:

1369

self.failAction( instring, tokensStart, self, err )

1370

raise

1371

else:

1372

if callPreParse and self.callPreparse:

1373

preloc = self.preParse( instring, loc )

1374

else:

1375

preloc = loc

1376

tokensStart = preloc

1377

if self.mayIndexError or loc >= len(instring):

1378

try:

1379

loc,tokens = self.parseImpl( instring, preloc, doActions )

1380

except IndexError:

1381

raise ParseException( instring, len(instring), self.errmsg, self )

1382

else:

1383

loc,tokens = self.parseImpl( instring, preloc, doActions )

1384

1385

tokens = self.postParse( instring, loc, tokens )

1386

1387

retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )

1388

if self.parseAction and (doActions or self.callDuringTry):

1389

if debugging:

1390

try:

1391

for fn in self.parseAction:

1392

tokens = fn( instring, tokensStart, retTokens )

1393

if tokens is not None:

1394

retTokens = ParseResults( tokens,

1395

self.resultsName,

1396

asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),

1397

modal=self.modalResults )

1398

except ParseBaseException as err:

1399

#~ print "Exception raised in user parse action:", err

1400

if (self.debugActions[2] ):

1401

self.debugActions[2]( instring, tokensStart, self, err )

1402

raise

1403

else:

1404

for fn in self.parseAction:

1405

tokens = fn( instring, tokensStart, retTokens )

1406

if tokens is not None:

1407

retTokens = ParseResults( tokens,

1408

self.resultsName,

1409

asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),

1410

modal=self.modalResults )

1411

1412

if debugging:

1413

#~ print ("Matched",self,"->",retTokens.asList())

1414

if (self.debugActions[1] ):

1415

self.debugActions[1]( instring, tokensStart, loc, self, retTokens )

1416

1417

return loc, retTokens

1418

1419

def tryParse( self, instring, loc ):

1420

try:

1421

return self._parse( instring, loc, doActions=False )[0]

1422

except ParseFatalException:

1423

raise ParseException( instring, loc, self.errmsg, self)

1424

1425

def canParseNext(self, instring, loc):

1426

try:

1427

self.tryParse(instring, loc)

1428

except (ParseException, IndexError):

1429

return False

1430

else:

1431

return True

1432

1433

class _UnboundedCache(object):

1434

def __init__(self):

1435

cache = {}

1436

self.not_in_cache = not_in_cache = object()

1437

1438

def get(self, key):

1439

return cache.get(key, not_in_cache)

1440

1441

def set(self, key, value):

1442

cache[key] = value

1443

1444

def clear(self):

1445

cache.clear()

1446

1447

self.get = types.MethodType(get, self)

1448

self.set = types.MethodType(set, self)

1449

self.clear = types.MethodType(clear, self)

1450

1451

if _OrderedDict is not None:

1452

class _FifoCache(object):

1453

def __init__(self, size):

1454

self.not_in_cache = not_in_cache = object()

1455

1456

cache = _OrderedDict()

1457

1458

def get(self, key):

1459

return cache.get(key, not_in_cache)

1460

1461

def set(self, key, value):

1462

cache[key] = value

1463

if len(cache) > size:

1464

cache.popitem(False)

1465

1466

def clear(self):

1467

cache.clear()

1468

1469

self.get = types.MethodType(get, self)

1470

self.set = types.MethodType(set, self)

1471

self.clear = types.MethodType(clear, self)

1472

1473

else:

1474

class _FifoCache(object):

1475

def __init__(self, size):

1476

self.not_in_cache = not_in_cache = object()

1477

1478

cache = {}

1479

key_fifo = collections.deque([], size)

1480

1481

def get(self, key):

1482

return cache.get(key, not_in_cache)

1483

1484

def set(self, key, value):

1485

cache[key] = value

1486

if len(cache) > size:

1487

cache.pop(key_fifo.popleft(), None)

1488

key_fifo.append(key)

1489

1490

def clear(self):

1491

cache.clear()

1492

key_fifo.clear()

1493

1494

self.get = types.MethodType(get, self)

1495

self.set = types.MethodType(set, self)

1496

self.clear = types.MethodType(clear, self)

1497

1498

# argument cache for optimizing repeated calls when backtracking through recursive expressions

1499

packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail

1500

packrat_cache_lock = RLock()

1501

packrat_cache_stats = [0, 0]

1502

1503

# this method gets repeatedly called during backtracking with the same arguments -

1504

# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression

1505

def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):

1506

HIT, MISS = 0, 1

1507

lookup = (self, instring, loc, callPreParse, doActions)

1508

with ParserElement.packrat_cache_lock:

1509

cache = ParserElement.packrat_cache

1510

value = cache.get(lookup)

1511

if value is cache.not_in_cache:

1512

ParserElement.packrat_cache_stats[MISS] += 1

1513

try:

1514

value = self._parseNoCache(instring, loc, doActions, callPreParse)

1515

except ParseBaseException as pe:

1516

# cache a copy of the exception, without the traceback

1517

cache.set(lookup, pe.__class__(*pe.args))

1518

raise

1519

else:

1520

cache.set(lookup, (value[0], value[1].copy()))

1521

return value

1522

else:

1523

ParserElement.packrat_cache_stats[HIT] += 1

1524

if isinstance(value, Exception):

1525

raise value

1526

return (value[0], value[1].copy())

1527

1528

_parse = _parseNoCache

1529

1530

@staticmethod

1531

def resetCache():

1532

ParserElement.packrat_cache.clear()

1533

ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)

1534

1535

_packratEnabled = False

1536

@staticmethod

1537

def enablePackrat(cache_size_limit=128):

1538

"""Enables "packrat" parsing, which adds memoizing to the parsing logic.

1539

Repeated parse attempts at the same string location (which happens

1540

often in many complex grammars) can immediately return a cached value,

1541

instead of re-executing parsing/validating code. Memoizing is done of

1542

both valid results and parsing exceptions.

1543

1544

Parameters:

1545

- cache_size_limit - (default=C{128}) - if an integer value is provided

1546

will limit the size of the packrat cache; if None is passed, then

1547

the cache size will be unbounded; if 0 is passed, the cache will

1548

be effectively disabled.

1549

1550

This speedup may break existing programs that use parse actions that

1551

have side-effects. For this reason, packrat parsing is disabled when

1552

you first import pyparsing. To activate the packrat feature, your

1553

program must call the class method C{ParserElement.enablePackrat()}. If

1554

your program uses C{psyco} to "compile as you go", you must call

1555

C{enablePackrat} before calling C{psyco.full()}. If you do not do this,

1556

Python will crash. For best results, call C{enablePackrat()} immediately

1557

after importing pyparsing.

1558

1559

Example::

1560

import pyparsing

1561

pyparsing.ParserElement.enablePackrat()

1562

"""

1563

if not ParserElement._packratEnabled:

1564

ParserElement._packratEnabled = True

1565

if cache_size_limit is None:

1566

ParserElement.packrat_cache = ParserElement._UnboundedCache()

1567

else:

1568

ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)

1569

ParserElement._parse = ParserElement._parseCache

1570

1571

def parseString( self, instring, parseAll=False ):

1572

"""

1573

Execute the parse expression with the given string.

1574

This is the main interface to the client code, once the complete

1575

expression has been built.

1576

1577

If you want the grammar to require that the entire input string be

1578

successfully parsed, then set C{parseAll} to True (equivalent to ending

1579

the grammar with C{L{StringEnd()}}).

1580

1581

Note: C{parseString} implicitly calls C{expandtabs()} on the input string,

1582

in order to report proper column numbers in parse actions.

1583

If the input string contains tabs and

1584

the grammar uses parse actions that use the C{loc} argument to index into the

1585

string being parsed, you can ensure you have a consistent view of the input

1586

string by:

1587

- calling C{parseWithTabs} on your grammar before calling C{parseString}

1588

(see L{I{parseWithTabs}<parseWithTabs>})

1589

- define your parse action using the full C{(s,loc,toks)} signature, and

1590

reference the input string using the parse action's C{s} argument

1591

- explictly expand the tabs in your input string before calling

1592

C{parseString}

1593

1594

Example::

1595

Word('a').parseString('aaaaabaaa') # -> ['aaaaa']

1596

Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text

1597

"""

1598

ParserElement.resetCache()

1599

if not self.streamlined:

1600

self.streamline()

1601

#~ self.saveAsList = True

1602

for e in self.ignoreExprs:

1603

e.streamline()

1604

if not self.keepTabs:

1605

instring = instring.expandtabs()

1606

try:

1607

loc, tokens = self._parse( instring, 0 )

1608

if parseAll:

1609

loc = self.preParse( instring, loc )

1610

se = Empty() + StringEnd()

1611

se._parse( instring, loc )

1612

except ParseBaseException as exc:

1613

if ParserElement.verbose_stacktrace:

1614

raise

1615

else:

1616

# catch and re-raise exception from here, clears out pyparsing internal stack trace

1617

raise exc

1618

else:

1619

return tokens

1620

1621

def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):

1622

"""

1623

Scan the input string for expression matches. Each match will return the

1624

matching tokens, start location, and end location. May be called with optional

1625

C{maxMatches} argument, to clip scanning after 'n' matches are found. If

1626

C{overlap} is specified, then overlapping matches will be reported.

1627

1628

Note that the start and end locations are reported relative to the string

1629

being parsed. See L{I{parseString}<parseString>} for more information on parsing

1630

strings with embedded tabs.

1631

1632

Example::

1633

source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"

1634

print(source)

1635

for tokens,start,end in Word(alphas).scanString(source):

1636

print(' '*start + '^'*(end-start))

1637

print(' '*start + tokens[0])

1638

1639

prints::

1640

1641

sldjf123lsdjjkf345sldkjf879lkjsfd987

1642

^^^^^

1643

sldjf

1644

^^^^^^^

1645

lsdjjkf

1646

^^^^^^

1647

sldkjf

1648

^^^^^^

1649

lkjsfd

1650

"""

1651

if not self.streamlined:

1652

self.streamline()

1653

for e in self.ignoreExprs:

1654

e.streamline()

1655

1656

if not self.keepTabs:

1657

instring = _ustr(instring).expandtabs()

1658

instrlen = len(instring)

1659

loc = 0

1660

preparseFn = self.preParse

1661

parseFn = self._parse

1662

ParserElement.resetCache()

1663

matches = 0

1664

try:

1665

while loc <= instrlen and matches < maxMatches:

1666

try:

1667

preloc = preparseFn( instring, loc )

1668

nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )

1669

except ParseException:

1670

loc = preloc+1

1671

else:

1672

if nextLoc > loc:

1673

matches += 1

1674

yield tokens, preloc, nextLoc

1675

if overlap:

1676

nextloc = preparseFn( instring, loc )

1677

if nextloc > loc:

1678

loc = nextLoc

1679

else:

1680

loc += 1

1681

else:

1682

loc = nextLoc

1683

else:

1684

loc = preloc+1

1685

except ParseBaseException as exc:

1686

if ParserElement.verbose_stacktrace:

1687

raise

1688

else:

1689

# catch and re-raise exception from here, clears out pyparsing internal stack trace

1690

raise exc

1691

1692

def transformString( self, instring ):

1693

"""

1694

Extension to C{L{scanString}}, to modify matching text with modified tokens that may

1695

be returned from a parse action. To use C{transformString}, define a grammar and

1696

attach a parse action to it that modifies the returned token list.

1697

Invoking C{transformString()} on a target string will then scan for matches,

1698

and replace the matched text patterns according to the logic in the parse

1699

action. C{transformString()} returns the resulting transformed string.

1700

1701

Example::

1702

wd = Word(alphas)

1703

wd.setParseAction(lambda toks: toks[0].title())

1704

1705

print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))

1706

Prints::

1707

Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.

1708

"""

1709

out = []

1710

lastE = 0

1711

# force preservation of <TAB>s, to minimize unwanted transformation of string, and to

1712

# keep string locs straight between transformString and scanString

1713

self.keepTabs = True

1714

try:

1715

for t,s,e in self.scanString( instring ):

1716

out.append( instring[lastE:s] )

1717

if t:

1718

if isinstance(t,ParseResults):

1719

out += t.asList()

1720

elif isinstance(t,list):

1721

out += t

1722

else:

1723

out.append(t)

1724

lastE = e

1725

out.append(instring[lastE:])

1726

out = [o for o in out if o]

1727

return "".join(map(_ustr,_flatten(out)))

1728

except ParseBaseException as exc:

1729

if ParserElement.verbose_stacktrace:

1730

raise

1731

else:

1732

# catch and re-raise exception from here, clears out pyparsing internal stack trace

1733

raise exc

1734

1735

def searchString( self, instring, maxMatches=_MAX_INT ):

1736

"""

1737

Another extension to C{L{scanString}}, simplifying the access to the tokens found

1738

to match the given parse expression. May be called with optional

1739

C{maxMatches} argument, to clip searching after 'n' matches are found.

1740

1741

Example::

1742

# a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters

1743

cap_word = Word(alphas.upper(), alphas.lower())

1744

1745

print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))

1746

prints::

1747

['More', 'Iron', 'Lead', 'Gold', 'I']

1748

"""

1749

try:

1750

return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])

1751

except ParseBaseException as exc:

1752

if ParserElement.verbose_stacktrace:

1753

raise

1754

else:

1755

# catch and re-raise exception from here, clears out pyparsing internal stack trace

1756

raise exc

1757

1758

def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):

1759

"""

1760

Generator method to split a string using the given expression as a separator.

1761

May be called with optional C{maxsplit} argument, to limit the number of splits;

1762

and the optional C{includeSeparators} argument (default=C{False}), if the separating

1763

matching text should be included in the split results.

1764

1765

Example::

1766

punc = oneOf(list(".,;:/-!?"))

1767

print(list(punc.split("This, this?, this sentence, is badly punctuated!")))

1768

prints::

1769

['This', ' this', '', ' this sentence', ' is badly punctuated', '']

1770

"""

1771

splits = 0

1772

last = 0

1773

for t,s,e in self.scanString(instring, maxMatches=maxsplit):

1774

yield instring[last:s]

1775

if includeSeparators:

1776

yield t[0]

1777

last = e

1778

yield instring[last:]

1779

1780

def __add__(self, other ):

1781

"""

1782

Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement

1783

converts them to L{Literal}s by default.

1784

1785

Example::

1786

greet = Word(alphas) + "," + Word(alphas) + "!"

1787

hello = "Hello, World!"

1788

print (hello, "->", greet.parseString(hello))

1789

Prints::

1790

Hello, World! -> ['Hello', ',', 'World', '!']

1791

"""

1792

if isinstance( other, basestring ):

1793

other = ParserElement._literalStringClass( other )

1794

if not isinstance( other, ParserElement ):

1795

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1796

SyntaxWarning, stacklevel=2)

1797

return None

1798

return And( [ self, other ] )

1799

1800

def __radd__(self, other ):

1801

"""

1802

Implementation of + operator when left operand is not a C{L{ParserElement}}

1803

"""

1804

if isinstance( other, basestring ):

1805

other = ParserElement._literalStringClass( other )

1806

if not isinstance( other, ParserElement ):

1807

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1808

SyntaxWarning, stacklevel=2)

1809

return None

1810

return other + self

1811

1812

def __sub__(self, other):

1813

"""

1814

Implementation of - operator, returns C{L{And}} with error stop

1815

"""

1816

if isinstance( other, basestring ):

1817

other = ParserElement._literalStringClass( other )

1818

if not isinstance( other, ParserElement ):

1819

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1820

SyntaxWarning, stacklevel=2)

1821

return None

1822

return And( [ self, And._ErrorStop(), other ] )

1823

1824

def __rsub__(self, other ):

1825

"""

1826

Implementation of - operator when left operand is not a C{L{ParserElement}}

1827

"""

1828

if isinstance( other, basestring ):

1829

other = ParserElement._literalStringClass( other )

1830

if not isinstance( other, ParserElement ):

1831

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1832

SyntaxWarning, stacklevel=2)

1833

return None

1834

return other - self

1835

1836

def __mul__(self,other):

1837

"""

1838

Implementation of * operator, allows use of C{expr * 3} in place of

1839

C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer

1840

tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples

1841

may also include C{None} as in:

1842

- C{expr*(n,None)} or C{expr*(n,)} is equivalent

1843

to C{expr*n + L{ZeroOrMore}(expr)}

1844

(read as "at least n instances of C{expr}")

1845

- C{expr*(None,n)} is equivalent to C{expr*(0,n)}

1846

(read as "0 to n instances of C{expr}")

1847

- C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}

1848

- C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}

1849

1850

Note that C{expr*(None,n)} does not raise an exception if

1851

more than n exprs exist in the input stream; that is,

1852

C{expr*(None,n)} does not enforce a maximum number of expr

1853

occurrences. If this behavior is desired, then write

1854

C{expr*(None,n) + ~expr}

1855

"""

1856

if isinstance(other,int):

1857

minElements, optElements = other,0

1858

elif isinstance(other,tuple):

1859

other = (other + (None, None))[:2]

1860

if other[0] is None:

1861

other = (0, other[1])

1862

if isinstance(other[0],int) and other[1] is None:

1863

if other[0] == 0:

1864

return ZeroOrMore(self)

1865

if other[0] == 1:

1866

return OneOrMore(self)

1867

else:

1868

return self*other[0] + ZeroOrMore(self)

1869

elif isinstance(other[0],int) and isinstance(other[1],int):

1870

minElements, optElements = other

1871

optElements -= minElements

1872

else:

1873

raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))

1874

else:

1875

raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))

1876

1877

if minElements < 0:

1878

raise ValueError("cannot multiply ParserElement by negative value")

1879

if optElements < 0:

1880

raise ValueError("second tuple value must be greater or equal to first tuple value")

1881

if minElements == optElements == 0:

1882

raise ValueError("cannot multiply ParserElement by 0 or (0,0)")

1883

1884

if (optElements):

1885

def makeOptionalList(n):

1886

if n>1:

1887

return Optional(self + makeOptionalList(n-1))

1888

else:

1889

return Optional(self)

1890

if minElements:

1891

if minElements == 1:

1892

ret = self + makeOptionalList(optElements)

1893

else:

1894

ret = And([self]*minElements) + makeOptionalList(optElements)

1895

else:

1896

ret = makeOptionalList(optElements)

1897

else:

1898

if minElements == 1:

1899

ret = self

1900

else:

1901

ret = And([self]*minElements)

1902

return ret

1903

1904

def __rmul__(self, other):

1905

return self.__mul__(other)

1906

1907

def __or__(self, other ):

1908

"""

1909

Implementation of | operator - returns C{L{MatchFirst}}

1910

"""

1911

if isinstance( other, basestring ):

1912

other = ParserElement._literalStringClass( other )

1913

if not isinstance( other, ParserElement ):

1914

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1915

SyntaxWarning, stacklevel=2)

1916

return None

1917

return MatchFirst( [ self, other ] )

1918

1919

def __ror__(self, other ):

1920

"""

1921

Implementation of | operator when left operand is not a C{L{ParserElement}}

1922

"""

1923

if isinstance( other, basestring ):

1924

other = ParserElement._literalStringClass( other )

1925

if not isinstance( other, ParserElement ):

1926

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1927

SyntaxWarning, stacklevel=2)

1928

return None

1929

return other | self

1930

1931

def __xor__(self, other ):

1932

"""

1933

Implementation of ^ operator - returns C{L{Or}}

1934

"""

1935

if isinstance( other, basestring ):

1936

other = ParserElement._literalStringClass( other )

1937

if not isinstance( other, ParserElement ):

1938

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1939

SyntaxWarning, stacklevel=2)

1940

return None

1941

return Or( [ self, other ] )

1942

1943

def __rxor__(self, other ):

1944

"""

1945

Implementation of ^ operator when left operand is not a C{L{ParserElement}}

1946

"""

1947

if isinstance( other, basestring ):

1948

other = ParserElement._literalStringClass( other )

1949

if not isinstance( other, ParserElement ):

1950

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1951

SyntaxWarning, stacklevel=2)

1952

return None

1953

return other ^ self

1954

1955

def __and__(self, other ):

1956

"""

1957

Implementation of & operator - returns C{L{Each}}

1958

"""

1959

if isinstance( other, basestring ):

1960

other = ParserElement._literalStringClass( other )

1961

if not isinstance( other, ParserElement ):

1962

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1963

SyntaxWarning, stacklevel=2)

1964

return None

1965

return Each( [ self, other ] )

1966

1967

def __rand__(self, other ):

1968

"""

1969

Implementation of & operator when left operand is not a C{L{ParserElement}}

1970

"""

1971

if isinstance( other, basestring ):

1972

other = ParserElement._literalStringClass( other )

1973

if not isinstance( other, ParserElement ):

1974

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1975

SyntaxWarning, stacklevel=2)

1976

return None

1977

return other & self

1978

1979

def __invert__( self ):

1980

"""

1981

Implementation of ~ operator - returns C{L{NotAny}}

1982

"""

1983

return NotAny( self )

1984

1985

def __call__(self, name=None):

1986

"""

1987

Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.

1988

1989

If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be

1990

passed as C{True}.

1991

1992

If C{name} is omitted, same as calling C{L{copy}}.

1993

1994

Example::

1995

# these are equivalent

1996

userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")

1997

userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")

1998

"""

1999

if name is not None:

2000

return self.setResultsName(name)

2001

else:

2002

return self.copy()

2003

2004

def suppress( self ):

2005

"""

2006

Suppresses the output of this C{ParserElement}; useful to keep punctuation from

2007

cluttering up returned output.

2008

"""

2009

return Suppress( self )

2010

2011

def leaveWhitespace( self ):

2012

"""

2013

Disables the skipping of whitespace before matching the characters in the

2014

C{ParserElement}'s defined pattern. This is normally only used internally by

2015

the pyparsing module, but may be needed in some whitespace-sensitive grammars.

2016

"""

2017

self.skipWhitespace = False

2018

return self

2019

2020

def setWhitespaceChars( self, chars ):

2021

"""

2022

Overrides the default whitespace chars

2023

"""

2024

self.skipWhitespace = True

2025

self.whiteChars = chars

2026

self.copyDefaultWhiteChars = False

2027

return self

2028

2029

def parseWithTabs( self ):

2030

"""

2031

Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.

2032

Must be called before C{parseString} when the input grammar contains elements that

2033

match C{<TAB>} characters.

2034

"""

2035

self.keepTabs = True

2036

return self

2037

2038

def ignore( self, other ):

2039

"""

2040

Define expression to be ignored (e.g., comments) while doing pattern

2041

matching; may be called repeatedly, to define multiple comment or other

2042

ignorable patterns.

2043

2044

Example::

2045

patt = OneOrMore(Word(alphas))

2046

patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']

2047

2048

patt.ignore(cStyleComment)

2049

patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']

2050

"""

2051

if isinstance(other, basestring):

2052

other = Suppress(other)

2053

2054

if isinstance( other, Suppress ):

2055

if other not in self.ignoreExprs:

2056

self.ignoreExprs.append(other)

2057

else:

2058

self.ignoreExprs.append( Suppress( other.copy() ) )

2059

return self

2060

2061

def setDebugActions( self, startAction, successAction, exceptionAction ):

2062

"""

2063

Enable display of debugging messages while doing pattern matching.

2064

"""

2065

self.debugActions = (startAction or _defaultStartDebugAction,

2066

successAction or _defaultSuccessDebugAction,

2067

exceptionAction or _defaultExceptionDebugAction)

2068

self.debug = True

2069

return self

2070

2071

def setDebug( self, flag=True ):

2072

"""

2073

Enable display of debugging messages while doing pattern matching.

2074

Set C{flag} to True to enable, False to disable.

2075

2076

Example::

2077

wd = Word(alphas).setName("alphaword")

2078

integer = Word(nums).setName("numword")

2079

term = wd | integer

2080

2081

# turn on debugging for wd

2082

wd.setDebug()

2083

2084

OneOrMore(term).parseString("abc 123 xyz 890")

2085

2086

prints::

2087

Match alphaword at loc 0(1,1)

2088

Matched alphaword -> ['abc']

2089

Match alphaword at loc 3(1,4)

2090

Exception raised:Expected alphaword (at char 4), (line:1, col:5)

2091

Match alphaword at loc 7(1,8)

2092

Matched alphaword -> ['xyz']

2093

Match alphaword at loc 11(1,12)

2094

Exception raised:Expected alphaword (at char 12), (line:1, col:13)

2095

Match alphaword at loc 15(1,16)

2096

Exception raised:Expected alphaword (at char 15), (line:1, col:16)

2097

2098

The output shown is that produced by the default debug actions - custom debug actions can be

2099

specified using L{setDebugActions}. Prior to attempting

2100

to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}

2101

is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}

2102

message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,

2103

which makes debugging and exception messages easier to understand - for instance, the default

2104

name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.

2105

"""

2106

if flag:

2107

self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )

2108

else:

2109

self.debug = False

2110

return self

2111

2112

def __str__( self ):

2113

return self.name

2114

2115

def __repr__( self ):

2116

return _ustr(self)

2117

2118

def streamline( self ):

2119

self.streamlined = True

2120

self.strRepr = None

2121

return self

2122

2123

def checkRecursion( self, parseElementList ):

2124

pass

2125

2126

def validate( self, validateTrace=[] ):

2127

"""

2128

Check defined expressions for valid structure, check for infinite recursive definitions.

2129

"""

2130

self.checkRecursion( [] )

2131

2132

def parseFile( self, file_or_filename, parseAll=False ):

2133

"""

2134

Execute the parse expression on the given file or filename.

2135

If a filename is specified (instead of a file object),

2136

the entire file is opened, read, and closed before parsing.

2137

"""

2138

try:

2139

file_contents = file_or_filename.read()

2140

except AttributeError:

2141

with open(file_or_filename, "r") as f:

2142

file_contents = f.read()

2143

try:

2144

return self.parseString(file_contents, parseAll)

2145

except ParseBaseException as exc:

2146

if ParserElement.verbose_stacktrace:

2147

raise

2148

else:

2149

# catch and re-raise exception from here, clears out pyparsing internal stack trace

2150

raise exc

2151

2152

def __eq__(self,other):

2153

if isinstance(other, ParserElement):

2154

return self is other or vars(self) == vars(other)

2155

elif isinstance(other, basestring):

2156

return self.matches(other)

2157

else:

2158

return super(ParserElement,self)==other

2159

2160

def __ne__(self,other):

2161

return not (self == other)

2162

2163

def __hash__(self):

2164

return hash(id(self))

2165

2166

def __req__(self,other):

2167

return self == other

2168

2169

def __rne__(self,other):

2170

return not (self == other)

2171

2172

def matches(self, testString, parseAll=True):

2173

"""

2174

Method for quick testing of a parser against a test string. Good for simple

2175

inline microtests of sub expressions while building up larger parser.

2176

2177

Parameters:

2178

- testString - to test against this expression for a match

2179

- parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests

2180

2181

Example::

2182

expr = Word(nums)

2183

assert expr.matches("100")

2184

"""

2185

try:

2186

self.parseString(_ustr(testString), parseAll=parseAll)

2187

return True

2188

except ParseBaseException:

2189

return False

2190

2191

def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):

2192

"""

2193

Execute the parse expression on a series of test strings, showing each

2194

test, the parsed results or where the parse failed. Quick and easy way to

2195

run a parse expression against a list of sample strings.

2196

2197

Parameters:

2198

- tests - a list of separate test strings, or a multiline string of test strings

2199

- parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests

2200

- comment - (default=C{'#'}) - expression for indicating embedded comments in the test

2201

string; pass None to disable comment filtering

2202

- fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;

2203

if False, only dump nested list

2204

- printResults - (default=C{True}) prints test output to stdout

2205

- failureTests - (default=C{False}) indicates if these tests are expected to fail parsing

2206

2207

Returns: a (success, results) tuple, where success indicates that all tests succeeded

2208

(or failed if C{failureTests} is True), and the results contain a list of lines of each

2209

test's output

2210

2211

Example::

2212

number_expr = pyparsing_common.number.copy()

2213

2214

result = number_expr.runTests('''

2215

# unsigned integer

2216

100

2217

# negative integer

2218

-100

2219

# float with scientific notation

2220

6.02e23

2221

# integer with scientific notation

2222

1e-12

2223

''')

2224

print("Success" if result[0] else "Failed!")

2225

2226

result = number_expr.runTests('''

2227

# stray character

2228

100Z

2229

# missing leading digit before '.'

2230

-.100

2231

# too many '.'

2232

3.14.159

2233

''', failureTests=True)

2234

print("Success" if result[0] else "Failed!")

2235

prints::

2236

# unsigned integer

2237

100

2238

[100]

2239

2240

# negative integer

2241

-100

2242

[-100]

2243

2244

# float with scientific notation

2245

6.02e23

2246

[6.02e+23]

2247

2248

# integer with scientific notation

2249

1e-12

2250

[1e-12]

2251

2252

Success

2253

2254

# stray character

2255

100Z

2256

2257

FAIL: Expected end of text (at char 3), (line:1, col:4)

2258

2259

# missing leading digit before '.'

2260

-.100

2261

2262

FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)

2263

2264

# too many '.'

2265

3.14.159

2266

2267

FAIL: Expected end of text (at char 4), (line:1, col:5)

2268

2269

Success

2270

2271

Each test string must be on a single line. If you want to test a string that spans multiple

2272

lines, create a test like this::

2273

2274

expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")

2275

2276

(Note that this is a raw string literal, you must include the leading 'r'.)

2277

"""

2278

if isinstance(tests, basestring):

2279

tests = list(map(str.strip, tests.rstrip().splitlines()))

2280

if isinstance(comment, basestring):

2281

comment = Literal(comment)

2282

allResults = []

2283

comments = []

2284

success = True

2285

for t in tests:

2286

if comment is not None and comment.matches(t, False) or comments and not t:

2287

comments.append(t)

2288

continue

2289

if not t:

2290

continue

2291

out = ['\n'.join(comments), t]

2292

comments = []

2293

try:

2294

t = t.replace(r'\n','\n')

2295

result = self.parseString(t, parseAll=parseAll)

2296

out.append(result.dump(full=fullDump))

2297

success = success and not failureTests

2298

except ParseBaseException as pe:

2299

fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""

2300

if '\n' in t:

2301

out.append(line(pe.loc, t))

2302

out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)

2303

else:

2304

out.append(' '*pe.loc + '^' + fatal)

2305

out.append("FAIL: " + str(pe))

2306

success = success and failureTests

2307

result = pe

2308

except Exception as exc:

2309

out.append("FAIL-EXCEPTION: " + str(exc))

2310

success = success and failureTests

2311

result = exc

2312

2313

if printResults:

2314

if fullDump:

2315

out.append('')

2316

print('\n'.join(out))

2317

2318

allResults.append((t, result))

2319

2320

return success, allResults

2321

2322

2323

class Token(ParserElement):

2324

"""

2325

Abstract C{ParserElement} subclass, for defining atomic matching patterns.

2326

"""

2327

def __init__( self ):

2328

super(Token,self).__init__( savelist=False )

2329

2330

2331

class Empty(Token):

2332

"""

2333

An empty token, will always match.

2334

"""

2335

def __init__( self ):

2336

super(Empty,self).__init__()

2337

self.name = "Empty"

2338

self.mayReturnEmpty = True

2339

self.mayIndexError = False

2340

2341

2342

class NoMatch(Token):

2343

"""

2344

A token that will never match.

2345

"""

2346

def __init__( self ):

2347

super(NoMatch,self).__init__()

2348

self.name = "NoMatch"

2349

self.mayReturnEmpty = True

2350

self.mayIndexError = False

2351

self.errmsg = "Unmatchable token"

2352

2353

def parseImpl( self, instring, loc, doActions=True ):

2354

raise ParseException(instring, loc, self.errmsg, self)

2355

2356

2357

class Literal(Token):

2358

"""

2359

Token to exactly match a specified string.

2360

2361

Example::

2362

Literal('blah').parseString('blah') # -> ['blah']

2363

Literal('blah').parseString('blahfooblah') # -> ['blah']

2364

Literal('blah').parseString('bla') # -> Exception: Expected "blah"

2365

2366

For case-insensitive matching, use L{CaselessLiteral}.

2367

2368

For keyword matching (force word break before and after the matched string),

2369

use L{Keyword} or L{CaselessKeyword}.

2370

"""

2371

def __init__( self, matchString ):

2372

super(Literal,self).__init__()

2373

self.match = matchString

2374

self.matchLen = len(matchString)

2375

try:

2376

self.firstMatchChar = matchString[0]

2377

except IndexError:

2378

warnings.warn("null string passed to Literal; use Empty() instead",

2379

SyntaxWarning, stacklevel=2)

2380

self.__class__ = Empty

2381

self.name = '"%s"' % _ustr(self.match)

2382

self.errmsg = "Expected " + self.name

2383

self.mayReturnEmpty = False

2384

self.mayIndexError = False

2385

2386

# Performance tuning: this routine gets called a *lot*

2387

# if this is a single character match string and the first character matches,

2388

# short-circuit as quickly as possible, and avoid calling startswith

2389

#~ @profile

2390

def parseImpl( self, instring, loc, doActions=True ):

2391

if (instring[loc] == self.firstMatchChar and

2392

(self.matchLen==1 or instring.startswith(self.match,loc)) ):

2393

return loc+self.matchLen, self.match

2394

raise ParseException(instring, loc, self.errmsg, self)

2395

_L = Literal

2396

ParserElement._literalStringClass = Literal

2397

2398

class Keyword(Token):

2399

"""

2400

Token to exactly match a specified string as a keyword, that is, it must be

2401

immediately followed by a non-keyword character. Compare with C{L{Literal}}:

2402

- C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.

2403

- C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}

2404

Accepts two optional constructor arguments in addition to the keyword string:

2405

- C{identChars} is a string of characters that would be valid identifier characters,

2406

defaulting to all alphanumerics + "_" and "$"

2407

- C{caseless} allows case-insensitive matching, default is C{False}.

2408

2409

Example::

2410

Keyword("start").parseString("start") # -> ['start']

2411

Keyword("start").parseString("starting") # -> Exception

2412

2413

For case-insensitive matching, use L{CaselessKeyword}.

2414

"""

2415

DEFAULT_KEYWORD_CHARS = alphanums+"_$"

2416

2417

def __init__( self, matchString, identChars=None, caseless=False ):

2418

super(Keyword,self).__init__()

2419

if identChars is None:

2420

identChars = Keyword.DEFAULT_KEYWORD_CHARS

2421

self.match = matchString

2422

self.matchLen = len(matchString)

2423

try:

2424

self.firstMatchChar = matchString[0]

2425

except IndexError:

2426

warnings.warn("null string passed to Keyword; use Empty() instead",

2427

SyntaxWarning, stacklevel=2)

2428

self.name = '"%s"' % self.match

2429

self.errmsg = "Expected " + self.name

2430

self.mayReturnEmpty = False

2431

self.mayIndexError = False

2432

self.caseless = caseless

2433

if caseless:

2434

self.caselessmatch = matchString.upper()

2435

identChars = identChars.upper()

2436

self.identChars = set(identChars)

2437

2438

def parseImpl( self, instring, loc, doActions=True ):

2439

if self.caseless:

2440

if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and

2441

(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and

2442

(loc == 0 or instring[loc-1].upper() not in self.identChars) ):

2443

return loc+self.matchLen, self.match

2444

else:

2445

if (instring[loc] == self.firstMatchChar and

2446

(self.matchLen==1 or instring.startswith(self.match,loc)) and

2447

(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and

2448

(loc == 0 or instring[loc-1] not in self.identChars) ):

2449

return loc+self.matchLen, self.match

2450

raise ParseException(instring, loc, self.errmsg, self)

2451

2452

def copy(self):

2453

c = super(Keyword,self).copy()

2454

c.identChars = Keyword.DEFAULT_KEYWORD_CHARS

2455

return c

2456

2457

@staticmethod

2458

def setDefaultKeywordChars( chars ):

2459

"""Overrides the default Keyword chars

2460

"""

2461

Keyword.DEFAULT_KEYWORD_CHARS = chars

2462

2463

class CaselessLiteral(Literal):

2464

"""

2465

Token to match a specified string, ignoring case of letters.

2466

Note: the matched results will always be in the case of the given

2467

match string, NOT the case of the input text.

2468

2469

Example::

2470

OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']

2471

2472

(Contrast with example for L{CaselessKeyword}.)

2473

"""

2474

def __init__( self, matchString ):

2475

super(CaselessLiteral,self).__init__( matchString.upper() )

2476

# Preserve the defining literal.

2477

self.returnString = matchString

2478

self.name = "'%s'" % self.returnString

2479

self.errmsg = "Expected " + self.name

2480

2481

def parseImpl( self, instring, loc, doActions=True ):

2482

if instring[ loc:loc+self.matchLen ].upper() == self.match:

2483

return loc+self.matchLen, self.returnString

2484

raise ParseException(instring, loc, self.errmsg, self)

2485

2486

class CaselessKeyword(Keyword):

2487

"""

2488

Caseless version of L{Keyword}.

2489

2490

Example::

2491

OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']

2492

2493

(Contrast with example for L{CaselessLiteral}.)

2494

"""

2495

def __init__( self, matchString, identChars=None ):

2496

super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )

2497

2498

def parseImpl( self, instring, loc, doActions=True ):

2499

if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and

2500

(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):

2501

return loc+self.matchLen, self.match

2502

raise ParseException(instring, loc, self.errmsg, self)

2503

2504

class CloseMatch(Token):

2505

"""

2506

A variation on L{Literal} which matches "close" matches, that is,

2507

strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:

2508

- C{match_string} - string to be matched

2509

- C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match

2510

2511

The results from a successful parse will contain the matched text from the input string and the following named results:

2512

- C{mismatches} - a list of the positions within the match_string where mismatches were found

2513

- C{original} - the original match_string used to compare against the input string

2514

2515

If C{mismatches} is an empty list, then the match was an exact match.

2516

2517

Example::

2518

patt = CloseMatch("ATCATCGAATGGA")

2519

patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})

2520

patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)

2521

2522

# exact match

2523

patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})

2524

2525

# close match allowing up to 2 mismatches

2526

patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)

2527

patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})

2528

"""

2529

def __init__(self, match_string, maxMismatches=1):

2530

super(CloseMatch,self).__init__()

2531

self.name = match_string

2532

self.match_string = match_string

2533

self.maxMismatches = maxMismatches

2534

self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)

2535

self.mayIndexError = False

2536

self.mayReturnEmpty = False

2537

2538

def parseImpl( self, instring, loc, doActions=True ):

2539

start = loc

2540

instrlen = len(instring)

2541

maxloc = start + len(self.match_string)

2542

2543

if maxloc <= instrlen:

2544

match_string = self.match_string

2545

match_stringloc = 0

2546

mismatches = []

2547

maxMismatches = self.maxMismatches

2548

2549

for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):

2550

src,mat = s_m

2551

if src != mat:

2552

mismatches.append(match_stringloc)

2553

if len(mismatches) > maxMismatches:

2554

break

2555

else:

2556

loc = match_stringloc + 1

2557

results = ParseResults([instring[start:loc]])

2558

results['original'] = self.match_string

2559

results['mismatches'] = mismatches

2560

return loc, results

2561

2562

raise ParseException(instring, loc, self.errmsg, self)

2563

2564

2565

class Word(Token):

2566

"""

2567

Token for matching words composed of allowed character sets.

2568

Defined with string containing all allowed initial characters,

2569

an optional string containing allowed body characters (if omitted,

2570

defaults to the initial character set), and an optional minimum,

2571

maximum, and/or exact length. The default value for C{min} is 1 (a

2572

minimum value < 1 is not valid); the default values for C{max} and C{exact}

2573

are 0, meaning no maximum or exact length restriction. An optional

2574

C{excludeChars} parameter can list characters that might be found in

2575

the input C{bodyChars} string; useful to define a word of all printables

2576

except for one or two characters, for instance.

2577

2578

L{srange} is useful for defining custom character set strings for defining

2579

C{Word} expressions, using range notation from regular expression character sets.

2580

2581

A common mistake is to use C{Word} to match a specific literal string, as in

2582

C{Word("Address")}. Remember that C{Word} uses the string argument to define

2583

I{sets} of matchable characters. This expression would match "Add", "AAA",

2584

"dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.

2585

To match an exact literal string, use L{Literal} or L{Keyword}.

2586

2587

pyparsing includes helper strings for building Words:

2588

- L{alphas}

2589

- L{nums}

2590

- L{alphanums}

2591

- L{hexnums}

2592

- L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)

2593

- L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)

2594

- L{printables} (any non-whitespace character)

2595

2596

Example::

2597

# a word composed of digits

2598

integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))

2599

2600

# a word with a leading capital, and zero or more lowercase

2601

capital_word = Word(alphas.upper(), alphas.lower())

2602

2603

# hostnames are alphanumeric, with leading alpha, and '-'

2604

hostname = Word(alphas, alphanums+'-')

2605

2606

# roman numeral (not a strict parser, accepts invalid mix of characters)

2607

roman = Word("IVXLCDM")

2608

2609

# any string of non-whitespace characters, except for ','

2610

csv_value = Word(printables, excludeChars=",")

2611

"""

2612

def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):

2613

super(Word,self).__init__()

2614

if excludeChars:

2615

initChars = ''.join(c for c in initChars if c not in excludeChars)

2616

if bodyChars:

2617

bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)

2618

self.initCharsOrig = initChars

2619

self.initChars = set(initChars)

2620

if bodyChars :

2621

self.bodyCharsOrig = bodyChars

2622

self.bodyChars = set(bodyChars)

2623

else:

2624

self.bodyCharsOrig = initChars

2625

self.bodyChars = set(initChars)

2626

2627

self.maxSpecified = max > 0

2628

2629

if min < 1:

2630

raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")

2631

2632

self.minLen = min

2633

2634

if max > 0:

2635

self.maxLen = max

2636

else:

2637

self.maxLen = _MAX_INT

2638

2639

if exact > 0:

2640

self.maxLen = exact

2641

self.minLen = exact

2642

2643

self.name = _ustr(self)

2644

self.errmsg = "Expected " + self.name

2645

self.mayIndexError = False

2646

self.asKeyword = asKeyword

2647

2648

if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):

2649

if self.bodyCharsOrig == self.initCharsOrig:

2650

self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)

2651

elif len(self.initCharsOrig) == 1:

2652

self.reString = "%s[%s]*" % \

2653

(re.escape(self.initCharsOrig),

2654

_escapeRegexRangeChars(self.bodyCharsOrig),)

2655

else:

2656

self.reString = "[%s][%s]*" % \

2657

(_escapeRegexRangeChars(self.initCharsOrig),

2658

_escapeRegexRangeChars(self.bodyCharsOrig),)

2659

if self.asKeyword:

2660

self.reString = r"\b"+self.reString+r"\b"

2661

try:

2662

self.re = re.compile( self.reString )

2663

except Exception:

2664

self.re = None

2665

2666

def parseImpl( self, instring, loc, doActions=True ):

2667

if self.re:

2668

result = self.re.match(instring,loc)

2669

if not result:

2670

raise ParseException(instring, loc, self.errmsg, self)

2671

2672

loc = result.end()

2673

return loc, result.group()

2674

2675

if not(instring[ loc ] in self.initChars):

2676

raise ParseException(instring, loc, self.errmsg, self)

2677

2678

start = loc

2679

loc += 1

2680

instrlen = len(instring)

2681

bodychars = self.bodyChars

2682

maxloc = start + self.maxLen

2683

maxloc = min( maxloc, instrlen )

2684

while loc < maxloc and instring[loc] in bodychars:

2685

loc += 1

2686

2687

throwException = False

2688

if loc - start < self.minLen:

2689

throwException = True

2690

if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:

2691

throwException = True

2692

if self.asKeyword:

2693

if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):

2694

throwException = True

2695

2696

if throwException:

2697

raise ParseException(instring, loc, self.errmsg, self)

2698

2699

return loc, instring[start:loc]

2700

2701

def __str__( self ):

2702

try:

2703

return super(Word,self).__str__()

2704

except Exception:

2705

pass

2706

2707

2708

if self.strRepr is None:

2709

2710

def charsAsStr(s):

2711

if len(s)>4:

2712

return s[:4]+"..."

2713

else:

2714

return s

2715

2716

if ( self.initCharsOrig != self.bodyCharsOrig ):

2717

self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )

2718

else:

2719

self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)

2720

2721

return self.strRepr

2722

2723

2724

class Regex(Token):

2725

"""

2726

Token for matching strings that match a given regular expression.

2727

Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.

2728

If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as

2729

named parse results.

2730

2731

Example::

2732

realnum = Regex(r"[+-]?\d+\.\d*")

2733

date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

2734

# ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

2735

roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

2736

"""

2737

compiledREtype = type(re.compile("[A-Z]"))

2738

def __init__( self, pattern, flags=0):

2739

"""The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""

2740

super(Regex,self).__init__()

2741

2742

if isinstance(pattern, basestring):

2743

if not pattern:

2744

warnings.warn("null string passed to Regex; use Empty() instead",

2745

SyntaxWarning, stacklevel=2)

2746

2747

self.pattern = pattern

2748

self.flags = flags

2749

2750

try:

2751

self.re = re.compile(self.pattern, self.flags)

2752

self.reString = self.pattern

2753

except sre_constants.error:

2754

warnings.warn("invalid pattern (%s) passed to Regex" % pattern,

2755

SyntaxWarning, stacklevel=2)

2756

raise

2757

2758

elif isinstance(pattern, Regex.compiledREtype):

2759

self.re = pattern

2760

self.pattern = \

2761

self.reString = str(pattern)

2762

self.flags = flags

2763

2764

else:

2765

raise ValueError("Regex may only be constructed with a string or a compiled RE object")

2766

2767

self.name = _ustr(self)

2768

self.errmsg = "Expected " + self.name

2769

self.mayIndexError = False

2770

self.mayReturnEmpty = True

2771

2772

def parseImpl( self, instring, loc, doActions=True ):

2773

result = self.re.match(instring,loc)

2774

if not result:

2775

raise ParseException(instring, loc, self.errmsg, self)

2776

2777

loc = result.end()

2778

d = result.groupdict()

2779

ret = ParseResults(result.group())

2780

if d:

2781

for k in d:

2782

ret[k] = d[k]

2783

return loc,ret

2784

2785

def __str__( self ):

2786

try:

2787

return super(Regex,self).__str__()

2788

except Exception:

2789

pass

2790

2791

if self.strRepr is None:

2792

self.strRepr = "Re:(%s)" % repr(self.pattern)

2793

2794

return self.strRepr

2795

2796

2797

class QuotedString(Token):

2798

r"""

2799

Token for matching strings that are delimited by quoting characters.

2800

2801

Defined with the following parameters:

2802

- quoteChar - string of one or more characters defining the quote delimiting string

2803

- escChar - character to escape quotes, typically backslash (default=C{None})

2804

- escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})

2805

- multiline - boolean indicating whether quotes can span multiple lines (default=C{False})

2806

- unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})

2807

- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)

2808

- convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})

2809

2810

Example::

2811

qs = QuotedString('"')

2812

print(qs.searchString('lsjdf "This is the quote" sldjf'))

2813

complex_qs = QuotedString('{{', endQuoteChar='}}')

2814

print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))

2815

sql_qs = QuotedString('"', escQuote='""')

2816

print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

2817

prints::

2818

[['This is the quote']]

2819

[['This is the "quote"']]

2820

[['This is the quote with "embedded" quotes']]

2821

"""

2822

def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):

2823

super(QuotedString,self).__init__()

2824

2825

# remove white space from quote chars - wont work anyway

2826

quoteChar = quoteChar.strip()

2827

if not quoteChar:

2828

warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)

2829

raise SyntaxError()

2830

2831

if endQuoteChar is None:

2832

endQuoteChar = quoteChar

2833

else:

2834

endQuoteChar = endQuoteChar.strip()

2835

if not endQuoteChar:

2836

warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)

2837

raise SyntaxError()

2838

2839

self.quoteChar = quoteChar

2840

self.quoteCharLen = len(quoteChar)

2841

self.firstQuoteChar = quoteChar[0]

2842

self.endQuoteChar = endQuoteChar

2843

self.endQuoteCharLen = len(endQuoteChar)

2844

self.escChar = escChar

2845

self.escQuote = escQuote

2846

self.unquoteResults = unquoteResults

2847

self.convertWhitespaceEscapes = convertWhitespaceEscapes

2848

2849

if multiline:

2850

self.flags = re.MULTILINE | re.DOTALL

2851

self.pattern = r'%s(?:[^%s%s]' % \

2852

( re.escape(self.quoteChar),

2853

_escapeRegexRangeChars(self.endQuoteChar[0]),

2854

(escChar is not None and _escapeRegexRangeChars(escChar) or '') )

2855

else:

2856

self.flags = 0

2857

self.pattern = r'%s(?:[^%s\n\r%s]' % \

2858

( re.escape(self.quoteChar),

2859

_escapeRegexRangeChars(self.endQuoteChar[0]),

2860

(escChar is not None and _escapeRegexRangeChars(escChar) or '') )

2861

if len(self.endQuoteChar) > 1:

2862

self.pattern += (

2863

'|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),

2864

_escapeRegexRangeChars(self.endQuoteChar[i]))

2865

for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'

2866

)

2867

if escQuote:

2868

self.pattern += (r'|(?:%s)' % re.escape(escQuote))

2869

if escChar:

2870

self.pattern += (r'|(?:%s.)' % re.escape(escChar))

2871

self.escCharReplacePattern = re.escape(self.escChar)+"(.)"

2872

self.pattern += (r')*%s' % re.escape(self.endQuoteChar))

2873

2874

try:

2875

self.re = re.compile(self.pattern, self.flags)

2876

self.reString = self.pattern

2877

except sre_constants.error:

2878

warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,

2879

SyntaxWarning, stacklevel=2)

2880

raise

2881

2882

self.name = _ustr(self)

2883

self.errmsg = "Expected " + self.name

2884

self.mayIndexError = False

2885

self.mayReturnEmpty = True

2886

2887

def parseImpl( self, instring, loc, doActions=True ):

2888

result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None

2889

if not result:

2890

raise ParseException(instring, loc, self.errmsg, self)

2891

2892

loc = result.end()

2893

ret = result.group()

2894

2895

if self.unquoteResults:

2896

2897

# strip off quotes

2898

ret = ret[self.quoteCharLen:-self.endQuoteCharLen]

2899

2900

if isinstance(ret,basestring):

2901

# replace escaped whitespace

2902

if '\\' in ret and self.convertWhitespaceEscapes:

2903

ws_map = {

2904

r'\t' : '\t',

2905

r'\n' : '\n',

2906

r'\f' : '\f',

2907

r'\r' : '\r',

2908

}

2909

for wslit,wschar in ws_map.items():

2910

ret = ret.replace(wslit, wschar)

2911

2912

# replace escaped characters

2913

if self.escChar:

2914

ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)

2915

2916

# replace escaped quotes

2917

if self.escQuote:

2918

ret = ret.replace(self.escQuote, self.endQuoteChar)

2919

2920

return loc, ret

2921

2922

def __str__( self ):

2923

try:

2924

return super(QuotedString,self).__str__()

2925

except Exception:

2926

pass

2927

2928

if self.strRepr is None:

2929

self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)

2930

2931

return self.strRepr

2932

2933

2934

class CharsNotIn(Token):

2935

"""

2936

Token for matching words composed of characters I{not} in a given set (will

2937

include whitespace in matched characters if not listed in the provided exclusion set - see example).

2938

Defined with string containing all disallowed characters, and an optional

2939

minimum, maximum, and/or exact length. The default value for C{min} is 1 (a

2940

minimum value < 1 is not valid); the default values for C{max} and C{exact}

2941

are 0, meaning no maximum or exact length restriction.

2942

2943

Example::

2944

# define a comma-separated-value as anything that is not a ','

2945

csv_value = CharsNotIn(',')

2946

print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))

2947

prints::

2948

['dkls', 'lsdkjf', 's12 34', '@!#', '213']

2949

"""

2950

def __init__( self, notChars, min=1, max=0, exact=0 ):

2951

super(CharsNotIn,self).__init__()

2952

self.skipWhitespace = False

2953

self.notChars = notChars

2954

2955

if min < 1:

2956

raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")

2957

2958

self.minLen = min

2959

2960

if max > 0:

2961

self.maxLen = max

2962

else:

2963

self.maxLen = _MAX_INT

2964

2965

if exact > 0:

2966

self.maxLen = exact

2967

self.minLen = exact

2968

2969

self.name = _ustr(self)

2970

self.errmsg = "Expected " + self.name

2971

self.mayReturnEmpty = ( self.minLen == 0 )

2972

self.mayIndexError = False

2973

2974

def parseImpl( self, instring, loc, doActions=True ):

2975

if instring[loc] in self.notChars:

2976

raise ParseException(instring, loc, self.errmsg, self)

2977

2978

start = loc

2979

loc += 1

2980

notchars = self.notChars

2981

maxlen = min( start+self.maxLen, len(instring) )

2982

while loc < maxlen and \

2983

(instring[loc] not in notchars):

2984

loc += 1

2985

2986

if loc - start < self.minLen:

2987

raise ParseException(instring, loc, self.errmsg, self)

2988

2989

return loc, instring[start:loc]

2990

2991

def __str__( self ):

2992

try:

2993

return super(CharsNotIn, self).__str__()

2994

except Exception:

2995

pass

2996

2997

if self.strRepr is None:

2998

if len(self.notChars) > 4:

2999

self.strRepr = "!W:(%s...)" % self.notChars[:4]

3000

else:

3001

self.strRepr = "!W:(%s)" % self.notChars

3002

3003

return self.strRepr

3004

3005

class White(Token):

3006

"""

3007

Special matching class for matching whitespace. Normally, whitespace is ignored

3008

by pyparsing grammars. This class is included when some whitespace structures

3009

are significant. Define with a string containing the whitespace characters to be

3010

matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,

3011

as defined for the C{L{Word}} class.

3012

"""

3013

whiteStrs = {

3014

" " : "<SPC>",

3015

"\t": "<TAB>",

3016

"\n": "<LF>",

3017

"\r": "<CR>",

3018

"\f": "<FF>",

3019

}

3020

def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):

3021

super(White,self).__init__()

3022

self.matchWhite = ws

3023

self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )

3024

#~ self.leaveWhitespace()

3025

self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))

3026

self.mayReturnEmpty = True

3027

self.errmsg = "Expected " + self.name

3028

3029

self.minLen = min

3030

3031

if max > 0:

3032

self.maxLen = max

3033

else:

3034

self.maxLen = _MAX_INT

3035

3036

if exact > 0:

3037

self.maxLen = exact

3038

self.minLen = exact

3039

3040

def parseImpl( self, instring, loc, doActions=True ):

3041

if not(instring[ loc ] in self.matchWhite):

3042

raise ParseException(instring, loc, self.errmsg, self)

3043

start = loc

3044

loc += 1

3045

maxloc = start + self.maxLen

3046

maxloc = min( maxloc, len(instring) )

3047

while loc < maxloc and instring[loc] in self.matchWhite:

3048

loc += 1

3049

3050

if loc - start < self.minLen:

3051

raise ParseException(instring, loc, self.errmsg, self)

3052

3053

return loc, instring[start:loc]

3054

3055

3056

class _PositionToken(Token):

3057

def __init__( self ):

3058

super(_PositionToken,self).__init__()

3059

self.name=self.__class__.__name__

3060

self.mayReturnEmpty = True

3061

self.mayIndexError = False

3062

3063

class GoToColumn(_PositionToken):

3064

"""

3065

Token to advance to a specific column of input text; useful for tabular report scraping.

3066

"""

3067

def __init__( self, colno ):

3068

super(GoToColumn,self).__init__()

3069

self.col = colno

3070

3071

def preParse( self, instring, loc ):

3072

if col(loc,instring) != self.col:

3073

instrlen = len(instring)

3074

if self.ignoreExprs:

3075

loc = self._skipIgnorables( instring, loc )

3076

while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :

3077

loc += 1

3078

return loc

3079

3080

def parseImpl( self, instring, loc, doActions=True ):

3081

thiscol = col( loc, instring )

3082

if thiscol > self.col:

3083

raise ParseException( instring, loc, "Text not in expected column", self )

3084

newloc = loc + self.col - thiscol

3085

ret = instring[ loc: newloc ]

3086

return newloc, ret

3087

3088

3089

class LineStart(_PositionToken):

3090

"""

3091

Matches if current position is at the beginning of a line within the parse string

3092

3093

Example::

3094

3095

test = '''\

3096

AAA this line

3097

AAA and this line

3098

AAA but not this one

3099

B AAA and definitely not this one

3100

'''

3101

3102

for t in (LineStart() + 'AAA' + restOfLine).searchString(test):

3103

print(t)

3104

3105

Prints::

3106

['AAA', ' this line']

3107

['AAA', ' and this line']

3108

3109

"""

3110

def __init__( self ):

3111

super(LineStart,self).__init__()

3112

self.errmsg = "Expected start of line"

3113

3114

def parseImpl( self, instring, loc, doActions=True ):

3115

if col(loc, instring) == 1:

3116

return loc, []

3117

raise ParseException(instring, loc, self.errmsg, self)

3118

3119

class LineEnd(_PositionToken):

3120

"""

3121

Matches if current position is at the end of a line within the parse string

3122

"""

3123

def __init__( self ):

3124

super(LineEnd,self).__init__()

3125

self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )

3126

self.errmsg = "Expected end of line"

3127

3128

def parseImpl( self, instring, loc, doActions=True ):

3129

if loc<len(instring):

3130

if instring[loc] == "\n":

3131

return loc+1, "\n"

3132

else:

3133

raise ParseException(instring, loc, self.errmsg, self)

3134

elif loc == len(instring):

3135

return loc+1, []

3136

else:

3137

raise ParseException(instring, loc, self.errmsg, self)

3138

3139

class StringStart(_PositionToken):

3140

"""

3141

Matches if current position is at the beginning of the parse string

3142

"""

3143

def __init__( self ):

3144

super(StringStart,self).__init__()

3145

self.errmsg = "Expected start of text"

3146

3147

def parseImpl( self, instring, loc, doActions=True ):

3148

if loc != 0:

3149

# see if entire string up to here is just whitespace and ignoreables

3150

if loc != self.preParse( instring, 0 ):

3151

raise ParseException(instring, loc, self.errmsg, self)

3152

return loc, []

3153

3154

class StringEnd(_PositionToken):

3155

"""

3156

Matches if current position is at the end of the parse string

3157

"""

3158

def __init__( self ):

3159

super(StringEnd,self).__init__()

3160

self.errmsg = "Expected end of text"

3161

3162

def parseImpl( self, instring, loc, doActions=True ):

3163

if loc < len(instring):

3164

raise ParseException(instring, loc, self.errmsg, self)

3165

elif loc == len(instring):

3166

return loc+1, []

3167

elif loc > len(instring):

3168

return loc, []

3169

else:

3170

raise ParseException(instring, loc, self.errmsg, self)

3171

3172

class WordStart(_PositionToken):

3173

"""

3174

Matches if the current position is at the beginning of a Word, and

3175

is not preceded by any character in a given set of C{wordChars}

3176

(default=C{printables}). To emulate the C{\b} behavior of regular expressions,

3177

use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of

3178

the string being parsed, or at the beginning of a line.

3179

"""

3180

def __init__(self, wordChars = printables):

3181

super(WordStart,self).__init__()

3182

self.wordChars = set(wordChars)

3183

self.errmsg = "Not at the start of a word"

3184

3185

def parseImpl(self, instring, loc, doActions=True ):

3186

if loc != 0:

3187

if (instring[loc-1] in self.wordChars or

3188

instring[loc] not in self.wordChars):

3189

raise ParseException(instring, loc, self.errmsg, self)

3190

return loc, []

3191

3192

class WordEnd(_PositionToken):

3193

"""

3194

Matches if the current position is at the end of a Word, and

3195

is not followed by any character in a given set of C{wordChars}

3196

(default=C{printables}). To emulate the C{\b} behavior of regular expressions,

3197

use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of

3198

the string being parsed, or at the end of a line.

3199

"""

3200

def __init__(self, wordChars = printables):

3201

super(WordEnd,self).__init__()

3202

self.wordChars = set(wordChars)

3203

self.skipWhitespace = False

3204

self.errmsg = "Not at the end of a word"

3205

3206

def parseImpl(self, instring, loc, doActions=True ):

3207

instrlen = len(instring)

3208

if instrlen>0 and loc<instrlen:

3209

if (instring[loc] in self.wordChars or

3210

instring[loc-1] not in self.wordChars):

3211

raise ParseException(instring, loc, self.errmsg, self)

3212

return loc, []

3213

3214

3215

class ParseExpression(ParserElement):

3216

"""

3217

Abstract subclass of ParserElement, for combining and post-processing parsed tokens.

3218

"""

3219

def __init__( self, exprs, savelist = False ):

3220

super(ParseExpression,self).__init__(savelist)

3221

if isinstance( exprs, _generatorType ):

3222

exprs = list(exprs)

3223

3224

if isinstance( exprs, basestring ):

3225

self.exprs = [ ParserElement._literalStringClass( exprs ) ]

3226

elif isinstance( exprs, collections.Iterable ):

3227

exprs = list(exprs)

3228

# if sequence of strings provided, wrap with Literal

3229

if all(isinstance(expr, basestring) for expr in exprs):

3230

exprs = map(ParserElement._literalStringClass, exprs)

3231

self.exprs = list(exprs)

3232

else:

3233

try:

3234

self.exprs = list( exprs )

3235

except TypeError:

3236

self.exprs = [ exprs ]

3237

self.callPreparse = False

3238

3239

def __getitem__( self, i ):

3240

return self.exprs[i]

3241

3242

def append( self, other ):

3243

self.exprs.append( other )

3244

self.strRepr = None

3245

return self

3246

3247

def leaveWhitespace( self ):

3248

"""Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on

3249

all contained expressions."""

3250

self.skipWhitespace = False

3251

self.exprs = [ e.copy() for e in self.exprs ]

3252

for e in self.exprs:

3253

e.leaveWhitespace()

3254

return self

3255

3256

def ignore( self, other ):

3257

if isinstance( other, Suppress ):

3258

if other not in self.ignoreExprs:

3259

super( ParseExpression, self).ignore( other )

3260

for e in self.exprs:

3261

e.ignore( self.ignoreExprs[-1] )

3262

else:

3263

super( ParseExpression, self).ignore( other )

3264

for e in self.exprs:

3265

e.ignore( self.ignoreExprs[-1] )

3266

return self

3267

3268

def __str__( self ):

3269

try:

3270

return super(ParseExpression,self).__str__()

3271

except Exception:

3272

pass

3273

3274

if self.strRepr is None:

3275

self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )

3276

return self.strRepr

3277

3278

def streamline( self ):

3279

super(ParseExpression,self).streamline()

3280

3281

for e in self.exprs:

3282

e.streamline()

3283

3284

# collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )

3285

# but only if there are no parse actions or resultsNames on the nested And's

3286

# (likewise for Or's and MatchFirst's)

3287

if ( len(self.exprs) == 2 ):

3288

other = self.exprs[0]

3289

if ( isinstance( other, self.__class__ ) and

3290

not(other.parseAction) and

3291

other.resultsName is None and

3292

not other.debug ):

3293

self.exprs = other.exprs[:] + [ self.exprs[1] ]

3294

self.strRepr = None

3295

self.mayReturnEmpty |= other.mayReturnEmpty

3296

self.mayIndexError |= other.mayIndexError

3297

3298

other = self.exprs[-1]

3299

if ( isinstance( other, self.__class__ ) and

3300

not(other.parseAction) and

3301

other.resultsName is None and

3302

not other.debug ):

3303

self.exprs = self.exprs[:-1] + other.exprs[:]

3304

self.strRepr = None

3305

self.mayReturnEmpty |= other.mayReturnEmpty

3306

self.mayIndexError |= other.mayIndexError

3307

3308

self.errmsg = "Expected " + _ustr(self)

3309

3310

return self

3311

3312

def setResultsName( self, name, listAllMatches=False ):

3313

ret = super(ParseExpression,self).setResultsName(name,listAllMatches)

3314

return ret

3315

3316

def validate( self, validateTrace=[] ):

3317

tmp = validateTrace[:]+[self]

3318

for e in self.exprs:

3319

e.validate(tmp)

3320

self.checkRecursion( [] )

3321

3322

def copy(self):

3323

ret = super(ParseExpression,self).copy()

3324

ret.exprs = [e.copy() for e in self.exprs]

3325

return ret

3326

3327

class And(ParseExpression):

3328

"""

3329

Requires all given C{ParseExpression}s to be found in the given order.

3330

Expressions may be separated by whitespace.

3331

May be constructed using the C{'+'} operator.

3332

May also be constructed using the C{'-'} operator, which will suppress backtracking.

3333

3334

Example::

3335

integer = Word(nums)

3336

name_expr = OneOrMore(Word(alphas))

3337

3338

expr = And([integer("id"),name_expr("name"),integer("age")])

3339

# more easily written as:

3340

expr = integer("id") + name_expr("name") + integer("age")

3341

"""

3342

3343

class _ErrorStop(Empty):

3344

def __init__(self, *args, **kwargs):

3345

super(And._ErrorStop,self).__init__(*args, **kwargs)

3346

self.name = '-'

3347

self.leaveWhitespace()

3348

3349

def __init__( self, exprs, savelist = True ):

3350

super(And,self).__init__(exprs, savelist)

3351

self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

3352

self.setWhitespaceChars( self.exprs[0].whiteChars )

3353

self.skipWhitespace = self.exprs[0].skipWhitespace

3354

self.callPreparse = True

3355

3356

def parseImpl( self, instring, loc, doActions=True ):

3357

# pass False as last arg to _parse for first element, since we already

3358

# pre-parsed the string as part of our And pre-parsing

3359

loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )

3360

errorStop = False

3361

for e in self.exprs[1:]:

3362

if isinstance(e, And._ErrorStop):

3363

errorStop = True

3364

continue

3365

if errorStop:

3366

try:

3367

loc, exprtokens = e._parse( instring, loc, doActions )

3368

except ParseSyntaxException:

3369

raise

3370

except ParseBaseException as pe:

3371

pe.__traceback__ = None

3372

raise ParseSyntaxException._from_exception(pe)

3373

except IndexError:

3374

raise ParseSyntaxException(instring, len(instring), self.errmsg, self)

3375

else:

3376

loc, exprtokens = e._parse( instring, loc, doActions )

3377

if exprtokens or exprtokens.haskeys():

3378

resultlist += exprtokens

3379

return loc, resultlist

3380

3381

def __iadd__(self, other ):

3382

if isinstance( other, basestring ):

3383

other = ParserElement._literalStringClass( other )

3384

return self.append( other ) #And( [ self, other ] )

3385

3386

def checkRecursion( self, parseElementList ):

3387

subRecCheckList = parseElementList[:] + [ self ]

3388

for e in self.exprs:

3389

e.checkRecursion( subRecCheckList )

3390

if not e.mayReturnEmpty:

3391

break

3392

3393

def __str__( self ):

3394

if hasattr(self,"name"):

3395

return self.name

3396

3397

if self.strRepr is None:

3398

self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"

3399

3400

return self.strRepr

3401

3402

3403

class Or(ParseExpression):

3404

"""

3405

Requires that at least one C{ParseExpression} is found.

3406

If two expressions match, the expression that matches the longest string will be used.

3407

May be constructed using the C{'^'} operator.

3408

3409

Example::

3410

# construct Or using '^' operator

3411

3412

number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

3413

print(number.searchString("123 3.1416 789"))

3414

prints::

3415

[['123'], ['3.1416'], ['789']]

3416

"""

3417

def __init__( self, exprs, savelist = False ):

3418

super(Or,self).__init__(exprs, savelist)

3419

if self.exprs:

3420

self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

3421

else:

3422

self.mayReturnEmpty = True

3423

3424

def parseImpl( self, instring, loc, doActions=True ):

3425

maxExcLoc = -1

3426

maxException = None

3427

matches = []

3428

for e in self.exprs:

3429

try:

3430

loc2 = e.tryParse( instring, loc )

3431

except ParseException as err:

3432

err.__traceback__ = None

3433

if err.loc > maxExcLoc:

3434

maxException = err

3435

maxExcLoc = err.loc

3436

except IndexError:

3437

if len(instring) > maxExcLoc:

3438

maxException = ParseException(instring,len(instring),e.errmsg,self)

3439

maxExcLoc = len(instring)

3440

else:

3441

# save match among all matches, to retry longest to shortest

3442

matches.append((loc2, e))

3443

3444

if matches:

3445

matches.sort(key=lambda x: -x[0])

3446

for _,e in matches:

3447

try:

3448

return e._parse( instring, loc, doActions )

3449

except ParseException as err:

3450

err.__traceback__ = None

3451

if err.loc > maxExcLoc:

3452

maxException = err

3453

maxExcLoc = err.loc

3454

3455

if maxException is not None:

3456

maxException.msg = self.errmsg

3457

raise maxException

3458

else:

3459

raise ParseException(instring, loc, "no defined alternatives to match", self)

3460

3461

3462

def __ixor__(self, other ):

3463

if isinstance( other, basestring ):

3464

other = ParserElement._literalStringClass( other )

3465

return self.append( other ) #Or( [ self, other ] )

3466

3467

def __str__( self ):

3468

if hasattr(self,"name"):

3469

return self.name

3470

3471

if self.strRepr is None:

3472

self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"

3473

3474

return self.strRepr

3475

3476

def checkRecursion( self, parseElementList ):

3477

subRecCheckList = parseElementList[:] + [ self ]

3478

for e in self.exprs:

3479

e.checkRecursion( subRecCheckList )

3480

3481

3482

class MatchFirst(ParseExpression):

3483

"""

3484

Requires that at least one C{ParseExpression} is found.

3485

If two expressions match, the first one listed is the one that will match.

3486

May be constructed using the C{'|'} operator.

3487

3488

Example::

3489

# construct MatchFirst using '|' operator

3490

3491

# watch the order of expressions to match

3492

number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

3493

print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]

3494

3495

# put more selective expression first

3496

number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

3497

print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]

3498

"""

3499

def __init__( self, exprs, savelist = False ):

3500

super(MatchFirst,self).__init__(exprs, savelist)

3501

if self.exprs:

3502

self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

3503

else:

3504

self.mayReturnEmpty = True

3505

3506

def parseImpl( self, instring, loc, doActions=True ):

3507

maxExcLoc = -1

3508

maxException = None

3509

for e in self.exprs:

3510

try:

3511

ret = e._parse( instring, loc, doActions )

3512

return ret

3513

except ParseException as err:

3514

if err.loc > maxExcLoc:

3515

maxException = err

3516

maxExcLoc = err.loc

3517

except IndexError:

3518

if len(instring) > maxExcLoc:

3519

maxException = ParseException(instring,len(instring),e.errmsg,self)

3520

maxExcLoc = len(instring)

3521

3522

# only got here if no expression matched, raise exception for match that made it the furthest

3523

else:

3524

if maxException is not None:

3525

maxException.msg = self.errmsg

3526

raise maxException

3527

else:

3528

raise ParseException(instring, loc, "no defined alternatives to match", self)

3529

3530

def __ior__(self, other ):

3531

if isinstance( other, basestring ):

3532

other = ParserElement._literalStringClass( other )

3533

return self.append( other ) #MatchFirst( [ self, other ] )

3534

3535

def __str__( self ):

3536

if hasattr(self,"name"):

3537

return self.name

3538

3539

if self.strRepr is None:

3540

self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"

3541

3542

return self.strRepr

3543

3544

def checkRecursion( self, parseElementList ):

3545

subRecCheckList = parseElementList[:] + [ self ]

3546

for e in self.exprs:

3547

e.checkRecursion( subRecCheckList )

3548

3549

3550

class Each(ParseExpression):

3551

"""

3552

Requires all given C{ParseExpression}s to be found, but in any order.

3553

Expressions may be separated by whitespace.

3554

May be constructed using the C{'&'} operator.

3555

3556

Example::

3557

color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

3558

shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

3559

integer = Word(nums)

3560

shape_attr = "shape:" + shape_type("shape")

3561

posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

3562

color_attr = "color:" + color("color")

3563

size_attr = "size:" + integer("size")

3564

3565

# use Each (using operator '&') to accept attributes in any order

3566

# (shape and posn are required, color and size are optional)

3567

shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)

3568

3569

shape_spec.runTests('''

3570

shape: SQUARE color: BLACK posn: 100, 120

3571

shape: CIRCLE size: 50 color: BLUE posn: 50,80

3572

color:GREEN size:20 shape:TRIANGLE posn:20,40

3573

'''

3574

)

3575

prints::

3576

shape: SQUARE color: BLACK posn: 100, 120

3577

['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

3578

- color: BLACK

3579

- posn: ['100', ',', '120']

3580

- x: 100

3581

- y: 120

3582

- shape: SQUARE

3583

3584

3585

shape: CIRCLE size: 50 color: BLUE posn: 50,80

3586

['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]

3587

- color: BLUE

3588

- posn: ['50', ',', '80']

3589

- x: 50

3590

- y: 80

3591

- shape: CIRCLE

3592

- size: 50

3593

3594

3595

color: GREEN size: 20 shape: TRIANGLE posn: 20,40

3596

['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

3597

- color: GREEN

3598

- posn: ['20', ',', '40']

3599

- x: 20

3600

- y: 40

3601

- shape: TRIANGLE

3602

- size: 20

3603

"""

3604

def __init__( self, exprs, savelist = True ):

3605

super(Each,self).__init__(exprs, savelist)

3606

self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

3607

self.skipWhitespace = True

3608

self.initExprGroups = True

3609

3610

def parseImpl( self, instring, loc, doActions=True ):

3611

if self.initExprGroups:

3612

self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))

3613

opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]

3614

opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]

3615

self.optionals = opt1 + opt2

3616

self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]

3617

self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]

3618

self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]

3619

self.required += self.multirequired

3620

self.initExprGroups = False

3621

tmpLoc = loc

3622

tmpReqd = self.required[:]

3623

tmpOpt = self.optionals[:]

3624

matchOrder = []

3625

3626

keepMatching = True

3627

while keepMatching:

3628

tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired

3629

failed = []

3630

for e in tmpExprs:

3631

try:

3632

tmpLoc = e.tryParse( instring, tmpLoc )

3633

except ParseException:

3634

failed.append(e)

3635

else:

3636

matchOrder.append(self.opt1map.get(id(e),e))

3637

if e in tmpReqd:

3638

tmpReqd.remove(e)

3639

elif e in tmpOpt:

3640

tmpOpt.remove(e)

3641

if len(failed) == len(tmpExprs):

3642

keepMatching = False

3643

3644

if tmpReqd:

3645

missing = ", ".join(_ustr(e) for e in tmpReqd)

3646

raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )

3647

3648

# add any unmatched Optionals, in case they have default values defined

3649

matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]

3650

3651

resultlist = []

3652

for e in matchOrder:

3653

loc,results = e._parse(instring,loc,doActions)

3654

resultlist.append(results)

3655

3656

finalResults = sum(resultlist, ParseResults([]))

3657

return loc, finalResults

3658

3659

def __str__( self ):

3660

if hasattr(self,"name"):

3661

return self.name

3662

3663

if self.strRepr is None:

3664

self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"

3665

3666

return self.strRepr

3667

3668

def checkRecursion( self, parseElementList ):

3669

subRecCheckList = parseElementList[:] + [ self ]

3670

for e in self.exprs:

3671

e.checkRecursion( subRecCheckList )

3672

3673

3674

class ParseElementEnhance(ParserElement):

3675

"""

3676

Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.

3677

"""

3678

def __init__( self, expr, savelist=False ):

3679

super(ParseElementEnhance,self).__init__(savelist)

3680

if isinstance( expr, basestring ):

3681

if issubclass(ParserElement._literalStringClass, Token):

3682

expr = ParserElement._literalStringClass(expr)

3683

else:

3684

expr = ParserElement._literalStringClass(Literal(expr))

3685

self.expr = expr

3686

self.strRepr = None

3687

if expr is not None:

3688

self.mayIndexError = expr.mayIndexError

3689

self.mayReturnEmpty = expr.mayReturnEmpty

3690

self.setWhitespaceChars( expr.whiteChars )

3691

self.skipWhitespace = expr.skipWhitespace

3692

self.saveAsList = expr.saveAsList

3693

self.callPreparse = expr.callPreparse

3694

self.ignoreExprs.extend(expr.ignoreExprs)

3695

3696

def parseImpl( self, instring, loc, doActions=True ):

3697

if self.expr is not None:

3698

return self.expr._parse( instring, loc, doActions, callPreParse=False )

3699

else:

3700

raise ParseException("",loc,self.errmsg,self)

3701

3702

def leaveWhitespace( self ):

3703

self.skipWhitespace = False

3704

self.expr = self.expr.copy()

3705

if self.expr is not None:

3706

self.expr.leaveWhitespace()

3707

return self

3708

3709

def ignore( self, other ):

3710

if isinstance( other, Suppress ):

3711

if other not in self.ignoreExprs:

3712

super( ParseElementEnhance, self).ignore( other )

3713

if self.expr is not None:

3714

self.expr.ignore( self.ignoreExprs[-1] )

3715

else:

3716

super( ParseElementEnhance, self).ignore( other )

3717

if self.expr is not None:

3718

self.expr.ignore( self.ignoreExprs[-1] )

3719

return self

3720

3721

def streamline( self ):

3722

super(ParseElementEnhance,self).streamline()

3723

if self.expr is not None:

3724

self.expr.streamline()

3725

return self

3726

3727

def checkRecursion( self, parseElementList ):

3728

if self in parseElementList:

3729

raise RecursiveGrammarException( parseElementList+[self] )

3730

subRecCheckList = parseElementList[:] + [ self ]

3731

if self.expr is not None:

3732

self.expr.checkRecursion( subRecCheckList )

3733

3734

def validate( self, validateTrace=[] ):

3735

tmp = validateTrace[:]+[self]

3736

if self.expr is not None:

3737

self.expr.validate(tmp)

3738

self.checkRecursion( [] )

3739

3740

def __str__( self ):

3741

try:

3742

return super(ParseElementEnhance,self).__str__()

3743

except Exception:

3744

pass

3745

3746

if self.strRepr is None and self.expr is not None:

3747

self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )

3748

return self.strRepr

3749

3750

3751

class FollowedBy(ParseElementEnhance):

3752

"""

3753

Lookahead matching of the given parse expression. C{FollowedBy}

3754

does I{not} advance the parsing position within the input string, it only

3755

verifies that the specified parse expression matches at the current

3756

position. C{FollowedBy} always returns a null token list.

3757

3758

Example::

3759

# use FollowedBy to match a label only if it is followed by a ':'

3760

data_word = Word(alphas)

3761

label = data_word + FollowedBy(':')

3762

attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))

3763

3764

OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()

3765

prints::

3766

[['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

3767

"""

3768

def __init__( self, expr ):

3769

super(FollowedBy,self).__init__(expr)

3770

self.mayReturnEmpty = True

3771

3772

def parseImpl( self, instring, loc, doActions=True ):

3773

self.expr.tryParse( instring, loc )

3774

return loc, []

3775

3776

3777

class NotAny(ParseElementEnhance):

3778

"""

3779

Lookahead to disallow matching with the given parse expression. C{NotAny}

3780

does I{not} advance the parsing position within the input string, it only

3781

verifies that the specified parse expression does I{not} match at the current

3782

position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}

3783

always returns a null token list. May be constructed using the '~' operator.

3784

3785

Example::

3786

3787

"""

3788

def __init__( self, expr ):

3789

super(NotAny,self).__init__(expr)

3790

#~ self.leaveWhitespace()

3791

self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs

3792

self.mayReturnEmpty = True

3793

self.errmsg = "Found unwanted token, "+_ustr(self.expr)

3794

3795

def parseImpl( self, instring, loc, doActions=True ):

3796

if self.expr.canParseNext(instring, loc):

3797

raise ParseException(instring, loc, self.errmsg, self)

3798

return loc, []

3799

3800

def __str__( self ):

3801

if hasattr(self,"name"):

3802

return self.name

3803

3804

if self.strRepr is None:

3805

self.strRepr = "~{" + _ustr(self.expr) + "}"

3806

3807

return self.strRepr

3808

3809

class _MultipleMatch(ParseElementEnhance):

3810

def __init__( self, expr, stopOn=None):

3811

super(_MultipleMatch, self).__init__(expr)

3812

self.saveAsList = True

3813

ender = stopOn

3814

if isinstance(ender, basestring):

3815

ender = ParserElement._literalStringClass(ender)

3816

self.not_ender = ~ender if ender is not None else None

3817

3818

def parseImpl( self, instring, loc, doActions=True ):

3819

self_expr_parse = self.expr._parse

3820

self_skip_ignorables = self._skipIgnorables

3821

check_ender = self.not_ender is not None

3822

if check_ender:

3823

try_not_ender = self.not_ender.tryParse

3824

3825

# must be at least one (but first see if we are the stopOn sentinel;

3826

# if so, fail)

3827

if check_ender:

3828

try_not_ender(instring, loc)

3829

loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )

3830

try:

3831

hasIgnoreExprs = (not not self.ignoreExprs)

3832

while 1:

3833

if check_ender:

3834

try_not_ender(instring, loc)

3835

if hasIgnoreExprs:

3836

preloc = self_skip_ignorables( instring, loc )

3837

else:

3838

preloc = loc

3839

loc, tmptokens = self_expr_parse( instring, preloc, doActions )

3840

if tmptokens or tmptokens.haskeys():

3841

tokens += tmptokens

3842

except (ParseException,IndexError):

3843

pass

3844

3845

return loc, tokens

3846

3847

class OneOrMore(_MultipleMatch):

3848

"""

3849

Repetition of one or more of the given expression.

3850

3851

Parameters:

3852

- expr - expression that must match one or more times

3853

- stopOn - (default=C{None}) - expression for a terminating sentinel

3854

(only required if the sentinel would ordinarily match the repetition

3855

expression)

3856

3857

Example::

3858

data_word = Word(alphas)

3859

label = data_word + FollowedBy(':')

3860

attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))

3861

3862

text = "shape: SQUARE posn: upper left color: BLACK"

3863

OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

3864

3865

# use stopOn attribute for OneOrMore to avoid reading label string as part of the data

3866

attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))

3867

OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

3868

3869

# could also be written as

3870

(attr_expr * (1,)).parseString(text).pprint()

3871

"""

3872

3873

def __str__( self ):

3874

if hasattr(self,"name"):

3875

return self.name

3876

3877

if self.strRepr is None:

3878

self.strRepr = "{" + _ustr(self.expr) + "}..."

3879

3880

return self.strRepr

3881

3882

class ZeroOrMore(_MultipleMatch):

3883

"""

3884

Optional repetition of zero or more of the given expression.

3885

3886

Parameters:

3887

- expr - expression that must match zero or more times

3888

- stopOn - (default=C{None}) - expression for a terminating sentinel

3889

(only required if the sentinel would ordinarily match the repetition

3890

expression)

3891

3892

Example: similar to L{OneOrMore}

3893

"""

3894

def __init__( self, expr, stopOn=None):

3895

super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)

3896

self.mayReturnEmpty = True

3897

3898

def parseImpl( self, instring, loc, doActions=True ):

3899

try:

3900

return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)

3901

except (ParseException,IndexError):

3902

return loc, []

3903

3904

def __str__( self ):

3905

if hasattr(self,"name"):

3906

return self.name

3907

3908

if self.strRepr is None:

3909

self.strRepr = "[" + _ustr(self.expr) + "]..."

3910

3911

return self.strRepr

3912

3913

class _NullToken(object):

3914

def __bool__(self):

3915

return False

3916

__nonzero__ = __bool__

3917

def __str__(self):

3918

return ""

3919

3920

_optionalNotMatched = _NullToken()

3921

class Optional(ParseElementEnhance):

3922

"""

3923

Optional matching of the given expression.

3924

3925

Parameters:

3926

- expr - expression that must match zero or more times

3927

- default (optional) - value to be returned if the optional expression is not found.

3928

3929

Example::

3930

# US postal code can be a 5-digit zip, plus optional 4-digit qualifier

3931

zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))

3932

zip.runTests('''

3933

# traditional ZIP code

3934

12345

3935

3936

# ZIP+4 form

3937

12101-0001

3938

3939

# invalid ZIP

3940

98765-

3941

''')

3942

prints::

3943

# traditional ZIP code

3944

12345

3945

['12345']

3946

3947

# ZIP+4 form

3948

12101-0001

3949

['12101-0001']

3950

3951

# invalid ZIP

3952

98765-

3953

3954

FAIL: Expected end of text (at char 5), (line:1, col:6)

3955

"""

3956

def __init__( self, expr, default=_optionalNotMatched ):

3957

super(Optional,self).__init__( expr, savelist=False )

3958

self.saveAsList = self.expr.saveAsList

3959

self.defaultValue = default

3960

self.mayReturnEmpty = True

3961

3962

def parseImpl( self, instring, loc, doActions=True ):

3963

try:

3964

loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )

3965

except (ParseException,IndexError):

3966

if self.defaultValue is not _optionalNotMatched:

3967

if self.expr.resultsName:

3968

tokens = ParseResults([ self.defaultValue ])

3969

tokens[self.expr.resultsName] = self.defaultValue

3970

else:

3971

tokens = [ self.defaultValue ]

3972

else:

3973

tokens = []

3974

return loc, tokens

3975

3976

def __str__( self ):

3977

if hasattr(self,"name"):

3978

return self.name

3979

3980

if self.strRepr is None:

3981

self.strRepr = "[" + _ustr(self.expr) + "]"

3982

3983

return self.strRepr

3984

3985

class SkipTo(ParseElementEnhance):

3986

"""

3987

Token for skipping over all undefined text until the matched expression is found.

3988

3989

Parameters:

3990

- expr - target expression marking the end of the data to be skipped

3991

- include - (default=C{False}) if True, the target expression is also parsed

3992

(the skipped text and target expression are returned as a 2-element list).

3993

- ignore - (default=C{None}) used to define grammars (typically quoted strings and

3994

comments) that might contain false matches to the target expression

3995

- failOn - (default=C{None}) define expressions that are not allowed to be

3996

included in the skipped test; if found before the target expression is found,

3997

the SkipTo is not a match

3998

3999

Example::

4000

report = '''

4001

Outstanding Issues Report - 1 Jan 2000

4002

4003

# | Severity | Description | Days Open

4004

-----+----------+-------------------------------------------+-----------

4005

101 | Critical | Intermittent system crash | 6

4006

94 | Cosmetic | Spelling error on Login ('log|n') | 14

4007

79 | Minor | System slow when running too many reports | 47

4008

'''

4009

integer = Word(nums)

4010

SEP = Suppress('|')

4011

# use SkipTo to simply match everything up until the next SEP

4012

# - ignore quoted strings, so that a '|' character inside a quoted string does not match

4013

# - parse action will call token.strip() for each matched token, i.e., the description body

4014

string_data = SkipTo(SEP, ignore=quotedString)

4015

string_data.setParseAction(tokenMap(str.strip))

4016

ticket_expr = (integer("issue_num") + SEP

4017

+ string_data("sev") + SEP

4018

+ string_data("desc") + SEP

4019

+ integer("days_open"))

4020

4021

for tkt in ticket_expr.searchString(report):

4022

print tkt.dump()

4023

prints::

4024

['101', 'Critical', 'Intermittent system crash', '6']

4025

- days_open: 6

4026

- desc: Intermittent system crash

4027

- issue_num: 101

4028

- sev: Critical

4029

['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

4030

- days_open: 14

4031

- desc: Spelling error on Login ('log|n')

4032

- issue_num: 94

4033

- sev: Cosmetic

4034

['79', 'Minor', 'System slow when running too many reports', '47']

4035

- days_open: 47

4036

- desc: System slow when running too many reports

4037

- issue_num: 79

4038

- sev: Minor

4039

"""

4040

def __init__( self, other, include=False, ignore=None, failOn=None ):

4041

super( SkipTo, self ).__init__( other )

4042

self.ignoreExpr = ignore

4043

self.mayReturnEmpty = True

4044

self.mayIndexError = False

4045

self.includeMatch = include

4046

self.asList = False

4047

if isinstance(failOn, basestring):

4048

self.failOn = ParserElement._literalStringClass(failOn)

4049

else:

4050

self.failOn = failOn

4051

self.errmsg = "No match found for "+_ustr(self.expr)

4052

4053

def parseImpl( self, instring, loc, doActions=True ):

4054

startloc = loc

4055

instrlen = len(instring)

4056

expr = self.expr

4057

expr_parse = self.expr._parse

4058

self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None

4059

self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None

4060

4061

tmploc = loc

4062

while tmploc <= instrlen:

4063

if self_failOn_canParseNext is not None:

4064

# break if failOn expression matches

4065

if self_failOn_canParseNext(instring, tmploc):

4066

break

4067

4068

if self_ignoreExpr_tryParse is not None:

4069

# advance past ignore expressions

4070

while 1:

4071

try:

4072

tmploc = self_ignoreExpr_tryParse(instring, tmploc)

4073

except ParseBaseException:

4074

break

4075

4076

try:

4077

expr_parse(instring, tmploc, doActions=False, callPreParse=False)

4078

except (ParseException, IndexError):

4079

# no match, advance loc in string

4080

tmploc += 1

4081

else:

4082

# matched skipto expr, done

4083

break

4084

4085

else:

4086

# ran off the end of the input string without matching skipto expr, fail

4087

raise ParseException(instring, loc, self.errmsg, self)

4088

4089

# build up return values

4090

loc = tmploc

4091

skiptext = instring[startloc:loc]

4092

skipresult = ParseResults(skiptext)

4093

4094

if self.includeMatch:

4095

loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)

4096

skipresult += mat

4097

4098

return loc, skipresult

4099

4100

class Forward(ParseElementEnhance):

4101

"""

4102

Forward declaration of an expression to be defined later -

4103

used for recursive grammars, such as algebraic infix notation.

4104

When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.

4105

4106

Note: take care when assigning to C{Forward} not to overlook precedence of operators.

4107

Specifically, '|' has a lower precedence than '<<', so that::

4108

fwdExpr << a | b | c

4109

will actually be evaluated as::

4110

(fwdExpr << a) | b | c

4111

thereby leaving b and c out as parseable alternatives. It is recommended that you

4112

explicitly group the values inserted into the C{Forward}::

4113

fwdExpr << (a | b | c)

4114

Converting to use the '<<=' operator instead will avoid this problem.

4115

4116

See L{ParseResults.pprint} for an example of a recursive parser created using

4117

C{Forward}.

4118

"""

4119

def __init__( self, other=None ):

4120

super(Forward,self).__init__( other, savelist=False )

4121

4122

def __lshift__( self, other ):

4123

if isinstance( other, basestring ):

4124

other = ParserElement._literalStringClass(other)

4125

self.expr = other

4126

self.strRepr = None

4127

self.mayIndexError = self.expr.mayIndexError

4128

self.mayReturnEmpty = self.expr.mayReturnEmpty

4129

self.setWhitespaceChars( self.expr.whiteChars )

4130

self.skipWhitespace = self.expr.skipWhitespace

4131

self.saveAsList = self.expr.saveAsList

4132

self.ignoreExprs.extend(self.expr.ignoreExprs)

4133

return self

4134

4135

def __ilshift__(self, other):

4136

return self << other

4137

4138

def leaveWhitespace( self ):

4139

self.skipWhitespace = False

4140

return self

4141

4142

def streamline( self ):

4143

if not self.streamlined:

4144

self.streamlined = True

4145

if self.expr is not None:

4146

self.expr.streamline()

4147

return self

4148

4149

def validate( self, validateTrace=[] ):

4150

if self not in validateTrace:

4151

tmp = validateTrace[:]+[self]

4152

if self.expr is not None:

4153

self.expr.validate(tmp)

4154

self.checkRecursion([])

4155

4156

def __str__( self ):

4157

if hasattr(self,"name"):

4158

return self.name

4159

return self.__class__.__name__ + ": ..."

4160

4161

# stubbed out for now - creates awful memory and perf issues

4162

self._revertClass = self.__class__

4163

self.__class__ = _ForwardNoRecurse

4164

try:

4165

if self.expr is not None:

4166

retString = _ustr(self.expr)

4167

else:

4168

retString = "None"

4169

finally:

4170

self.__class__ = self._revertClass

4171

return self.__class__.__name__ + ": " + retString

4172

4173

def copy(self):

4174

if self.expr is not None:

4175

return super(Forward,self).copy()

4176

else:

4177

ret = Forward()

4178

ret <<= self

4179

return ret

4180

4181

class _ForwardNoRecurse(Forward):

4182

def __str__( self ):

4183

return "..."

4184

4185

class TokenConverter(ParseElementEnhance):

4186

"""

4187

Abstract subclass of C{ParseExpression}, for converting parsed results.

4188

"""

4189

def __init__( self, expr, savelist=False ):

4190

super(TokenConverter,self).__init__( expr )#, savelist )

4191

self.saveAsList = False

4192

4193

class Combine(TokenConverter):

4194

"""

4195

Converter to concatenate all matching tokens to a single string.

4196

By default, the matching patterns must also be contiguous in the input string;

4197

this can be disabled by specifying C{'adjacent=False'} in the constructor.

4198

4199

Example::

4200

real = Word(nums) + '.' + Word(nums)

4201

print(real.parseString('3.1416')) # -> ['3', '.', '1416']

4202

# will also erroneously match the following

4203

print(real.parseString('3. 1416')) # -> ['3', '.', '1416']

4204

4205

real = Combine(Word(nums) + '.' + Word(nums))

4206

print(real.parseString('3.1416')) # -> ['3.1416']

4207

# no match when there are internal spaces

4208

print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)

4209

"""

4210

def __init__( self, expr, joinString="", adjacent=True ):

4211

super(Combine,self).__init__( expr )

4212

# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

4213

if adjacent:

4214

self.leaveWhitespace()

4215

self.adjacent = adjacent

4216

self.skipWhitespace = True

4217

self.joinString = joinString

4218

self.callPreparse = True

4219

4220

def ignore( self, other ):

4221

if self.adjacent:

4222

ParserElement.ignore(self, other)

4223

else:

4224

super( Combine, self).ignore( other )

4225

return self

4226

4227

def postParse( self, instring, loc, tokenlist ):

4228

retToks = tokenlist.copy()

4229

del retToks[:]

4230

retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)

4231

4232

if self.resultsName and retToks.haskeys():

4233

return [ retToks ]

4234

else:

4235

return retToks

4236

4237

class Group(TokenConverter):

4238

"""

4239

Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.

4240

4241

Example::

4242

ident = Word(alphas)

4243

num = Word(nums)

4244

term = ident | num

4245

func = ident + Optional(delimitedList(term))

4246

print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']

4247

4248

func = ident + Group(Optional(delimitedList(term)))

4249

print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]

4250

"""

4251

def __init__( self, expr ):

4252

super(Group,self).__init__( expr )

4253

self.saveAsList = True

4254

4255

def postParse( self, instring, loc, tokenlist ):

4256

return [ tokenlist ]

4257

4258

class Dict(TokenConverter):

4259

"""

4260

Converter to return a repetitive expression as a list, but also as a dictionary.

4261

Each element can also be referenced using the first token in the expression as its key.

4262

Useful for tabular report scraping when the first column can be used as a item key.

4263

4264

Example::

4265

data_word = Word(alphas)

4266

label = data_word + FollowedBy(':')

4267

attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))

4268

4269

text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

4270

attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))

4271

4272

# print attributes as plain groups

4273

print(OneOrMore(attr_expr).parseString(text).dump())

4274

4275

# instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names

4276

result = Dict(OneOrMore(Group(attr_expr))).parseString(text)

4277

print(result.dump())

4278

4279

# access named fields as dict entries, or output as dict

4280

print(result['shape'])

4281

print(result.asDict())

4282

prints::

4283

['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

4284

4285

[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

4286

- color: light blue

4287

- posn: upper left

4288

- shape: SQUARE

4289

- texture: burlap

4290

SQUARE

4291

{'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}

4292

See more examples at L{ParseResults} of accessing fields by results name.

4293

"""

4294

def __init__( self, expr ):

4295

super(Dict,self).__init__( expr )

4296

self.saveAsList = True

4297

4298

def postParse( self, instring, loc, tokenlist ):

4299

for i,tok in enumerate(tokenlist):

4300

if len(tok) == 0:

4301

continue

4302

ikey = tok[0]

4303

if isinstance(ikey,int):

4304

ikey = _ustr(tok[0]).strip()

4305

if len(tok)==1:

4306

tokenlist[ikey] = _ParseResultsWithOffset("",i)

4307

elif len(tok)==2 and not isinstance(tok[1],ParseResults):

4308

tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)

4309

else:

4310

dictvalue = tok.copy() #ParseResults(i)

4311

del dictvalue[0]

4312

if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):

4313

tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)

4314

else:

4315

tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)

4316

4317

if self.resultsName:

4318

return [ tokenlist ]

4319

else:

4320

return tokenlist

4321

4322

4323

class Suppress(TokenConverter):

4324

"""

4325

Converter for ignoring the results of a parsed expression.

4326

4327

Example::

4328

source = "a, b, c,d"

4329

wd = Word(alphas)

4330

wd_list1 = wd + ZeroOrMore(',' + wd)

4331

print(wd_list1.parseString(source))

4332

4333

# often, delimiters that are useful during parsing are just in the

4334

# way afterward - use Suppress to keep them out of the parsed output

4335

wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)

4336

print(wd_list2.parseString(source))

4337

prints::

4338

['a', ',', 'b', ',', 'c', ',', 'd']

4339

['a', 'b', 'c', 'd']

4340

(See also L{delimitedList}.)

4341

"""

4342

def postParse( self, instring, loc, tokenlist ):

4343

return []

4344

4345

def suppress( self ):

4346

return self

4347

4348

4349

class OnlyOnce(object):

4350

"""

4351

Wrapper for parse actions, to ensure they are only called once.

4352

"""

4353

def __init__(self, methodCall):

4354

self.callable = _trim_arity(methodCall)

4355

self.called = False

4356

def __call__(self,s,l,t):

4357

if not self.called:

4358

results = self.callable(s,l,t)

4359

self.called = True

4360

return results

4361

raise ParseException(s,l,"")

4362

def reset(self):

4363

self.called = False

4364

4365

def traceParseAction(f):

4366

"""

4367

Decorator for debugging parse actions.

4368

4369

When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}

4370

When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.

4371

4372

Example::

4373

wd = Word(alphas)

4374

4375

@traceParseAction

4376

def remove_duplicate_chars(tokens):

4377

return ''.join(sorted(set(''.join(tokens)))

4378

4379

wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)

4380

print(wds.parseString("slkdjs sld sldd sdlf sdljf"))

4381

prints::

4382

>>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

4383

<<leaving remove_duplicate_chars (ret: 'dfjkls')

4384

['dfjkls']

4385

"""

4386

f = _trim_arity(f)

4387

def z(*paArgs):

4388

thisFunc = f.__name__

4389

s,l,t = paArgs[-3:]

4390

if len(paArgs)>3:

4391

thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc

4392

sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )

4393

try:

4394

ret = f(*paArgs)

4395

except Exception as exc:

4396

sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )

4397

raise

4398

sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )

4399

return ret

4400

try:

4401

z.__name__ = f.__name__

4402

except AttributeError:

4403

pass

4404

return z

4405

4406

4407

# global helpers

4408

4409

def delimitedList( expr, delim=",", combine=False ):

4410

"""

4411

Helper to define a delimited list of expressions - the delimiter defaults to ','.

4412

By default, the list elements and delimiters can have intervening whitespace, and

4413

comments, but this can be overridden by passing C{combine=True} in the constructor.

4414

If C{combine} is set to C{True}, the matching tokens are returned as a single token

4415

string, with the delimiters included; otherwise, the matching tokens are returned

4416

as a list of tokens, with the delimiters suppressed.

4417

4418

Example::

4419

delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']

4420

delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

4421

"""

4422

dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."

4423

if combine:

4424

return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)

4425

else:

4426

return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)

4427

4428

def countedArray( expr, intExpr=None ):

4429

"""

4430

Helper to define a counted list of expressions.

4431

This helper defines a pattern of the form::

4432

integer expr expr expr...

4433

where the leading integer tells how many expr expressions follow.

4434

The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.

4435

4436

If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.

4437

4438

Example::

4439

countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']

4440

4441

# in this parser, the leading integer value is given in binary,

4442

# '10' indicating that 2 values are in the array

4443

binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))

4444

countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']

4445

"""

4446

arrayExpr = Forward()

4447

def countFieldParseAction(s,l,t):

4448

n = t[0]

4449

arrayExpr << (n and Group(And([expr]*n)) or Group(empty))

4450

return []

4451

if intExpr is None:

4452

intExpr = Word(nums).setParseAction(lambda t:int(t[0]))

4453

else:

4454

intExpr = intExpr.copy()

4455

intExpr.setName("arrayLen")

4456

intExpr.addParseAction(countFieldParseAction, callDuringTry=True)

4457

return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')

4458

4459

def _flatten(L):

4460

ret = []

4461

for i in L:

4462

if isinstance(i,list):

4463

ret.extend(_flatten(i))

4464

else:

4465

ret.append(i)

4466

return ret

4467

4468

def matchPreviousLiteral(expr):

4469

"""

4470

Helper to define an expression that is indirectly defined from

4471

the tokens matched in a previous expression, that is, it looks

4472

for a 'repeat' of a previous expression. For example::

4473

first = Word(nums)

4474

second = matchPreviousLiteral(first)

4475

matchExpr = first + ":" + second

4476

will match C{"1:1"}, but not C{"1:2"}. Because this matches a

4477

previous literal, will also match the leading C{"1:1"} in C{"1:10"}.

4478

If this is not desired, use C{matchPreviousExpr}.

4479

Do I{not} use with packrat parsing enabled.

4480

"""

4481

rep = Forward()

4482

def copyTokenToRepeater(s,l,t):

4483

if t:

4484

if len(t) == 1:

4485

rep << t[0]

4486

else:

4487

# flatten t tokens

4488

tflat = _flatten(t.asList())

4489

rep << And(Literal(tt) for tt in tflat)

4490

else:

4491

rep << Empty()

4492

expr.addParseAction(copyTokenToRepeater, callDuringTry=True)

4493

rep.setName('(prev) ' + _ustr(expr))

4494

return rep

4495

4496

def matchPreviousExpr(expr):

4497

"""

4498

Helper to define an expression that is indirectly defined from

4499

the tokens matched in a previous expression, that is, it looks

4500

for a 'repeat' of a previous expression. For example::

4501

first = Word(nums)

4502

second = matchPreviousExpr(first)

4503

matchExpr = first + ":" + second

4504

will match C{"1:1"}, but not C{"1:2"}. Because this matches by

4505

expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};

4506

the expressions are evaluated first, and then compared, so

4507

C{"1"} is compared with C{"10"}.

4508

Do I{not} use with packrat parsing enabled.

4509

"""

4510

rep = Forward()

4511

e2 = expr.copy()

4512

rep <<= e2

4513

def copyTokenToRepeater(s,l,t):

4514

matchTokens = _flatten(t.asList())

4515

def mustMatchTheseTokens(s,l,t):

4516

theseTokens = _flatten(t.asList())

4517

if theseTokens != matchTokens:

4518

raise ParseException("",0,"")

4519

rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )

4520

expr.addParseAction(copyTokenToRepeater, callDuringTry=True)

4521

rep.setName('(prev) ' + _ustr(expr))

4522

return rep

4523

4524

def _escapeRegexRangeChars(s):

4525

#~ escape these chars: ^-]

4526

for c in r"\^-]":

4527

s = s.replace(c,_bslash+c)

4528

s = s.replace("\n",r"\n")

4529

s = s.replace("\t",r"\t")

4530

return _ustr(s)

4531

4532

def oneOf( strs, caseless=False, useRegex=True ):

4533

"""

4534

Helper to quickly define a set of alternative Literals, and makes sure to do

4535

longest-first testing when there is a conflict, regardless of the input order,

4536

but returns a C{L{MatchFirst}} for best performance.

4537

4538

Parameters:

4539

- strs - a string of space-delimited literals, or a collection of string literals

4540

- caseless - (default=C{False}) - treat all literals as caseless

4541

- useRegex - (default=C{True}) - as an optimization, will generate a Regex

4542

object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or

4543

if creating a C{Regex} raises an exception)

4544

4545

Example::

4546

comp_oper = oneOf("< = > <= >= !=")

4547

var = Word(alphas)

4548

number = Word(nums)

4549

term = var | number

4550

comparison_expr = term + comp_oper + term

4551

print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))

4552

prints::

4553

[['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]

4554

"""

4555

if caseless:

4556

isequal = ( lambda a,b: a.upper() == b.upper() )

4557

masks = ( lambda a,b: b.upper().startswith(a.upper()) )

4558

parseElementClass = CaselessLiteral

4559

else:

4560

isequal = ( lambda a,b: a == b )

4561

masks = ( lambda a,b: b.startswith(a) )

4562

parseElementClass = Literal

4563

4564

symbols = []

4565

if isinstance(strs,basestring):

4566

symbols = strs.split()

4567

elif isinstance(strs, collections.Iterable):

4568

symbols = list(strs)

4569

else:

4570

warnings.warn("Invalid argument to oneOf, expected string or iterable",

4571

SyntaxWarning, stacklevel=2)

4572

if not symbols:

4573

return NoMatch()

4574

4575

i = 0

4576

while i < len(symbols)-1:

4577

cur = symbols[i]

4578

for j,other in enumerate(symbols[i+1:]):

4579

if ( isequal(other, cur) ):

4580

del symbols[i+j+1]

4581

break

4582

elif ( masks(cur, other) ):

4583

del symbols[i+j+1]

4584

symbols.insert(i,other)

4585

cur = other

4586

break

4587

else:

4588

i += 1

4589

4590

if not caseless and useRegex:

4591

#~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))

4592

try:

4593

if len(symbols)==len("".join(symbols)):

4594

return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))

4595

else:

4596

return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))

4597

except Exception:

4598

warnings.warn("Exception creating Regex for oneOf, building MatchFirst",

4599

SyntaxWarning, stacklevel=2)

4600

4601

4602

# last resort, just use MatchFirst

4603

return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))

4604

4605

def dictOf( key, value ):

4606

"""

4607

Helper to easily and clearly define a dictionary by specifying the respective patterns

4608

for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens

4609

in the proper order. The key pattern can include delimiting markers or punctuation,

4610

as long as they are suppressed, thereby leaving the significant key text. The value

4611

pattern can include named results, so that the C{Dict} results can include named token

4612

fields.

4613

4614

Example::

4615

text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

4616

attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))

4617

print(OneOrMore(attr_expr).parseString(text).dump())

4618

4619

attr_label = label

4620

attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)

4621

4622

# similar to Dict, but simpler call format

4623

result = dictOf(attr_label, attr_value).parseString(text)

4624

print(result.dump())

4625

print(result['shape'])

4626

print(result.shape) # object attribute access works too

4627

print(result.asDict())

4628

prints::

4629

[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

4630

- color: light blue

4631

- posn: upper left

4632

- shape: SQUARE

4633

- texture: burlap

4634

SQUARE

4635

SQUARE

4636

{'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}

4637

"""

4638

return Dict( ZeroOrMore( Group ( key + value ) ) )

4639

4640

def originalTextFor(expr, asString=True):

4641

"""

4642

Helper to return the original, untokenized text for a given expression. Useful to

4643

restore the parsed fields of an HTML start tag into the raw tag text itself, or to

4644

revert separate tokens with intervening whitespace back to the original matching

4645

input text. By default, returns astring containing the original parsed text.

4646

4647

If the optional C{asString} argument is passed as C{False}, then the return value is a

4648

C{L{ParseResults}} containing any results names that were originally matched, and a

4649

single token containing the original matched text from the input string. So if

4650

the expression passed to C{L{originalTextFor}} contains expressions with defined

4651

results names, you must set C{asString} to C{False} if you want to preserve those

4652

results name values.

4653

4654

Example::

4655

src = "this is test <b> bold <i>text</i> </b> normal text "

4656

for tag in ("b","i"):

4657

opener,closer = makeHTMLTags(tag)

4658

patt = originalTextFor(opener + SkipTo(closer) + closer)

4659

print(patt.searchString(src)[0])

4660

prints::

4661

['<b> bold <i>text</i> </b>']

4662

['<i>text</i>']

4663

"""

4664

locMarker = Empty().setParseAction(lambda s,loc,t: loc)

4665

endlocMarker = locMarker.copy()

4666

endlocMarker.callPreparse = False

4667

matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")

4668

if asString:

4669

extractText = lambda s,l,t: s[t._original_start:t._original_end]

4670

else:

4671

def extractText(s,l,t):

4672

t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]

4673

matchExpr.setParseAction(extractText)

4674

matchExpr.ignoreExprs = expr.ignoreExprs

4675

return matchExpr

4676

4677

def ungroup(expr):

4678

"""

4679

Helper to undo pyparsing's default grouping of And expressions, even

4680

if all but one are non-empty.

4681

"""

4682

return TokenConverter(expr).setParseAction(lambda t:t[0])

4683

4684

def locatedExpr(expr):

4685

"""

4686

Helper to decorate a returned token with its starting and ending locations in the input string.

4687

This helper adds the following results names:

4688

- locn_start = location where matched expression begins

4689

- locn_end = location where matched expression ends

4690

- value = the actual parsed results

4691

4692

Be careful if the input text contains C{<TAB>} characters, you may want to call

4693

C{L{ParserElement.parseWithTabs}}

4694

4695

Example::

4696

wd = Word(alphas)

4697

for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):

4698

print(match)

4699

prints::

4700

[[0, 'ljsdf', 5]]

4701

[[8, 'lksdjjf', 15]]

4702

[[18, 'lkkjj', 23]]

4703

"""

4704

locator = Empty().setParseAction(lambda s,l,t: l)

4705

return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))

4706

4707

4708

# convenience constants for positional expressions

4709

empty = Empty().setName("empty")

4710

lineStart = LineStart().setName("lineStart")

4711

lineEnd = LineEnd().setName("lineEnd")

4712

stringStart = StringStart().setName("stringStart")

4713

stringEnd = StringEnd().setName("stringEnd")

4714

4715

_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])

4716

_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))

4717

_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))

4718

_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)

4719

_charRange = Group(_singleChar + Suppress("-") + _singleChar)

4720

_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"

4721

4722

def srange(s):

4723

r"""

4724

Helper to easily define string ranges for use in Word construction. Borrows

4725

syntax from regexp '[]' string range definitions::

4726

srange("[0-9]") -> "0123456789"

4727

srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

4728

srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

4729

The input string must be enclosed in []'s, and the returned string is the expanded

4730

character set joined into a single string.

4731

The values enclosed in the []'s may be:

4732

- a single character

4733

- an escaped character with a leading backslash (such as C{\-} or C{\]})

4734

- an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)

4735

(C{\0x##} is also supported for backwards compatibility)

4736

- an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)

4737

- a range of any of the above, separated by a dash (C{'a-z'}, etc.)

4738

- any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)

4739

"""

4740

_expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))

4741

try:

4742

return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)

4743

except Exception:

4744

return ""

4745

4746

def matchOnlyAtCol(n):

4747

"""

4748

Helper method for defining parse actions that require matching at a specific

4749

column in the input text.

4750

"""

4751

def verifyCol(strg,locn,toks):

4752

if col(locn,strg) != n:

4753

raise ParseException(strg,locn,"matched token not at column %d" % n)

4754

return verifyCol

4755

4756

def replaceWith(replStr):

4757

"""

4758

Helper method for common parse actions that simply return a literal value. Especially

4759

useful when used with C{L{transformString<ParserElement.transformString>}()}.

4760

4761

Example::

4762

num = Word(nums).setParseAction(lambda toks: int(toks[0]))

4763

na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))

4764

term = na | num

4765

4766

OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]

4767

"""

4768

return lambda s,l,t: [replStr]

4769

4770

def removeQuotes(s,l,t):

4771

"""

4772

Helper parse action for removing quotation marks from parsed quoted strings.

4773

4774

Example::

4775

# by default, quotation marks are included in parsed results

4776

quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]

4777

4778

# use removeQuotes to strip quotation marks from parsed results

4779

quotedString.setParseAction(removeQuotes)

4780

quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]

4781

"""

4782

return t[0][1:-1]

4783

4784

def tokenMap(func, *args):

4785

"""

4786

Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional

4787

args are passed, they are forwarded to the given function as additional arguments after

4788

the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the

4789

parsed data to an integer using base 16.

4790

4791

Example (compare the last to example in L{ParserElement.transformString}::

4792

hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))

4793

hex_ints.runTests('''

4794

00 11 22 aa FF 0a 0d 1a

4795

''')

4796

4797

upperword = Word(alphas).setParseAction(tokenMap(str.upper))

4798

OneOrMore(upperword).runTests('''

4799

my kingdom for a horse

4800

''')

4801

4802

wd = Word(alphas).setParseAction(tokenMap(str.title))

4803

OneOrMore(wd).setParseAction(' '.join).runTests('''

4804

now is the winter of our discontent made glorious summer by this sun of york

4805

''')

4806

prints::

4807

00 11 22 aa FF 0a 0d 1a

4808

[0, 17, 34, 170, 255, 10, 13, 26]

4809

4810

my kingdom for a horse

4811

['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

4812

4813

now is the winter of our discontent made glorious summer by this sun of york

4814

['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

4815

"""

4816

def pa(s,l,t):

4817

return [func(tokn, *args) for tokn in t]

4818

4819

try:

4820

func_name = getattr(func, '__name__',

4821

getattr(func, '__class__').__name__)

4822

except Exception:

4823

func_name = str(func)

4824

pa.__name__ = func_name

4825

4826

return pa

4827

4828

upcaseTokens = tokenMap(lambda t: _ustr(t).upper())

4829

"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""

4830

4831

downcaseTokens = tokenMap(lambda t: _ustr(t).lower())

4832

"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""

4833

4834

def _makeTags(tagStr, xml):

4835

"""Internal helper to construct opening and closing tag expressions, given a tag name"""

4836

if isinstance(tagStr,basestring):

4837

resname = tagStr

4838

tagStr = Keyword(tagStr, caseless=not xml)

4839

else:

4840

resname = tagStr.name

4841

4842

tagAttrName = Word(alphas,alphanums+"_-:")

4843

if (xml):

4844

tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )

4845

openTag = Suppress("<") + tagStr("tag") + \

4846

Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \

4847

Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")

4848

else:

4849

printablesLessRAbrack = "".join(c for c in printables if c not in ">")

4850

tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)

4851

openTag = Suppress("<") + tagStr("tag") + \

4852

Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \

4853

Optional( Suppress("=") + tagAttrValue ) ))) + \

4854

Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")

4855

closeTag = Combine(_L("</") + tagStr + ">")

4856

4857

openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)

4858

closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)

4859

openTag.tag = resname

4860

closeTag.tag = resname

4861

return openTag, closeTag

4862

4863

def makeHTMLTags(tagStr):

4864

"""

4865

Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches

4866

tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.

4867

4868

Example::

4869

text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'

4870

# makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple

4871

a,a_end = makeHTMLTags("A")

4872

link_expr = a + SkipTo(a_end)("link_text") + a_end

4873

4874

for link in link_expr.searchString(text):

4875

# attributes in the <A> tag (like "href" shown here) are also accessible as named results

4876

print(link.link_text, '->', link.href)

4877

prints::

4878

pyparsing -> http://pyparsing.wikispaces.com

4879

"""

4880

return _makeTags( tagStr, False )

4881

4882

def makeXMLTags(tagStr):

4883

"""

4884

Helper to construct opening and closing tag expressions for XML, given a tag name. Matches

4885

tags only in the given upper/lower case.

4886

4887

Example: similar to L{makeHTMLTags}

4888

"""

4889

return _makeTags( tagStr, True )

4890

4891

def withAttribute(*args,**attrDict):

4892

"""

4893

Helper to create a validating parse action to be used with start tags created

4894

with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag

4895

with a required attribute value, to avoid false matches on common tags such as

4896

C{<TD>} or C{<DIV>}.

4897

4898

Call C{withAttribute} with a series of attribute names and values. Specify the list

4899

of filter attributes names and values as:

4900

- keyword arguments, as in C{(align="right")}, or

4901

- as an explicit dict with C{**} operator, when an attribute name is also a Python

4902

reserved word, as in C{**{"class":"Customer", "align":"right"}}

4903

- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )

4904

For attribute names with a namespace prefix, you must use the second form. Attribute

4905

names are matched insensitive to upper/lower case.

4906

4907

If just testing for C{class} (with or without a namespace), use C{L{withClass}}.

4908

4909

To verify that the attribute exists, but without specifying a value, pass

4910

C{withAttribute.ANY_VALUE} as the value.

4911

4912

Example::

4913

html = '''

4914

<div>

4915

Some text

4916

4917

4918

4919

</div>

4920

4921

'''

4922

div,div_end = makeHTMLTags("div")

4923

4924

# only match div tag having a type attribute with value "grid"

4925

div_grid = div().setParseAction(withAttribute(type="grid"))

4926

grid_expr = div_grid + SkipTo(div | div_end)("body")

4927

for grid_header in grid_expr.searchString(html):

4928

print(grid_header.body)

4929

4930

# construct a match with any div tag having a type attribute, regardless of the value

4931

div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))

4932

div_expr = div_any_type + SkipTo(div | div_end)("body")

4933

for div_header in div_expr.searchString(html):

4934

print(div_header.body)

4935

prints::

4936

1 4 0 1 0

4937

4938

1 4 0 1 0

4939

1,3 2,3 1,1

4940

"""

4941

if args:

4942

attrs = args[:]

4943

else:

4944

attrs = attrDict.items()

4945

attrs = [(k,v) for k,v in attrs]

4946

def pa(s,l,tokens):

4947

for attrName,attrValue in attrs:

4948

if attrName not in tokens:

4949

raise ParseException(s,l,"no matching attribute " + attrName)

4950

if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:

4951

raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %

4952

(attrName, tokens[attrName], attrValue))

4953

return pa

4954

withAttribute.ANY_VALUE = object()

4955

4956

def withClass(classname, namespace=''):

4957

"""

4958

Simplified version of C{L{withAttribute}} when matching on a div class - made

4959

difficult because C{class} is a reserved word in Python.

4960

4961

Example::

4962

html = '''

4963

<div>

4964

Some text

4965

4966

4967

<div>this <div> has no class</div>

4968

</div>

4969

4970

'''

4971

div,div_end = makeHTMLTags("div")

4972

div_grid = div().setParseAction(withClass("grid"))

4973

4974

grid_expr = div_grid + SkipTo(div | div_end)("body")

4975

for grid_header in grid_expr.searchString(html):

4976

print(grid_header.body)

4977

4978

div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))

4979

div_expr = div_any_type + SkipTo(div | div_end)("body")

4980

for div_header in div_expr.searchString(html):

4981

print(div_header.body)

4982

prints::

4983

1 4 0 1 0

4984

4985

1 4 0 1 0

4986

1,3 2,3 1,1

4987

"""

4988

classattr = "%s:class" % namespace if namespace else "class"

4989

return withAttribute(**{classattr : classname})

4990

4991

opAssoc = _Constants()

4992

opAssoc.LEFT = object()

4993

opAssoc.RIGHT = object()

4994

4995

def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):

4996

"""

4997

Helper method for constructing grammars of expressions made up of

4998

operators working in a precedence hierarchy. Operators may be unary or

4999

binary, left- or right-associative. Parse actions can also be attached

5000

to operator expressions. The generated parser will also recognize the use

5001

of parentheses to override operator precedences (see example below).

5002

5003

Note: if you define a deep operator list, you may see performance issues

5004

when using infixNotation. See L{ParserElement.enablePackrat} for a

5005

mechanism to potentially improve your parser performance.

5006

5007

Parameters:

5008

- baseExpr - expression representing the most basic element for the nested

5009

- opList - list of tuples, one for each operator precedence level in the

5010

expression grammar; each tuple is of the form

5011

(opExpr, numTerms, rightLeftAssoc, parseAction), where:

5012

- opExpr is the pyparsing expression for the operator;

5013

may also be a string, which will be converted to a Literal;

5014

if numTerms is 3, opExpr is a tuple of two expressions, for the

5015

two operators separating the 3 terms

5016

- numTerms is the number of terms for this operator (must

5017

be 1, 2, or 3)

5018

- rightLeftAssoc is the indicator whether the operator is

5019

right or left associative, using the pyparsing-defined

5020

constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.

5021

- parseAction is the parse action to be associated with

5022

expressions matching this operator expression (the

5023

parse action tuple member may be omitted)

5024

- lpar - expression for matching left-parentheses (default=C{Suppress('(')})

5025

- rpar - expression for matching right-parentheses (default=C{Suppress(')')})

5026

5027

Example::

5028

# simple example of four-function arithmetic with ints and variable names

5029

integer = pyparsing_common.signed_integer

5030

varname = pyparsing_common.identifier

5031

5032

arith_expr = infixNotation(integer | varname,

5033

[

5034

('-', 1, opAssoc.RIGHT),

5035

(oneOf('* /'), 2, opAssoc.LEFT),

5036

(oneOf('+ -'), 2, opAssoc.LEFT),

5037

])

5038

5039

arith_expr.runTests('''

5040

5+3*6

5041

(5+3)*6

5042

-2--11

5043

''', fullDump=False)

5044

prints::

5045

5+3*6

5046

[[5, '+', [3, '*', 6]]]

5047

5048

(5+3)*6

5049

[[[5, '+', 3], '*', 6]]

5050

5051

-2--11

5052

[[['-', 2], '-', ['-', 11]]]

5053

"""

5054

ret = Forward()

5055

lastExpr = baseExpr | ( lpar + ret + rpar )

5056

for i,operDef in enumerate(opList):

5057

opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]

5058

termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr

5059

if arity == 3:

5060

if opExpr is None or len(opExpr) != 2:

5061

raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")

5062

opExpr1, opExpr2 = opExpr

5063

thisExpr = Forward().setName(termName)

5064

if rightLeftAssoc == opAssoc.LEFT:

5065

if arity == 1:

5066

matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )

5067

elif arity == 2:

5068

if opExpr is not None:

5069

matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )

5070

else:

5071

matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )

5072

elif arity == 3:

5073

matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \

5074

Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )

5075

else:

5076

raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

5077

elif rightLeftAssoc == opAssoc.RIGHT:

5078

if arity == 1:

5079

# try to avoid LR with this extra test

5080

if not isinstance(opExpr, Optional):

5081

opExpr = Optional(opExpr)

5082

matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )

5083

elif arity == 2:

5084

if opExpr is not None:

5085

matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )

5086

else:

5087

matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )

5088

elif arity == 3:

5089

matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \

5090

Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )

5091

else:

5092

raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

5093

else:

5094

raise ValueError("operator must indicate right or left associativity")

5095

if pa:

5096

matchExpr.setParseAction( pa )

5097

thisExpr <<= ( matchExpr.setName(termName) | lastExpr )

5098

lastExpr = thisExpr

5099

ret <<= lastExpr

5100

return ret

5101

5102

operatorPrecedence = infixNotation

5103

"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""

5104

5105

dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")

5106

sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")

5107

quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|

5108

Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")

5109

unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")

5110

5111

def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):

5112

"""

5113

Helper method for defining nested lists enclosed in opening and closing

5114

delimiters ("(" and ")" are the default).

5115

5116

Parameters:

5117

- opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression

5118

- closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression

5119

- content - expression for items within the nested lists (default=C{None})

5120

- ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})

5121

5122

If an expression is not provided for the content argument, the nested

5123

expression will capture all whitespace-delimited content between delimiters

5124

as a list of separate values.

5125

5126

Use the C{ignoreExpr} argument to define expressions that may contain

5127

opening or closing characters that should not be treated as opening

5128

or closing characters for nesting, such as quotedString or a comment

5129

expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.

5130

The default is L{quotedString}, but if no expressions are to be ignored,

5131

then pass C{None} for this argument.

5132

5133

Example::

5134

data_type = oneOf("void int short long char float double")

5135

decl_data_type = Combine(data_type + Optional(Word('*')))

5136

ident = Word(alphas+'_', alphanums+'_')

5137

number = pyparsing_common.number

5138

arg = Group(decl_data_type + ident)

5139

LPAR,RPAR = map(Suppress, "()")

5140

5141

code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))

5142

5143

c_function = (decl_data_type("type")

5144

+ ident("name")

5145

+ LPAR + Optional(delimitedList(arg), [])("args") + RPAR

5146

+ code_body("body"))

5147

c_function.ignore(cStyleComment)

5148

5149

source_code = '''

5150

int is_odd(int x) {

5151

return (x%2);

5152

}

5153

5154

int dec_to_hex(char hchar) {

5155

if (hchar >= '0' && hchar <= '9') {

5156

return (ord(hchar)-ord('0'));

5157

} else {

5158

return (10+ord(hchar)-ord('A'));

5159

}

5160

}

5161

'''

5162

for func in c_function.searchString(source_code):

5163

print("%(name)s (%(type)s) args: %(args)s" % func)

5164

5165

prints::

5166

is_odd (int) args: [['int', 'x']]

5167

dec_to_hex (int) args: [['char', 'hchar']]

5168

"""

5169

if opener == closer:

5170

raise ValueError("opening and closing strings cannot be the same")

5171

if content is None:

5172

if isinstance(opener,basestring) and isinstance(closer,basestring):

5173

if len(opener) == 1 and len(closer)==1:

5174

if ignoreExpr is not None:

5175

content = (Combine(OneOrMore(~ignoreExpr +

5176

CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))

5177

).setParseAction(lambda t:t[0].strip()))

5178

else:

5179

content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS

5180

).setParseAction(lambda t:t[0].strip()))

5181

else:

5182

if ignoreExpr is not None:

5183

content = (Combine(OneOrMore(~ignoreExpr +

5184

~Literal(opener) + ~Literal(closer) +

5185

CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))

5186

).setParseAction(lambda t:t[0].strip()))

5187

else:

5188

content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +

5189

CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))

5190

).setParseAction(lambda t:t[0].strip()))

5191

else:

5192

raise ValueError("opening and closing arguments must be strings if no content expression is given")

5193

ret = Forward()

5194

if ignoreExpr is not None:

5195

ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )

5196

else:

5197

ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )

5198

ret.setName('nested %s%s expression' % (opener,closer))

5199

return ret

5200

5201

def indentedBlock(blockStatementExpr, indentStack, indent=True):

5202

"""

5203

Helper method for defining space-delimited indentation blocks, such as

5204

those used to define block statements in Python source code.

5205

5206

Parameters:

5207

- blockStatementExpr - expression defining syntax of statement that

5208

is repeated within the indented block

5209

- indentStack - list created by caller to manage indentation stack

5210

(multiple statementWithIndentedBlock expressions within a single grammar

5211

should share a common indentStack)

5212

- indent - boolean indicating whether block must be indented beyond the

5213

the current level; set to False for block of left-most statements

5214

(default=C{True})

5215

5216

A valid block must contain at least one C{blockStatement}.

5217

5218

Example::

5219

data = '''

5220

def A(z):

5221

5222

B = 100

5223

G = A2

5224

5225

5226

5227

def BB(a,b,c):

5228

BB1

5229

def BBA():

5230

bba1

5231

bba2

5232

bba3

5233

5234

5235

def spam(x,y):

5236

def eggs(z):

5237

pass

5238

'''

5239

5240

5241

indentStack = [1]

5242

stmt = Forward()

5243

5244

identifier = Word(alphas, alphanums)

5245

funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")

5246

func_body = indentedBlock(stmt, indentStack)

5247

funcDef = Group( funcDecl + func_body )

5248

5249

rvalue = Forward()

5250

funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")

5251

rvalue << (funcCall | identifier | Word(nums))

5252

assignment = Group(identifier + "=" + rvalue)

5253

stmt << ( funcDef | assignment | identifier )

5254

5255

module_body = OneOrMore(stmt)

5256

5257

parseTree = module_body.parseString(data)

5258

parseTree.pprint()

5259

prints::

5260

[['def',

5261

'A',

5262

['(', 'z', ')'],

5263

':',

5264

[['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],

5265

'B',

5266

['def',

5267

'BB',

5268

['(', 'a', 'b', 'c', ')'],

5269

':',

5270

[['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],

5271

'C',

5272

'D',

5273

['def',

5274

'spam',

5275

['(', 'x', 'y', ')'],

5276

':',

5277

[[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]

5278

"""

5279

def checkPeerIndent(s,l,t):

5280

if l >= len(s): return

5281

curCol = col(l,s)

5282

if curCol != indentStack[-1]:

5283

if curCol > indentStack[-1]:

5284

raise ParseFatalException(s,l,"illegal nesting")

5285

raise ParseException(s,l,"not a peer entry")

5286

5287

def checkSubIndent(s,l,t):

5288

curCol = col(l,s)

5289

if curCol > indentStack[-1]:

5290

indentStack.append( curCol )

5291

else:

5292

raise ParseException(s,l,"not a subentry")

5293

5294

def checkUnindent(s,l,t):

5295

if l >= len(s): return

5296

curCol = col(l,s)

5297

if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):

5298

raise ParseException(s,l,"not an unindent")

5299

indentStack.pop()

5300

5301

NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())

5302

INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')

5303

PEER = Empty().setParseAction(checkPeerIndent).setName('')

5304

UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')

5305

if indent:

5306

smExpr = Group( Optional(NL) +

5307

#~ FollowedBy(blockStatementExpr) +

5308

INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)

5309

else:

5310

smExpr = Group( Optional(NL) +

5311

(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )

5312

blockStatementExpr.ignore(_bslash + LineEnd())

5313

return smExpr.setName('indented block')

5314

5315

alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

5316

punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

5317

5318

anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))

5319

_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))

5320

commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")

5321

def replaceHTMLEntity(t):

5322

"""Helper parser action to replace common HTML entities with their special characters"""

5323

return _htmlEntityMap.get(t.entity)

5324

5325

# it's easy to get these comment structures wrong - they're very common, so may as well make them available

5326

cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")

5327

"Comment of the form C{/* ... */}"

5328

5329

htmlComment = Regex(r"").setName("HTML comment")

5330

"Comment of the form C{}"

5331

5332

restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")

5333

dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")

5334

"Comment of the form C{// ... (to end of line)}"

5335

5336

cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")

5337

"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"

5338

5339

javaStyleComment = cppStyleComment

5340

"Same as C{L{cppStyleComment}}"

5341

5342

pythonStyleComment = Regex(r"#.*").setName("Python style comment")

5343

"Comment of the form C{# ... (to end of line)}"

5344

5345

_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +

5346

Optional( Word(" \t") +

5347

~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")

5348

commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")

5349

"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.

5350

This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""

5351

5352

# some other useful expressions - using lower-case class name since we are really using this as a namespace

5353

class pyparsing_common:

5354

"""

5355

Here are some common low-level expressions that may be useful in jump-starting parser development:

5356

- numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})

5357

- common L{programming identifiers<identifier>}

5358

- network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})

5359

- ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}

5360

- L{UUID<uuid>}

5361

- L{comma-separated list<comma_separated_list>}

5362

Parse actions:

5363

- C{L{convertToInteger}}

5364

- C{L{convertToFloat}}

5365

- C{L{convertToDate}}

5366

- C{L{convertToDatetime}}

5367

- C{L{stripHTMLTags}}

5368

- C{L{upcaseTokens}}

5369

- C{L{downcaseTokens}}

5370

5371

Example::

5372

pyparsing_common.number.runTests('''

5373

# any int or real number, returned as the appropriate type

5374

100

5375

-100

5376

+100

5377

3.14159

5378

6.02e23

5379

1e-12

5380

''')

5381

5382

pyparsing_common.fnumber.runTests('''

5383

# any int or real number, returned as float

5384

100

5385

-100

5386

+100

5387

3.14159

5388

6.02e23

5389

1e-12

5390

''')

5391

5392

pyparsing_common.hex_integer.runTests('''

5393

# hex numbers

5394

100

5395

5396

''')

5397

5398

pyparsing_common.fraction.runTests('''

5399

# fractions

5400

1/2

5401

-3/4

5402

''')

5403

5404

pyparsing_common.mixed_integer.runTests('''

5405

# mixed fractions

5406

5407

1/2

5408

-3/4

5409

1-3/4

5410

''')

5411

5412

import uuid

5413

pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))

5414

pyparsing_common.uuid.runTests('''

5415

# uuid

5416

12345678-1234-5678-1234-567812345678

5417

''')

5418

prints::

5419

# any int or real number, returned as the appropriate type

5420

100

5421

[100]

5422

5423

-100

5424

[-100]

5425

5426

+100

5427

[100]

5428

5429

3.14159

5430

[3.14159]

5431

5432

6.02e23

5433

[6.02e+23]

5434

5435

1e-12

5436

[1e-12]

5437

5438

# any int or real number, returned as float

5439

100

5440

[100.0]

5441

5442

-100

5443

[-100.0]

5444

5445

+100

5446

[100.0]

5447

5448

3.14159

5449

[3.14159]

5450

5451

6.02e23

5452

[6.02e+23]

5453

5454

1e-12

5455

[1e-12]

5456

5457

# hex numbers

5458

100

5459

[256]

5460

5461

5462

[255]

5463

5464

# fractions

5465

1/2

5466

[0.5]

5467

5468

-3/4

5469

[-0.75]

5470

5471

# mixed fractions

5472

5473

[1]

5474

5475

1/2

5476

[0.5]

5477

5478

-3/4

5479

[-0.75]

5480

5481

1-3/4

5482

[1.75]

5483

5484

# uuid

5485

12345678-1234-5678-1234-567812345678

5486

[UUID('12345678-1234-5678-1234-567812345678')]

5487

"""

5488

5489

convertToInteger = tokenMap(int)

5490

"""

5491

Parse action for converting parsed integers to Python int

5492

"""

5493

5494

convertToFloat = tokenMap(float)

5495

"""

5496

Parse action for converting parsed numbers to Python float

5497

"""

5498

5499

integer = Word(nums).setName("integer").setParseAction(convertToInteger)

5500

"""expression that parses an unsigned integer, returns an int"""

5501

5502

hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))

5503

"""expression that parses a hexadecimal integer, returns an int"""

5504

5505

signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)

5506

"""expression that parses an integer with optional leading sign, returns an int"""

5507

5508

fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")

5509

"""fractional expression of an integer divided by an integer, returns a float"""

5510

fraction.addParseAction(lambda t: t[0]/t[-1])

5511

5512

mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")

5513

"""mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""

5514

mixed_integer.addParseAction(sum)

5515

5516

real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)

5517

"""expression that parses a floating point number and returns a float"""

5518

5519

sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)

5520

"""expression that parses a floating point number with optional scientific notation and returns a float"""

5521

5522

# streamlining this expression makes the docs nicer-looking

5523

number = (sci_real | real | signed_integer).streamline()

5524

"""any numeric expression, returns the corresponding Python type"""

5525

5526

fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)

5527

"""any int or real number, returned as float"""

5528

5529

identifier = Word(alphas+'_', alphanums+'_').setName("identifier")

5530

"""typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""

5531

5532

ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")

5533

"IPv4 address (C{0.0.0.0 - 255.255.255.255})"

5534

5535

_ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")

5536

_full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")

5537

_short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")

5538

_short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)

5539

_mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")

5540

ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")

5541

"IPv6 address (long, short, or mixed form)"

5542

5543

mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")

5544

"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"

5545

5546

@staticmethod

5547

def convertToDate(fmt="%Y-%m-%d"):

5548

"""

5549

Helper to create a parse action for converting parsed date string to Python datetime.date

5550

5551

Params -

5552

- fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})

5553

5554

Example::

5555

date_expr = pyparsing_common.iso8601_date.copy()

5556

date_expr.setParseAction(pyparsing_common.convertToDate())

5557

print(date_expr.parseString("1999-12-31"))

5558

prints::

5559

[datetime.date(1999, 12, 31)]

5560

"""

5561

def cvt_fn(s,l,t):

5562

try:

5563

return datetime.strptime(t[0], fmt).date()

5564

except ValueError as ve:

5565

raise ParseException(s, l, str(ve))

5566

return cvt_fn

5567

5568

@staticmethod

5569

def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):

5570

"""

5571

Helper to create a parse action for converting parsed datetime string to Python datetime.datetime

5572

5573

Params -

5574

- fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})

5575

5576

Example::

5577

dt_expr = pyparsing_common.iso8601_datetime.copy()

5578

dt_expr.setParseAction(pyparsing_common.convertToDatetime())

5579

print(dt_expr.parseString("1999-12-31T23:59:59.999"))

5580

prints::

5581

[datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]

5582

"""

5583

def cvt_fn(s,l,t):

5584

try:

5585

return datetime.strptime(t[0], fmt)

5586

except ValueError as ve:

5587

raise ParseException(s, l, str(ve))

5588

return cvt_fn

5589

5590

iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")

5591

"ISO8601 date (C{yyyy-mm-dd})"

5592

5593

iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")

5594

"ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"

5595

5596

uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")

5597

"UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"

5598

5599

_html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()

5600

@staticmethod

5601

def stripHTMLTags(s, l, tokens):

5602

"""

5603

Parse action to remove HTML tags from web page HTML source

5604

5605

Example::

5606

# strip HTML links from normal text

5607

text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'

5608

td,td_end = makeHTMLTags("TD")

5609

table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end

5610

5611

print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'

5612

"""

5613

return pyparsing_common._html_stripper.transformString(tokens[0])

5614

5615

_commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')

5616

+ Optional( White(" \t") ) ) ).streamline().setName("commaItem")

5617

comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")

5618

"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""

5619

5620

upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))

5621

"""Parse action to convert tokens to upper case."""

5622

5623

downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))

5624

"""Parse action to convert tokens to lower case."""

5625

5626

5627

if __name__ == "__main__":

5628

5629

selectToken = CaselessLiteral("select")

5630

fromToken = CaselessLiteral("from")

5631

5632

ident = Word(alphas, alphanums + "_$")

5633

5634

columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)

5635

columnNameList = Group(delimitedList(columnName)).setName("columns")

5636

columnSpec = ('*' | columnNameList)

5637

5638

tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)

5639

tableNameList = Group(delimitedList(tableName)).setName("tables")

5640

5641

simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")

5642

5643

# demo runTests method, including embedded comments in test string

5644

simpleSQL.runTests("""

5645

# '*' as column list and dotted table name

5646

select * from SYS.XYZZY

5647

5648

# caseless match on "SELECT", and casts back to "select"

5649

SELECT * from XYZZY, ABC

5650

5651

# list of column names, and mixed case SELECT keyword

5652

Select AA,BB,CC from Sys.dual

5653

5654

# multiple tables

5655

Select A, B, C from Sys.dual, Table2

5656

5657

# invalid SELECT keyword - should fail

5658

Xelect A, B, C from Sys.dual

5659

5660

# incomplete command - should fail

5661

Select

5662

5663

# invalid column name - should fail

5664

Select ^^^ frox Sys.dual

5665

5666

""")

5667

5668

pyparsing_common.number.runTests("""

5669

100

5670

-100

5671

+100

5672

3.14159

5673

6.02e23

5674

1e-12

5675

""")

5676

5677

# any int or real number, returned as float

5678

pyparsing_common.fnumber.runTests("""

5679

100

5680

-100

5681

+100

5682

3.14159

5683

6.02e23

5684

1e-12

5685

""")

5686

5687

pyparsing_common.hex_integer.runTests("""

5688

100

5689

5690

""")

5691

5692

import uuid

5693

pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))

5694

pyparsing_common.uuid.runTests("""

5695

12345678-1234-5678-1234-567812345678

5696

""")

Older »