~ubuntu-branches/ubuntu/trusty/ply/trusty-proposed

Viewing changes to .pc/01_fix-lex-tabversion.patch/ply/lex.py

Committer: Bazaar Package Importer
Author(s): Arnaud Fontaine, Arnaud Fontaine, Daniele Tricoli
Date: 2011-06-12 20:27:05 UTC
mfrom: (1.1.8 upstream) (3.1.5 sid)
Revision ID: james.westby@ubuntu.com-20110612202705-sv3fhvuoc979h8fr

Tags: 3.4-1

http://bugs.debian.org/605659

http://bugs.debian.org/627862

[ Arnaud Fontaine ]
* New upstream release.
* Switch to dh_python2.
  + debian/control:
    - Bump cdbs dependency to 0.4.90-1~.
    - Bump python-all dependency to 2.6.6-9~.
    - Remove Build-Depends on python-support.
    - Add X-Python-Version field.
    - Add ${python:Breaks}.
* Add python3-ply binary package. Closes: #605659.
  + debian/control:
    - Add python3-ply binary package.
    - Build-Depends on python3-all and add X-Python3-Version field.
* debian/copyright:
  + Update copyright years.
* Provide virtual packages for lex and yacc tabversions. Thanks to Jakub
  Wilk. Closes: #627862.

[ Daniele Tricoli ]
* debian/control:
  - Bumped Standard-Version to 3.9.2. No changes needed.
* debian/patches/01_fix-lex-tabversion.patch:
  - Set proper _tabversion in ply.lex.
* debian/patches/02_relax-lex-tabversion-check.patch:
  - Don't break already built packages with wrong _tabversion.
* debian/source/format:
  - Switched to dpkg-source 3.0 (quilt) format.

files added:
.pc

.pc/.version

.pc/01_fix-lex-tabversion.patch

.pc/01_fix-lex-tabversion.patch/ply

.pc/01_fix-lex-tabversion.patch/ply/lex.py

.pc/02_relax-lex-tabversion-check.patch

.pc/02_relax-lex-tabversion-check.patch/ply

.pc/02_relax-lex-tabversion-check.patch/ply/lex.py

.pc/applied-patches

PKG-INFO

debian/dh_python-ply

debian/patches

debian/patches/01_fix-lex-tabversion.patch

debian/patches/02_relax-lex-tabversion-check.patch

debian/patches/series

debian/python-ply.install

debian/python3-ply.docs

debian/python3-ply.install

debian/source

debian/source/format

debian/virtual-packages.py

files removed:
debian/pycompat

debian/pyversions

files modified:
ANNOUNCE

CHANGES

README

debian/changelog

debian/control

debian/copyright

debian/rules

doc/ply.html

ply/cpp.py

ply/lex.py

ply/yacc.py

setup.py

test/README

test/cleanup.sh

test/testlex.py

test/testyacc.py

Show diffs side-by-side

added added

removed removed

.pc/01_fix-lex-tabversion.patch/ply/lex.py

# -----------------------------------------------------------------------------

# ply: lex.py

# David M. Beazley (Dabeaz LLC)

# Redistribution and use in source and binary forms, with or without

# modification, are permitted provided that the following conditions are

# met:

# * Redistributions of source code must retain the above copyright notice,

# this list of conditions and the following disclaimer.

# * Redistributions in binary form must reproduce the above copyright notice,

# this list of conditions and the following disclaimer in the documentation

# and/or other materials provided with the distribution.

# * Neither the name of the David Beazley or Dabeaz LLC may be used to

# endorse or promote products derived from this software without

# specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# -----------------------------------------------------------------------------

__version__ = "3.4"

__tabversion__ = "3.2" # Version of table file used

import re, sys, types, copy, os

# This tuple contains known string types

try:

# Python 2.6

StringTypes = (types.StringType, types.UnicodeType)

except AttributeError:

# Python 3.0

StringTypes = (str, bytes)

# Extract the code attribute of a function. Different implementations

# are for Python 2/3 compatibility.

if sys.version_info[0] < 3:

def func_code(f):

return f.func_code

else:

def func_code(f):

return f.__code__

# This regular expression is used to match valid token names

_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')

# Exception thrown when invalid token encountered and no default error

# handler is defined.

class LexError(Exception):

def __init__(self,message,s):

self.args = (message,)

self.text = s

# Token class. This class is used to represent the tokens produced.

class LexToken(object):

def __str__(self):

return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos)

def __repr__(self):

return str(self)

# This object is a stand-in for a logging object created by the

# logging module.

class PlyLogger(object):

def __init__(self,f):

self.f = f

def critical(self,msg,*args,**kwargs):

self.f.write((msg % args) + "\n")

def warning(self,msg,*args,**kwargs):

self.f.write("WARNING: "+ (msg % args) + "\n")

def error(self,msg,*args,**kwargs):

self.f.write("ERROR: " + (msg % args) + "\n")

info = critical

debug = critical

# Null logger is used when no output is generated. Does nothing.

class NullLogger(object):

def __getattribute__(self,name):

return self

def __call__(self,*args,**kwargs):

return self

100

# -----------------------------------------------------------------------------

101

# === Lexing Engine ===

102

103

# The following Lexer class implements the lexer runtime. There are only

104

# a few public methods and attributes:

105

106

# input() - Store a new string in the lexer

107

# token() - Get the next token

108

# clone() - Clone the lexer

109

110

# lineno - Current line number

111

# lexpos - Current position in the input string

112

# -----------------------------------------------------------------------------

113

114

class Lexer:

115

def __init__(self):

116

self.lexre = None # Master regular expression. This is a list of

117

# tuples (re,findex) where re is a compiled

118

# regular expression and findex is a list

119

# mapping regex group numbers to rules

120

self.lexretext = None # Current regular expression strings

121

self.lexstatere = {} # Dictionary mapping lexer states to master regexs

122

self.lexstateretext = {} # Dictionary mapping lexer states to regex strings

123

self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names

124

self.lexstate = "INITIAL" # Current lexer state

125

self.lexstatestack = [] # Stack of lexer states

126

self.lexstateinfo = None # State information

127

self.lexstateignore = {} # Dictionary of ignored characters for each state

128

self.lexstateerrorf = {} # Dictionary of error functions for each state

129

self.lexreflags = 0 # Optional re compile flags

130

self.lexdata = None # Actual input data (as a string)

131

self.lexpos = 0 # Current position in input text

132

self.lexlen = 0 # Length of the input text

133

self.lexerrorf = None # Error rule (if any)

134

self.lextokens = None # List of valid tokens

135

self.lexignore = "" # Ignored characters

136

self.lexliterals = "" # Literal characters that can be passed through

137

self.lexmodule = None # Module

138

self.lineno = 1 # Current line number

139

self.lexoptimize = 0 # Optimized mode

140

141

def clone(self,object=None):

142

c = copy.copy(self)

143

144

# If the object parameter has been supplied, it means we are attaching the

145

# lexer to a new object. In this case, we have to rebind all methods in

146

# the lexstatere and lexstateerrorf tables.

147

148

if object:

149

newtab = { }

150

for key, ritem in self.lexstatere.items():

151

newre = []

152

for cre, findex in ritem:

153

newfindex = []

154

for f in findex:

155

if not f or not f[0]:

156

newfindex.append(f)

157

continue

158

newfindex.append((getattr(object,f[0].__name__),f[1]))

159

newre.append((cre,newfindex))

160

newtab[key] = newre

161

c.lexstatere = newtab

162

c.lexstateerrorf = { }

163

for key, ef in self.lexstateerrorf.items():

164

c.lexstateerrorf[key] = getattr(object,ef.__name__)

165

c.lexmodule = object

166

return c

167

168

# ------------------------------------------------------------

169

# writetab() - Write lexer information to a table file

170

# ------------------------------------------------------------

171

def writetab(self,tabfile,outputdir=""):

172

if isinstance(tabfile,types.ModuleType):

173

return

174

basetabfilename = tabfile.split(".")[-1]

175

filename = os.path.join(outputdir,basetabfilename)+".py"

176

tf = open(filename,"w")

177

tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))

178

tf.write("_tabversion = %s\n" % repr(__version__))

179

tf.write("_lextokens = %s\n" % repr(self.lextokens))

180

tf.write("_lexreflags = %s\n" % repr(self.lexreflags))

181

tf.write("_lexliterals = %s\n" % repr(self.lexliterals))

182

tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))

183

184

tabre = { }

185

# Collect all functions in the initial state

186

initial = self.lexstatere["INITIAL"]

187

initialfuncs = []

188

for part in initial:

189

for f in part[1]:

190

if f and f[0]:

191

initialfuncs.append(f)

192

193

for key, lre in self.lexstatere.items():

194

titem = []

195

for i in range(len(lre)):

196

titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i])))

197

tabre[key] = titem

198

199

tf.write("_lexstatere = %s\n" % repr(tabre))

200

tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))

201

202

taberr = { }

203

for key, ef in self.lexstateerrorf.items():

204

if ef:

205

taberr[key] = ef.__name__

206

else:

207

taberr[key] = None

208

tf.write("_lexstateerrorf = %s\n" % repr(taberr))

209

tf.close()

210

211

# ------------------------------------------------------------

212

# readtab() - Read lexer information from a tab file

213

# ------------------------------------------------------------

214

def readtab(self,tabfile,fdict):

215

if isinstance(tabfile,types.ModuleType):

216

lextab = tabfile

217

else:

218

if sys.version_info[0] < 3:

219

exec("import %s as lextab" % tabfile)

220

else:

221

env = { }

222

exec("import %s as lextab" % tabfile, env,env)

223

lextab = env['lextab']

224

225

if getattr(lextab,"_tabversion","0.0") != __version__:

226

raise ImportError("Inconsistent PLY version")

227

228

self.lextokens = lextab._lextokens

229

self.lexreflags = lextab._lexreflags

230

self.lexliterals = lextab._lexliterals

231

self.lexstateinfo = lextab._lexstateinfo

232

self.lexstateignore = lextab._lexstateignore

233

self.lexstatere = { }

234

self.lexstateretext = { }

235

for key,lre in lextab._lexstatere.items():

236

titem = []

237

txtitem = []

238

for i in range(len(lre)):

239

titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict)))

240

txtitem.append(lre[i][0])

241

self.lexstatere[key] = titem

242

self.lexstateretext[key] = txtitem

243

self.lexstateerrorf = { }

244

for key,ef in lextab._lexstateerrorf.items():

245

self.lexstateerrorf[key] = fdict[ef]

246

self.begin('INITIAL')

247

248

# ------------------------------------------------------------

249

# input() - Push a new string into the lexer

250

# ------------------------------------------------------------

251

def input(self,s):

252

# Pull off the first character to see if s looks like a string

253

c = s[:1]

254

if not isinstance(c,StringTypes):

255

raise ValueError("Expected a string")

256

self.lexdata = s

257

self.lexpos = 0

258

self.lexlen = len(s)

259

260

# ------------------------------------------------------------

261

# begin() - Changes the lexing state

262

# ------------------------------------------------------------

263

def begin(self,state):

264

if not state in self.lexstatere:

265

raise ValueError("Undefined state")

266

self.lexre = self.lexstatere[state]

267

self.lexretext = self.lexstateretext[state]

268

self.lexignore = self.lexstateignore.get(state,"")

269

self.lexerrorf = self.lexstateerrorf.get(state,None)

270

self.lexstate = state

271

272

# ------------------------------------------------------------

273

# push_state() - Changes the lexing state and saves old on stack

274

# ------------------------------------------------------------

275

def push_state(self,state):

276

self.lexstatestack.append(self.lexstate)

277

self.begin(state)

278

279

# ------------------------------------------------------------

280

# pop_state() - Restores the previous state

281

# ------------------------------------------------------------

282

def pop_state(self):

283

self.begin(self.lexstatestack.pop())

284

285

# ------------------------------------------------------------

286

# current_state() - Returns the current lexing state

287

# ------------------------------------------------------------

288

def current_state(self):

289

return self.lexstate

290

291

# ------------------------------------------------------------

292

# skip() - Skip ahead n characters

293

# ------------------------------------------------------------

294

def skip(self,n):

295

self.lexpos += n

296

297

# ------------------------------------------------------------

298

# opttoken() - Return the next token from the Lexer

299

300

# Note: This function has been carefully implemented to be as fast

301

# as possible. Don't make changes unless you really know what

302

# you are doing

303

# ------------------------------------------------------------

304

def token(self):

305

# Make local copies of frequently referenced attributes

306

lexpos = self.lexpos

307

lexlen = self.lexlen

308

lexignore = self.lexignore

309

lexdata = self.lexdata

310

311

while lexpos < lexlen:

312

# This code provides some short-circuit code for whitespace, tabs, and other ignored characters

313

if lexdata[lexpos] in lexignore:

314

lexpos += 1

315

continue

316

317

# Look for a regular expression match

318

for lexre,lexindexfunc in self.lexre:

319

m = lexre.match(lexdata,lexpos)

320

if not m: continue

321

322

# Create a token for return

323

tok = LexToken()

324

tok.value = m.group()

325

tok.lineno = self.lineno

326

tok.lexpos = lexpos

327

328

i = m.lastindex

329

func,tok.type = lexindexfunc[i]

330

331

if not func:

332

# If no token type was set, it's an ignored token

333

if tok.type:

334

self.lexpos = m.end()

335

return tok

336

else:

337

lexpos = m.end()

338

break

339

340

lexpos = m.end()

341

342

# If token is processed by a function, call it

343

344

tok.lexer = self # Set additional attributes useful in token rules

345

self.lexmatch = m

346

self.lexpos = lexpos

347

348

newtok = func(tok)

349

350

# Every function must return a token, if nothing, we just move to next token

351

if not newtok:

352

lexpos = self.lexpos # This is here in case user has updated lexpos.

353

lexignore = self.lexignore # This is here in case there was a state change

354

break

355

356

# Verify type of the token. If not in the token map, raise an error

357

if not self.lexoptimize:

358

if not newtok.type in self.lextokens:

359

raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (

360

func_code(func).co_filename, func_code(func).co_firstlineno,

361

func.__name__, newtok.type),lexdata[lexpos:])

362

363

return newtok

364

else:

365

# No match, see if in literals

366

if lexdata[lexpos] in self.lexliterals:

367

tok = LexToken()

368

tok.value = lexdata[lexpos]

369

tok.lineno = self.lineno

370

tok.type = tok.value

371

tok.lexpos = lexpos

372

self.lexpos = lexpos + 1

373

return tok

374

375

# No match. Call t_error() if defined.

376

if self.lexerrorf:

377

tok = LexToken()

378

tok.value = self.lexdata[lexpos:]

379

tok.lineno = self.lineno

380

tok.type = "error"

381

tok.lexer = self

382

tok.lexpos = lexpos

383

self.lexpos = lexpos

384

newtok = self.lexerrorf(tok)

385

if lexpos == self.lexpos:

386

# Error method didn't change text position at all. This is an error.

387

raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])

388

lexpos = self.lexpos

389

if not newtok: continue

390

return newtok

391

392

self.lexpos = lexpos

393

raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:])

394

395

self.lexpos = lexpos + 1

396

if self.lexdata is None:

397

raise RuntimeError("No input string given with input()")

398

return None

399

400

# Iterator interface

401

def __iter__(self):

402

return self

403

404

def next(self):

405

t = self.token()

406

if t is None:

407

raise StopIteration

408

return t

409

410

__next__ = next

411

412

# -----------------------------------------------------------------------------

413

# ==== Lex Builder ===

414

415

# The functions and classes below are used to collect lexing information

416

# and build a Lexer object from it.

417

# -----------------------------------------------------------------------------

418

419

# -----------------------------------------------------------------------------

420

# get_caller_module_dict()

421

422

# This function returns a dictionary containing all of the symbols defined within

423

# a caller further down the call stack. This is used to get the environment

424

# associated with the yacc() call if none was provided.

425

# -----------------------------------------------------------------------------

426

427

def get_caller_module_dict(levels):

428

try:

429

raise RuntimeError

430

except RuntimeError:

431

e,b,t = sys.exc_info()

432

f = t.tb_frame

433

while levels > 0:

434

f = f.f_back

435

levels -= 1

436

ldict = f.f_globals.copy()

437

if f.f_globals != f.f_locals:

438

ldict.update(f.f_locals)

439

440

return ldict

441

442

# -----------------------------------------------------------------------------

443

# _funcs_to_names()

444

445

# Given a list of regular expression functions, this converts it to a list

446

# suitable for output to a table file

447

# -----------------------------------------------------------------------------

448

449

def _funcs_to_names(funclist,namelist):

450

result = []

451

for f,name in zip(funclist,namelist):

452

if f and f[0]:

453

result.append((name, f[1]))

454

else:

455

result.append(f)

456

return result

457

458

# -----------------------------------------------------------------------------

459

# _names_to_funcs()

460

461

# Given a list of regular expression function names, this converts it back to

462

# functions.

463

# -----------------------------------------------------------------------------

464

465

def _names_to_funcs(namelist,fdict):

466

result = []

467

for n in namelist:

468

if n and n[0]:

469

result.append((fdict[n[0]],n[1]))

470

else:

471

result.append(n)

472

return result

473

474

# -----------------------------------------------------------------------------

475

# _form_master_re()

476

477

# This function takes a list of all of the regex components and attempts to

478

# form the master regular expression. Given limitations in the Python re

479

# module, it may be necessary to break the master regex into separate expressions.

480

# -----------------------------------------------------------------------------

481

482

def _form_master_re(relist,reflags,ldict,toknames):

483

if not relist: return []

484

regex = "|".join(relist)

485

try:

486

lexre = re.compile(regex,re.VERBOSE | reflags)

487

488

# Build the index to function map for the matching engine

489

lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)

490

lexindexnames = lexindexfunc[:]

491

492

for f,i in lexre.groupindex.items():

493

handle = ldict.get(f,None)

494

if type(handle) in (types.FunctionType, types.MethodType):

495

lexindexfunc[i] = (handle,toknames[f])

496

lexindexnames[i] = f

497

elif handle is not None:

498

lexindexnames[i] = f

499

if f.find("ignore_") > 0:

500

lexindexfunc[i] = (None,None)

501

else:

502

lexindexfunc[i] = (None, toknames[f])

503

504

return [(lexre,lexindexfunc)],[regex],[lexindexnames]

505

except Exception:

506

m = int(len(relist)/2)

507

if m == 0: m = 1

508

llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)

509

rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)

510

return llist+rlist, lre+rre, lnames+rnames

511

512

# -----------------------------------------------------------------------------

513

# def _statetoken(s,names)

514

515

# Given a declaration name s of the form "t_" and a dictionary whose keys are

516

# state names, this function returns a tuple (states,tokenname) where states

517

# is a tuple of state names and tokenname is the name of the token. For example,

518

# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')

519

# -----------------------------------------------------------------------------

520

521

def _statetoken(s,names):

522

nonstate = 1

523

parts = s.split("_")

524

for i in range(1,len(parts)):

525

if not parts[i] in names and parts[i] != 'ANY': break

526

if i > 1:

527

states = tuple(parts[1:i])

528

else:

529

states = ('INITIAL',)

530

531

if 'ANY' in states:

532

states = tuple(names)

533

534

tokenname = "_".join(parts[i:])

535

return (states,tokenname)

536

537

538

# -----------------------------------------------------------------------------

539

# LexerReflect()

540

541

# This class represents information needed to build a lexer as extracted from a

542

# user's input file.

543

# -----------------------------------------------------------------------------

544

class LexerReflect(object):

545

def __init__(self,ldict,log=None,reflags=0):

546

self.ldict = ldict

547

self.error_func = None

548

self.tokens = []

549

self.reflags = reflags

550

self.stateinfo = { 'INITIAL' : 'inclusive'}

551

self.files = {}

552

self.error = 0

553

554

if log is None:

555

self.log = PlyLogger(sys.stderr)

556

else:

557

self.log = log

558

559

# Get all of the basic information

560

def get_all(self):

561

self.get_tokens()

562

self.get_literals()

563

self.get_states()

564

self.get_rules()

565

566

# Validate all of the information

567

def validate_all(self):

568

self.validate_tokens()

569

self.validate_literals()

570

self.validate_rules()

571

return self.error

572

573

# Get the tokens map

574

def get_tokens(self):

575

tokens = self.ldict.get("tokens",None)

576

if not tokens:

577

self.log.error("No token list is defined")

578

self.error = 1

579

return

580

581

if not isinstance(tokens,(list, tuple)):

582

self.log.error("tokens must be a list or tuple")

583

self.error = 1

584

return

585

586

if not tokens:

587

self.log.error("tokens is empty")

588

self.error = 1

589

return

590

591

self.tokens = tokens

592

593

# Validate the tokens

594

def validate_tokens(self):

595

terminals = {}

596

for n in self.tokens:

597

if not _is_identifier.match(n):

598

self.log.error("Bad token name '%s'",n)

599

self.error = 1

600

if n in terminals:

601

self.log.warning("Token '%s' multiply defined", n)

602

terminals[n] = 1

603

604

# Get the literals specifier

605

def get_literals(self):

606

self.literals = self.ldict.get("literals","")

607

608

# Validate literals

609

def validate_literals(self):

610

try:

611

for c in self.literals:

612

if not isinstance(c,StringTypes) or len(c) > 1:

613

self.log.error("Invalid literal %s. Must be a single character", repr(c))

614

self.error = 1

615

continue

616

617

except TypeError:

618

self.log.error("Invalid literals specification. literals must be a sequence of characters")

619

self.error = 1

620

621

def get_states(self):

622

self.states = self.ldict.get("states",None)

623

# Build statemap

624

if self.states:

625

if not isinstance(self.states,(tuple,list)):

626

self.log.error("states must be defined as a tuple or list")

627

self.error = 1

628

else:

629

for s in self.states:

630

if not isinstance(s,tuple) or len(s) != 2:

631

self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s))

632

self.error = 1

633

continue

634

name, statetype = s

635

if not isinstance(name,StringTypes):

636

self.log.error("State name %s must be a string", repr(name))

637

self.error = 1

638

continue

639

if not (statetype == 'inclusive' or statetype == 'exclusive'):

640

self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name)

641

self.error = 1

642

continue

643

if name in self.stateinfo:

644

self.log.error("State '%s' already defined",name)

645

self.error = 1

646

continue

647

self.stateinfo[name] = statetype

648

649

# Get all of the symbols with a t_ prefix and sort them into various

650

# categories (functions, strings, error functions, and ignore characters)

651

652

def get_rules(self):

653

tsymbols = [f for f in self.ldict if f[:2] == 't_' ]

654

655

# Now build up a list of functions and a list of strings

656

657

self.toknames = { } # Mapping of symbols to token names

658

self.funcsym = { } # Symbols defined as functions

659

self.strsym = { } # Symbols defined as strings

660

self.ignore = { } # Ignore strings by state

661

self.errorf = { } # Error functions by state

662

663

for s in self.stateinfo:

664

self.funcsym[s] = []

665

self.strsym[s] = []

666

667

if len(tsymbols) == 0:

668

self.log.error("No rules of the form t_rulename are defined")

669

self.error = 1

670

return

671

672

for f in tsymbols:

673

t = self.ldict[f]

674

states, tokname = _statetoken(f,self.stateinfo)

675

self.toknames[f] = tokname

676

677

if hasattr(t,"__call__"):

678

if tokname == 'error':

679

for s in states:

680

self.errorf[s] = t

681

elif tokname == 'ignore':

682

line = func_code(t).co_firstlineno

683

file = func_code(t).co_filename

684

self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__)

685

self.error = 1

686

else:

687

for s in states:

688

self.funcsym[s].append((f,t))

689

elif isinstance(t, StringTypes):

690

if tokname == 'ignore':

691

for s in states:

692

self.ignore[s] = t

693

if "\\" in t:

694

self.log.warning("%s contains a literal backslash '\\'",f)

695

696

elif tokname == 'error':

697

self.log.error("Rule '%s' must be defined as a function", f)

698

self.error = 1

699

else:

700

for s in states:

701

self.strsym[s].append((f,t))

702

else:

703

self.log.error("%s not defined as a function or string", f)

704

self.error = 1

705

706

# Sort the functions by line number

707

for f in self.funcsym.values():

708

if sys.version_info[0] < 3:

709

f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno))

710

else:

711

# Python 3.0

712

f.sort(key=lambda x: func_code(x[1]).co_firstlineno)

713

714

# Sort the strings by regular expression length

715

for s in self.strsym.values():

716

if sys.version_info[0] < 3:

717

s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1])))

718

else:

719

# Python 3.0

720

s.sort(key=lambda x: len(x[1]),reverse=True)

721

722

# Validate all of the t_rules collected

723

def validate_rules(self):

724

for state in self.stateinfo:

725

# Validate all rules defined by functions

726

727

728

729

for fname, f in self.funcsym[state]:

730

line = func_code(f).co_firstlineno

731

file = func_code(f).co_filename

732

self.files[file] = 1

733

734

tokname = self.toknames[fname]

735

if isinstance(f, types.MethodType):

736

reqargs = 2

737

else:

738

reqargs = 1

739

nargs = func_code(f).co_argcount

740

if nargs > reqargs:

741

self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)

742

self.error = 1

743

continue

744

745

if nargs < reqargs:

746

self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)

747

self.error = 1

748

continue

749

750

if not f.__doc__:

751

self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__)

752

self.error = 1

753

continue

754

755

try:

756

c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags)

757

if c.match(""):

758

self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__)

759

self.error = 1

760

except re.error:

761

_etype, e, _etrace = sys.exc_info()

762

self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e)

763

if '#' in f.__doc__:

764

self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__)

765

self.error = 1

766

767

# Validate all rules defined by strings

768

for name,r in self.strsym[state]:

769

tokname = self.toknames[name]

770

if tokname == 'error':

771

self.log.error("Rule '%s' must be defined as a function", name)

772

self.error = 1

773

continue

774

775

if not tokname in self.tokens and tokname.find("ignore_") < 0:

776

self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname)

777

self.error = 1

778

continue

779

780

try:

781

c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags)

782

if (c.match("")):

783

self.log.error("Regular expression for rule '%s' matches empty string",name)

784

self.error = 1

785

except re.error:

786

_etype, e, _etrace = sys.exc_info()

787

self.log.error("Invalid regular expression for rule '%s'. %s",name,e)

788

if '#' in r:

789

self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name)

790

self.error = 1

791

792

if not self.funcsym[state] and not self.strsym[state]:

793

self.log.error("No rules defined for state '%s'",state)

794

self.error = 1

795

796

# Validate the error function

797

efunc = self.errorf.get(state,None)

798

if efunc:

799

f = efunc

800

line = func_code(f).co_firstlineno

801

file = func_code(f).co_filename

802

self.files[file] = 1

803

804

if isinstance(f, types.MethodType):

805

reqargs = 2

806

else:

807

reqargs = 1

808

nargs = func_code(f).co_argcount

809

if nargs > reqargs:

810

self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)

811

self.error = 1

812

813

if nargs < reqargs:

814

self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)

815

self.error = 1

816

817

for f in self.files:

818

self.validate_file(f)

819

820

821

# -----------------------------------------------------------------------------

822

# validate_file()

823

824

# This checks to see if there are duplicated t_rulename() functions or strings

825

# in the parser input file. This is done using a simple regular expression

826

# match on each line in the given file.

827

# -----------------------------------------------------------------------------

828

829

def validate_file(self,filename):

830

import os.path

831

base,ext = os.path.splitext(filename)

832

if ext != '.py': return # No idea what the file is. Return OK

833

834

try:

835

f = open(filename)

836

lines = f.readlines()

837

f.close()

838

except IOError:

839

return # Couldn't find the file. Don't worry about it

840

841

fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')

842

sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')

843

844

counthash = { }

845

linen = 1

846

for l in lines:

847

m = fre.match(l)

848

if not m:

849

m = sre.match(l)

850

if m:

851

name = m.group(1)

852

prev = counthash.get(name)

853

if not prev:

854

counthash[name] = linen

855

else:

856

self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev)

857

self.error = 1

858

linen += 1

859

860

# -----------------------------------------------------------------------------

861

# lex(module)

862

863

# Build all of the regular expression rules from definitions in the supplied module

864

# -----------------------------------------------------------------------------

865

def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None):

866

global lexer

867

ldict = None

868

stateinfo = { 'INITIAL' : 'inclusive'}

869

lexobj = Lexer()

870

lexobj.lexoptimize = optimize

871

global token,input

872

873

if errorlog is None:

874

errorlog = PlyLogger(sys.stderr)

875

876

if debug:

877

if debuglog is None:

878

debuglog = PlyLogger(sys.stderr)

879

880

# Get the module dictionary used for the lexer

881

if object: module = object

882

883

if module:

884

_items = [(k,getattr(module,k)) for k in dir(module)]

885

ldict = dict(_items)

886

else:

887

ldict = get_caller_module_dict(2)

888

889

# Collect parser information from the dictionary

890

linfo = LexerReflect(ldict,log=errorlog,reflags=reflags)

891

linfo.get_all()

892

if not optimize:

893

if linfo.validate_all():

894

raise SyntaxError("Can't build lexer")

895

896

if optimize and lextab:

897

try:

898

lexobj.readtab(lextab,ldict)

899

token = lexobj.token

900

input = lexobj.input

901

lexer = lexobj

902

return lexobj

903

904

except ImportError:

905

pass

906

907

# Dump some basic debugging information

908

if debug:

909

debuglog.info("lex: tokens = %r", linfo.tokens)

910

debuglog.info("lex: literals = %r", linfo.literals)

911

debuglog.info("lex: states = %r", linfo.stateinfo)

912

913

# Build a dictionary of valid token names

914

lexobj.lextokens = { }

915

for n in linfo.tokens:

916

lexobj.lextokens[n] = 1

917

918

# Get literals specification

919

if isinstance(linfo.literals,(list,tuple)):

920

lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)

921

else:

922

lexobj.lexliterals = linfo.literals

923

924

# Get the stateinfo dictionary

925

stateinfo = linfo.stateinfo

926

927

regexs = { }

928

# Build the master regular expressions

929

for state in stateinfo:

930

regex_list = []

931

932

# Add rules defined by functions first

933

for fname, f in linfo.funcsym[state]:

934

line = func_code(f).co_firstlineno

935

file = func_code(f).co_filename

936

regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))

937

if debug:

938

debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state)

939

940

# Now add all of the simple rules

941

for name,r in linfo.strsym[state]:

942

regex_list.append("(?P<%s>%s)" % (name,r))

943

if debug:

944

debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)

945

946

regexs[state] = regex_list

947

948

# Build the master regular expressions

949

950

if debug:

951

debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")

952

953

for state in regexs:

954

lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames)

955

lexobj.lexstatere[state] = lexre

956

lexobj.lexstateretext[state] = re_text

957

lexobj.lexstaterenames[state] = re_names

958

if debug:

959

for i in range(len(re_text)):

960

debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i])

961

962

# For inclusive states, we need to add the regular expressions from the INITIAL state

963

for state,stype in stateinfo.items():

964

if state != "INITIAL" and stype == 'inclusive':

965

lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])

966

lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])

967

lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])

968

969

lexobj.lexstateinfo = stateinfo

970

lexobj.lexre = lexobj.lexstatere["INITIAL"]

971

lexobj.lexretext = lexobj.lexstateretext["INITIAL"]

972

lexobj.lexreflags = reflags

973

974

# Set up ignore variables

975

lexobj.lexstateignore = linfo.ignore

976

lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")

977

978

# Set up error functions

979

lexobj.lexstateerrorf = linfo.errorf

980

lexobj.lexerrorf = linfo.errorf.get("INITIAL",None)

981

if not lexobj.lexerrorf:

982

errorlog.warning("No t_error rule is defined")

983

984

# Check state information for ignore and error rules

985

for s,stype in stateinfo.items():

986

if stype == 'exclusive':

987

if not s in linfo.errorf:

988

errorlog.warning("No error rule is defined for exclusive state '%s'", s)

989

if not s in linfo.ignore and lexobj.lexignore:

990

errorlog.warning("No ignore rule is defined for exclusive state '%s'", s)

991

elif stype == 'inclusive':

992

if not s in linfo.errorf:

993

linfo.errorf[s] = linfo.errorf.get("INITIAL",None)

994

if not s in linfo.ignore:

995

linfo.ignore[s] = linfo.ignore.get("INITIAL","")

996

997

# Create global versions of the token() and input() functions

998

token = lexobj.token

999

input = lexobj.input

1000

lexer = lexobj

1001

1002

# If in optimize mode, we write the lextab

1003

if lextab and optimize:

1004

lexobj.writetab(lextab,outputdir)

1005

1006

return lexobj

1007

1008

# -----------------------------------------------------------------------------

1009

# runmain()

1010

1011

# This runs the lexer as a main program

1012

# -----------------------------------------------------------------------------

1013

1014

def runmain(lexer=None,data=None):

1015

if not data:

1016

try:

1017

filename = sys.argv[1]

1018

f = open(filename)

1019

data = f.read()

1020

f.close()

1021

except IndexError:

1022

sys.stdout.write("Reading from standard input (type EOF to end):\n")

1023

data = sys.stdin.read()

1024

1025

if lexer:

1026

_input = lexer.input

1027

else:

1028

_input = input

1029

_input(data)

1030

if lexer:

1031

_token = lexer.token

1032

else:

1033

_token = token

1034

1035

while 1:

1036

tok = _token()

1037

if not tok: break

1038

sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos))

1039

1040

# -----------------------------------------------------------------------------

1041

# @TOKEN(regex)

1042

1043

# This decorator function can be used to set the regex expression on a function

1044

# when its docstring might need to be set in an alternative way

1045

# -----------------------------------------------------------------------------

1046

1047

def TOKEN(r):

1048

def set_doc(f):

1049

if hasattr(r,"__call__"):

1050

f.__doc__ = r.__doc__

1051

else:

1052

f.__doc__ = r

1053

return f

1054

return set_doc

1055

1056

# Alternative spelling of the TOKEN decorator

1057

Token = TOKEN

1058

Older »