~ubuntu-branches/ubuntu/vivid/emscripten/vivid

debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)

390

else:

391

debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)

392

393

# --! DEBUG

394

395

if plen:

396

targ = symstack[-plen-1:]

397

targ[0] = sym

398

399

# --! TRACKING

400

if tracking:

401

t1 = targ[1]

402

sym.lineno = t1.lineno

403

sym.lexpos = t1.lexpos

404

t1 = targ[-1]

405

sym.endlineno = getattr(t1,"endlineno",t1.lineno)

406

sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)

407

408

# --! TRACKING

409

410

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

411

# The code enclosed in this section is duplicated

412

# below as a performance optimization. Make sure

413

# changes get made in both locations.

414

415

pslice.slice = targ

416

417

try:

418

# Call the grammar rule with our special slice object

419

del symstack[-plen:]

420

del statestack[-plen:]

421

p.callable(pslice)

422

# --! DEBUG

423

debug.info("Result : %s", format_result(pslice[0]))

424

# --! DEBUG

425

symstack.append(sym)

426

state = goto[statestack[-1]][pname]

427

statestack.append(state)

428

except SyntaxError:

429

# If an error was set. Enter error recovery state

430

lookaheadstack.append(lookahead)

431

symstack.pop()

432

statestack.pop()

433

state = statestack[-1]

434

sym.type = 'error'

435

lookahead = sym

436

errorcount = error_count

437

self.errorok = 0

438

continue

439

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

440

441

else:

442

443

# --! TRACKING

444

if tracking:

445

sym.lineno = lexer.lineno

446

sym.lexpos = lexer.lexpos

447

# --! TRACKING

448

449

targ = [ sym ]

450

451

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

452

# The code enclosed in this section is duplicated

453

# above as a performance optimization. Make sure

454

# changes get made in both locations.

455

456

pslice.slice = targ

457

458

try:

459

# Call the grammar rule with our special slice object

460

p.callable(pslice)

461

# --! DEBUG

462

debug.info("Result : %s", format_result(pslice[0]))

463

# --! DEBUG

464

symstack.append(sym)

465

state = goto[statestack[-1]][pname]

466

statestack.append(state)

467

except SyntaxError:

468

# If an error was set. Enter error recovery state

469

lookaheadstack.append(lookahead)

470

symstack.pop()

471

statestack.pop()

472

state = statestack[-1]

473

sym.type = 'error'

474

lookahead = sym

475

errorcount = error_count

476

self.errorok = 0

477

continue

478

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

479

480

if t == 0:

481

n = symstack[-1]

482

result = getattr(n,"value",None)

483

# --! DEBUG

484

debug.info("Done : Returning %s", format_result(result))

485

debug.info("PLY: PARSE DEBUG END")

486

# --! DEBUG

487

return result

488

489

if t == None:

490

491

# --! DEBUG

492

debug.error('Error : %s',

493

("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())

494

# --! DEBUG

495

496

# We have some kind of parsing error here. To handle

497

# this, we are going to push the current token onto

498

# the tokenstack and replace it with an 'error' token.

499

# If there are any synchronization rules, they may

500

# catch it.

501

502

# In addition to pushing the error token, we call call

503

# the user defined p_error() function if this is the

504

# first syntax error. This function is only called if

505

# errorcount == 0.

506

if errorcount == 0 or self.errorok:

507

errorcount = error_count

508

self.errorok = 0

509

errtoken = lookahead

510

if errtoken.type == "$end":

511

errtoken = None # End of file!

512

if self.errorfunc:

513

global errok,token,restart

514

errok = self.errok # Set some special functions available in error recovery

515

token = get_token

516

restart = self.restart

517

if errtoken and not hasattr(errtoken,'lexer'):

518

errtoken.lexer = lexer

519

tok = self.errorfunc(errtoken)

520

del errok, token, restart # Delete special functions

521

522

if self.errorok:

523

# User must have done some kind of panic

524

# mode recovery on their own. The

525

# returned token is the next lookahead

526

lookahead = tok

527

errtoken = None

528

continue

529

else:

530

if errtoken:

531

if hasattr(errtoken,"lineno"): lineno = lookahead.lineno

532

else: lineno = 0

533

if lineno:

534

sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

535

else:

536

sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

537

else:

538

sys.stderr.write("yacc: Parse error in input. EOF\n")

539

return

540

541

else:

542

errorcount = error_count

543

544

# case 1: the statestack only has 1 entry on it. If we're in this state, the

545

# entire parse has been rolled back and we're completely hosed. The token is

546

# discarded and we just keep going.

547

548

if len(statestack) <= 1 and lookahead.type != "$end":

549

lookahead = None

550

errtoken = None

551

state = 0

552

# Nuke the pushback stack

553

del lookaheadstack[:]

554

continue

555

556

# case 2: the statestack has a couple of entries on it, but we're

557

# at the end of the file. nuke the top entry and generate an error token

558

559

# Start nuking entries on the stack

560

if lookahead.type == "$end":

561

# Whoa. We're really hosed here. Bail out

562

return

563

564

if lookahead.type != 'error':

565

sym = symstack[-1]

566

if sym.type == 'error':

567

# Hmmm. Error is on top of stack, we'll just nuke input

568

# symbol and continue

569

lookahead = None

570

continue

571

t = YaccSymbol()

572

t.type = 'error'

573

if hasattr(lookahead,"lineno"):

574

t.lineno = lookahead.lineno

575

t.value = lookahead

576

lookaheadstack.append(lookahead)

577

lookahead = t

578

else:

579

symstack.pop()

580

statestack.pop()

581

state = statestack[-1] # Potential bug fix

582

583

continue

584

585

# Call an error function here

586

raise RuntimeError("yacc: internal parser error!!!\n")

587

588

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

589

# parseopt().

590

591

# Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.

592

# Edit the debug version above, then copy any modifications to the method

593

# below while removing #--! DEBUG sections.

594

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

595

596

597

def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):

598

lookahead = None # Current lookahead symbol

599

lookaheadstack = [ ] # Stack of lookahead symbols

600

actions = self.action # Local reference to action table (to avoid lookup on self.)

601

goto = self.goto # Local reference to goto table (to avoid lookup on self.)

602

prod = self.productions # Local reference to production list (to avoid lookup on self.)

603

pslice = YaccProduction(None) # Production object passed to grammar rules

604

errorcount = 0 # Used during error recovery

605

606

# If no lexer was given, we will try to use the lex module

607

if not lexer:

608

lex = load_ply_lex()

609

lexer = lex.lexer

610

611

# Set up the lexer and parser objects on pslice

612

pslice.lexer = lexer

613

pslice.parser = self

614

615

# If input was supplied, pass to lexer

616

if input is not None:

617

lexer.input(input)

618

619

if tokenfunc is None:

620

# Tokenize function

621

get_token = lexer.token

622

else:

623

get_token = tokenfunc

624

625

# Set up the state and symbol stacks

626

627

statestack = [ ] # Stack of parsing states

628

self.statestack = statestack

629

symstack = [ ] # Stack of grammar symbols

630

self.symstack = symstack

631

632

pslice.stack = symstack # Put in the production

633

errtoken = None # Err token

634

635

# The start state is assumed to be (0,$end)

636

637

statestack.append(0)

638

sym = YaccSymbol()

639

sym.type = '$end'

640

symstack.append(sym)

641

state = 0

642

while 1:

643

# Get the next symbol on the input. If a lookahead symbol

644

# is already set, we just use that. Otherwise, we'll pull

645

# the next token off of the lookaheadstack or from the lexer

646

647

if not lookahead:

648

if not lookaheadstack:

649

lookahead = get_token() # Get the next token

650

else:

651

lookahead = lookaheadstack.pop()

652

if not lookahead:

653

lookahead = YaccSymbol()

654

lookahead.type = '$end'

655

656

# Check the action table

657

ltype = lookahead.type

658

t = actions[state].get(ltype)

659

660

if t is not None:

661

if t > 0:

662

# shift a symbol on the stack

663

statestack.append(t)

664

state = t

665

666

symstack.append(lookahead)

667

lookahead = None

668

669

# Decrease error count on successful shift

670

if errorcount: errorcount -=1

671

continue

672

673

if t < 0:

674

# reduce a symbol on the stack, emit a production

675

p = prod[-t]

676

pname = p.name

677

plen = p.len

678

679

# Get production function

680

sym = YaccSymbol()

681

sym.type = pname # Production name

682

sym.value = None

683

684

if plen:

685

targ = symstack[-plen-1:]

686

targ[0] = sym

687

688

# --! TRACKING

689

if tracking:

690

t1 = targ[1]

691

sym.lineno = t1.lineno

692

sym.lexpos = t1.lexpos

693

t1 = targ[-1]

694

sym.endlineno = getattr(t1,"endlineno",t1.lineno)

695

sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)

696

697

# --! TRACKING

698

699

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

700

# The code enclosed in this section is duplicated

701

# below as a performance optimization. Make sure

702

# changes get made in both locations.

703

704

pslice.slice = targ

705

706

try:

707

# Call the grammar rule with our special slice object

708

del symstack[-plen:]

709

del statestack[-plen:]

710

p.callable(pslice)

711

symstack.append(sym)

712

state = goto[statestack[-1]][pname]

713

statestack.append(state)

714

except SyntaxError:

715

# If an error was set. Enter error recovery state

716

lookaheadstack.append(lookahead)

717

symstack.pop()

718

statestack.pop()

719

state = statestack[-1]

720

sym.type = 'error'

721

lookahead = sym

722

errorcount = error_count

723

self.errorok = 0

724

continue

725

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

726

727

else:

728

729

# --! TRACKING

730

if tracking:

731

sym.lineno = lexer.lineno

732

sym.lexpos = lexer.lexpos

733

# --! TRACKING

734

735

targ = [ sym ]

736

737

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

738

# The code enclosed in this section is duplicated

739

# above as a performance optimization. Make sure

740

# changes get made in both locations.

741

742

pslice.slice = targ

743

744

try:

745

# Call the grammar rule with our special slice object

746

p.callable(pslice)

747

symstack.append(sym)

748

state = goto[statestack[-1]][pname]

749

statestack.append(state)

750

except SyntaxError:

751

# If an error was set. Enter error recovery state

752

lookaheadstack.append(lookahead)

753

symstack.pop()

754

statestack.pop()

755

state = statestack[-1]

756

sym.type = 'error'

757

lookahead = sym

758

errorcount = error_count

759

self.errorok = 0

760

continue

761

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

762

763

if t == 0:

764

n = symstack[-1]

765

return getattr(n,"value",None)

766

767

if t == None:

768

769

# We have some kind of parsing error here. To handle

770

# this, we are going to push the current token onto

771

# the tokenstack and replace it with an 'error' token.

772

# If there are any synchronization rules, they may

773

# catch it.

774

775

# In addition to pushing the error token, we call call

776

# the user defined p_error() function if this is the

777

# first syntax error. This function is only called if

778

# errorcount == 0.

779

if errorcount == 0 or self.errorok:

780

errorcount = error_count

781

self.errorok = 0

782

errtoken = lookahead

783

if errtoken.type == '$end':

784

errtoken = None # End of file!

785

if self.errorfunc:

786

global errok,token,restart

787

errok = self.errok # Set some special functions available in error recovery

788

token = get_token

789

restart = self.restart

790

if errtoken and not hasattr(errtoken,'lexer'):

791

errtoken.lexer = lexer

792

tok = self.errorfunc(errtoken)

793

del errok, token, restart # Delete special functions

794

795

if self.errorok:

796

# User must have done some kind of panic

797

# mode recovery on their own. The

798

# returned token is the next lookahead

799

lookahead = tok

800

errtoken = None

801

continue

802

else:

803

if errtoken:

804

if hasattr(errtoken,"lineno"): lineno = lookahead.lineno

805

else: lineno = 0

806

if lineno:

807

sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

808

else:

809

sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

810

else:

811

sys.stderr.write("yacc: Parse error in input. EOF\n")

812

return

813

814

else:

815

errorcount = error_count

816

817

# case 1: the statestack only has 1 entry on it. If we're in this state, the

818

# entire parse has been rolled back and we're completely hosed. The token is

819

# discarded and we just keep going.

820

821

if len(statestack) <= 1 and lookahead.type != '$end':

822

lookahead = None

823

errtoken = None

824

state = 0

825

# Nuke the pushback stack

826

del lookaheadstack[:]

827

continue

828

829

# case 2: the statestack has a couple of entries on it, but we're

830

# at the end of the file. nuke the top entry and generate an error token

831

832

# Start nuking entries on the stack

833

if lookahead.type == '$end':

834

# Whoa. We're really hosed here. Bail out

835

return

836

837

if lookahead.type != 'error':

838

sym = symstack[-1]

839

if sym.type == 'error':

840

# Hmmm. Error is on top of stack, we'll just nuke input

841

# symbol and continue

842

lookahead = None

843

continue

844

t = YaccSymbol()

845

t.type = 'error'

846

if hasattr(lookahead,"lineno"):

847

t.lineno = lookahead.lineno

848

t.value = lookahead

849

lookaheadstack.append(lookahead)

850

lookahead = t

851

else:

852

symstack.pop()

853

statestack.pop()

854

state = statestack[-1] # Potential bug fix

855

856

continue

857

858

# Call an error function here

859

raise RuntimeError("yacc: internal parser error!!!\n")

860

861

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

862

# parseopt_notrack().

863

864

# Optimized version of parseopt() with line number tracking removed.

865

# DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove

866

# code in the #--! TRACKING sections

867

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

868

869

def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):

870

lookahead = None # Current lookahead symbol

871

lookaheadstack = [ ] # Stack of lookahead symbols

872

actions = self.action # Local reference to action table (to avoid lookup on self.)

873

goto = self.goto # Local reference to goto table (to avoid lookup on self.)

874

prod = self.productions # Local reference to production list (to avoid lookup on self.)

875

pslice = YaccProduction(None) # Production object passed to grammar rules

876

errorcount = 0 # Used during error recovery

877

878

# If no lexer was given, we will try to use the lex module

879

if not lexer:

880

lex = load_ply_lex()

881

lexer = lex.lexer

882

883

# Set up the lexer and parser objects on pslice

884

pslice.lexer = lexer

885

pslice.parser = self

886

887

# If input was supplied, pass to lexer

888

if input is not None:

889

lexer.input(input)

890

891

if tokenfunc is None:

892

# Tokenize function

893

get_token = lexer.token

894

else:

895

get_token = tokenfunc

896

897

# Set up the state and symbol stacks

898

899

statestack = [ ] # Stack of parsing states

900

self.statestack = statestack

901

symstack = [ ] # Stack of grammar symbols

902

self.symstack = symstack

903

904

pslice.stack = symstack # Put in the production

905

errtoken = None # Err token

906

907

# The start state is assumed to be (0,$end)

908

909

statestack.append(0)

910

sym = YaccSymbol()

911

sym.type = '$end'

912

symstack.append(sym)

913

state = 0

914

while 1:

915

# Get the next symbol on the input. If a lookahead symbol

916

# is already set, we just use that. Otherwise, we'll pull

917

# the next token off of the lookaheadstack or from the lexer

918

919

if not lookahead:

920

if not lookaheadstack:

921

lookahead = get_token() # Get the next token

922

else:

923

lookahead = lookaheadstack.pop()

924

if not lookahead:

925

lookahead = YaccSymbol()

926

lookahead.type = '$end'

927

928

# Check the action table

929

ltype = lookahead.type

930

t = actions[state].get(ltype)

931

932

if t is not None:

933

if t > 0:

934

# shift a symbol on the stack

935

statestack.append(t)

936

state = t

937

938

symstack.append(lookahead)

939

lookahead = None

940

941

# Decrease error count on successful shift

942

if errorcount: errorcount -=1

943

continue

944

945

if t < 0:

946

# reduce a symbol on the stack, emit a production

947

p = prod[-t]

948

pname = p.name

949

plen = p.len

950

951

# Get production function

952

sym = YaccSymbol()

953

sym.type = pname # Production name

954

sym.value = None

955

956

if plen:

957

targ = symstack[-plen-1:]

958

targ[0] = sym

959

960

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

961

# The code enclosed in this section is duplicated

962

# below as a performance optimization. Make sure

963

# changes get made in both locations.

964

965

pslice.slice = targ

966

967

try:

968

# Call the grammar rule with our special slice object

969

del symstack[-plen:]

970

del statestack[-plen:]

971

p.callable(pslice)

972

symstack.append(sym)

973

state = goto[statestack[-1]][pname]

974

statestack.append(state)

975

except SyntaxError:

976

# If an error was set. Enter error recovery state

977

lookaheadstack.append(lookahead)

978

symstack.pop()

979

statestack.pop()

980

state = statestack[-1]

981

sym.type = 'error'

982

lookahead = sym

983

errorcount = error_count

984

self.errorok = 0

985

continue

986

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

987

988

else:

989

990

targ = [ sym ]

991

992

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

993

# The code enclosed in this section is duplicated

994

# above as a performance optimization. Make sure

995

# changes get made in both locations.

996

997

pslice.slice = targ

998

999

try:

1000

# Call the grammar rule with our special slice object

1001

p.callable(pslice)

1002

symstack.append(sym)

1003

state = goto[statestack[-1]][pname]

1004

statestack.append(state)

1005

except SyntaxError:

1006

# If an error was set. Enter error recovery state

1007

lookaheadstack.append(lookahead)

1008

symstack.pop()

1009

statestack.pop()

1010

state = statestack[-1]

1011

sym.type = 'error'

1012

lookahead = sym

1013

errorcount = error_count

1014

self.errorok = 0

1015

continue

1016

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

1017

1018

if t == 0:

1019

n = symstack[-1]

1020

return getattr(n,"value",None)

1021

1022

if t == None:

1023

1024

# We have some kind of parsing error here. To handle

1025

# this, we are going to push the current token onto

1026

# the tokenstack and replace it with an 'error' token.

1027

# If there are any synchronization rules, they may

1028

# catch it.

1029

1030

# In addition to pushing the error token, we call call

1031

# the user defined p_error() function if this is the

1032

# first syntax error. This function is only called if

1033

# errorcount == 0.

1034

if errorcount == 0 or self.errorok:

1035

errorcount = error_count

1036

self.errorok = 0

1037

errtoken = lookahead

1038

if errtoken.type == '$end':

1039

errtoken = None # End of file!

1040

if self.errorfunc:

1041

global errok,token,restart

1042

errok = self.errok # Set some special functions available in error recovery

1043

token = get_token

1044

restart = self.restart

1045

if errtoken and not hasattr(errtoken,'lexer'):

1046

errtoken.lexer = lexer

1047

tok = self.errorfunc(errtoken)

1048

del errok, token, restart # Delete special functions

1049

1050

if self.errorok:

1051

# User must have done some kind of panic

1052

# mode recovery on their own. The

1053

# returned token is the next lookahead

1054

lookahead = tok

1055

errtoken = None

1056

continue

1057

else:

1058

if errtoken:

1059

if hasattr(errtoken,"lineno"): lineno = lookahead.lineno

1060

else: lineno = 0

1061

if lineno:

1062

sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

1063

else:

1064

sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

1065

else:

1066

sys.stderr.write("yacc: Parse error in input. EOF\n")

1067

return

1068

1069

else:

1070

errorcount = error_count

1071

1072

# case 1: the statestack only has 1 entry on it. If we're in this state, the

1073

# entire parse has been rolled back and we're completely hosed. The token is

1074

# discarded and we just keep going.

1075

1076

if len(statestack) <= 1 and lookahead.type != '$end':

1077

lookahead = None

1078

errtoken = None

1079

state = 0

1080

# Nuke the pushback stack

1081

del lookaheadstack[:]

1082

continue

1083

1084

# case 2: the statestack has a couple of entries on it, but we're

1085

# at the end of the file. nuke the top entry and generate an error token

1086

1087

# Start nuking entries on the stack

1088

if lookahead.type == '$end':

1089

# Whoa. We're really hosed here. Bail out

1090

return

1091

1092

if lookahead.type != 'error':

1093

sym = symstack[-1]

1094

if sym.type == 'error':

1095

# Hmmm. Error is on top of stack, we'll just nuke input

1096

# symbol and continue

1097

lookahead = None

1098

continue

1099

t = YaccSymbol()

1100

t.type = 'error'

1101

if hasattr(lookahead,"lineno"):

1102

t.lineno = lookahead.lineno

1103

t.value = lookahead

1104

lookaheadstack.append(lookahead)

1105

lookahead = t

1106

else:

1107

symstack.pop()

1108

statestack.pop()

1109

state = statestack[-1] # Potential bug fix

1110

1111

continue

1112

1113

# Call an error function here

1114

raise RuntimeError("yacc: internal parser error!!!\n")

1115

1116

# -----------------------------------------------------------------------------

1117

# === Grammar Representation ===

1118

1119

# The following functions, classes, and variables are used to represent and

1120

# manipulate the rules that make up a grammar.

1121

# -----------------------------------------------------------------------------

1122

1123

import re

1124

1125

# regex matching identifiers

1126

_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')

1127

1128

# -----------------------------------------------------------------------------

1129

# class Production:

1130

1131

# This class stores the raw information about a single production or grammar rule.

1132

# A grammar rule refers to a specification such as this:

1133

1134

# expr : expr PLUS term

1135

1136

# Here are the basic attributes defined on all productions

1137

1138

# name - Name of the production. For example 'expr'

1139

# prod - A list of symbols on the right side ['expr','PLUS','term']

1140

# prec - Production precedence level

1141

# number - Production number.

1142

# func - Function that executes on reduce

1143

# file - File where production function is defined

1144

# lineno - Line number where production function is defined

1145

1146

# The following attributes are defined or optional.

1147

1148

# len - Length of the production (number of symbols on right hand side)

1149

# usyms - Set of unique symbols found in the production

1150

# -----------------------------------------------------------------------------

1151

1152

class Production(object):

1153

reduced = 0

1154

def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):

1155

self.name = name

1156

self.prod = tuple(prod)

1157

self.number = number

1158

self.func = func

1159

self.callable = None

1160

self.file = file

1161

self.line = line

1162

self.prec = precedence

1163

1164

# Internal settings used during table construction

1165

1166

self.len = len(self.prod) # Length of the production

1167

1168

# Create a list of unique production symbols used in the production

1169

self.usyms = [ ]

1170

for s in self.prod:

1171

if s not in self.usyms:

1172

self.usyms.append(s)

1173

1174

# List of all LR items for the production

1175

self.lr_items = []

1176

self.lr_next = None

1177

1178

# Create a string representation

1179

if self.prod:

1180

self.str = "%s -> %s" % (self.name," ".join(self.prod))

1181

else:

1182

self.str = "%s -> <empty>" % self.name

1183

1184

def __str__(self):

1185

return self.str

1186

1187

def __repr__(self):

1188

return "Production("+str(self)+")"

1189

1190

def __len__(self):

1191

return len(self.prod)

1192

1193

def __nonzero__(self):

1194

return 1

1195

1196

def __getitem__(self,index):

1197

return self.prod[index]

1198

1199

# Return the nth lr_item from the production (or None if at the end)

1200

def lr_item(self,n):

1201

if n > len(self.prod): return None

1202

p = LRItem(self,n)

1203

1204

# Precompute the list of productions immediately following. Hack. Remove later

1205

try:

1206

p.lr_after = Prodnames[p.prod[n+1]]

1207

except (IndexError,KeyError):

1208

p.lr_after = []

1209

try:

1210

p.lr_before = p.prod[n-1]

1211

except IndexError:

1212

p.lr_before = None

1213

1214

return p

1215

1216

# Bind the production function name to a callable

1217

def bind(self,pdict):

1218

if self.func:

1219

self.callable = pdict[self.func]

1220

1221

# This class serves as a minimal standin for Production objects when

1222

# reading table data from files. It only contains information

1223

# actually used by the LR parsing engine, plus some additional

1224

# debugging information.

1225

class MiniProduction(object):

1226

def __init__(self,str,name,len,func,file,line):

1227

self.name = name

1228

self.len = len

1229

self.func = func

1230

self.callable = None

1231

self.file = file

1232

self.line = line

1233

self.str = str

1234

def __str__(self):

1235

return self.str

1236

def __repr__(self):

1237

return "MiniProduction(%s)" % self.str

1238

1239

# Bind the production function name to a callable

1240

def bind(self,pdict):

1241

if self.func:

1242

self.callable = pdict[self.func]

1243

1244

1245

# -----------------------------------------------------------------------------

1246

# class LRItem

1247

1248

# This class represents a specific stage of parsing a production rule. For

1249

# example:

1250

1251

# expr : expr . PLUS term

1252

1253

# In the above, the "." represents the current location of the parse. Here

1254

# basic attributes:

1255

1256

# name - Name of the production. For example 'expr'

1257

# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']

1258

# number - Production number.

1259

1260

# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'

1261

# then lr_next refers to 'expr -> expr PLUS . term'

1262

# lr_index - LR item index (location of the ".") in the prod list.

1263

# lookaheads - LALR lookahead symbols for this item

1264

# len - Length of the production (number of symbols on right hand side)

1265

# lr_after - List of all productions that immediately follow

1266

# lr_before - Grammar symbol immediately before

1267

# -----------------------------------------------------------------------------

1268

1269

class LRItem(object):

1270

def __init__(self,p,n):

1271

self.name = p.name

1272

self.prod = list(p.prod)

1273

self.number = p.number

1274

self.lr_index = n

1275

self.lookaheads = { }

1276

self.prod.insert(n,".")

1277

self.prod = tuple(self.prod)

1278

self.len = len(self.prod)

1279

self.usyms = p.usyms

1280

1281

def __str__(self):

1282

if self.prod:

1283

s = "%s -> %s" % (self.name," ".join(self.prod))

1284

else:

1285

s = "%s -> <empty>" % self.name

1286

return s

1287

1288

def __repr__(self):

1289

return "LRItem("+str(self)+")"

1290

1291

# -----------------------------------------------------------------------------

1292

# rightmost_terminal()

1293

1294

# Return the rightmost terminal from a list of symbols. Used in add_production()

1295

# -----------------------------------------------------------------------------

1296

def rightmost_terminal(symbols, terminals):

1297

i = len(symbols) - 1

1298

while i >= 0:

1299

if symbols[i] in terminals:

1300

return symbols[i]

1301

i -= 1

1302

return None

1303

1304

# -----------------------------------------------------------------------------

1305

# === GRAMMAR CLASS ===

1306

1307

# The following class represents the contents of the specified grammar along

1308

# with various computed properties such as first sets, follow sets, LR items, etc.

1309

# This data is used for critical parts of the table generation process later.

1310

# -----------------------------------------------------------------------------

1311

1312

class GrammarError(YaccError): pass

1313

1314

class Grammar(object):

1315

def __init__(self,terminals):

1316

self.Productions = [None] # A list of all of the productions. The first

1317

# entry is always reserved for the purpose of

1318

# building an augmented grammar

1319

1320

self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all

1321

# productions of that nonterminal.

1322

1323

self.Prodmap = { } # A dictionary that is only used to detect duplicate

1324

# productions.

1325

1326

self.Terminals = { } # A dictionary mapping the names of terminal symbols to a

1327

# list of the rules where they are used.

1328

1329

for term in terminals:

1330

self.Terminals[term] = []

1331

1332

self.Terminals['error'] = []

1333

1334

self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list

1335

# of rule numbers where they are used.

1336

1337

self.First = { } # A dictionary of precomputed FIRST(x) symbols

1338

1339

self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols

1340

1341

self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the

1342

# form ('right',level) or ('nonassoc', level) or ('left',level)

1343

1344

self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer.

1345

# This is only used to provide error checking and to generate

1346

# a warning about unused precedence rules.

1347

1348

self.Start = None # Starting symbol for the grammar

1349

1350

1351

def __len__(self):

1352

return len(self.Productions)

1353

1354

def __getitem__(self,index):

1355

return self.Productions[index]

1356

1357

# -----------------------------------------------------------------------------

1358

# set_precedence()

1359

1360

# Sets the precedence for a given terminal. assoc is the associativity such as

1361

# 'left','right', or 'nonassoc'. level is a numeric level.

1362

1363

# -----------------------------------------------------------------------------

1364

1365

def set_precedence(self,term,assoc,level):

1366

assert self.Productions == [None],"Must call set_precedence() before add_production()"

1367

if term in self.Precedence:

1368

raise GrammarError("Precedence already specified for terminal '%s'" % term)

1369

if assoc not in ['left','right','nonassoc']:

1370

raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")

1371

self.Precedence[term] = (assoc,level)

1372

1373

# -----------------------------------------------------------------------------

1374

# add_production()

1375

1376

# Given an action function, this function assembles a production rule and

1377

# computes its precedence level.

1378

1379

# The production rule is supplied as a list of symbols. For example,

1380

# a rule such as 'expr : expr PLUS term' has a production name of 'expr' and

1381

# symbols ['expr','PLUS','term'].

1382

1383

# Precedence is determined by the precedence of the right-most non-terminal

1384

# or the precedence of a terminal specified by %prec.

1385

1386

# A variety of error checks are performed to make sure production symbols

1387

# are valid and that %prec is used correctly.

1388

# -----------------------------------------------------------------------------

1389

1390

def add_production(self,prodname,syms,func=None,file='',line=0):

1391

1392

if prodname in self.Terminals:

1393

raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))

1394

if prodname == 'error':

1395

raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))

1396

if not _is_identifier.match(prodname):

1397

raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))

1398

1399

# Look for literal tokens

1400

for n,s in enumerate(syms):

1401

if s[0] in "'\"":

1402

try:

1403

c = eval(s)

1404

if (len(c) > 1):

1405

raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))

1406

if not c in self.Terminals:

1407

self.Terminals[c] = []

1408

syms[n] = c

1409

continue

1410

except SyntaxError:

1411

pass

1412

if not _is_identifier.match(s) and s != '%prec':

1413

raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))

1414

1415

# Determine the precedence level

1416

if '%prec' in syms:

1417

if syms[-1] == '%prec':

1418

raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))

1419

if syms[-2] != '%prec':

1420

raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))

1421

precname = syms[-1]

1422

prodprec = self.Precedence.get(precname,None)

1423

if not prodprec:

1424

raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))

1425

else:

1426

self.UsedPrecedence[precname] = 1

1427

del syms[-2:] # Drop %prec from the rule

1428

else:

1429

# If no %prec, precedence is determined by the rightmost terminal symbol

1430

precname = rightmost_terminal(syms,self.Terminals)

1431

prodprec = self.Precedence.get(precname,('right',0))

1432

1433

# See if the rule is already in the rulemap

1434

map = "%s -> %s" % (prodname,syms)

1435

if map in self.Prodmap:

1436

m = self.Prodmap[map]

1437

raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +

1438

"Previous definition at %s:%d" % (m.file, m.line))

1439

1440

# From this point on, everything is valid. Create a new Production instance

1441

pnumber = len(self.Productions)

1442

if not prodname in self.Nonterminals:

1443

self.Nonterminals[prodname] = [ ]

1444

1445

# Add the production number to Terminals and Nonterminals

1446

for t in syms:

1447

if t in self.Terminals:

1448

self.Terminals[t].append(pnumber)

1449

else:

1450

if not t in self.Nonterminals:

1451

self.Nonterminals[t] = [ ]

1452

self.Nonterminals[t].append(pnumber)

1453

1454

# Create a production and add it to the list of productions

1455

p = Production(pnumber,prodname,syms,prodprec,func,file,line)

1456

self.Productions.append(p)

1457

self.Prodmap[map] = p

1458

1459

# Add to the global productions list

1460

try:

1461

self.Prodnames[prodname].append(p)

1462

except KeyError:

1463

self.Prodnames[prodname] = [ p ]

1464

return 0

1465

1466

# -----------------------------------------------------------------------------

1467

# set_start()

1468

1469

# Sets the starting symbol and creates the augmented grammar. Production

1470

# rule 0 is S' -> start where start is the start symbol.

1471

# -----------------------------------------------------------------------------

1472

1473

def set_start(self,start=None):

1474

if not start:

1475

start = self.Productions[1].name

1476

if start not in self.Nonterminals:

1477

raise GrammarError("start symbol %s undefined" % start)

1478

self.Productions[0] = Production(0,"S'",[start])

1479

self.Nonterminals[start].append(0)

1480

self.Start = start

1481

1482

# -----------------------------------------------------------------------------

1483

# find_unreachable()

1484

1485

# Find all of the nonterminal symbols that can't be reached from the starting

1486

# symbol. Returns a list of nonterminals that can't be reached.

1487

# -----------------------------------------------------------------------------

1488

1489

def find_unreachable(self):

1490

1491

# Mark all symbols that are reachable from a symbol s

1492

def mark_reachable_from(s):

1493

if reachable[s]:

1494

# We've already reached symbol s.

1495

return

1496

reachable[s] = 1

1497

for p in self.Prodnames.get(s,[]):

1498

for r in p.prod:

1499

mark_reachable_from(r)

1500

1501

reachable = { }

1502

for s in list(self.Terminals) + list(self.Nonterminals):

1503

reachable[s] = 0

1504

1505

mark_reachable_from( self.Productions[0].prod[0] )

1506

1507

return [s for s in list(self.Nonterminals)

1508

if not reachable[s]]

1509

1510

# -----------------------------------------------------------------------------

1511

# infinite_cycles()

1512

1513

# This function looks at the various parsing rules and tries to detect

1514

# infinite recursion cycles (grammar rules where there is no possible way

1515

# to derive a string of only terminals).

1516

# -----------------------------------------------------------------------------

1517

1518

def infinite_cycles(self):

1519

terminates = {}

1520

1521

# Terminals:

1522

for t in self.Terminals:

1523

terminates[t] = 1

1524

1525

terminates['$end'] = 1

1526

1527

# Nonterminals:

1528

1529

# Initialize to false:

1530

for n in self.Nonterminals:

1531

terminates[n] = 0

1532

1533

# Then propagate termination until no change:

1534

while 1:

1535

some_change = 0

1536

for (n,pl) in self.Prodnames.items():

1537

# Nonterminal n terminates iff any of its productions terminates.

1538

for p in pl:

1539

# Production p terminates iff all of its rhs symbols terminate.

1540

for s in p.prod:

1541

if not terminates[s]:

1542

# The symbol s does not terminate,

1543

# so production p does not terminate.

1544

p_terminates = 0

1545

break

1546

else:

1547

# didn't break from the loop,

1548

# so every symbol s terminates

1549

# so production p terminates.

1550

p_terminates = 1

1551

1552

if p_terminates:

1553

# symbol n terminates!

1554

if not terminates[n]:

1555

terminates[n] = 1

1556

some_change = 1

1557

# Don't need to consider any more productions for this n.

1558

break

1559

1560

if not some_change:

1561

break

1562

1563

infinite = []

1564

for (s,term) in terminates.items():

1565

if not term:

1566

if not s in self.Prodnames and not s in self.Terminals and s != 'error':

1567

# s is used-but-not-defined, and we've already warned of that,

1568

# so it would be overkill to say that it's also non-terminating.

1569

pass

1570

else:

1571

infinite.append(s)

1572

1573

return infinite

1574

1575

1576

# -----------------------------------------------------------------------------

1577

# undefined_symbols()

1578

1579

# Find all symbols that were used the grammar, but not defined as tokens or

1580

# grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol

1581

# and prod is the production where the symbol was used.

1582

# -----------------------------------------------------------------------------

1583

def undefined_symbols(self):

1584

result = []

1585

for p in self.Productions:

1586

if not p: continue

1587

1588

for s in p.prod:

1589

if not s in self.Prodnames and not s in self.Terminals and s != 'error':

1590

result.append((s,p))

1591

return result

1592

1593

# -----------------------------------------------------------------------------

1594

# unused_terminals()

1595

1596

# Find all terminals that were defined, but not used by the grammar. Returns

1597

# a list of all symbols.

1598

# -----------------------------------------------------------------------------

1599

def unused_terminals(self):

1600

unused_tok = []

1601

for s,v in self.Terminals.items():

1602

if s != 'error' and not v:

1603

unused_tok.append(s)

1604

1605

return unused_tok

1606

1607

# ------------------------------------------------------------------------------

1608

# unused_rules()

1609

1610

# Find all grammar rules that were defined, but not used (maybe not reachable)

1611

# Returns a list of productions.

1612

# ------------------------------------------------------------------------------

1613

1614

def unused_rules(self):

1615

unused_prod = []

1616

for s,v in self.Nonterminals.items():

1617

if not v:

1618

p = self.Prodnames[s][0]

1619

unused_prod.append(p)

1620

return unused_prod

1621

1622

# -----------------------------------------------------------------------------

1623

# unused_precedence()

1624

1625

# Returns a list of tuples (term,precedence) corresponding to precedence

1626

# rules that were never used by the grammar. term is the name of the terminal

1627

# on which precedence was applied and precedence is a string such as 'left' or

1628

# 'right' corresponding to the type of precedence.

1629

# -----------------------------------------------------------------------------

1630

1631

def unused_precedence(self):

1632

unused = []

1633

for termname in self.Precedence:

1634

if not (termname in self.Terminals or termname in self.UsedPrecedence):

1635

unused.append((termname,self.Precedence[termname][0]))

1636

1637

return unused

1638

1639

# -------------------------------------------------------------------------

1640

# _first()

1641

1642

# Compute the value of FIRST1(beta) where beta is a tuple of symbols.

1643

1644

# During execution of compute_first1, the result may be incomplete.

1645

# Afterward (e.g., when called from compute_follow()), it will be complete.

1646

# -------------------------------------------------------------------------

1647

def _first(self,beta):

1648

1649

# We are computing First(x1,x2,x3,...,xn)

1650

result = [ ]

1651

for x in beta:

1652

x_produces_empty = 0

1653

1654

# Add all the non-<empty> symbols of First[x] to the result.

1655

for f in self.First[x]:

1656

if f == '<empty>':

1657

x_produces_empty = 1

1658

else:

1659

if f not in result: result.append(f)

1660

1661

if x_produces_empty:

1662

# We have to consider the next x in beta,

1663

# i.e. stay in the loop.

1664

pass

1665

else:

1666

# We don't have to consider any further symbols in beta.

1667

break

1668

else:

1669

# There was no 'break' from the loop,

1670

# so x_produces_empty was true for all x in beta,

1671

# so beta produces empty as well.

1672

result.append('<empty>')

1673

1674

return result

1675

1676

# -------------------------------------------------------------------------

1677

# compute_first()

1678

1679

# Compute the value of FIRST1(X) for all symbols

1680

# -------------------------------------------------------------------------

1681

def compute_first(self):

1682

if self.First:

1683

return self.First

1684

1685

# Terminals:

1686

for t in self.Terminals:

1687

self.First[t] = [t]

1688

1689

self.First['$end'] = ['$end']

1690

1691

# Nonterminals:

1692

1693

# Initialize to the empty set:

1694

for n in self.Nonterminals:

1695

self.First[n] = []

1696

1697

# Then propagate symbols until no change:

1698

while 1:

1699

some_change = 0

1700

for n in self.Nonterminals:

1701

for p in self.Prodnames[n]:

1702

for f in self._first(p.prod):

1703

if f not in self.First[n]:

1704

self.First[n].append( f )

1705

some_change = 1

1706

if not some_change:

1707

break

1708

1709

return self.First

1710

1711

# ---------------------------------------------------------------------

1712

# compute_follow()

1713

1714

# Computes all of the follow sets for every non-terminal symbol. The

1715

# follow set is the set of all symbols that might follow a given

1716

# non-terminal. See the Dragon book, 2nd Ed. p. 189.

1717

# ---------------------------------------------------------------------

1718

def compute_follow(self,start=None):

1719

# If already computed, return the result

1720

if self.Follow:

1721

return self.Follow

1722

1723

# If first sets not computed yet, do that first.

1724

if not self.First:

1725

self.compute_first()

1726

1727

# Add '$end' to the follow list of the start symbol

1728

for k in self.Nonterminals:

1729

self.Follow[k] = [ ]

1730

1731

if not start:

1732

start = self.Productions[1].name

1733

1734

self.Follow[start] = [ '$end' ]

1735

1736

while 1:

1737

didadd = 0

1738

for p in self.Productions[1:]:

1739

# Here is the production set

1740

for i in range(len(p.prod)):

1741

B = p.prod[i]

1742

if B in self.Nonterminals:

1743

# Okay. We got a non-terminal in a production

1744

fst = self._first(p.prod[i+1:])

1745

hasempty = 0

1746

for f in fst:

1747

if f != '<empty>' and f not in self.Follow[B]:

1748

self.Follow[B].append(f)

1749

didadd = 1

1750

if f == '<empty>':

1751

hasempty = 1

1752

if hasempty or i == (len(p.prod)-1):

1753

# Add elements of follow(a) to follow(b)

1754

for f in self.Follow[p.name]:

1755

if f not in self.Follow[B]:

1756

self.Follow[B].append(f)

1757

didadd = 1

1758

if not didadd: break

1759

return self.Follow

1760

1761

1762

# -----------------------------------------------------------------------------

1763

# build_lritems()

1764

1765

# This function walks the list of productions and builds a complete set of the

1766

# LR items. The LR items are stored in two ways: First, they are uniquely

1767

# numbered and placed in the list _lritems. Second, a linked list of LR items

1768

# is built for each production. For example:

1769

1770

# E -> E PLUS E

1771

1772

# Creates the list

1773

1774

# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]

1775

# -----------------------------------------------------------------------------

1776

1777

def build_lritems(self):

1778

for p in self.Productions:

1779

lastlri = p

1780

i = 0

1781

lr_items = []

1782

while 1:

1783

if i > len(p):

1784

lri = None

1785

else:

1786

lri = LRItem(p,i)

1787

# Precompute the list of productions immediately following

1788

try:

1789

lri.lr_after = self.Prodnames[lri.prod[i+1]]

1790

except (IndexError,KeyError):

1791

lri.lr_after = []

1792

try:

1793

lri.lr_before = lri.prod[i-1]

1794

except IndexError:

1795

lri.lr_before = None

1796

1797

lastlri.lr_next = lri

1798

if not lri: break

1799

lr_items.append(lri)

1800

lastlri = lri

1801

i += 1

1802

p.lr_items = lr_items

1803

1804

# -----------------------------------------------------------------------------

1805

# == Class LRTable ==

1806

1807

# This basic class represents a basic table of LR parsing information.

1808

# Methods for generating the tables are not defined here. They are defined

1809

# in the derived class LRGeneratedTable.

1810

# -----------------------------------------------------------------------------

1811

1812

class VersionError(YaccError): pass

1813

1814

class LRTable(object):

1815

def __init__(self):

1816

self.lr_action = None

1817

self.lr_goto = None

1818

self.lr_productions = None

1819

self.lr_method = None

1820

1821

def read_table(self,module):

1822

if isinstance(module,types.ModuleType):

1823

parsetab = module

1824

else:

1825

if sys.version_info[0] < 3:

1826

exec("import %s as parsetab" % module)

1827

else:

1828

env = { }

1829

exec("import %s as parsetab" % module, env, env)

1830

parsetab = env['parsetab']

1831

1832

if parsetab._tabversion != __tabversion__:

1833

raise VersionError("yacc table file version is out of date")

1834

1835

self.lr_action = parsetab._lr_action

1836

self.lr_goto = parsetab._lr_goto

1837

1838

self.lr_productions = []

1839

for p in parsetab._lr_productions:

1840

self.lr_productions.append(MiniProduction(*p))

1841

1842

self.lr_method = parsetab._lr_method

1843

return parsetab._lr_signature

1844

1845

def read_pickle(self,filename):

1846

try:

1847

import cPickle as pickle

1848

except ImportError:

1849

import pickle

1850

1851

in_f = open(filename,"rb")

1852

1853

tabversion = pickle.load(in_f)

1854

if tabversion != __tabversion__:

1855

raise VersionError("yacc table file version is out of date")

1856

self.lr_method = pickle.load(in_f)

1857

signature = pickle.load(in_f)

1858

self.lr_action = pickle.load(in_f)

1859

self.lr_goto = pickle.load(in_f)

1860

productions = pickle.load(in_f)

1861

1862

self.lr_productions = []

1863

for p in productions:

1864

self.lr_productions.append(MiniProduction(*p))

1865

1866

in_f.close()

1867

return signature

1868

1869

# Bind all production function names to callable objects in pdict

1870

def bind_callables(self,pdict):

1871

for p in self.lr_productions:

1872

p.bind(pdict)

1873

1874

# -----------------------------------------------------------------------------

1875

# === LR Generator ===

1876

1877

# The following classes and functions are used to generate LR parsing tables on

1878

# a grammar.

1879

# -----------------------------------------------------------------------------

1880

1881

# -----------------------------------------------------------------------------

1882

# digraph()

1883

# traverse()

1884

1885

# The following two functions are used to compute set valued functions

1886

# of the form:

1887

1888

# F(x) = F'(x) U U{F(y) | x R y}

1889

1890

# This is used to compute the values of Read() sets as well as FOLLOW sets

1891

# in LALR(1) generation.

1892

1893

# Inputs: X - An input set

1894

# R - A relation

1895

# FP - Set-valued function

1896

# ------------------------------------------------------------------------------

1897

1898

def digraph(X,R,FP):

1899

N = { }

1900

for x in X:

1901

N[x] = 0

1902

stack = []

1903

F = { }

1904

for x in X:

1905

if N[x] == 0: traverse(x,N,stack,F,X,R,FP)

1906

return F

1907

1908

def traverse(x,N,stack,F,X,R,FP):

1909

stack.append(x)

1910

d = len(stack)

1911

N[x] = d

1912

F[x] = FP(x) # F(X) <- F'(x)

1913

1914

rel = R(x) # Get y's related to x

1915

for y in rel:

1916

if N[y] == 0:

1917

traverse(y,N,stack,F,X,R,FP)

1918

N[x] = min(N[x],N[y])

1919

for a in F.get(y,[]):

1920

if a not in F[x]: F[x].append(a)

1921

if N[x] == d:

1922

N[stack[-1]] = MAXINT

1923

F[stack[-1]] = F[x]

1924

element = stack.pop()

1925

while element != x:

1926

N[stack[-1]] = MAXINT

1927

F[stack[-1]] = F[x]

1928

element = stack.pop()

1929

1930

class LALRError(YaccError): pass

1931

1932

# -----------------------------------------------------------------------------

1933

# == LRGeneratedTable ==

1934

1935

# This class implements the LR table generation algorithm. There are no

1936

# public methods except for write()

1937

# -----------------------------------------------------------------------------

1938

1939

class LRGeneratedTable(LRTable):

1940

def __init__(self,grammar,method='LALR',log=None):

1941

if method not in ['SLR','LALR']:

1942

raise LALRError("Unsupported method %s" % method)

1943

1944

self.grammar = grammar

1945

self.lr_method = method

1946

1947

# Set up the logger

1948

if not log:

1949

log = NullLogger()

1950

self.log = log

1951

1952

# Internal attributes

1953

self.lr_action = {} # Action table

1954

self.lr_goto = {} # Goto table

1955

self.lr_productions = grammar.Productions # Copy of grammar Production array

1956

self.lr_goto_cache = {} # Cache of computed gotos

1957

self.lr0_cidhash = {} # Cache of closures

1958

1959

self._add_count = 0 # Internal counter used to detect cycles

1960

1961

# Diagonistic information filled in by the table generator

1962

self.sr_conflict = 0

1963

self.rr_conflict = 0

1964

self.conflicts = [] # List of conflicts

1965

1966

self.sr_conflicts = []

1967

self.rr_conflicts = []

1968

1969

# Build the tables

1970

self.grammar.build_lritems()

1971

self.grammar.compute_first()

1972

self.grammar.compute_follow()

1973

self.lr_parse_table()

1974

1975

# Compute the LR(0) closure operation on I, where I is a set of LR(0) items.

1976

1977

def lr0_closure(self,I):

1978

self._add_count += 1

1979

1980

# Add everything in I to J

1981

J = I[:]

1982

didadd = 1

1983

while didadd:

1984

didadd = 0

1985

for j in J:

1986

for x in j.lr_after:

1987

if getattr(x,"lr0_added",0) == self._add_count: continue

1988

# Add B --> .G to J

1989

J.append(x.lr_next)

1990

x.lr0_added = self._add_count

1991

didadd = 1

1992

1993

return J

1994

1995

# Compute the LR(0) goto function goto(I,X) where I is a set

1996

# of LR(0) items and X is a grammar symbol. This function is written

1997

# in a way that guarantees uniqueness of the generated goto sets

1998

# (i.e. the same goto set will never be returned as two different Python

1999

# objects). With uniqueness, we can later do fast set comparisons using

2000

# id(obj) instead of element-wise comparison.

2001

2002

def lr0_goto(self,I,x):

2003

# First we look for a previously cached entry

2004

g = self.lr_goto_cache.get((id(I),x),None)

2005

if g: return g

2006

2007

# Now we generate the goto set in a way that guarantees uniqueness

2008

# of the result

2009

2010

s = self.lr_goto_cache.get(x,None)

2011

if not s:

2012

s = { }

2013

self.lr_goto_cache[x] = s

2014

2015

gs = [ ]

2016

for p in I:

2017

n = p.lr_next

2018

if n and n.lr_before == x:

2019

s1 = s.get(id(n),None)

2020

if not s1:

2021

s1 = { }

2022

s[id(n)] = s1

2023

gs.append(n)

2024

s = s1

2025

g = s.get('$end',None)

2026

if not g:

2027

if gs:

2028

g = self.lr0_closure(gs)

2029

s['$end'] = g

2030

else:

2031

s['$end'] = gs

2032

self.lr_goto_cache[(id(I),x)] = g

2033

return g

2034

2035

# Compute the LR(0) sets of item function

2036

def lr0_items(self):

2037

2038

C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]

2039

i = 0

2040

for I in C:

2041

self.lr0_cidhash[id(I)] = i

2042

i += 1

2043

2044

# Loop over the items in C and each grammar symbols

2045

i = 0

2046

while i < len(C):

2047

I = C[i]

2048

i += 1

2049

2050

# Collect all of the symbols that could possibly be in the goto(I,X) sets

2051

asyms = { }

2052

for ii in I:

2053

for s in ii.usyms:

2054

asyms[s] = None

2055

2056

for x in asyms:

2057

g = self.lr0_goto(I,x)

2058

if not g: continue

2059

if id(g) in self.lr0_cidhash: continue

2060

self.lr0_cidhash[id(g)] = len(C)

2061

C.append(g)

2062

2063

return C

2064

2065

# -----------------------------------------------------------------------------

2066

# ==== LALR(1) Parsing ====

2067

2068

# LALR(1) parsing is almost exactly the same as SLR except that instead of

2069

# relying upon Follow() sets when performing reductions, a more selective

2070

# lookahead set that incorporates the state of the LR(0) machine is utilized.

2071

# Thus, we mainly just have to focus on calculating the lookahead sets.

2072

2073

# The method used here is due to DeRemer and Pennelo (1982).

2074

2075

# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)

2076

# Lookahead Sets", ACM Transactions on Programming Languages and Systems,

2077

# Vol. 4, No. 4, Oct. 1982, pp. 615-649

2078

2079

# Further details can also be found in:

2080

2081

# J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",

2082

# McGraw-Hill Book Company, (1985).

2083

2084

# -----------------------------------------------------------------------------

2085

2086

# -----------------------------------------------------------------------------

2087

# compute_nullable_nonterminals()

2088

2089

# Creates a dictionary containing all of the non-terminals that might produce

2090

# an empty production.

2091

# -----------------------------------------------------------------------------

2092

2093

def compute_nullable_nonterminals(self):

2094

nullable = {}

2095

num_nullable = 0

2096

while 1:

2097

for p in self.grammar.Productions[1:]:

2098

if p.len == 0:

2099

nullable[p.name] = 1

2100

continue

2101

for t in p.prod:

2102

if not t in nullable: break

2103

else:

2104

nullable[p.name] = 1

2105

if len(nullable) == num_nullable: break

2106

num_nullable = len(nullable)

2107

return nullable

2108

2109

# -----------------------------------------------------------------------------

2110

# find_nonterminal_trans(C)

2111

2112

# Given a set of LR(0) items, this functions finds all of the non-terminal

2113

# transitions. These are transitions in which a dot appears immediately before

2114

# a non-terminal. Returns a list of tuples of the form (state,N) where state

2115

# is the state number and N is the nonterminal symbol.

2116

2117

# The input C is the set of LR(0) items.

2118

# -----------------------------------------------------------------------------

2119

2120

def find_nonterminal_transitions(self,C):

2121

trans = []

2122

for state in range(len(C)):

2123

for p in C[state]:

2124

if p.lr_index < p.len - 1:

2125

t = (state,p.prod[p.lr_index+1])

2126

if t[1] in self.grammar.Nonterminals:

2127

if t not in trans: trans.append(t)

2128

state = state + 1

2129

return trans

2130

2131

# -----------------------------------------------------------------------------

2132

# dr_relation()

2133

2134

# Computes the DR(p,A) relationships for non-terminal transitions. The input

2135

# is a tuple (state,N) where state is a number and N is a nonterminal symbol.

2136

2137

# Returns a list of terminals.

2138

# -----------------------------------------------------------------------------

2139

2140

def dr_relation(self,C,trans,nullable):

2141

dr_set = { }

2142

state,N = trans

2143

terms = []

2144

2145

g = self.lr0_goto(C[state],N)

2146

for p in g:

2147

if p.lr_index < p.len - 1:

2148

a = p.prod[p.lr_index+1]

2149

if a in self.grammar.Terminals:

2150

if a not in terms: terms.append(a)

2151

2152

# This extra bit is to handle the start state

2153

if state == 0 and N == self.grammar.Productions[0].prod[0]:

2154

terms.append('$end')

2155

2156

return terms

2157

2158

# -----------------------------------------------------------------------------

2159

# reads_relation()

2160

2161

# Computes the READS() relation (p,A) READS (t,C).

2162

# -----------------------------------------------------------------------------

2163

2164

def reads_relation(self,C, trans, empty):

2165

# Look for empty transitions

2166

rel = []

2167

state, N = trans

2168

2169

g = self.lr0_goto(C[state],N)

2170

j = self.lr0_cidhash.get(id(g),-1)

2171

for p in g:

2172

if p.lr_index < p.len - 1:

2173

a = p.prod[p.lr_index + 1]

2174

if a in empty:

2175

rel.append((j,a))

2176

2177

return rel

2178

2179

# -----------------------------------------------------------------------------

2180

# compute_lookback_includes()

2181

2182

# Determines the lookback and includes relations

2183

2184

# LOOKBACK:

2185

2186

# This relation is determined by running the LR(0) state machine forward.

2187

# For example, starting with a production "N : . A B C", we run it forward

2188

# to obtain "N : A B C ." We then build a relationship between this final

2189

# state and the starting state. These relationships are stored in a dictionary

2190

# lookdict.

2191

2192

# INCLUDES:

2193

2194

# Computes the INCLUDE() relation (p,A) INCLUDES (p',B).

2195

2196

# This relation is used to determine non-terminal transitions that occur

2197

# inside of other non-terminal transition states. (p,A) INCLUDES (p', B)

2198

# if the following holds:

2199

2200

# B -> LAT, where T -> epsilon and p' -L-> p

2201

2202

# L is essentially a prefix (which may be empty), T is a suffix that must be

2203

# able to derive an empty string. State p' must lead to state p with the string L.

2204

2205

# -----------------------------------------------------------------------------

2206

2207

def compute_lookback_includes(self,C,trans,nullable):

2208

2209

lookdict = {} # Dictionary of lookback relations

2210

includedict = {} # Dictionary of include relations

2211

2212

# Make a dictionary of non-terminal transitions

2213

dtrans = {}

2214

for t in trans:

2215

dtrans[t] = 1

2216

2217

# Loop over all transitions and compute lookbacks and includes

2218

for state,N in trans:

2219

lookb = []

2220

includes = []

2221

for p in C[state]:

2222

if p.name != N: continue

2223

2224

# Okay, we have a name match. We now follow the production all the way

2225

# through the state machine until we get the . on the right hand side

2226

2227

lr_index = p.lr_index

2228

j = state

2229

while lr_index < p.len - 1:

2230

lr_index = lr_index + 1

2231

t = p.prod[lr_index]

2232

2233

# Check to see if this symbol and state are a non-terminal transition

2234

if (j,t) in dtrans:

2235

# Yes. Okay, there is some chance that this is an includes relation

2236

# the only way to know for certain is whether the rest of the

2237

# production derives empty

2238

2239

li = lr_index + 1

2240

while li < p.len:

2241

if p.prod[li] in self.grammar.Terminals: break # No forget it

2242

if not p.prod[li] in nullable: break

2243

li = li + 1

2244

else:

2245

# Appears to be a relation between (j,t) and (state,N)

2246

includes.append((j,t))

2247

2248

g = self.lr0_goto(C[j],t) # Go to next set

2249

j = self.lr0_cidhash.get(id(g),-1) # Go to next state

2250

2251

# When we get here, j is the final state, now we have to locate the production

2252

for r in C[j]:

2253

if r.name != p.name: continue

2254

if r.len != p.len: continue

2255

i = 0

2256

# This look is comparing a production ". A B C" with "A B C ."

2257

while i < r.lr_index:

2258

if r.prod[i] != p.prod[i+1]: break

2259

i = i + 1

2260

else:

2261

lookb.append((j,r))

2262

for i in includes:

2263

if not i in includedict: includedict[i] = []

2264

includedict[i].append((state,N))

2265

lookdict[(state,N)] = lookb

2266

2267

return lookdict,includedict

2268

2269

# -----------------------------------------------------------------------------

2270

# compute_read_sets()

2271

2272

# Given a set of LR(0) items, this function computes the read sets.

2273

2274

# Inputs: C = Set of LR(0) items

2275

# ntrans = Set of nonterminal transitions

2276

# nullable = Set of empty transitions

2277

2278

# Returns a set containing the read sets

2279

# -----------------------------------------------------------------------------

2280

2281

def compute_read_sets(self,C, ntrans, nullable):

2282

FP = lambda x: self.dr_relation(C,x,nullable)

2283

R = lambda x: self.reads_relation(C,x,nullable)

2284

F = digraph(ntrans,R,FP)

2285

return F

2286

2287

# -----------------------------------------------------------------------------

2288

# compute_follow_sets()

2289

2290

# Given a set of LR(0) items, a set of non-terminal transitions, a readset,

2291

# and an include set, this function computes the follow sets

2292

2293

# Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}

2294

2295

# Inputs:

2296

# ntrans = Set of nonterminal transitions

2297

# readsets = Readset (previously computed)

2298

# inclsets = Include sets (previously computed)

2299

2300

# Returns a set containing the follow sets

2301

# -----------------------------------------------------------------------------

2302

2303

def compute_follow_sets(self,ntrans,readsets,inclsets):

2304

FP = lambda x: readsets[x]

2305

R = lambda x: inclsets.get(x,[])

2306

F = digraph(ntrans,R,FP)

2307

return F

2308

2309

# -----------------------------------------------------------------------------

2310

# add_lookaheads()

2311

2312

# Attaches the lookahead symbols to grammar rules.

2313

2314

# Inputs: lookbacks - Set of lookback relations

2315

# followset - Computed follow set

2316

2317

# This function directly attaches the lookaheads to productions contained

2318

# in the lookbacks set

2319

# -----------------------------------------------------------------------------

2320

2321

def add_lookaheads(self,lookbacks,followset):

2322

for trans,lb in lookbacks.items():

2323

# Loop over productions in lookback

2324

for state,p in lb:

2325

if not state in p.lookaheads:

2326

p.lookaheads[state] = []

2327

f = followset.get(trans,[])

2328

for a in f:

2329

if a not in p.lookaheads[state]: p.lookaheads[state].append(a)

2330

2331

# -----------------------------------------------------------------------------

2332

# add_lalr_lookaheads()

2333

2334

# This function does all of the work of adding lookahead information for use

2335

# with LALR parsing

2336

# -----------------------------------------------------------------------------

2337

2338

def add_lalr_lookaheads(self,C):

2339

# Determine all of the nullable nonterminals

2340

nullable = self.compute_nullable_nonterminals()

2341

2342

# Find all non-terminal transitions

2343

trans = self.find_nonterminal_transitions(C)

2344

2345

# Compute read sets

2346

readsets = self.compute_read_sets(C,trans,nullable)

2347

2348

# Compute lookback/includes relations

2349

lookd, included = self.compute_lookback_includes(C,trans,nullable)

2350

2351

# Compute LALR FOLLOW sets

2352

followsets = self.compute_follow_sets(trans,readsets,included)

2353

2354

# Add all of the lookaheads

2355

self.add_lookaheads(lookd,followsets)

2356

2357

# -----------------------------------------------------------------------------

2358

# lr_parse_table()

2359

2360

# This function constructs the parse tables for SLR or LALR

2361

# -----------------------------------------------------------------------------

2362

def lr_parse_table(self):

2363

Productions = self.grammar.Productions

2364

Precedence = self.grammar.Precedence

2365

goto = self.lr_goto # Goto array

2366

action = self.lr_action # Action array

2367

log = self.log # Logger for output

2368

2369

actionp = { } # Action production array (temporary)

2370

2371

log.info("Parsing method: %s", self.lr_method)

2372

2373

# Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items

2374

# This determines the number of states

2375

2376

C = self.lr0_items()

2377

2378

if self.lr_method == 'LALR':

2379

self.add_lalr_lookaheads(C)

2380

2381

# Build the parser table, state by state

2382

st = 0

2383

for I in C:

2384

# Loop over each production in I

2385

actlist = [ ] # List of actions

2386

st_action = { }

2387

st_actionp = { }

2388

st_goto = { }

2389

log.info("")

2390

log.info("state %d", st)

2391

log.info("")

2392

for p in I:

2393

log.info(" (%d) %s", p.number, str(p))

2394

log.info("")

2395

2396

for p in I:

2397

if p.len == p.lr_index + 1:

2398

if p.name == "S'":

2399

# Start symbol. Accept!

2400

st_action["$end"] = 0

2401

st_actionp["$end"] = p

2402

else:

2403

# We are at the end of a production. Reduce!

2404

if self.lr_method == 'LALR':

2405

laheads = p.lookaheads[st]

2406

else:

2407

laheads = self.grammar.Follow[p.name]

2408

for a in laheads:

2409

actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))

2410

r = st_action.get(a,None)

2411

if r is not None:

2412

# Whoa. Have a shift/reduce or reduce/reduce conflict

2413

if r > 0:

2414

# Need to decide on shift or reduce here

2415

# By default we favor shifting. Need to add

2416

# some precedence rules here.

2417

sprec,slevel = Productions[st_actionp[a].number].prec

2418

rprec,rlevel = Precedence.get(a,('right',0))

2419

if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):

2420

# We really need to reduce here.

2421

st_action[a] = -p.number

2422

st_actionp[a] = p

2423

if not slevel and not rlevel:

2424

log.info(" ! shift/reduce conflict for %s resolved as reduce",a)

2425

self.sr_conflicts.append((st,a,'reduce'))

2426

Productions[p.number].reduced += 1

2427

elif (slevel == rlevel) and (rprec == 'nonassoc'):

2428

st_action[a] = None

2429

else:

2430

# Hmmm. Guess we'll keep the shift

2431

if not rlevel:

2432

log.info(" ! shift/reduce conflict for %s resolved as shift",a)

2433

self.sr_conflicts.append((st,a,'shift'))

2434

elif r < 0:

2435

# Reduce/reduce conflict. In this case, we favor the rule

2436

# that was defined first in the grammar file

2437

oldp = Productions[-r]

2438

pp = Productions[p.number]

2439

if oldp.line > pp.line:

2440

st_action[a] = -p.number

2441

st_actionp[a] = p

2442

chosenp,rejectp = pp,oldp

2443

Productions[p.number].reduced += 1

2444

Productions[oldp.number].reduced -= 1

2445

else:

2446

chosenp,rejectp = oldp,pp

2447

self.rr_conflicts.append((st,chosenp,rejectp))

2448

log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])

2449

else:

2450

raise LALRError("Unknown conflict in state %d" % st)

2451

else:

2452

st_action[a] = -p.number

2453

st_actionp[a] = p

2454

Productions[p.number].reduced += 1

2455

else:

2456

i = p.lr_index

2457

a = p.prod[i+1] # Get symbol right after the "."

2458

if a in self.grammar.Terminals:

2459

g = self.lr0_goto(I,a)

2460

j = self.lr0_cidhash.get(id(g),-1)

2461

if j >= 0:

2462

# We are in a shift state

2463

actlist.append((a,p,"shift and go to state %d" % j))

2464

r = st_action.get(a,None)

2465

if r is not None:

2466

# Whoa have a shift/reduce or shift/shift conflict

2467

if r > 0:

2468

if r != j:

2469

raise LALRError("Shift/shift conflict in state %d" % st)

2470

elif r < 0:

2471

# Do a precedence check.

2472

# - if precedence of reduce rule is higher, we reduce.

2473

# - if precedence of reduce is same and left assoc, we reduce.

2474

# - otherwise we shift

2475

rprec,rlevel = Productions[st_actionp[a].number].prec

2476

sprec,slevel = Precedence.get(a,('right',0))

2477

if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):

2478

# We decide to shift here... highest precedence to shift

2479

Productions[st_actionp[a].number].reduced -= 1

2480

st_action[a] = j

2481

st_actionp[a] = p

2482

if not rlevel:

2483

log.info(" ! shift/reduce conflict for %s resolved as shift",a)

2484

self.sr_conflicts.append((st,a,'shift'))

2485

elif (slevel == rlevel) and (rprec == 'nonassoc'):

2486

st_action[a] = None

2487

else:

2488

# Hmmm. Guess we'll keep the reduce

2489

if not slevel and not rlevel:

2490

log.info(" ! shift/reduce conflict for %s resolved as reduce",a)

2491

self.sr_conflicts.append((st,a,'reduce'))

2492

2493

else:

2494

raise LALRError("Unknown conflict in state %d" % st)

2495

else:

2496

st_action[a] = j

2497

st_actionp[a] = p

2498

2499

# Print the actions associated with each terminal

2500

_actprint = { }

2501

for a,p,m in actlist:

2502

if a in st_action:

2503

if p is st_actionp[a]:

2504

log.info(" %-15s %s",a,m)

2505

_actprint[(a,m)] = 1

2506

log.info("")

2507

# Print the actions that were not used. (debugging)

2508

not_used = 0

2509

for a,p,m in actlist:

2510

if a in st_action:

2511

if p is not st_actionp[a]:

2512

if not (a,m) in _actprint:

2513

log.debug(" ! %-15s [ %s ]",a,m)

2514

not_used = 1

2515

_actprint[(a,m)] = 1

2516

if not_used:

2517

log.debug("")

2518

2519

# Construct the goto table for this state

2520

2521

nkeys = { }

2522

for ii in I:

2523

for s in ii.usyms:

2524

if s in self.grammar.Nonterminals:

2525

nkeys[s] = None

2526

for n in nkeys:

2527

g = self.lr0_goto(I,n)

2528

j = self.lr0_cidhash.get(id(g),-1)

2529

if j >= 0:

2530

st_goto[n] = j

2531

log.info(" %-30s shift and go to state %d",n,j)

2532

2533

action[st] = st_action

2534

actionp[st] = st_actionp

2535

goto[st] = st_goto

2536

st += 1

2537

2538

2539

# -----------------------------------------------------------------------------

2540

# write()

2541

2542

# This function writes the LR parsing tables to a file

2543

# -----------------------------------------------------------------------------

2544

2545

def write_table(self,modulename,outputdir='',signature=""):

2546

basemodulename = modulename.split(".")[-1]

2547

filename = os.path.join(outputdir,basemodulename) + ".py"

2548

try:

2549

f = open(filename,"w")

2550

2551

f.write("""

2552

# %s

2553

# This file is automatically generated. Do not edit.

2554

_tabversion = %r

2555

2556

_lr_method = %r

2557

2558

_lr_signature = %r

2559

""" % (filename, __tabversion__, self.lr_method, signature))

2560

2561

# Change smaller to 0 to go back to original tables

2562

smaller = 1

2563

2564

# Factor out names to try and make smaller

2565

if smaller:

2566

items = { }

2567

2568

for s,nd in self.lr_action.items():

2569

for name,v in nd.items():

2570

i = items.get(name)

2571

if not i:

2572

i = ([],[])

2573

items[name] = i

2574

i[0].append(s)

2575

i[1].append(v)

2576

2577

f.write("\n_lr_action_items = {")

2578

for k,v in items.items():

2579

f.write("%r:([" % k)

2580

for i in v[0]:

2581

f.write("%r," % i)

2582

f.write("],[")

2583

for i in v[1]:

2584

f.write("%r," % i)

2585

2586

f.write("]),")

2587

f.write("}\n")

2588

2589

f.write("""

2590

_lr_action = { }

2591

for _k, _v in _lr_action_items.items():

2592

for _x,_y in zip(_v[0],_v[1]):

2593

if not _x in _lr_action: _lr_action[_x] = { }

2594

_lr_action[_x][_k] = _y

2595

del _lr_action_items

2596

""")

2597

2598

else:

2599

f.write("\n_lr_action = { ");

2600

for k,v in self.lr_action.items():

2601

f.write("(%r,%r):%r," % (k[0],k[1],v))

2602

f.write("}\n");

2603

2604

if smaller:

2605

# Factor out names to try and make smaller

2606

items = { }

2607

2608

for s,nd in self.lr_goto.items():

2609

for name,v in nd.items():

2610

i = items.get(name)

2611

if not i:

2612

i = ([],[])

2613

items[name] = i

2614

i[0].append(s)

2615

i[1].append(v)

2616

2617

f.write("\n_lr_goto_items = {")

2618

for k,v in items.items():

2619

f.write("%r:([" % k)

2620

for i in v[0]:

2621

f.write("%r," % i)

2622

f.write("],[")

2623

for i in v[1]:

2624

f.write("%r," % i)

2625

2626

f.write("]),")

2627

f.write("}\n")

2628

2629

f.write("""

2630

_lr_goto = { }

2631

for _k, _v in _lr_goto_items.items():

2632

for _x,_y in zip(_v[0],_v[1]):

2633

if not _x in _lr_goto: _lr_goto[_x] = { }

2634

_lr_goto[_x][_k] = _y

2635

del _lr_goto_items

2636

""")

2637

else:

2638

f.write("\n_lr_goto = { ");

2639

for k,v in self.lr_goto.items():

2640

f.write("(%r,%r):%r," % (k[0],k[1],v))

2641

f.write("}\n");

2642

2643

# Write production table

2644

f.write("_lr_productions = [\n")

2645

for p in self.lr_productions:

2646

if p.func:

2647

f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))

2648

else:

2649

f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))

2650

f.write("]\n")

2651

f.close()

2652

2653

except IOError:

2654

e = sys.exc_info()[1]

2655

sys.stderr.write("Unable to create '%s'\n" % filename)

2656

sys.stderr.write(str(e)+"\n")

2657

return

2658

2659

2660

# -----------------------------------------------------------------------------

2661

# pickle_table()

2662

2663

# This function pickles the LR parsing tables to a supplied file object

2664

# -----------------------------------------------------------------------------

2665

2666

def pickle_table(self,filename,signature=""):

2667

try:

2668

import cPickle as pickle

2669

except ImportError:

2670

import pickle

2671

outf = open(filename,"wb")

2672

pickle.dump(__tabversion__,outf,pickle_protocol)

2673

pickle.dump(self.lr_method,outf,pickle_protocol)

2674

pickle.dump(signature,outf,pickle_protocol)

2675

pickle.dump(self.lr_action,outf,pickle_protocol)

2676

pickle.dump(self.lr_goto,outf,pickle_protocol)

2677

2678

outp = []

2679

for p in self.lr_productions:

2680

if p.func:

2681

outp.append((p.str,p.name, p.len, p.func,p.file,p.line))

2682

else:

2683

outp.append((str(p),p.name,p.len,None,None,None))

2684

pickle.dump(outp,outf,pickle_protocol)

2685

outf.close()

2686

2687

# -----------------------------------------------------------------------------

2688

# === INTROSPECTION ===

2689

2690

# The following functions and classes are used to implement the PLY

2691

# introspection features followed by the yacc() function itself.

2692

# -----------------------------------------------------------------------------

2693

2694

# -----------------------------------------------------------------------------

2695

# get_caller_module_dict()

2696

2697

# This function returns a dictionary containing all of the symbols defined within

2698

# a caller further down the call stack. This is used to get the environment

2699

# associated with the yacc() call if none was provided.

2700

# -----------------------------------------------------------------------------

2701

2702

def get_caller_module_dict(levels):

2703

try:

2704

raise RuntimeError

2705

except RuntimeError:

2706

e,b,t = sys.exc_info()

2707

f = t.tb_frame

2708

while levels > 0:

2709

f = f.f_back

2710

levels -= 1

2711

ldict = f.f_globals.copy()

2712

if f.f_globals != f.f_locals:

2713

ldict.update(f.f_locals)

2714

2715

return ldict

2716

2717

# -----------------------------------------------------------------------------

2718

# parse_grammar()

2719

2720

# This takes a raw grammar rule string and parses it into production data

2721

# -----------------------------------------------------------------------------

2722

def parse_grammar(doc,file,line):

2723

grammar = []

2724

# Split the doc string into lines

2725

pstrings = doc.splitlines()

2726

lastp = None

2727

dline = line

2728

for ps in pstrings:

2729

dline += 1

2730

p = ps.split()

2731

if not p: continue

2732

try:

2733

if p[0] == '|':

2734

# This is a continuation of a previous rule

2735

if not lastp:

2736

raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))

2737

prodname = lastp

2738

syms = p[1:]

2739

else:

2740

prodname = p[0]

2741

lastp = prodname

2742

syms = p[2:]

2743

assign = p[1]

2744

if assign != ':' and assign != '::=':

2745

raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))

2746

2747

grammar.append((file,dline,prodname,syms))

2748

except SyntaxError:

2749

raise

2750

except Exception:

2751

raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))

2752

2753

return grammar

2754

2755

# -----------------------------------------------------------------------------

2756

# ParserReflect()

2757

2758

# This class represents information extracted for building a parser including

2759

# start symbol, error function, tokens, precedence list, action functions,

2760

# etc.

2761

# -----------------------------------------------------------------------------

2762

class ParserReflect(object):

2763

def __init__(self,pdict,log=None):

2764

self.pdict = pdict

2765

self.start = None

2766

self.error_func = None

2767

self.tokens = None

2768

self.files = {}

2769

self.grammar = []

2770

self.error = 0

2771

2772

if log is None:

2773

self.log = PlyLogger(sys.stderr)

2774

else:

2775

self.log = log

2776

2777

# Get all of the basic information

2778

def get_all(self):

2779

self.get_start()

2780

self.get_error_func()

2781

self.get_tokens()

2782

self.get_precedence()

2783

self.get_pfunctions()

2784

2785

# Validate all of the information

2786

def validate_all(self):

2787

self.validate_start()

2788

self.validate_error_func()

2789

self.validate_tokens()

2790

self.validate_precedence()

2791

self.validate_pfunctions()

2792

self.validate_files()

2793

return self.error

2794

2795

# Compute a signature over the grammar

2796

def signature(self):

2797

try:

2798

from hashlib import md5

2799

except ImportError:

2800

from md5 import md5

2801

try:

2802

sig = md5()

2803

if self.start:

2804

sig.update(self.start.encode('latin-1'))

2805

if self.prec:

2806

sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))

2807

if self.tokens:

2808

sig.update(" ".join(self.tokens).encode('latin-1'))

2809

for f in self.pfuncs:

2810

if f[3]:

2811

sig.update(f[3].encode('latin-1'))

2812

except (TypeError,ValueError):

2813

pass

2814

return sig.digest()

2815

2816

# -----------------------------------------------------------------------------

2817

# validate_file()

2818

2819

# This method checks to see if there are duplicated p_rulename() functions

2820

# in the parser module file. Without this function, it is really easy for

2821

# users to make mistakes by cutting and pasting code fragments (and it's a real

2822

# bugger to try and figure out why the resulting parser doesn't work). Therefore,

2823

# we just do a little regular expression pattern matching of def statements

2824

# to try and detect duplicates.

2825

# -----------------------------------------------------------------------------

2826

2827

def validate_files(self):

2828

# Match def p_funcname(

2829

fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')

2830

2831

for filename in self.files.keys():

2832

base,ext = os.path.splitext(filename)

2833

if ext != '.py': return 1 # No idea. Assume it's okay.

2834

2835

try:

2836

f = open(filename)

2837

lines = f.readlines()

2838

f.close()

2839

except IOError:

2840

continue

2841

2842

counthash = { }

2843

for linen,l in enumerate(lines):

2844

linen += 1

2845

m = fre.match(l)

2846

if m:

2847

name = m.group(1)

2848

prev = counthash.get(name)

2849

if not prev:

2850

counthash[name] = linen

2851

else:

2852

self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)

2853

2854

# Get the start symbol

2855

def get_start(self):

2856

self.start = self.pdict.get('start')

2857

2858

# Validate the start symbol

2859

def validate_start(self):

2860

if self.start is not None:

2861

if not isinstance(self.start,str):

2862

self.log.error("'start' must be a string")

2863

2864

# Look for error handler

2865

def get_error_func(self):

2866

self.error_func = self.pdict.get('p_error')

2867

2868

# Validate the error function

2869

def validate_error_func(self):

2870

if self.error_func:

2871

if isinstance(self.error_func,types.FunctionType):

2872

ismethod = 0

2873

elif isinstance(self.error_func, types.MethodType):

2874

ismethod = 1

2875

else:

2876

self.log.error("'p_error' defined, but is not a function or method")

2877

self.error = 1

2878

return

2879

2880

eline = func_code(self.error_func).co_firstlineno

2881

efile = func_code(self.error_func).co_filename

2882

self.files[efile] = 1

2883

2884

if (func_code(self.error_func).co_argcount != 1+ismethod):

2885

self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)

2886

self.error = 1

2887

2888

# Get the tokens map

2889

def get_tokens(self):

2890

tokens = self.pdict.get("tokens",None)

2891

if not tokens:

2892

self.log.error("No token list is defined")

2893

self.error = 1

2894

return

2895

2896

if not isinstance(tokens,(list, tuple)):

2897

self.log.error("tokens must be a list or tuple")

2898

self.error = 1

2899

return

2900

2901

if not tokens:

2902

self.log.error("tokens is empty")

2903

self.error = 1

2904

return

2905

2906

self.tokens = tokens

2907

2908

# Validate the tokens

2909

def validate_tokens(self):

2910

# Validate the tokens.

2911

if 'error' in self.tokens:

2912

self.log.error("Illegal token name 'error'. Is a reserved word")

2913

self.error = 1

2914

return

2915

2916

terminals = {}

2917

for n in self.tokens:

2918

if n in terminals:

2919

self.log.warning("Token '%s' multiply defined", n)

2920

terminals[n] = 1

2921

2922

# Get the precedence map (if any)

2923

def get_precedence(self):

2924

self.prec = self.pdict.get("precedence",None)

2925

2926

# Validate and parse the precedence map

2927

def validate_precedence(self):

2928

preclist = []

2929

if self.prec:

2930

if not isinstance(self.prec,(list,tuple)):

2931

self.log.error("precedence must be a list or tuple")

2932

self.error = 1

2933

return

2934

for level,p in enumerate(self.prec):

2935

if not isinstance(p,(list,tuple)):

2936

self.log.error("Bad precedence table")

2937

self.error = 1

2938

return

2939

2940

if len(p) < 2:

2941

self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)

2942

self.error = 1

2943

return

2944

assoc = p[0]

2945

if not isinstance(assoc,str):

2946

self.log.error("precedence associativity must be a string")

2947

self.error = 1

2948

return

2949

for term in p[1:]:

2950

if not isinstance(term,str):

2951

self.log.error("precedence items must be strings")

2952

self.error = 1

2953

return

2954

preclist.append((term,assoc,level+1))

2955

self.preclist = preclist

2956

2957

# Get all p_functions from the grammar

2958

def get_pfunctions(self):

2959

p_functions = []

2960

for name, item in self.pdict.items():

2961

if name[:2] != 'p_': continue

2962

if name == 'p_error': continue

2963

if isinstance(item,(types.FunctionType,types.MethodType)):

2964

line = func_code(item).co_firstlineno

2965

file = func_code(item).co_filename

2966

p_functions.append((line,file,name,item.__doc__))

2967

2968

# Sort all of the actions by line number

2969

p_functions.sort()

2970

self.pfuncs = p_functions

2971

2972

2973

# Validate all of the p_functions

2974

def validate_pfunctions(self):

2975

grammar = []

2976

# Check for non-empty symbols

2977

if len(self.pfuncs) == 0:

2978

self.log.error("no rules of the form p_rulename are defined")

2979

self.error = 1

2980

return

2981

2982

for line, file, name, doc in self.pfuncs:

2983

func = self.pdict[name]

2984

if isinstance(func, types.MethodType):

2985

reqargs = 2

2986

else:

2987

reqargs = 1

2988

if func_code(func).co_argcount > reqargs:

2989

self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)

2990

self.error = 1

2991

elif func_code(func).co_argcount < reqargs:

2992

self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)

2993

self.error = 1

2994

elif not func.__doc__:

2995

self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)

2996

else:

2997

try:

2998

parsed_g = parse_grammar(doc,file,line)

2999

for g in parsed_g:

3000

grammar.append((name, g))

3001

except SyntaxError:

3002

e = sys.exc_info()[1]

3003

self.log.error(str(e))

3004

self.error = 1

3005

3006

# Looks like a valid grammar rule

3007

# Mark the file in which defined.

3008

self.files[file] = 1

3009

3010

# Secondary validation step that looks for p_ definitions that are not functions

3011

# or functions that look like they might be grammar rules.

3012

3013

for n,v in self.pdict.items():

3014

if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue

3015

if n[0:2] == 't_': continue

3016

if n[0:2] == 'p_' and n != 'p_error':

3017

self.log.warning("'%s' not defined as a function", n)

3018

if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or

3019

(isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):

3020

try:

3021

doc = v.__doc__.split(" ")

3022

if doc[1] == ':':

3023

self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",

3024

func_code(v).co_filename, func_code(v).co_firstlineno,n)

3025

except Exception:

3026

pass

3027

3028

self.grammar = grammar

3029

3030

# -----------------------------------------------------------------------------

3031

# yacc(module)

3032

3033

# Build a parser

3034

# -----------------------------------------------------------------------------

3035

3036

def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,

3037

check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',

3038

debuglog=None, errorlog = None, picklefile=None):

3039

3040

global parse # Reference to the parsing method of the last built parser

3041

3042

# If pickling is enabled, table files are not created

3043

3044

if picklefile:

3045

write_tables = 0

3046

3047

if errorlog is None:

3048

errorlog = PlyLogger(sys.stderr)

3049

3050

# Get the module dictionary used for the parser

3051

if module:

3052

_items = [(k,getattr(module,k)) for k in dir(module)]

3053

pdict = dict(_items)

3054

else:

3055

pdict = get_caller_module_dict(2)

3056

3057

# Collect parser information from the dictionary

3058

pinfo = ParserReflect(pdict,log=errorlog)

3059

pinfo.get_all()

3060

3061

if pinfo.error:

3062

raise YaccError("Unable to build parser")

3063

3064

# Check signature against table files (if any)

3065

signature = pinfo.signature()

3066

3067

# Read the tables

3068

try:

3069

lr = LRTable()

3070

if picklefile:

3071

read_signature = lr.read_pickle(picklefile)

3072

else:

3073

read_signature = lr.read_table(tabmodule)

3074

if optimize or (read_signature == signature):

3075

try:

3076

lr.bind_callables(pinfo.pdict)

3077

parser = LRParser(lr,pinfo.error_func)

3078

parse = parser.parse

3079

return parser

3080

except Exception:

3081

e = sys.exc_info()[1]

3082

errorlog.warning("There was a problem loading the table file: %s", repr(e))

3083

except VersionError:

3084

e = sys.exc_info()

3085

errorlog.warning(str(e))

3086

except Exception:

3087

pass

3088

3089

if debuglog is None:

3090

if debug:

3091

debuglog = PlyLogger(open(debugfile,"w"))

3092

else:

3093

debuglog = NullLogger()

3094

3095

debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)

3096

3097

3098

errors = 0

3099

3100

# Validate the parser information

3101

if pinfo.validate_all():

3102

raise YaccError("Unable to build parser")

3103

3104

if not pinfo.error_func:

3105

errorlog.warning("no p_error() function is defined")

3106

3107

# Create a grammar object

3108

grammar = Grammar(pinfo.tokens)

3109

3110

# Set precedence level for terminals

3111

for term, assoc, level in pinfo.preclist:

3112

try:

3113

grammar.set_precedence(term,assoc,level)

3114

except GrammarError:

3115

e = sys.exc_info()[1]

3116

errorlog.warning("%s",str(e))

3117

3118

# Add productions to the grammar

3119

for funcname, gram in pinfo.grammar:

3120

file, line, prodname, syms = gram

3121

try:

3122

grammar.add_production(prodname,syms,funcname,file,line)

3123

except GrammarError:

3124

e = sys.exc_info()[1]

3125

errorlog.error("%s",str(e))

3126

errors = 1

3127

3128

# Set the grammar start symbols

3129

try:

3130

if start is None:

3131

grammar.set_start(pinfo.start)

3132

else:

3133

grammar.set_start(start)

3134

except GrammarError:

3135

e = sys.exc_info()[1]

3136

errorlog.error(str(e))

3137

errors = 1

3138

3139

if errors:

3140

raise YaccError("Unable to build parser")

3141

3142

# Verify the grammar structure

3143

undefined_symbols = grammar.undefined_symbols()

3144

for sym, prod in undefined_symbols:

3145

errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)

3146

errors = 1

3147

3148

unused_terminals = grammar.unused_terminals()

3149

if unused_terminals:

3150

debuglog.info("")

3151

debuglog.info("Unused terminals:")

3152

debuglog.info("")

3153

for term in unused_terminals:

3154

errorlog.warning("Token '%s' defined, but not used", term)

3155

debuglog.info(" %s", term)

3156

3157

# Print out all productions to the debug log

3158

if debug:

3159

debuglog.info("")

3160

debuglog.info("Grammar")

3161

debuglog.info("")

3162

for n,p in enumerate(grammar.Productions):

3163

debuglog.info("Rule %-5d %s", n, p)

3164

3165

# Find unused non-terminals

3166

unused_rules = grammar.unused_rules()

3167

for prod in unused_rules:

3168

errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)

3169

3170

if len(unused_terminals) == 1:

3171

errorlog.warning("There is 1 unused token")

3172

if len(unused_terminals) > 1:

3173

errorlog.warning("There are %d unused tokens", len(unused_terminals))

3174

3175

if len(unused_rules) == 1:

3176

errorlog.warning("There is 1 unused rule")

3177

if len(unused_rules) > 1:

3178

errorlog.warning("There are %d unused rules", len(unused_rules))

3179

3180

if debug:

3181

debuglog.info("")

3182

debuglog.info("Terminals, with rules where they appear")

3183

debuglog.info("")

3184

terms = list(grammar.Terminals)

3185

terms.sort()

3186

for term in terms:

3187

debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))

3188

3189

debuglog.info("")

3190

debuglog.info("Nonterminals, with rules where they appear")

3191

debuglog.info("")

3192

nonterms = list(grammar.Nonterminals)

3193

nonterms.sort()

3194

for nonterm in nonterms:

3195

debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))

3196

debuglog.info("")

3197

3198

if check_recursion:

3199

unreachable = grammar.find_unreachable()

3200

for u in unreachable:

3201

errorlog.warning("Symbol '%s' is unreachable",u)

3202

3203

infinite = grammar.infinite_cycles()

3204

for inf in infinite:

3205

errorlog.error("Infinite recursion detected for symbol '%s'", inf)

3206

errors = 1

3207

3208

unused_prec = grammar.unused_precedence()

3209

for term, assoc in unused_prec:

3210

errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)

3211

errors = 1

3212

3213

if errors:

3214

raise YaccError("Unable to build parser")

3215

3216

# Run the LRGeneratedTable on the grammar

3217

if debug:

3218

errorlog.debug("Generating %s tables", method)

3219

3220

lr = LRGeneratedTable(grammar,method,debuglog)

3221

3222

if debug:

3223

num_sr = len(lr.sr_conflicts)

3224

3225

# Report shift/reduce and reduce/reduce conflicts

3226

if num_sr == 1:

3227

errorlog.warning("1 shift/reduce conflict")

3228

elif num_sr > 1:

3229

errorlog.warning("%d shift/reduce conflicts", num_sr)

3230

3231

num_rr = len(lr.rr_conflicts)

3232

if num_rr == 1:

3233

errorlog.warning("1 reduce/reduce conflict")

3234

elif num_rr > 1:

3235

errorlog.warning("%d reduce/reduce conflicts", num_rr)

3236

3237

# Write out conflicts to the output file

3238

if debug and (lr.sr_conflicts or lr.rr_conflicts):

3239

debuglog.warning("")

3240

debuglog.warning("Conflicts:")

3241

debuglog.warning("")

3242

3243

for state, tok, resolution in lr.sr_conflicts:

3244

debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)

3245

3246

already_reported = {}

3247

for state, rule, rejected in lr.rr_conflicts:

3248

if (state,id(rule),id(rejected)) in already_reported:

3249

continue

3250

debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)

3251

debuglog.warning("rejected rule (%s) in state %d", rejected,state)

3252

errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)

3253

errorlog.warning("rejected rule (%s) in state %d", rejected, state)

3254

already_reported[state,id(rule),id(rejected)] = 1

3255

3256

warned_never = []

3257

for state, rule, rejected in lr.rr_conflicts:

3258

if not rejected.reduced and (rejected not in warned_never):

3259

debuglog.warning("Rule (%s) is never reduced", rejected)

3260

errorlog.warning("Rule (%s) is never reduced", rejected)

3261

warned_never.append(rejected)

3262

3263

# Write the table file if requested

3264

if write_tables:

3265

lr.write_table(tabmodule,outputdir,signature)

3266

3267

# Write a pickled version of the tables

3268

if picklefile:

3269

lr.pickle_table(picklefile,signature)

3270

3271

# Build the parser

3272

lr.bind_callables(pinfo.pdict)

3273

parser = LRParser(lr,pinfo.error_func)

3274

3275

parse = parser.parse

3276

return parser

Older »