5
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool
6
* Set (PCCTS) -- PCCTS is in the public domain. An individual or
7
* company may do whatever they wish with source code distributed with
8
* PCCTS or the code generated by PCCTS, including the incorporation of
9
* PCCTS, or its output, into commerical software.
11
* We encourage users to develop software with PCCTS. However, we do ask
12
* that credit is given to us for developing PCCTS. By "credit",
13
* we mean that if you incorporate our source code into one of your
14
* programs (commercial product, research project, or otherwise) that you
15
* acknowledge this fact somewhere in the documentation, research report,
16
* etc... If you like PCCTS and have developed a nice tool with the
17
* output, please mention that you developed it using PCCTS. In
18
* addition, we ask that this header remain intact in our source code.
19
* As long as these guidelines are kept, we expect to continue enhancing
20
* this system and expect to make other tools available as they are
25
* Parr Research Corporation
26
* with Purdue University and AHPCRC, University of Minnesota
32
#include "pccts_stdlib.h"
33
#include "pccts_stdarg.h"
34
#include "pccts_string.h"
35
#include "pccts_stdio.h"
39
/* I have to put this here due to C++ limitation
40
* that you can't have a 'forward' decl for enums.
41
* I hate C++!!!!!!!!!!!!!!!
42
* Of course, if I could use real templates, this would go away.
45
// MR1 10-Apr-97 133MR1 Prevent use of varying sizes for the
46
// MR1 ANTLRTokenType enum
49
enum ANTLRTokenType { TER_HATES_CPP=0, ITS_TOO_COMPLICATED=9999}; // MR1
51
#define ANTLR_SUPPORT_CODE
54
#include ATOKENBUFFER_H
57
static const int zzINF_DEF_TOKEN_BUFFER_SIZE = 2000; /* MR14 */
58
static const int zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000; /* MR14 */
60
/* L o o k a h e a d M a c r o s */
62
/* maximum of 32 bits/unsigned int and must be 8 bits/byte;
63
* we only use 8 bits of it.
65
SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = {
66
0x00000001, 0x00000002, 0x00000004, 0x00000008,
67
0x00000010, 0x00000020, 0x00000040, 0x00000080
70
char ANTLRParser::eMsgBuffer[500] = "";
76
delete [] zzFAILtext; // MR16 Manfred Kogler
80
ANTLRParser(ANTLRTokenBuffer *_inputTokens,
87
can_use_inf_look = use_inf_look;
88
/* MR14 */ if (dlook != 0) {
89
/* MR14 */ panic("ANTLRParser::ANTLRParser - Demand lookahead not supported in C++ mode");
92
demand_look = 0; /* demand_look = dlook; */
96
eofToken = (ANTLRTokenType)1;
98
// allocate lookahead buffer
99
token_type = new ANTLRTokenType[LLk];
103
stillToFetch = 0; // MR19
106
inf_labase = 0; // MR7
108
/* prime lookahead buffer, point to inputTokens */
109
this->inputTokens = _inputTokens;
110
this->inputTokens->setMinTokens(k);
111
_inputTokens->setParser(this); // MR1
112
resynchConsumed=1; // MR8
113
zzFAILtext=NULL; // MR9
114
traceOptionValueDefault=0; // MR10
115
traceReset(); // MR10
116
zzGuessSeq=0; // MR10
117
syntaxErrCount=0; // MR11
120
void ANTLRParser::init()
123
resynchConsumed=1; // MR8
124
traceReset(); // MR10
127
void ANTLRParser::traceReset()
129
traceOptionValue=traceOptionValueDefault;
130
traceGuessOptionValue=1;
131
traceCurrentRuleName=NULL;
136
#ifdef _MSC_VER // MR23
138
//interaction between '_setjmp' and C++ object destruction is non-portable
139
#pragma warning(disable : 4611)
142
guess(ANTLRParserState *st)
146
return setjmp(guess_start.state);
148
#ifdef _MSC_VER // MR23
149
#pragma warning(default: 4611)
153
saveState(ANTLRParserState *buf)
155
buf->guess_start = guess_start;
156
buf->guessing = guessing;
157
buf->inf_labase = inf_labase;
158
buf->inf_last = inf_last;
160
buf->traceOptionValue=traceOptionValue; /* MR10 */
161
buf->traceGuessOptionValue=traceGuessOptionValue; /* MR10 */
162
buf->traceCurrentRuleName=traceCurrentRuleName; /* MR10 */
163
buf->traceDepth=traceDepth; /* MR10 */
167
restoreState(ANTLRParserState *buf)
170
int prevTraceOptionValue;
172
guess_start = buf->guess_start;
173
guessing = buf->guessing;
174
inf_labase = buf->inf_labase;
175
inf_last = buf->inf_last;
178
// restore lookahead buffer from k tokens before restored TokenBuffer position
179
// if demand_look, then I guess we don't look backwards for these tokens.
180
for (i=1; i<=LLk; i++) token_type[i-1] =
181
inputTokens->bufferedToken(i-LLk)->getType();
187
prevTraceOptionValue=traceOptionValue;
188
traceOptionValue=buf->traceOptionValue;
189
if ( (prevTraceOptionValue > 0) !=
190
(traceOptionValue > 0)) {
191
if (traceCurrentRuleName != NULL) { /* MR21 */
192
if (traceOptionValue > 0) {
193
/* MR23 */ printMessage(stderr,
194
"trace enable restored in rule %s depth %d\n",
195
traceCurrentRuleName,
198
if (traceOptionValue <= 0) {
199
/* MR23 */ printMessage(stderr,
200
"trace disable restored in rule %s depth %d\n",
201
traceCurrentRuleName, /* MR21 */
206
traceGuessOptionValue=buf->traceGuessOptionValue;
207
traceCurrentRuleName=buf->traceCurrentRuleName;
208
traceDepth=buf->traceDepth;
212
/* Get the next symbol from the input stream; put it into lookahead buffer;
213
* fill token_type[] fast reference cache also. NLA is the next place where
214
* a lookahead ANTLRAbstractToken should go.
220
#ifdef ZZDEBUG_CONSUME_ACTION
221
zzdebug_consume_action();
225
// Defer Fetch feature
226
// Moves action of consume() into LA() function
231
NLA = inputTokens->getToken()->getType();
233
lap = (lap+1)&(LLk-1);
238
_ANTLRTokenPtr ANTLRParser::
243
// Defer Fetch feature
244
// Moves action of consume() into LA() function
250
#ifdef DEBUG_TOKENBUFFER
251
if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() < LLk ) /* MR20 Was "<=" */
253
char buf[2000]; /* MR20 Was "static" */
254
sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i);
258
return inputTokens->bufferedToken(i-LLk);
265
int i, c = k - (LLk-dirty);
266
for (i=1; i<=c; i++) consume();
269
/* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
276
for(i=1;i<=LLk; i++) consume();
278
// lap = 0; // MR14 Sinan Karasu (sinan.karasu@boeing.com)
279
// labase = 0; // MR14
283
/* check to see if the current input symbol matches '_t'.
284
* During NON demand lookahead mode, dirty will always be 0 and
285
* hence the extra code for consuming tokens in _match is never
286
* executed; the same routine can be used for both modes.
289
_match(ANTLRTokenType _t, ANTLRChar **MissText,
290
ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
291
SetWordType **MissSet)
304
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
308
/* check to see if the current input symbol matches '_t'.
309
* Used during exception handling.
312
_match_wsig(ANTLRTokenType _t)
317
if ( LA(1)!=_t ) return 0;
319
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
323
/* check to see if the current input symbol matches any token in a set.
324
* During NON demand lookahead mode, dirty will always be 0 and
325
* hence the extra code for consuming tokens in _match is never
326
* executed; the same routine can be used for both modes.
329
_setmatch(SetWordType *tset, ANTLRChar **MissText,
330
ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
331
SetWordType **MissSet, SetWordType *tokclassErrset)
336
if ( !set_el(LA(1), tset) ) {
337
*MissText=NULL; /* MR23 */
338
*MissTok=(ANTLRTokenType) 0; /* MR23 */
339
*BadTok=LT(1); /* MR23 */
340
*MissSet=tokclassErrset; /* MR23 */
344
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
349
_setmatch_wsig(SetWordType *tset)
354
if ( !set_el(LA(1), tset) ) return 0;
356
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
360
/* Exception handling routines */
363
// Change suggested by Eli Sternheim (eli@interhdl.com)
366
consumeUntil(SetWordType *st)
368
ANTLRTokenType tmp; // MR1
369
const int Eof=1; // MR1
370
while ( !set_el( (tmp=LA(1)), st) && tmp!=Eof) { consume(); } // MR1
375
// Change suggested by Eli Sternheim (eli@interhdl.com)
378
consumeUntilToken(int t)
381
const int Eof=1; // MR1
382
while ( (tmp=LA(1)) !=t && tmp!=Eof) { consume(); } // MR1
386
/* Old error stuff */
389
resynch(SetWordType *wd,SetWordType mask)
392
/* MR8 S.Bochnak@microtool.com.pl */
393
/* MR8 Change file scope static "consumed" to instance var */
395
/* if you enter here without having consumed a token from last resynch
396
* force a token consumption.
398
/* MR8 */ if ( !resynchConsumed ) {consume(); resynchConsumed=1; return;}
400
/* if current token is in resynch set, we've got what we wanted */
402
/* MR8 */ if ( wd[LA(1)]&mask || LA(1) == eofToken ) {resynchConsumed=0; return;}
404
/* scan until we find something in the resynch set */
406
while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();}
408
/* MR8 */ resynchConsumed=1;
411
/* standard error reporting function that assumes DLG-based scanners;
412
* you should redefine in subclass to change it or if you use your
416
/* MR23 THM There appears to be a parameter "badText" passed to syn()
417
which is not present in the parameter list. This may be
418
because in C mode there is no attribute function which
419
returns the text, so the text representation of the token
420
must be passed explicitly. I think.
424
syn(_ANTLRTokenPtr /*tok MR23*/, ANTLRChar *egroup, SetWordType *eset,
425
ANTLRTokenType etok, int k)
429
line = LT(1)->getLine();
431
syntaxErrCount++; /* MR11 */
433
/* MR23 If the token is not an EOF token, then use the ->getText() value.
435
If the token is the EOF token the text returned by ->getText()
436
may be garbage. If the text from the token table is "@" use
437
"<eof>" instead, because end-users don't know what "@" means.
438
If the text is not "@" then use that text, which must have been
439
supplied by the grammar writer.
441
const char * errorAt = LT(1)->getText();
442
if (LA(1) == eofToken) {
443
errorAt = parserTokenName(LA(1));
444
if (errorAt[0] == '@') errorAt = "<eof>";
446
/* MR23 */ printMessage(stderr, "line %d: syntax error at \"%s\"",
448
if ( !etok && !eset ) {/* MR23 */ printMessage(stderr, "\n"); return;}
449
if ( k==1 ) /* MR23 */ printMessage(stderr, " missing");
452
/* MR23 */ printMessage(stderr, "; \"%s\" not", LT(k)->getText()); // MR23 use LT(k) since k>1
453
if ( set_deg(eset)>1 ) /* MR23 */ printMessage(stderr, " in");
455
if ( set_deg(eset)>0 ) edecode(eset);
456
else /* MR23 */ printMessage(stderr, " %s", token_tbl[etok]);
457
if ( strlen(egroup) > 0 ) /* MR23 */ printMessage(stderr, " in %s", egroup);
458
/* MR23 */ printMessage(stderr, "\n");
461
/* is b an element of set p? */
463
set_el(ANTLRTokenType b, SetWordType *p)
465
return( p[DIVWORD(b)] & bitmask[MODWORD(b)] );
469
set_deg(SetWordType *a)
471
/* Fast compute degree of a set... the number
472
of elements present in the set. Assumes
473
that all word bits are used in the set
475
register SetWordType *p = a;
476
register SetWordType *endp = &(a[bsetsize]);
477
register int degree = 0;
479
if ( a == NULL ) return 0;
482
register SetWordType t = *p;
483
register SetWordType *b = &(bitmask[0]);
485
if (t & *b) ++degree;
486
} while (++b < &(bitmask[sizeof(SetWordType)*8]));
494
edecode(SetWordType *a)
496
register SetWordType *p = a;
497
register SetWordType *endp = &(p[bsetsize]);
498
register unsigned e = 0;
500
if ( set_deg(a)>1 ) /* MR23 */ printMessage(stderr, " {");
502
register SetWordType t = *p;
503
register SetWordType *b = &(bitmask[0]);
505
if ( t & *b ) /* MR23 */ printMessage(stderr, " %s", token_tbl[e]);
507
} while (++b < &(bitmask[sizeof(SetWordType)*8]));
508
} while (++p < endp);
509
if ( set_deg(a)>1 ) /* MR23 */ printMessage(stderr, " }");
513
* zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk)
514
* where the zzMiss stuff is set here to the token that did not match
515
* (and which set wasn't it a member of).
518
// MR9 29-Sep-97 Stan Bochnak (S.Bochnak@microTool.com.pl)
519
// MR9 Original fix to static allocated text didn't
520
// MR9 work because a pointer to it was passed back
521
// MR9 to caller. Replace with instance variable.
523
const int SETWORDCOUNT=20;
526
ANTLRParser::FAIL(int k, ...)
532
if (zzFAILtext == NULL) zzFAILtext=new char [1000]; // MR9
533
SetWordType **f=new SetWordType *[SETWORDCOUNT]; // MR1 // MR9
534
SetWordType **miss_set;
535
ANTLRChar **miss_text;
536
_ANTLRTokenPtr *bad_tok;
537
ANTLRChar **bad_text;
540
// err_k is passed as a "int *", not "unsigned *"
548
zzFAILtext[0] = '\0';
549
if ( k > SETWORDCOUNT ) panic("FAIL: overflowed buffer");
550
for (i=1; i<=k; i++) /* collect all lookahead sets */
552
f[i-1] = va_arg(ap, SetWordType *);
554
for (i=1; i<=k; i++) /* look for offending token */
556
if ( i>1 ) strcat(zzFAILtext, " ");
557
strcat(zzFAILtext, LT(i)->getText());
558
if ( !set_el(LA(i), f[i-1]) ) break;
560
miss_set = va_arg(ap, SetWordType **);
561
miss_text = va_arg(ap, ANTLRChar **);
562
bad_tok = va_arg(ap, _ANTLRTokenPtr *);
563
bad_text = va_arg(ap, ANTLRChar **);
564
err_k = va_arg(ap, int *); // MR1
567
/* bad; lookahead is permutation that cannot be matched,
568
* but, the ith token of lookahead is valid at the ith position
569
* (The old LL sub 1 (k) versus LL(k) parsing technique)
572
*miss_text = LT(1)->getText();
574
*bad_text = (*bad_tok)->getText();
577
// MR4 20-May-97 erroneously deleted contents of f[]
578
// MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca)
579
// MR1 10-Apr-97 release temporary storage
584
/* MR23 printMessage(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
586
*miss_text = zzFAILtext;
588
*bad_text = (*bad_tok)->getText();
589
if ( i==1 ) *err_k = 1;
592
// MR4 20-May-97 erroneously deleted contents of f[]
593
// MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca)
594
// MR1 10-Apr-97 release temporary storage
601
_match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows)
603
if ( dirty==LLk ) consume();
605
if ( LA(1)!=tokenWanted )
607
syntaxErrCount++; /* MR11 */
608
/* MR23 */ printMessage(stderr,
609
"line %d: syntax error at \"%s\" missing %s\n",
611
(LA(1)==eofToken && LT(1)->getText()[0] == '@')?"<eof>":LT(1)->getText(), /* MR21a */
612
token_tbl[tokenWanted]);
613
consumeUntil( whatFollows );
618
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
619
/* if ( !demand_look ) consume(); */
626
_setmatch_wdfltsig(SetWordType *tokensWanted,
627
ANTLRTokenType tokenTypeOfSet,
628
SetWordType *whatFollows)
630
if ( dirty==LLk ) consume();
631
if ( !set_el(LA(1), tokensWanted) )
633
syntaxErrCount++; /* MR11 */
634
/* MR23 */ printMessage(stderr,
635
"line %d: syntax error at \"%s\" missing %s\n",
637
(LA(1)==eofToken && LT(1)->getText()[0] == '@')?"<eof>":LT(1)->getText(), /* MR21a */
638
token_tbl[tokenTypeOfSet]);
639
consumeUntil( whatFollows );
644
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
645
/* if ( !demand_look ) consume(); */
651
eMsgd(char *err,int d)
653
sprintf(eMsgBuffer, err, d); // dangerous, but I don't care
658
eMsg(char *err, char *s)
660
sprintf(eMsgBuffer, err, s);
665
eMsg2(char *err,char *s, char *t)
667
sprintf(eMsgBuffer, err, s, t);
672
panic(const char *msg) // MR20 const
674
/* MR23 */ printMessage(stderr, "ANTLR panic: %s\n", msg);
675
exit(PCCTS_EXIT_FAILURE); // MR1
678
const ANTLRChar *ANTLRParser:: // MR1
679
parserTokenName(int tok) { // MR1
680
return token_tbl[tok]; // MR1
683
void ANTLRParser::traceGuessDone(const ANTLRParserState *state) {
687
if (traceCurrentRuleName == NULL) return;
689
if (traceOptionValue <= 0) {
691
} else if (traceGuessOptionValue <= 0) {
698
/* MR23 */ printMessage(stderr,"guess done - returning to rule %s {\"%s\"} at depth %d",
699
state->traceCurrentRuleName,
700
LT(1)->getType() == eofToken ? "@" : LT(1)->getText(),
702
if (state->guessing != 0) {
703
/* MR23 */ printMessage(stderr," (guess mode continues - an enclosing guess is still active)");
705
/* MR23 */ printMessage(stderr," (guess mode ends)");
707
/* MR23 */ printMessage(stderr,"\n");
711
void ANTLRParser::traceGuessFail() {
715
if (traceCurrentRuleName == NULL) return; /* MR21 */
717
if (traceOptionValue <= 0) {
719
} else if (guessing && traceGuessOptionValue <= 0) {
726
/* MR23 */ printMessage(stderr,"guess failed in %s\n",traceCurrentRuleName);
731
zero value turns off trace
734
void ANTLRParser::tracein(const ANTLRChar * rule) {
739
traceCurrentRuleName=rule;
741
if (traceOptionValue <= 0) {
743
} else if (guessing && traceGuessOptionValue <= 0) {
750
/* MR23 */ printMessage(stderr,"enter rule %s {\"%s\"} depth %d",
752
LT(1)->getType() == eofToken ? "@" : LT(1)->getText(),
754
if (guessing) /* MR23 */ printMessage(stderr," guessing");
755
/* MR23 */ printMessage(stderr,"\n");
760
void ANTLRParser::traceout(const ANTLRChar * rule) {
766
if (traceOptionValue <= 0) {
768
} else if (guessing && traceGuessOptionValue <= 0) {
775
/* MR23 */ printMessage(stderr,"exit rule %s {\"%s\"} depth %d",
777
LT(1)->getType() == eofToken ? "@" : LT(1)->getText(),
779
if (guessing) /* MR23 */ printMessage(stderr," guessing");
780
/* MR23 */ printMessage(stderr,"\n");
784
int ANTLRParser::traceOption(int delta) {
786
int prevValue=traceOptionValue;
788
traceOptionValue=traceOptionValue+delta;
790
if (traceCurrentRuleName != NULL) {
791
if (prevValue <= 0 && traceOptionValue > 0) {
792
/* MR23 */ printMessage(stderr,"trace enabled in rule %s depth %d\n",traceCurrentRuleName,traceDepth);
794
if (prevValue > 0 && traceOptionValue <= 0) {
795
/* MR23 */ printMessage(stderr,"trace disabled in rule %s depth %d\n",traceCurrentRuleName,traceDepth);
802
int ANTLRParser::traceGuessOption(int delta) {
804
int prevValue=traceGuessOptionValue;
806
traceGuessOptionValue=traceGuessOptionValue+delta;
808
if (traceCurrentRuleName != NULL) {
809
if (prevValue <= 0 && traceGuessOptionValue > 0) {
810
/* MR23 */ printMessage(stderr,"guess trace enabled in rule %s depth %d\n",traceCurrentRuleName,traceDepth);
812
if (prevValue > 0 && traceGuessOptionValue <= 0) {
813
/* MR23 */ printMessage(stderr,"guess trace disabled in rule %s depth %d\n",traceCurrentRuleName,traceDepth);
819
// MR19 V.H. Simonis Defer Fetch feature
821
void ANTLRParser::undeferFetch()
826
for (int stillToFetch_x = 0; stillToFetch_x < stillToFetch; ++stillToFetch_x) {
827
NLA = inputTokens->getToken()->getType();
829
lap = (lap+1)&(LLk-1);
839
int ANTLRParser::isDeferFetchEnabled()
849
int ANTLRParser::printMessage(FILE* pFile, const char* pFormat, ...)
852
va_start( marker, pFormat );
853
int iRet = printMessageV(pFile, pFormat, marker);
858
int ANTLRParser::printMessageV(FILE* pFile, const char* pFormat, va_list arglist) // MR23
860
return vfprintf(pFile, pFormat, arglist);
863
// MR23 Move semantic predicate error handling from macro to virtual function
865
// Called by the zzfailed_pred
867
void ANTLRParser::failedSemanticPredicate(const char* predicate)
869
printMessage(stdout,"line %d: semantic error; failed predicate: '%s'\n",
870
LT(1)->getLine(), predicate);