1
/* This is the parser for the dlg
2
* This is a part of the Purdue Compiler Construction Tool Set
6
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool
7
* Set (PCCTS) -- PCCTS is in the public domain. An individual or
8
* company may do whatever they wish with source code distributed with
9
* PCCTS or the code generated by PCCTS, including the incorporation of
10
* PCCTS, or its output, into commerical software.
12
* We encourage users to develop software with PCCTS. However, we do ask
13
* that credit is given to us for developing PCCTS. By "credit",
14
* we mean that if you incorporate our source code into one of your
15
* programs (commercial product, research project, or otherwise) that you
16
* acknowledge this fact somewhere in the documentation, research report,
17
* etc... If you like PCCTS and have developed a nice tool with the
18
* output, please mention that you developed it using PCCTS. In
19
* addition, we ask that this header remain intact in our source code.
20
* As long as these guidelines are kept, we expect to continue enhancing
21
* this system and expect to make other tools available as they are
26
* With mods by Terence Parr; AHPCRC, University of Minnesota
38
Fix for Borland C++ 4.x & 5.x compiling with ALL warnings enabled
42
#pragma warn -aus /* unused assignment of 'xxx' */
45
int action_no = 0; /* keep track of actions outputed */
46
int nfa_allocated = 0; /* keeps track of number of nfa nodes */
47
nfa_node **nfa_array = NULL;/* root of binary tree that stores nfa array */
48
nfa_node nfa_model_node; /* model to initialize new nodes */
49
set used_chars; /* used to label trans. arcs */
50
set used_classes; /* classes or chars used to label trans. arcs */
51
set normal_chars; /* mask to get rid elements that aren't used
53
int flag_paren = FALSE;
54
int flag_brace = FALSE;
55
int mode_counter = 0; /* keep track of number of %%names */
60
int func_action; /* should actions be turned into functions?*/
61
int lex_mode_counter = 0; /* keeps track of the number of %%names */
63
/* MR1 11-Apr-97 Provide mechanism for inserting code into DLG class */
64
/* MR1 via <<%%lexmember...>> */
66
int lexMember = 0; /* <<%%lexmemeber ...>> MR1 */
67
int lexAction = 0; /* <<%%lexaction ...>> MR1 */
68
int parserClass = 0; /* <<%%parserclass ...>> MR1 */
69
int lexPrefix = 0; /* <<%%lexprefix ...>> MR1 */
70
char theClassName[100]; /* MR11 */
71
char *pClassName=theClassName; /* MR11 */
72
int firstLexMember=1; /* MR1 */
75
void xxputc(int c) { /* MR1 */
77
void xxputc(c) /* MR1 */
81
if (parserClass) { /* MR1 */
82
*pClassName++=c; /* MR1 */
83
*pClassName=0; /* MR1 */
84
} else if (lexMember || lexPrefix) { /* MR1 */
85
if (class_stream != NULL) fputc(c,class_stream); /* MR1 */
87
fputc(c,OUT); /* MR1 */
92
void xxprintf(char *format,char *string) { /* MR1 */
94
void xxprintf(format,string) /* MR1 */
95
char *format; /* MR1 */
96
char *string; /* MR1 */
99
if (lexMember || lexPrefix || parserClass) { /* MR1 */
100
if (class_stream != NULL) /* MR1 */
101
fprintf(class_stream,format,string); /* MR1 */
103
fprintf(OUT,format,string); /* MR1 */
108
#token "[\r\t\ ]+" << zzskip(); >> /* Ignore white */
109
#token "\n" << zzline++; zzskip(); DAWDLE; >> /* Track Line # */
111
#token PER_PER "\%\%"
112
#token NAME_PER_PER "\%\%[a-zA-Z_][a-zA-Z0-9_]*"
113
<< p_mode_def(&zzlextext[2],lex_mode_counter++); >>
115
#token LEXMEMBER "\<\<\%\%lexmember" /* MR1 */
116
<<lexMember=1; /* MR1 */
117
if (firstLexMember != 0) { /* MR1 */
118
firstLexMember=0; /* MR1 */
119
p_class_def1(); /* MR1 */
121
zzmode(ACT); /* MR1 */
123
#token LEXACTION "\<\<\%\%lexaction" /* MR1 */
124
<<lexAction=1;zzmode(ACT);>> /* MR1 */
125
#token PARSERCLASS "\<\<\%\%parserclass" /* MR1 */
126
<<parserClass=1; /* MR1 */
127
zzmode(ACT); /* MR1 */
129
#token LEXPREFIX "\<\<\%\%lexprefix" /* MR1 */
130
<<lexPrefix=1;zzmode(ACT);>> /* MR1 */
134
fprintf(OUT,"\n%s %sact%d()\n{ ",
135
gen_cpp?"ANTLRTokenType":"static void",
136
gen_cpp?ClassName("::"):"", ++action_no);
137
zzmode(ACT); zzskip();
139
#token GREAT_GREAT "\>\>"
146
#token ZERO_MORE "\*"
151
#token OCTAL_VALUE "\\0[0-7]*"
152
<< {int t; sscanf(&zzlextext[1],"%o",&t); zzlextext[0] = t;}>>
153
#token HEX_VALUE "\\0[Xx][0-9a-fA-F]+"
154
<< {int t; sscanf(&zzlextext[3],"%x",&t); zzlextext[0] = t;}>>
155
#token DEC_VALUE "\\[1-9][0-9]*"
156
<< {int t; sscanf(&zzlextext[1],"%d",&t); zzlextext[0] = t;}>>
157
#token TAB "\\t" << zzlextext[0] = '\t';>>
158
#token NL "\\n" << zzlextext[0] = '\n';>>
159
#token CR "\\r" << zzlextext[0] = '\r';>>
160
#token BS "\\b" << zzlextext[0] = '\b';>>
163
/* MR1 10-Apr-97 MR1 Allow #token regular expressions to cross lines */
165
#token CONTINUATION "\\ \n" << zzline++; zzskip();>> /* MR1 */
167
/* NOTE: this takes ANYTHING after the \ */
168
#token LIT "\\~[tnrb]" << zzlextext[0] = zzlextext[1];>>
170
/* NOTE: this takes ANYTHING that doesn't match the other tokens */
171
#token REGCHAR "~[\\]"
174
grammar : << p_head(); p_class_hdr(); func_action = FALSE;>>
175
( {LEXACTION | LEXMEMBER | LEXPREFIX | PARSERCLASS } ACTION)* /* MR1 */
176
<<if ( gen_cpp ) p_includes();>>
178
<< func_action = FALSE; p_tables(); p_tail(); >>
180
<< if (firstLexMember != 0) p_class_def1(); >> /* MR1 */
183
start_states : ( PER_PER do_conversion
184
| NAME_PER_PER do_conversion (NAME_PER_PER do_conversion)*)
188
do_conversion : <<new_automaton_mode(); func_action = TRUE;>>
191
dfa_class_nop[mode_counter] =
192
relabel($1.l,comp_level);
194
p_shift_table(mode_counter);
195
dfa_basep[mode_counter] = dfa_allocated+1;
196
make_dfa_model_node(dfa_class_nop[mode_counter]);
201
fprint_hash_stats(stderr);
206
rule_list : rule <<$$.l=$1.l; $$.r=$1.r;>>
212
/* all accept nodes "dead ends" */
218
<<$$.l = new_nfa_node(); $$.r = NULL;
219
warning("no regular expressions", zzline);
223
rule : reg_expr ACTION
224
<<$$.l=$1.l; $$.r=$1.r; ($1.r)->accept=action_no;>>
226
<<$$.l = NULL; $$.r = NULL;
227
error("no expression for action ", zzline);
231
reg_expr : and_expr <<$$.l=$1.l; $$.r=$1.r;>>
233
<<{nfa_node *t1, *t2;
234
t1 = new_nfa_node(); t2 = new_nfa_node();
239
($2.r)->trans[1]=t2; /* MR20 */
247
and_expr : repeat_expr <<$$.l=$1.l; $$.r=$1.r;>>
248
(repeat_expr <<($$.r)->trans[1]=$1.l; $$.r=$1.r;>>)*
251
repeat_expr : expr <<$$.l=$1.l; $$.r=$1.r;>>
253
<<{ nfa_node *t1,*t2;
254
($$.r)->trans[0] = $$.l;
255
t1 = new_nfa_node(); t2 = new_nfa_node();
263
<<($$.r)->trans[0] = $$.l;>>
266
<< error("no expression for *", zzline);>>
268
<< error("no expression for +", zzline);>>
271
expr : << $$.l = new_nfa_node(); $$.r = new_nfa_node(); >>
272
L_BRACK atom_list R_BRACK
274
($$.l)->trans[0] = $$.r;
275
($$.l)->label = set_dup($2.label);
276
set_orin(&used_chars,($$.l)->label);
278
| NOT L_BRACK atom_list R_BRACK
280
($$.l)->trans[0] = $$.r;
281
($$.l)->label = set_dif(normal_chars,$3.label);
282
set_orin(&used_chars,($$.l)->label);
284
| L_PAR reg_expr R_PAR
286
($$.l)->trans[0] = $2.l;
288
($2.r)->trans[1] = $$.r; /* MR20 */
291
| L_BRACE reg_expr R_BRACE
293
($$.l)->trans[0] = $2.l;
294
($$.l)->trans[1] = $$.r;
296
($2.r)->trans[1] = $$.r; /* MR20 */
301
($$.l)->trans[0] = $$.r;
302
($$.l)->label = set_dup($1.label);
303
set_orin(&used_chars,($$.l)->label);
307
atom_list : << set_free($$.label); >>
308
(near_atom <<set_orin(&($$.label),$1.label);>>)*
311
near_atom : << register int i;
312
register int i_prime;
315
<<$$.letter=$1.letter; $$.label=set_of($1.letter);
316
i_prime = $1.letter + MIN_CHAR;
317
if (case_insensitive && islower(i_prime))
318
set_orel(toupper(i_prime)-MIN_CHAR,
320
if (case_insensitive && isupper(i_prime))
321
set_orel(tolower(i_prime)-MIN_CHAR,
325
<< if (case_insensitive){
326
i_prime = $$.letter+MIN_CHAR;
327
$$.letter = (islower(i_prime) ?
328
toupper(i_prime) : i_prime)-MIN_CHAR;
329
i_prime = $2.letter+MIN_CHAR;
330
$2.letter = (islower(i_prime) ?
331
toupper(i_prime) : i_prime)-MIN_CHAR;
333
/* check to see if range okay */
334
if ($$.letter > $2.letter
335
&& $2.letter != 0xff){ /* MR16 */
336
error("invalid range ", zzline);
338
for (i=$$.letter; i<= (int)$2.letter; ++i){
339
set_orel(i,&($$.label));
340
i_prime = i+MIN_CHAR;
341
if (case_insensitive && islower(i_prime))
342
set_orel(toupper(i_prime)-MIN_CHAR,
344
if (case_insensitive && isupper(i_prime))
345
set_orel(tolower(i_prime)-MIN_CHAR,
352
atom : << register int i_prime;>>
354
<<$$.label = set_of($1.letter);
355
i_prime = $1.letter + MIN_CHAR;
356
if (case_insensitive && islower(i_prime))
357
set_orel(toupper(i_prime)-MIN_CHAR,
359
if (case_insensitive && isupper(i_prime))
360
set_orel(tolower(i_prime)-MIN_CHAR,
365
anychar : REGCHAR <<$$.letter = $1.letter - MIN_CHAR;>>
366
| OCTAL_VALUE <<$$.letter = $1.letter - MIN_CHAR;>>
367
| HEX_VALUE <<$$.letter = $1.letter - MIN_CHAR;>>
368
| DEC_VALUE <<$$.letter = $1.letter - MIN_CHAR;>>
369
| TAB <<$$.letter = $1.letter - MIN_CHAR;>>
370
| NL <<$$.letter = $1.letter - MIN_CHAR;>>
371
| CR <<$$.letter = $1.letter - MIN_CHAR;>>
372
| BS <<$$.letter = $1.letter - MIN_CHAR;>>
373
| LIT <<$$.letter = $1.letter - MIN_CHAR;>>
374
/* NOTE: LEX_EOF is ALWAYS shifted to 0 = MIN_CHAR - MIN_CHAR*/
375
| L_EOF <<$$.letter = 0;>>
378
<</* empty action */>>
381
#token "@" << error("unterminated action", zzline); zzmode(START); >>
383
<< if (func_action) fprintf(OUT,"}\n\n");
386
/* MR1 11-Apr-97 Provide mechanism for inserting code into DLG class */
387
/* MR1 via <<%%lexmember ...>> */
388
/* MR1 This is a consequence of not saving actions */
390
/* MR1 */ parserClass=0;
391
/* MR1 */ lexPrefix=0;
392
/* MR1 */ lexAction=0;
393
/* MR1 */ lexMember=0;
395
#token "\>" << xxputc(zzlextext[0]); zzskip(); >> /* MR1 */
396
#token "\\\>" << xxputc('>'); zzskip(); >> /* MR1 */
397
#token "\\" << xxputc('\\'); zzskip(); >> /* MR1 */
398
#token "\n" << xxputc(zzlextext[0]); ++zzline; zzskip(); >> /* MR1 */
399
#token "/\*" << zzmode(ACTION_COMMENTS); /* MR1 */
400
xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */
402
#token "//" << zzmode(ACTION_CPP_COMMENTS); /* MR1 */
403
xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */
405
#token "~[]" << xxputc(zzlextext[0]); zzskip(); >> /* MR1 */
407
#lexclass ACTION_COMMENTS /* MR1 */
408
#token "\*/" << zzmode(ACT); /* MR1 */
409
xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */
411
#token "[\n\r]" << zzline++; xxputc(zzlextext[0]); zzskip();>> /* MR1 */
412
#token "~[]" << xxputc(zzlextext[0]); zzskip();>> /* MR1 */
414
#lexclass ACTION_CPP_COMMENTS /* MR1 */
415
#token "[\n\r]" << zzmode(ACT); zzline++; /* MR1 */
416
xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */
418
#token "~[]" << xxputc(zzlextext[0]); zzskip();>> /* MR1 */
421
/* adds a new nfa to the binary tree and returns a pointer to it */
429
register nfa_node *t;
430
static int nfa_size=0; /* elements nfa_array[] can hold */
433
if (nfa_size<=nfa_allocated){
434
/* need to redo array */
436
/* need some to do inital allocation */
437
nfa_size=nfa_allocated+NFA_MIN;
438
nfa_array=(nfa_node **) malloc(sizeof(nfa_node*)*
441
/* need more space */
442
nfa_size=2*(nfa_allocated+1);
443
nfa_array=(nfa_node **) realloc(nfa_array,
444
sizeof(nfa_node*)*nfa_size);
447
/* fill out entry in array */
448
t = (nfa_node*) malloc(sizeof(nfa_node));
449
nfa_array[nfa_allocated] = t;
451
t->node_no = nfa_allocated;
456
/* initialize the model node used to fill in newly made nfa_nodes */
459
make_nfa_model_node(void)
461
make_nfa_model_node()
464
nfa_model_node.node_no = -1; /* impossible value for real nfa node */
465
nfa_model_node.nfa_set = 0;
466
nfa_model_node.accept = 0; /* error state default*/
467
nfa_model_node.trans[0] = NULL;
468
nfa_model_node.trans[1] = NULL;
469
nfa_model_node.label = empty;
474
#if defined(DEBUG) || defined(_DEBUG)
476
/* print out the pointer value and the node_number */
479
fprint_dfa_pair(FILE *f, nfa_node *p)
481
fprint_dfa_pair(f, p)
487
fprintf(f, "%x (%d)", p, p->node_no);
493
/* print out interest information on a set */
496
fprint_set(FILE *f, set s)
505
fprintf(f, "n = %d,", s.n);
507
fprintf(f, "setword = %x, ", s.setword);
508
/* print out all the elements in the set */
511
fprintf(f, "%d ", *x);
515
fprintf(f, "setword = (nil)");
519
/* code to be able to dump out the nfas
520
return 0 if okay dump
521
return 1 if screwed up
525
dump_nfas(int first_node, int last_node)
527
dump_nfas(first_node, last_node)
535
for (i=first_node; i<=last_node; ++i){
538
fprintf(stderr, "nfa_node %d {\n", t->node_no);
539
fprintf(stderr, "\n\tnfa_set = %d\n", t->nfa_set);
540
fprintf(stderr, "\taccept\t=\t%d\n", t->accept);
541
fprintf(stderr, "\ttrans\t=\t(");
542
fprint_dfa_pair(stderr, t->trans[0]);
543
fprintf(stderr, ",");
544
fprint_dfa_pair(stderr, t->trans[1]);
545
fprintf(stderr, ")\n");
546
fprintf(stderr, "\tlabel\t=\t{ ");
547
fprint_set(stderr, t->label);
548
fprintf(stderr, "\t}\n");
549
fprintf(stderr, "}\n\n");
557
/* DLG-specific syntax error message generator
558
* (define USER_ZZSYN when compiling so don't get 2 definitions)
562
zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text)
564
zzsyn(text, tok, egroup, eset, etok, k, bad_text)
565
char *text, *egroup, *bad_text;
572
fprintf(stderr, ErrHdr, file_str[0]!=NULL?file_str[0]:"stdin", zzline);
573
fprintf(stderr, " syntax error at \"%s\"", (tok==zzEOF_TOKEN)?"EOF":text);
574
if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
575
if ( k==1 ) fprintf(stderr, " missing");
578
fprintf(stderr, "; \"%s\" not", bad_text);
579
if ( zzset_deg(eset)>1 ) fprintf(stderr, " in");
581
if ( zzset_deg(eset)>0 ) zzedecode(eset);
582
else fprintf(stderr, " %s", zztokens[etok]);
583
if ( strlen(egroup) > (size_t)0 ) fprintf(stderr, " in %s", egroup);
584
fprintf(stderr, "\n");