1
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
3
* Copyright (C) 1998-2004 Gerwin Klein <lsf@jflex.de> *
4
* All rights reserved. *
6
* This program is free software; you can redistribute it and/or modify *
7
* it under the terms of the GNU General Public License. See the file *
8
* COPYRIGHT for more information. *
10
* This program is distributed in the hope that it will be useful, *
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13
* GNU General Public License for more details. *
15
* You should have received a copy of the GNU General Public License along *
16
* with this program; if not, write to the Free Software Foundation, Inc., *
17
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
19
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
25
/* customizing code */
30
CharClasses charClasses = new CharClasses(127);
31
RegExps regExps = new RegExps();
32
Macros macros = new Macros();
34
Timer t = new Timer();
35
EOFActions eofActions = new EOFActions();
37
void fatalError(ErrorMessages message, int line, int col) {
38
syntaxError(message, line, col);
39
throw new GeneratorException();
42
void fatalError(ErrorMessages message) {
43
fatalError(message, scanner.currentLine(), -1);
44
throw new GeneratorException();
47
void syntaxError(ErrorMessages message) {
48
Out.error(scanner.file, message, scanner.currentLine(), -1);
51
void syntaxError(ErrorMessages message, int line) {
52
Out.error(scanner.file, message, line, -1);
55
void syntaxError(ErrorMessages message, int line, int col) {
56
Out.error(scanner.file, message, line, col);
60
private boolean check(int type, char c) {
62
case sym.JLETTERCLASS:
63
return Character.isJavaIdentifierStart(c);
65
case sym.JLETTERDIGITCLASS:
66
return Character.isJavaIdentifierPart(c);
69
return Character.isLetter(c);
72
return Character.isDigit(c);
75
return Character.isUpperCase(c);
78
return Character.isLowerCase(c);
80
default: return false;
84
private Vector makePreClass(int type) {
86
Vector result = new Vector();
90
char last = charClasses.getMaxCharCode();
92
boolean prev, current;
94
prev = check(type,'\u0000');
96
for (c = 1; c < last; c++) {
98
current = check(type,c);
100
if (!prev && current) start = c;
101
if (prev && !current) {
102
result.addElement(new Interval(start, (char)(c-1)));
108
// the last iteration is moved out of the loop to
109
// avoid an endless loop if last == maxCharCode and
111
current = check(type,c);
113
if (!prev && current) result.addElement(new Interval(c,c));
114
if (prev && current) result.addElement(new Interval(start, c));
115
if (prev && !current) result.addElement(new Interval(start, (char)(c-1)));
120
private RegExp makeRepeat(RegExp r, int n1, int n2, int line, int col) {
122
if (n1 <= 0 && n2 <= 0) {
123
syntaxError(ErrorMessages.REPEAT_ZERO, line, col);
128
syntaxError(ErrorMessages.REPEAT_GREATER, line, col);
137
n1--; n2--; // we need one concatenation less than the number of expressions to match
140
result = new RegExp1(sym.QUESTION,r);
144
for (i = 0; i < n1; i++)
145
result = new RegExp2(sym.CONCAT, result, r);
148
for (i = 0; i < n2; i++)
149
result = new RegExp2(sym.CONCAT, result, new RegExp1(sym.QUESTION,r));
154
private RegExp makeNL() {
155
Vector list = new Vector();
156
list.addElement(new Interval('\n','\r'));
157
list.addElement(new Interval('\u0085','\u0085'));
158
list.addElement(new Interval('\u2028','\u2029'));
160
// assumption: line feeds are caseless
161
charClasses.makeClass(list, false);
162
charClasses.makeClass('\n', false);
163
charClasses.makeClass('\r', false);
165
RegExp1 c = new RegExp1(sym.CCLASS, list);
166
Character n = new Character('\n');
167
Character r = new Character('\r');
169
return new RegExp2(sym.BAR,
171
new RegExp2(sym.CONCAT,
172
new RegExp1(sym.CHAR, r),
173
new RegExp1(sym.CHAR, n)));
179
public LexScan scanner;
181
public LexParse(LexScan scanner) {
183
this.scanner = scanner;
186
public CharClasses getCharClasses() {
187
return action_obj.charClasses;
190
public EOFActions getEOFActions() {
191
return action_obj.eofActions;
194
public void report_error(String message, Object info) {
195
if ( info instanceof java_cup.runtime.Symbol ) {
196
java_cup.runtime.Symbol s = (java_cup.runtime.Symbol) info;
198
if (s.sym == sym.EOF)
199
Out.error(ErrorMessages.UNEXPECTED_EOF);
201
Out.error(scanner.file, ErrorMessages.SYNTAX_ERROR, s.left, s.right);
204
Out.error(ErrorMessages.UNKNOWN_SYNTAX);
207
public void report_fatal_error(String message, Object info) {
208
// report_error(message, info);
209
throw new GeneratorException();
215
action_obj.scanner = this.scanner;
218
/* token declarations */
220
terminal OPENBRACKET, CLOSEBRACKET, HAT, DOLLAR, OPENCLASS,
221
CLOSECLASS, DASH, DELIMITER, EQUALS, COMMA, LESSTHAN,
222
MORETHAN, LBRACE, RBRACE, FULL, UNICODE, REGEXPEND;
224
terminal JLETTERCLASS, JLETTERDIGITCLASS, LETTERCLASS, DIGITCLASS,
225
UPPERCLASS, LOWERCLASS, EOFRULE, NOACTION, LOOKAHEAD;
227
terminal Action ACTION;
228
terminal String IDENT, USERCODE;
229
terminal Integer REPEAT;
231
/* tokens used in RegExp parse tree */
232
terminal STAR, PLUS, BAR, QUESTION, POINT, BANG, TILDE;
234
terminal Character CHAR;
235
terminal String STRING, MACROUSE;
237
/* symbols *only* used in the parse tree (not in the grammar) */
238
terminal CCLASS, CCLASSNOT, CONCAT;
239
terminal STRING_I, CHAR_I; /* case insensitive strings/chars */
242
non terminal macros, macro;
243
non terminal Integer rule;
244
non terminal NFA specification;
245
non terminal RegExp series, concs, nregexp, regexp, charclass, lookaheadOPT;
246
non terminal Interval classcontentelem;
247
non terminal Vector states, statesOPT, classcontent, preclass, rules;
248
non terminal Boolean hatOPT;
249
non terminal Action actions;
252
/* grammar specification */
253
start with specification;
255
specification ::= USERCODE
256
/* delimiter is checked in lexer */
265
Out.time(ErrorMessages.PARSING_TOOK, t);
268
Enumeration unused = macros.unused();
269
while ( unused.hasMoreElements() ) {
270
Out.warning("Macro \""+unused.nextElement()+"\" has been declared but never used.");
273
SemCheck.check(regExps, macros, charClasses.getMaxCharCode(), scanner.file);
275
regExps.checkActions();
277
if (Options.dump) charClasses.dump();
279
Out.print("Constructing NFA : ");
282
int num = regExps.getNum();
284
RESULT = new NFA(charClasses.getNumClasses(),
285
scanner, regExps, macros, charClasses);
287
eofActions.setNumLexStates(scanner.states.number());
289
for (int i = 0; i < num; i++) {
290
if (regExps.isEOF(i))
291
eofActions.add( regExps.getStates(i), regExps.getAction(i) );
296
if (scanner.standalone) RESULT.addStandaloneRule();
300
Out.time(ErrorMessages.NFA_TOOK, t);
303
| /* emtpy spec. error */
305
fatalError(ErrorMessages.NO_LEX_SPEC);
309
macros ::= /* empty, most switches & state declarations are parsed in lexer */
314
{: charClasses.setMaxCharCode(255); :}
316
{: charClasses.setMaxCharCode(0xFFFF); :}
317
| IDENT:name EQUALS series:definition REGEXPEND
318
{: macros.insert(name, definition); :}
320
{: syntaxError(ErrorMessages.REGEXP_EXPECTED, eleft, eright); :}
324
rules ::= rules:rlist rule:r
325
{: rlist.addElement(r); RESULT = rlist; :}
326
| rules:rlist1 LESSTHAN states:states MORETHAN LBRACE rules:rlist2 RBRACE
328
Enumeration rs = rlist2.elements();
329
while ( rs.hasMoreElements() ) {
330
Integer elem = (Integer) rs.nextElement();
331
regExps.addStates( elem.intValue(), states );
332
rlist1.addElement( elem );
336
| LESSTHAN states:states MORETHAN LBRACE rules:rlist RBRACE
338
Enumeration rs = rlist.elements();
339
while ( rs.hasMoreElements() ) {
340
Integer elem = (Integer) rs.nextElement();
341
regExps.addStates( elem.intValue(), states );
346
{: RESULT = new Vector(); RESULT.addElement(r); :}
349
rule ::= statesOPT:s hatOPT:bol series:r lookaheadOPT:l actions:a
350
{: RESULT = new Integer(regExps.insert(rleft, s, r, a, bol, l)); :}
351
| statesOPT:s EOFRULE ACTION:a
352
{: RESULT = new Integer(regExps.insert(s, a)); :}
356
lookaheadOPT ::= DOLLAR
357
{: RESULT = makeNL(); :}
362
| LOOKAHEAD series:s DOLLAR
363
{: RESULT = new RegExp2(sym.CONCAT, s, makeNL()); :}
366
actions ::= REGEXPEND ACTION:a
373
statesOPT ::= LESSTHAN states:list MORETHAN
376
{: RESULT = new Vector(); :}
379
states ::= IDENT:id COMMA states:list
381
stateNumber = scanner.states.getNumber( id );
382
if ( stateNumber != null )
383
list.addElement( stateNumber );
385
throw new ScannerException(scanner.file, ErrorMessages.LEXSTATE_UNDECL,
392
Vector list = new Vector();
393
stateNumber = scanner.states.getNumber( id );
394
if ( stateNumber != null )
395
list.addElement( stateNumber );
397
throw new ScannerException(scanner.file, ErrorMessages.LEXSTATE_UNDECL,
403
{: syntaxError(ErrorMessages.REGEXP_EXPECTED, cleft, cright+1); :}
407
{: // assumption: there is no upper case for \n
408
charClasses.makeClass('\n', false);
409
RESULT = new Boolean(true); :}
411
{: RESULT = new Boolean(false); :}
414
series ::= series:r1 BAR concs:r2
415
{: RESULT = new RegExp2(sym.BAR, r1, r2); :}
419
{: syntaxError(ErrorMessages.REGEXP_EXPECTED, bleft, bright); :}
422
concs ::= concs:r1 nregexp:r2
423
{: RESULT = new RegExp2(sym.CONCAT, r1, r2); :}
431
{: RESULT = new RegExp1(sym.BANG, r); :}
433
{: RESULT = new RegExp1(sym.TILDE, r); :}
436
regexp ::= regexp:r STAR
437
{: RESULT = new RegExp1(sym.STAR, r); :}
439
{: RESULT = new RegExp1(sym.PLUS, r); :}
441
{: RESULT = new RegExp1(sym.QUESTION, r); :}
442
| regexp:r REPEAT:n RBRACE:b
443
{: RESULT = makeRepeat(r, n.intValue(), n.intValue(), bleft, bright); :}
444
| regexp:r REPEAT:n1 REPEAT:n2 RBRACE
445
{: RESULT = makeRepeat(r, n1.intValue(), n2.intValue(), n1left, n2right); :}
446
| OPENBRACKET series:r CLOSEBRACKET
450
if ( !scanner.macroDefinition ) {
451
if ( ! macros.markUsed(ident) )
452
throw new ScannerException(scanner.file, ErrorMessages.MACRO_UNDECL,
453
identleft, identright);
455
RESULT = new RegExp1(sym.MACROUSE, ident);
462
// assumption [correct?]: preclasses are already closed under case
463
charClasses.makeClass(list, false);
465
catch (CharClassException e) {
466
syntaxError(ErrorMessages.CHARSET_2_SMALL, listleft);
468
RESULT = new RegExp1(sym.CCLASS, list);
473
if ( scanner.caseless ) {
474
charClasses.makeClass(str, true);
475
RESULT = new RegExp1(sym.STRING_I, str);
478
charClasses.makeClass(str, false);
479
RESULT = new RegExp1(sym.STRING, str);
482
catch (CharClassException e) {
483
syntaxError(ErrorMessages.CS2SMALL_STRING, strleft, strright);
489
Vector any = new Vector();
490
any.addElement(new Interval('\n','\n'));
491
// assumption: there is no upper case for \n
492
charClasses.makeClass('\n', false);
493
RESULT = new RegExp1(sym.CCLASSNOT, any);
498
if ( scanner.caseless ) {
499
charClasses.makeClass(c.charValue(), true);
500
RESULT = new RegExp1(sym.CHAR_I, c);
503
charClasses.makeClass(c.charValue(), false);
504
RESULT = new RegExp1(sym.CHAR, c);
507
catch (CharClassException e) {
508
syntaxError(ErrorMessages.CS2SMALL_CHAR, cleft, cright);
513
charclass ::= OPENCLASS CLOSECLASS
515
RESULT = new RegExp1(sym.CCLASS,null);
517
| OPENCLASS classcontent:list CLOSECLASS:close
520
charClasses.makeClass(list, Options.jlex && scanner.caseless);
522
catch (CharClassException e) {
523
syntaxError(ErrorMessages.CHARSET_2_SMALL, closeleft, closeright);
525
RESULT = new RegExp1(sym.CCLASS,list);
527
| OPENCLASS HAT CLOSECLASS:close
529
Vector list = new Vector();
530
list.addElement(new Interval((char)0,CharClasses.maxChar));
532
charClasses.makeClass(list, false);
534
catch (CharClassException e) {
535
syntaxError(ErrorMessages.CHARSET_2_SMALL, closeleft, closeright);
537
RESULT = new RegExp1(sym.CCLASS,list);
539
| OPENCLASS HAT classcontent:list CLOSECLASS:close
542
charClasses.makeClassNot(list, Options.jlex && scanner.caseless);
544
catch (CharClassException e) {
545
syntaxError(ErrorMessages.CHARSET_2_SMALL, closeleft, closeright);
547
RESULT = new RegExp1(sym.CCLASSNOT,list);
549
| OPENCLASS DASH classcontent:list CLOSECLASS:close
552
list.addElement(new Interval('-','-'));
553
charClasses.makeClass(list, Options.jlex && scanner.caseless);
555
catch (CharClassException e) {
556
syntaxError(ErrorMessages.CHARSET_2_SMALL, closeleft, closeright);
558
RESULT = new RegExp1(sym.CCLASS,list);
560
| OPENCLASS HAT DASH classcontent:list CLOSECLASS:close
563
list.addElement(new Interval('-','-'));
564
charClasses.makeClassNot(list, Options.jlex && scanner.caseless);
566
catch (CharClassException e) {
567
syntaxError(ErrorMessages.CHARSET_2_SMALL, closeleft, closeright);
569
RESULT = new RegExp1(sym.CCLASSNOT,list);
573
classcontent ::= classcontent:list classcontentelem:elem
575
list.addElement(elem);
578
| classcontentelem:elem
580
Vector list = new Vector();
581
list.addElement(elem);
584
| classcontent:list preclass:plist
586
for (Enumeration e = plist.elements(); e.hasMoreElements();)
587
list.addElement(e.nextElement());
592
| classcontent:list STRING:s
594
for (int i = 0; i < s.length(); i++)
595
list.addElement(new Interval(s.charAt(i),s.charAt(i)));
600
RESULT = new Vector();
601
for (int i = 0; i < s.length(); i++)
602
RESULT.addElement(new Interval(s.charAt(i),s.charAt(i)));
604
| classcontent:list MACROUSE:ident
606
syntaxError(ErrorMessages.CHARCLASS_MACRO, identleft, identright);
610
syntaxError(ErrorMessages.CHARCLASS_MACRO, identleft, identright);
614
classcontentelem ::= CHAR:c1 DASH CHAR:c2
615
{: RESULT = new Interval(c1.charValue(), c2.charValue()); :}
617
{: RESULT = new Interval(c.charValue(), c.charValue()); :}
620
preclass ::= JLETTERCLASS
621
{: RESULT = makePreClass(sym.JLETTERCLASS); :}
623
{: RESULT = makePreClass(sym.JLETTERDIGITCLASS); :}
625
{: RESULT = makePreClass(sym.LETTERCLASS); :}
627
{: RESULT = makePreClass(sym.DIGITCLASS); :}
629
{: RESULT = makePreClass(sym.UPPERCLASS); :}
631
{: RESULT = makePreClass(sym.LOWERCLASS); :}