2
* STANDARD ML OF NEW JERSEY COPYRIGHT NOTICE, LICENSE AND DISCLAIMER.
4
* Copyright (c) 1989-1998 by Lucent Technologies
6
* Permission to use, copy, modify, and distribute this software and its
7
* documentation for any purpose and without fee is hereby granted, provided
8
* that the above copyright notice appear in all copies and that both the
9
* copyright notice and this permission notice and warranty disclaimer appear
10
* in supporting documentation, and that the name of Lucent Technologies, Bell
11
* Labs or any Lucent entity not be used in advertising or publicity pertaining
12
* to distribution of the software without specific, written prior permission.
14
* Lucent disclaims all warranties with regard to this software, including all
15
* implied warranties of merchantability and fitness. In no event shall Lucent
16
* be liable for any special, indirect or consequential damages or any damages
17
* whatsoever resulting from loss of use, data or profits, whether in an action
18
* of contract, negligence or other tortious action, arising out of or in
19
* connection with the use or performance of this software.
21
* Taken from this URL:
22
* http://www.smlnj.org/license.html
24
* This license is compatible with the GNU GPL (see section "Standard ML of New
25
* Jersey Copyright License"):
26
* http://www.gnu.org/licenses/license-list.html#StandardMLofNJ
30
* Copyright 1996-1999 by Scott Hudson, Frank Flannery, C. Scott Ananian
33
package weka.core.parser.java_cup;
35
import java.io.PrintWriter;
36
import java.util.Stack;
37
import java.util.Enumeration;
38
import java.util.Date;
41
* This class handles emitting generated code for the resulting parser.
42
* The various parse tables must be constructed, etc. before calling any
43
* routines in this class.<p>
45
* Three classes are produced by this code:
47
* <dt> symbol constant class
48
* <dd> this contains constant declarations for each terminal (and
49
* optionally each non-terminal).
51
* <dd> this non-public class contains code to invoke all the user actions
52
* that were embedded in the parser specification.
54
* <dd> the specialized parser class consisting primarily of some user
55
* supplied general and initialization code, and the parse tables.
58
* Three parse tables are created as part of the parser class:
60
* <dt> production table
61
* <dd> lists the LHS non terminal number, and the length of the RHS of
64
* <dd> for each state of the parse machine, gives the action to be taken
65
* (shift, reduce, or error) under each lookahead symbol.<br>
66
* <dt> reduce-goto table
67
* <dd> when a reduce on a given production is taken, the parse stack is
68
* popped back a number of elements corresponding to the RHS of the
69
* production. This reveals a prior state, which we transition out
70
* of under the LHS non terminal symbol for the production (as if we
71
* had seen the LHS symbol rather than all the symbols matching the
72
* RHS). This table is indexed by non terminal numbers and indicates
73
* how to make these transitions.
76
* In addition to the method interface, this class maintains a series of
77
* public global variables and flags indicating how misc. parts of the code
78
* and other output is to be produced, and counting things such as number of
79
* conflicts detected (see the source code and public variables below for
82
* This class is "static" (contains only static data and methods).<p>
84
* @see weka.core.parser.java_cup.main
85
* @version last update: 11/25/95
86
* @author Scott Hudson
89
/* Major externally callable routines here include:
90
symbols - emit the symbol constant class
91
parser - emit the parser class
93
In addition the following major internal routines are provided:
94
emit_package - emit a package declaration
95
emit_action_code - emit the class containing the user's actions
96
emit_production_table - emit declaration and init for the production table
97
do_action_table - emit declaration and init for the action table
98
do_reduce_table - emit declaration and init for the reduce-goto table
100
Finally, this class uses a number of public instance variables to communicate
101
optional parameters and flags used to control how code is generated,
102
as well as to report counts of various things (such as number of conflicts
103
detected). These include:
105
prefix - a prefix string used to prefix names that would
106
otherwise "pollute" someone else's name space.
107
package_name - name of the package emitted code is placed in
108
(or null for an unnamed package.
109
symbol_const_class_name - name of the class containing symbol constants.
110
parser_class_name - name of the class for the resulting parser.
111
action_code - user supplied declarations and other code to be
112
placed in action class.
113
parser_code - user supplied declarations and other code to be
114
placed in parser class.
115
init_code - user supplied code to be executed as the parser
116
is being initialized.
117
scan_code - user supplied code to get the next Symbol.
118
start_production - the start production for the grammar.
119
import_list - list of imports for use with action class.
120
num_conflicts - number of conflicts detected.
121
nowarn - true if we are not to issue warning messages.
122
not_reduced - count of number of productions that never reduce.
123
unused_term - count of unused terminal symbols.
124
unused_non_term - count of unused non terminal symbols.
125
*_time - a series of symbols indicating how long various
126
sub-parts of code generation took (used to produce
127
optional time reports in main).
132
/*-----------------------------------------------------------*/
133
/*--- Constructor(s) ----------------------------------------*/
134
/*-----------------------------------------------------------*/
136
/** Only constructor is private so no instances can be created. */
139
/*-----------------------------------------------------------*/
140
/*--- Static (Class) Variables ------------------------------*/
141
/*-----------------------------------------------------------*/
143
/** The prefix placed on names that pollute someone else's name space. */
144
public static String prefix = "CUP$";
146
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
148
/** Package that the resulting code goes into (null is used for unnamed). */
149
public static String package_name = null;
151
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
153
/** Name of the generated class for symbol constants. */
154
public static String symbol_const_class_name = "sym";
156
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
158
/** Name of the generated parser class. */
159
public static String parser_class_name = "parser";
161
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
163
/** TUM changes; proposed by Henning Niss 20050628: Type arguments for class declaration */
164
public static String class_type_argument = null;
166
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
168
/** User declarations for direct inclusion in user action class. */
169
public static String action_code = null;
171
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
173
/** User declarations for direct inclusion in parser class. */
174
public static String parser_code = null;
176
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
178
/** User code for user_init() which is called during parser initialization. */
179
public static String init_code = null;
181
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
183
/** User code for scan() which is called to get the next Symbol. */
184
public static String scan_code = null;
186
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
188
/** The start production of the grammar. */
189
public static production start_production = null;
191
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
193
/** List of imports (Strings containing class names) to go with actions. */
194
public static Stack import_list = new Stack();
196
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
198
/** Number of conflict found while building tables. */
199
public static int num_conflicts = 0;
201
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
203
/** Do we skip warnings? */
204
public static boolean nowarn = false;
206
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
208
/** Count of the number on non-reduced productions found. */
209
public static int not_reduced = 0;
211
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
213
/** Count of unused terminals. */
214
public static int unused_term = 0;
216
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
218
/** Count of unused non terminals. */
219
public static int unused_non_term = 0;
221
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
223
/* Timing values used to produce timing report in main.*/
225
/** Time to produce symbol constant class. */
226
public static long symbols_time = 0;
228
/** Time to produce parser class. */
229
public static long parser_time = 0;
231
/** Time to produce action code class. */
232
public static long action_code_time = 0;
234
/** Time to produce the production table. */
235
public static long production_table_time = 0;
237
/** Time to produce the action table. */
238
public static long action_table_time = 0;
240
/** Time to produce the reduce-goto table. */
241
public static long goto_table_time = 0;
244
protected static boolean _lr_values;
246
/** whether or not to emit code for left and right values */
247
public static boolean lr_values() {return _lr_values;}
248
protected static void set_lr_values(boolean b) { _lr_values = b;}
250
//Hm Added clear to clear all static fields
251
public static void clear () {
254
import_list = new Stack();
259
parser_class_name = "parser";
262
start_production = null;
263
symbol_const_class_name = "sym";
268
/*-----------------------------------------------------------*/
269
/*--- General Methods ---------------------------------------*/
270
/*-----------------------------------------------------------*/
272
/** Build a string with the standard prefix.
273
* @param str string to prefix.
275
protected static String pre(String str) {
276
return prefix + parser_class_name + "$" + str;
280
* TUM changes; proposed by Henning Niss 20050628
281
* Build a string with the specified type arguments,
282
* if present, otherwise an empty string.
284
protected static String typeArgument() {
285
return class_type_argument == null ? "" : "<" + class_type_argument + ">";
288
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
290
/** Emit a package spec if the user wants one.
291
* @param out stream to produce output on.
293
protected static void emit_package(PrintWriter out)
295
// FracPete: adding license to output
297
out.println(" * STANDARD ML OF NEW JERSEY COPYRIGHT NOTICE, LICENSE AND DISCLAIMER.");
299
out.println(" * Copyright (c) 1989-1998 by Lucent Technologies");
301
out.println(" * Permission to use, copy, modify, and distribute this software and its");
302
out.println(" * documentation for any purpose and without fee is hereby granted, provided");
303
out.println(" * that the above copyright notice appear in all copies and that both the");
304
out.println(" * copyright notice and this permission notice and warranty disclaimer appear");
305
out.println(" * in supporting documentation, and that the name of Lucent Technologies, Bell");
306
out.println(" * Labs or any Lucent entity not be used in advertising or publicity pertaining");
307
out.println(" * to distribution of the software without specific, written prior permission.");
309
out.println(" * Lucent disclaims all warranties with regard to this software, including all");
310
out.println(" * implied warranties of merchantability and fitness. In no event shall Lucent");
311
out.println(" * be liable for any special, indirect or consequential damages or any damages");
312
out.println(" * whatsoever resulting from loss of use, data or profits, whether in an action");
313
out.println(" * of contract, negligence or other tortious action, arising out of or in");
314
out.println(" * connection with the use or performance of this software. ");
316
out.println(" * Taken from this URL:");
317
out.println(" * http://www.smlnj.org/license.html");
319
out.println(" * This license is compatible with the GNU GPL (see section \"Standard ML of New");
320
out.println(" * Jersey Copyright License\"):");
321
out.println(" * http://www.gnu.org/licenses/license-list.html#StandardMLofNJ");
325
out.println(" * Copyright 1996-1999 by Scott Hudson, Frank Flannery, C. Scott Ananian");
329
/* generate a package spec if we have a name for one */
330
if (package_name != null) {
331
out.println("package " + package_name + ";"); out.println();
335
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
337
/** Emit code for the symbol constant class, optionally including non terms,
338
* if they have been requested.
339
* @param out stream to produce output on.
340
* @param emit_non_terms do we emit constants for non terminals?
341
* @param sym_interface should we emit an interface, rather than a class?
343
public static void symbols(PrintWriter out,
344
boolean emit_non_terms, boolean sym_interface)
348
String class_or_interface = (sym_interface)?"interface":"class";
350
long start_time = System.currentTimeMillis();
354
out.println("//----------------------------------------------------");
355
out.println("// The following code was generated by " +
357
out.println("// " + new Date());
358
out.println("//----------------------------------------------------");
363
out.println("/** CUP generated " + class_or_interface +
364
" containing symbol constants. */");
365
out.println("public " + class_or_interface + " " +
366
symbol_const_class_name + " {");
368
out.println(" /* terminals */");
370
/* walk over the terminals */ /* later might sort these */
371
for (Enumeration e = terminal.all(); e.hasMoreElements(); )
373
term = (terminal)e.nextElement();
375
/* output a constant decl for the terminal */
376
out.println(" public static final int " + term.name() + " = " +
380
/* do the non terminals if they want them (parser doesn't need them) */
384
out.println(" /* non terminals */");
386
/* walk over the non terminals */ /* later might sort these */
387
for (Enumeration e = non_terminal.all(); e.hasMoreElements(); )
389
nt = (non_terminal)e.nextElement();
392
// TUM Comment: here we could add a typesafe enumeration
395
/* output a constant decl for the terminal */
396
out.println(" static final int " + nt.name() + " = " +
405
symbols_time = System.currentTimeMillis() - start_time;
408
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
410
/** Emit code for the non-public class holding the actual action code.
411
* @param out stream to produce output on.
412
* @param start_prod the start production of the grammar.
414
protected static void emit_action_code(PrintWriter out, production start_prod)
415
throws internal_error
419
long start_time = System.currentTimeMillis();
424
"/** Cup generated class to encapsulate user supplied action code.*/"
426
/* TUM changes; proposed by Henning Niss 20050628: added type arguement */
427
out.println("class " + pre("actions") + typeArgument() + " {");
428
/* user supplied code */
429
if (action_code != null)
432
out.println(action_code);
435
/* field for parser object */
436
/* TUM changes; proposed by Henning Niss 20050628: added typeArgument */
437
out.println(" private final "+parser_class_name + typeArgument() + " parser;");
441
out.println(" /** Constructor */");
442
/* TUM changes; proposed by Henning Niss 20050628: added typeArgument */
443
out.println(" " + pre("actions") + "("+parser_class_name+typeArgument()+" parser) {");
444
out.println(" this.parser = parser;");
447
/* action method head */
449
out.println(" /** Method with the actual generated action code. */");
450
out.println(" public final weka.core.parser.java_cup.runtime.Symbol " +
451
pre("do_action") + "(");
452
out.println(" int " + pre("act_num,"));
453
out.println(" weka.core.parser.java_cup.runtime.lr_parser " + pre("parser,"));
454
out.println(" java.util.Stack " + pre("stack,"));
455
out.println(" int " + pre("top)"));
456
out.println(" throws java.lang.Exception");
459
/* declaration of result symbol */
460
/* New declaration!! now return Symbol
462
out.println(" /* Symbol object for return from actions */");
463
out.println(" weka.core.parser.java_cup.runtime.Symbol " + pre("result") + ";");
467
out.println(" /* select the action based on the action number */");
468
out.println(" switch (" + pre("act_num") + ")");
471
/* emit action code for each production as a separate case */
472
for (Enumeration p = production.all(); p.hasMoreElements(); )
474
prod = (production)p.nextElement();
477
out.println(" /*. . . . . . . . . . . . . . . . . . . .*/");
478
out.println(" case " + prod.index() + ": // " +
479
prod.to_simple_string());
481
/* give them their own block to work in */
486
* TUM 20060608 intermediate result patch
488
String result = "null";
489
if (prod instanceof action_production) {
490
int lastResult = ((action_production)prod).getIndexOfIntermediateResult();
491
if (lastResult!=-1) {
492
result = "(" + prod.lhs().the_symbol().stack_type() + ") " +
493
"((weka.core.parser.java_cup.runtime.Symbol) " + emit.pre("stack") +
495
((lastResult==1)?".peek()":(".elementAt(" + emit.pre("top") + "-" + (lastResult-1) + ")"))+
500
/* create the result symbol */
501
/*make the variable RESULT which will point to the new Symbol (see below)
502
and be changed by action code
504
out.println(" " + prod.lhs().the_symbol().stack_type() +
505
" RESULT ="+result+";");
507
/* Add code to propagate RESULT assignments that occur in
508
* action code embedded in a production (ie, non-rightmost
509
* action code). 24-Mar-1998 CSA
511
for (int i=prod.rhs_length()-1; i>=0; i--) {
512
// only interested in non-terminal symbols.
513
if (!(prod.rhs(i) instanceof symbol_part)) continue;
514
symbol s = ((symbol_part)prod.rhs(i)).the_symbol();
515
if (!(s instanceof non_terminal)) continue;
516
// skip this non-terminal unless it corresponds to
517
// an embedded action production.
518
if (((non_terminal)s).is_embedded_action == false) continue;
519
// OK, it fits. Make a conditional assignment to RESULT.
520
int index = prod.rhs_length() - i - 1; // last rhs is on top.
521
// set comment to inform about where the intermediate result came from
522
out.println(" " + "// propagate RESULT from " +s.name());
523
// // look out, whether the intermediate result is null or not
524
// out.println(" " + "if ( " +
525
// "((weka.core.parser.java_cup.runtime.Symbol) " + emit.pre("stack") +
527
// ((index==0)?".peek()":(".elementAt(" + emit.pre("top") + "-" + index + ")"))+
528
// ").value != null )");
530
// TUM 20060608: even when its null: who cares?
532
// store the intermediate result into RESULT
533
out.println(" " + "RESULT = " +
534
"(" + prod.lhs().the_symbol().stack_type() + ") " +
535
"((weka.core.parser.java_cup.runtime.Symbol) " + emit.pre("stack") +
537
((index==0)?".peek()":(".elementAt(" + emit.pre("top") + "-" + index + ")"))+
542
/* if there is an action string, emit it */
543
if (prod.action() != null && prod.action().code_string() != null &&
544
!prod.action().equals(""))
545
out.println(prod.action().code_string());
547
/* here we have the left and right values being propagated.
548
must make this a command line option.
551
/* Create the code that assigns the left and right values of
552
the new Symbol that the production is reducing to */
553
if (emit.lr_values()) {
555
String leftstring, rightstring;
558
rightstring = "((weka.core.parser.java_cup.runtime.Symbol)" + emit.pre("stack") +
560
//".elementAt(" + emit.pre("top") + "-" + roffset + "))"+
562
// TUM 20060327 removed .right
564
if (prod.rhs_length() == 0)
565
leftstring = rightstring;
567
loffset = prod.rhs_length() - 1;
568
leftstring = "((weka.core.parser.java_cup.runtime.Symbol)" + emit.pre("stack") +
570
((loffset==0)?(".peek()"):(".elementAt(" + emit.pre("top") + "-" + loffset + ")")) +
571
// TUM 20060327 removed .left
574
// out.println(" " + pre("result") + " = new weka.core.parser.java_cup.runtime.Symbol(" +
575
out.println(" " + pre("result") + " = parser.getSymbolFactory().newSymbol(" +
576
"\""+ prod.lhs().the_symbol().name() +"\","+
577
prod.lhs().the_symbol().index() +
578
", " + leftstring + ", " + rightstring + ", RESULT);");
580
// out.println(" " + pre("result") + " = new weka.core.parser.java_cup.runtime.Symbol(" +
581
out.println(" " + pre("result") + " = parser.getSymbolFactory().newSymbol(" +
582
"\""+ prod.lhs().the_symbol().name() + "\","+
583
prod.lhs().the_symbol().index() +
587
/* end of their block */
590
/* if this was the start production, do action for accept */
591
if (prod == start_prod)
593
out.println(" /* ACCEPT */");
594
out.println(" " + pre("parser") + ".done_parsing();");
597
/* code to return lhs symbol */
598
out.println(" return " + pre("result") + ";");
603
out.println(" /* . . . . . .*/");
604
out.println(" default:");
605
out.println(" throw new Exception(");
606
out.println(" \"Invalid action number found in " +
607
"internal parse table\");");
618
action_code_time = System.currentTimeMillis() - start_time;
621
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
623
/** Emit the production table.
624
* @param out stream to produce output on.
626
protected static void emit_production_table(PrintWriter out)
628
production all_prods[];
631
long start_time = System.currentTimeMillis();
633
/* collect up the productions in order */
634
all_prods = new production[production.number()];
635
for (Enumeration p = production.all(); p.hasMoreElements(); )
637
prod = (production)p.nextElement();
638
all_prods[prod.index()] = prod;
642
short[][] prod_table = new short[production.number()][2];
643
for (int i = 0; i<production.number(); i++)
646
// { lhs symbol , rhs size }
647
prod_table[i][0] = (short) prod.lhs().the_symbol().index();
648
prod_table[i][1] = (short) prod.rhs_length();
650
/* do the top of the table */
652
out.println(" /** Production table. */");
653
out.println(" protected static final short _production_table[][] = ");
654
out.print (" unpackFromStrings(");
655
do_table_as_string(out, prod_table);
658
/* do the public accessor method */
660
out.println(" /** Access to production table. */");
661
out.println(" public short[][] production_table() " +
662
"{return _production_table;}");
664
production_table_time = System.currentTimeMillis() - start_time;
667
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
669
/** Emit the action table.
670
* @param out stream to produce output on.
671
* @param act_tab the internal representation of the action table.
672
* @param compact_reduces do we use the most frequent reduce as default?
674
protected static void do_action_table(
676
parse_action_table act_tab,
677
boolean compact_reduces)
678
throws internal_error
680
parse_action_row row;
684
long start_time = System.currentTimeMillis();
686
/* collect values for the action table */
687
short[][] action_table = new short[act_tab.num_states()][];
688
/* do each state (row) of the action table */
689
for (int i = 0; i < act_tab.num_states(); i++)
692
row = act_tab.under_state[i];
694
/* determine the default for the row */
696
row.compute_default();
698
row.default_reduce = -1;
700
/* make temporary table for the row. */
701
short[] temp_table = new short[2*parse_action_row.size()];
705
for (int j = 0; j < parse_action_row.size(); j++)
707
/* extract the action from the table */
708
act = row.under_term[j];
710
/* skip error entries these are all defaulted out */
711
if (act.kind() != parse_action.ERROR)
713
/* first put in the symbol index, then the actual entry */
715
/* shifts get positive entries of state number + 1 */
716
if (act.kind() == parse_action.SHIFT)
719
temp_table[nentries++] = (short) j;
720
temp_table[nentries++] = (short)
721
(((shift_action)act).shift_to().index() + 1);
724
/* reduce actions get negated entries of production# + 1 */
725
else if (act.kind() == parse_action.REDUCE)
727
/* if its the default entry let it get defaulted out */
728
red = ((reduce_action)act).reduce_with().index();
729
if (red != row.default_reduce) {
731
temp_table[nentries++] = (short) j;
732
temp_table[nentries++] = (short) (-(red+1));
734
} else if (act.kind() == parse_action.NONASSOC)
736
/* do nothing, since we just want a syntax error */
738
/* shouldn't be anything else */
740
throw new internal_error("Unrecognized action code " +
741
act.kind() + " found in parse table");
745
/* now we know how big to make the row */
746
action_table[i] = new short[nentries + 2];
747
System.arraycopy(temp_table, 0, action_table[i], 0, nentries);
749
/* finish off the row with a default entry */
750
action_table[i][nentries++] = -1;
751
if (row.default_reduce != -1)
752
action_table[i][nentries++] = (short) (-(row.default_reduce+1));
754
action_table[i][nentries++] = 0;
757
/* finish off the init of the table */
759
out.println(" /** Parse-action table. */");
760
out.println(" protected static final short[][] _action_table = ");
761
out.print (" unpackFromStrings(");
762
do_table_as_string(out, action_table);
765
/* do the public accessor method */
767
out.println(" /** Access to parse-action table. */");
768
out.println(" public short[][] action_table() {return _action_table;}");
770
action_table_time = System.currentTimeMillis() - start_time;
773
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
775
/** Emit the reduce-goto table.
776
* @param out stream to produce output on.
777
* @param red_tab the internal representation of the reduce-goto table.
779
protected static void do_reduce_table(
781
parse_reduce_table red_tab)
786
long start_time = System.currentTimeMillis();
788
/* collect values for reduce-goto table */
789
short[][] reduce_goto_table = new short[red_tab.num_states()][];
790
/* do each row of the reduce-goto table */
791
for (int i=0; i<red_tab.num_states(); i++)
793
/* make temporary table for the row. */
794
short[] temp_table = new short[2*parse_reduce_row.size()];
796
/* do each entry in the row */
797
for (int j=0; j<parse_reduce_row.size(); j++)
800
goto_st = red_tab.under_state[i].under_non_term[j];
802
/* if we have none, skip it */
805
/* make entries for the index and the value */
806
temp_table[nentries++] = (short) j;
807
temp_table[nentries++] = (short) goto_st.index();
810
/* now we know how big to make the row. */
811
reduce_goto_table[i] = new short[nentries+2];
812
System.arraycopy(temp_table, 0, reduce_goto_table[i], 0, nentries);
814
/* end row with default value */
815
reduce_goto_table[i][nentries++] = -1;
816
reduce_goto_table[i][nentries++] = -1;
819
/* emit the table. */
821
out.println(" /** <code>reduce_goto</code> table. */");
822
out.println(" protected static final short[][] _reduce_table = ");
823
out.print (" unpackFromStrings(");
824
do_table_as_string(out, reduce_goto_table);
827
/* do the public accessor method */
829
out.println(" /** Access to <code>reduce_goto</code> table. */");
830
out.println(" public short[][] reduce_table() {return _reduce_table;}");
833
goto_table_time = System.currentTimeMillis() - start_time;
836
// print a string array encoding the given short[][] array.
837
protected static void do_table_as_string(PrintWriter out, short[][] sa) {
838
out.println("new String[] {");
840
int nchar=0, nbytes=0;
841
nbytes+=do_escaped(out, (char)(sa.length>>16));
842
nchar =do_newline(out, nchar, nbytes);
843
nbytes+=do_escaped(out, (char)(sa.length&0xFFFF));
844
nchar =do_newline(out, nchar, nbytes);
845
for (int i=0; i<sa.length; i++) {
846
nbytes+=do_escaped(out, (char)(sa[i].length>>16));
847
nchar =do_newline(out, nchar, nbytes);
848
nbytes+=do_escaped(out, (char)(sa[i].length&0xFFFF));
849
nchar =do_newline(out, nchar, nbytes);
850
for (int j=0; j<sa[i].length; j++) {
851
// contents of string are (value+2) to allow for common -1, 0 cases
852
// (UTF-8 encoding is most efficient for 0<c<0x80)
853
nbytes+=do_escaped(out, (char)(2+sa[i][j]));
854
nchar =do_newline(out, nchar, nbytes);
859
// split string if it is very long; start new line occasionally for neatness
860
protected static int do_newline(PrintWriter out, int nchar, int nbytes) {
861
if (nbytes > 65500) { out.println("\", "); out.print(" \""); }
862
else if (nchar > 11) { out.println("\" +"); out.print(" \""); }
866
// output an escape sequence for the given character code.
867
protected static int do_escaped(PrintWriter out, char c) {
868
StringBuffer escape = new StringBuffer();
870
escape.append(Integer.toOctalString(c));
871
while(escape.length() < 3) escape.insert(0, '0');
873
escape.append(Integer.toHexString(c));
874
while(escape.length() < 4) escape.insert(0, '0');
875
escape.insert(0, 'u');
877
escape.insert(0, '\\');
878
out.print(escape.toString());
880
// return number of bytes this takes up in UTF-8 encoding.
881
if (c == 0) return 2;
882
if (c >= 0x01 && c <= 0x7F) return 1;
883
if (c >= 0x80 && c <= 0x7FF) return 2;
887
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
889
/** Emit the parser subclass with embedded tables.
890
* @param out stream to produce output on.
891
* @param action_table internal representation of the action table.
892
* @param reduce_table internal representation of the reduce-goto table.
893
* @param start_st start state of the parse machine.
894
* @param start_prod start production of the grammar.
895
* @param compact_reduces do we use most frequent reduce as default?
896
* @param suppress_scanner should scanner be suppressed for compatibility?
898
public static void parser(
900
parse_action_table action_table,
901
parse_reduce_table reduce_table,
903
production start_prod,
904
boolean compact_reduces,
905
boolean suppress_scanner)
906
throws internal_error
908
long start_time = System.currentTimeMillis();
912
out.println("//----------------------------------------------------");
913
out.println("// The following code was generated by " +
915
out.println("// " + new Date());
916
out.println("//----------------------------------------------------");
920
/* user supplied imports */
921
for (int i = 0; i < import_list.size(); i++)
922
out.println("import " + import_list.elementAt(i) + ";");
926
out.println("/** "+version.title_str+" generated parser.");
927
out.println(" * @version " + new Date());
929
/* TUM changes; proposed by Henning Niss 20050628: added typeArgument */
930
out.println("public class " + parser_class_name + typeArgument() +
931
" extends weka.core.parser.java_cup.runtime.lr_parser {");
933
/* constructors [CSA/davidm, 24-jul-99] */
935
out.println(" /** Default constructor. */");
936
out.println(" public " + parser_class_name + "() {super();}");
937
if (!suppress_scanner) {
939
out.println(" /** Constructor which sets the default scanner. */");
940
out.println(" public " + parser_class_name +
941
"(weka.core.parser.java_cup.runtime.Scanner s) {super(s);}");
942
// TUM 20060327 added SymbolFactory aware constructor
944
out.println(" /** Constructor which sets the default scanner. */");
945
out.println(" public " + parser_class_name +
946
"(weka.core.parser.java_cup.runtime.Scanner s, weka.core.parser.java_cup.runtime.SymbolFactory sf) {super(s,sf);}");
949
/* emit the various tables */
950
emit_production_table(out);
951
do_action_table(out, action_table, compact_reduces);
952
do_reduce_table(out, reduce_table);
954
/* instance of the action encapsulation class */
955
out.println(" /** Instance of action encapsulation class. */");
956
out.println(" protected " + pre("actions") + " action_obj;");
959
/* action object initializer */
960
out.println(" /** Action encapsulation object initializer. */");
961
out.println(" protected void init_actions()");
963
/* TUM changes; proposed by Henning Niss 20050628: added typeArgument */
964
out.println(" action_obj = new " + pre("actions") + typeArgument() +"(this);");
968
/* access to action code */
969
out.println(" /** Invoke a user supplied parse action. */");
970
out.println(" public weka.core.parser.java_cup.runtime.Symbol do_action(");
971
out.println(" int act_num,");
972
out.println(" weka.core.parser.java_cup.runtime.lr_parser parser,");
973
out.println(" java.util.Stack stack,");
974
out.println(" int top)");
975
out.println(" throws java.lang.Exception");
977
out.println(" /* call code in generated class */");
978
out.println(" return action_obj." + pre("do_action(") +
979
"act_num, parser, stack, top);");
984
/* method to tell the parser about the start state */
985
out.println(" /** Indicates start state. */");
986
out.println(" public int start_state() {return " + start_st + ";}");
988
/* method to indicate start production */
989
out.println(" /** Indicates start production. */");
990
out.println(" public int start_production() {return " +
991
start_production.index() + ";}");
994
/* methods to indicate EOF and error symbol indexes */
995
out.println(" /** <code>EOF</code> Symbol index. */");
996
out.println(" public int EOF_sym() {return " + terminal.EOF.index() +
999
out.println(" /** <code>error</code> Symbol index. */");
1000
out.println(" public int error_sym() {return " + terminal.error.index() +
1004
/* user supplied code for user_init() */
1005
if (init_code != null)
1008
out.println(" /** User initialization code. */");
1009
out.println(" public void user_init() throws java.lang.Exception");
1011
out.println(init_code);
1015
/* user supplied code for scan */
1016
if (scan_code != null)
1019
out.println(" /** Scan to get the next Symbol. */");
1020
out.println(" public weka.core.parser.java_cup.runtime.Symbol scan()");
1021
out.println(" throws java.lang.Exception");
1023
out.println(scan_code);
1027
/* user supplied code */
1028
if (parser_code != null)
1031
out.println(parser_code);
1037
/* put out the action code class */
1038
emit_action_code(out, start_prod);
1040
parser_time = System.currentTimeMillis() - start_time;
1043
/*-----------------------------------------------------------*/