2
* STANDARD ML OF NEW JERSEY COPYRIGHT NOTICE, LICENSE AND DISCLAIMER.
4
* Copyright (c) 1989-1998 by Lucent Technologies
6
* Permission to use, copy, modify, and distribute this software and its
7
* documentation for any purpose and without fee is hereby granted, provided
8
* that the above copyright notice appear in all copies and that both the
9
* copyright notice and this permission notice and warranty disclaimer appear
10
* in supporting documentation, and that the name of Lucent Technologies, Bell
11
* Labs or any Lucent entity not be used in advertising or publicity pertaining
12
* to distribution of the software without specific, written prior permission.
14
* Lucent disclaims all warranties with regard to this software, including all
15
* implied warranties of merchantability and fitness. In no event shall Lucent
16
* be liable for any special, indirect or consequential damages or any damages
17
* whatsoever resulting from loss of use, data or profits, whether in an action
18
* of contract, negligence or other tortious action, arising out of or in
19
* connection with the use or performance of this software.
21
* Taken from this URL:
22
* http://www.smlnj.org/license.html
24
* This license is compatible with the GNU GPL (see section "Standard ML of New
25
* Jersey Copyright License"):
26
* http://www.gnu.org/licenses/license-list.html#StandardMLofNJ
30
* Copyright 1996-1999 by Scott Hudson, Frank Flannery, C. Scott Ananian
33
package weka.core.parser.java_cup;
35
import java.util.Hashtable;
36
import java.util.Enumeration;
38
/** This class represents a production in the grammar. It contains
39
* a LHS non terminal, and an array of RHS symbols. As various
40
* transformations are done on the RHS of the production, it may shrink.
41
* As a result a separate length is always maintained to indicate how much
42
* of the RHS array is still valid.<p>
44
* I addition to construction and manipulation operations, productions provide
45
* methods for factoring out actions (see remove_embedded_actions()), for
46
* computing the nullability of the production (i.e., can it derive the empty
47
* string, see check_nullable()), and operations for computing its first
48
* set (i.e., the set of terminals that could appear at the beginning of some
49
* string derived from the production, see check_first_set()).
51
* @see weka.core.parser.java_cup.production_part
52
* @see weka.core.parser.java_cup.symbol_part
53
* @see weka.core.parser.java_cup.action_part
54
* @version last updated: 7/3/96
55
* @author Frank Flannery
58
public class production {
60
/*-----------------------------------------------------------*/
61
/*--- Constructor(s) ----------------------------------------*/
62
/*-----------------------------------------------------------*/
64
/** Full constructor. This constructor accepts a LHS non terminal,
65
* an array of RHS parts (including terminals, non terminals, and
66
* actions), and a string for a final reduce action. It does several
67
* manipulations in the process of creating a production object.
68
* After some validity checking it translates labels that appear in
69
* actions into code for accessing objects on the runtime parse stack.
70
* It them merges adjacent actions if they appear and moves any trailing
71
* action into the final reduce actions string. Next it removes any
72
* embedded actions by factoring them out with new action productions.
73
* Finally it assigns a unique index to the production.<p>
75
* Factoring out of actions is accomplished by creating new "hidden"
76
* non terminals. For example if the production was originally: <pre>
77
* A ::= B {action} C D
79
* then it is factored into two productions:<pre>
83
* (where X is a unique new non terminal). This has the effect of placing
84
* all actions at the end where they can be handled as part of a reduce by
89
production_part rhs_parts[],
95
action_part tail_action;
99
/* remember the length */
102
else if (rhs_parts != null)
103
_rhs_length = rhs_parts.length;
107
/* make sure we have a valid left-hand-side */
109
throw new internal_error(
110
"Attempt to construct a production with a null LHS");
112
/* I'm not translating labels anymore, I'm adding code to declare
113
labels as valid variables. This way, the users code string is
117
/* check if the last part of the right hand side is an action. If
118
it is, it won't be on the stack, so we don't want to count it
119
in the rightlen. Then when we search down the stack for a
120
Symbol, we don't try to search past action */
123
if (rhs_parts[rhs_l - 1].is_action()) {
124
rightlen = rhs_l - 1;
130
/* get the generated declaration code for the necessary labels. */
131
declare_str = declare_labels(
132
rhs_parts, rightlen, action_str);
134
if (action_str == null)
135
action_str = declare_str;
137
action_str = declare_str + action_str;
139
/* count use of lhs */
142
/* create the part for left-hand-side */
143
_lhs = new symbol_part(lhs_sym);
145
/* merge adjacent actions (if any) */
146
_rhs_length = merge_adjacent_actions(rhs_parts, _rhs_length);
148
/* strip off any trailing action */
149
tail_action = strip_trailing_action(rhs_parts, _rhs_length);
150
if (tail_action != null) _rhs_length--;
152
/* Why does this run through the right hand side happen
153
over and over? here a quick combination of two
154
prior runs plus one I wanted of my own
156
/* allocate and copy over the right-hand-side */
157
/* count use of each rhs symbol */
158
_rhs = new production_part[_rhs_length];
159
for (i=0; i<_rhs_length; i++) {
160
_rhs[i] = rhs_parts[i];
161
if (!_rhs[i].is_action()) {
162
((symbol_part)_rhs[i]).the_symbol().note_use();
163
if (((symbol_part)_rhs[i]).the_symbol() instanceof terminal) {
165
((terminal)((symbol_part)_rhs[i]).the_symbol()).precedence_num();
167
((terminal)((symbol_part)_rhs[i]).the_symbol()).precedence_side();
172
/*now action string is really declaration string, so put it in front!
174
if (action_str == null) action_str = "";
175
if (tail_action != null && tail_action.code_string() != null)
176
action_str = action_str + "\t\t" + tail_action.code_string();
178
/* stash the action */
179
_action = new action_part(action_str);
181
/* rewrite production to remove any embedded actions */
182
remove_embedded_actions();
184
/* assign an index */
185
_index = next_index++;
187
/* put us in the global collection of productions */
188
_all.put(new Integer(_index),this);
190
/* put us in the production list of the lhs non terminal */
191
lhs_sym.add_production(this);
194
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
196
/** Constructor with no action string. */
198
non_terminal lhs_sym,
199
production_part rhs_parts[],
201
throws internal_error
203
this(lhs_sym,rhs_parts,rhs_l,null);
206
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
208
/* Constructor with precedence and associativity of production
209
contextually define */
211
non_terminal lhs_sym,
212
production_part rhs_parts[],
217
throws internal_error
219
this(lhs_sym,rhs_parts,rhs_l,action_str);
221
/* set the precedence */
222
set_precedence_num(prec_num);
223
set_precedence_side(prec_side);
226
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
228
/* Constructor w/ no action string and contextual precedence
231
non_terminal lhs_sym,
232
production_part rhs_parts[],
236
throws internal_error
238
this(lhs_sym,rhs_parts,rhs_l,null);
239
/* set the precedence */
240
set_precedence_num(prec_num);
241
set_precedence_side(prec_side);
244
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
246
/*-----------------------------------------------------------*/
247
/*--- (Access to) Static (Class) Variables ------------------*/
248
/*-----------------------------------------------------------*/
251
/** Table of all productions. Elements are stored using their index as
254
protected static Hashtable _all = new Hashtable();
256
/** Access to all productions. */
257
public static Enumeration all() {return _all.elements();}
259
/** Lookup a production by index. */
260
public static production find(int indx) {
261
return (production) _all.get(new Integer(indx));
264
//Hm Added clear to clear all static fields
265
public static void clear() {
270
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
272
/** Total number of productions. */
273
public static int number() {return _all.size();}
275
/** Static counter for assigning unique index numbers. */
276
protected static int next_index;
278
/*-----------------------------------------------------------*/
279
/*--- (Access to) Instance Variables ------------------------*/
280
/*-----------------------------------------------------------*/
282
/** The left hand side non-terminal. */
283
protected symbol_part _lhs;
285
/** The left hand side non-terminal. */
286
public symbol_part lhs() {return _lhs;}
289
/** The precedence of the rule */
290
protected int _rhs_prec = -1;
291
protected int _rhs_assoc = -1;
293
/** Access to the precedence of the rule */
294
public int precedence_num() { return _rhs_prec; }
295
public int precedence_side() { return _rhs_assoc; }
297
/** Setting the precedence of a rule */
298
public void set_precedence_num(int prec_num) {
299
_rhs_prec = prec_num;
301
public void set_precedence_side(int prec_side) {
302
_rhs_assoc = prec_side;
305
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
307
/** A collection of parts for the right hand side. */
308
protected production_part _rhs[];
310
/** Access to the collection of parts for the right hand side. */
311
public production_part rhs(int indx) throws internal_error
313
if (indx >= 0 && indx < _rhs_length)
316
throw new internal_error(
317
"Index out of range for right hand side of production");
320
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
322
/** How much of the right hand side array we are presently using. */
323
protected int _rhs_length;
325
/** How much of the right hand side array we are presently using. */
326
public int rhs_length() {return _rhs_length;}
328
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
330
/** An action_part containing code for the action to be performed when we
331
* reduce with this production.
333
protected action_part _action;
335
/** An action_part containing code for the action to be performed when we
336
* reduce with this production.
338
public action_part action() {return _action;}
340
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
342
/** Index number of the production. */
343
protected int _index;
345
/** Index number of the production. */
346
public int index() {return _index;}
348
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
350
/** Count of number of reductions using this production. */
351
protected int _num_reductions = 0;
353
/** Count of number of reductions using this production. */
354
public int num_reductions() {return _num_reductions;}
356
/** Increment the count of reductions with this non-terminal */
357
public void note_reduction_use() {_num_reductions++;}
359
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
361
/** Is the nullability of the production known or unknown? */
362
protected boolean _nullable_known = false;
364
/** Is the nullability of the production known or unknown? */
365
public boolean nullable_known() {return _nullable_known;}
367
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
369
/** Nullability of the production (can it derive the empty string). */
370
protected boolean _nullable = false;
372
/** Nullability of the production (can it derive the empty string). */
373
public boolean nullable() {return _nullable;}
375
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
377
/** First set of the production. This is the set of terminals that
378
* could appear at the front of some string derived from this production.
380
protected terminal_set _first_set = new terminal_set();
382
/** First set of the production. This is the set of terminals that
383
* could appear at the front of some string derived from this production.
385
public terminal_set first_set() {return _first_set;}
387
/*-----------------------------------------------------------*/
388
/*--- Static Methods ----------------------------------------*/
389
/*-----------------------------------------------------------*/
391
/** Determine if a given character can be a label id starter.
392
* @param c the character in question.
394
protected static boolean is_id_start(char c)
396
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_');
398
//later need to handle non-8-bit chars here
401
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
403
/** Determine if a character can be in a label id.
404
* @param c the character in question.
406
protected static boolean is_id_char(char c)
408
return is_id_start(c) || (c >= '0' && c <= '9');
411
/*-----------------------------------------------------------*/
412
/*--- General Methods ---------------------------------------*/
413
/*-----------------------------------------------------------*/
416
/** Return label declaration code
417
* @param labelname the label name
418
* @param stack_type the stack type of label?
421
protected String make_declaration(
428
/* Put in the left/right value labels */
429
if (emit.lr_values())
430
ret = "\t\tint " + labelname + "left = ((weka.core.parser.java_cup.runtime.Symbol)" +
433
((offset==0)?".peek()":(".elementAt(" + emit.pre("top") + "-" + offset + ")"))+
435
"\t\tint " + labelname + "right = ((weka.core.parser.java_cup.runtime.Symbol)" +
438
((offset==0)?".peek()":(".elementAt(" + emit.pre("top") + "-" + offset + ")"))+
442
/* otherwise, just declare label. */
443
return ret + "\t\t" + stack_type + " " + labelname + " = (" + stack_type +
444
")((" + "weka.core.parser.java_cup.runtime.Symbol) " + emit.pre("stack") +
446
((offset==0)?".peek()":(".elementAt(" + emit.pre("top") + "-" + offset + ")"))+
450
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
452
/** Declare label names as valid variables within the action string
453
* @param rhs array of RHS parts.
454
* @param rhs_len how much of rhs to consider valid.
455
* @param final_action the final action string of the production.
456
* @param lhs_type the object type associated with the LHS symbol.
458
protected String declare_labels(
459
production_part rhs[],
463
String declaration = "";
466
action_part act_part;
469
/* walk down the parts and extract the labels */
470
for (pos = 0; pos < rhs_len; pos++)
472
if (!rhs[pos].is_action())
474
part = (symbol_part)rhs[pos];
476
/* if it has a label, make declaration! */
477
if (part.label() != null)
479
declaration = declaration +
480
make_declaration(part.label(), part.the_symbol().stack_type(),
490
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
492
/** Helper routine to merge adjacent actions in a set of RHS parts
493
* @param rhs_parts array of RHS parts.
494
* @param len amount of that array that is valid.
495
* @return remaining valid length.
497
protected int merge_adjacent_actions(production_part rhs_parts[], int len)
499
int from_loc, to_loc, merge_cnt;
501
/* bail out early if we have no work to do */
502
if (rhs_parts == null || len == 0) return 0;
506
for (from_loc=0; from_loc<len; from_loc++)
508
/* do we go in the current position or one further */
509
if (to_loc < 0 || !rhs_parts[to_loc].is_action()
510
|| !rhs_parts[from_loc].is_action())
515
/* clear the way for it */
516
if (to_loc != from_loc) rhs_parts[to_loc] = null;
519
/* if this is not trivial? */
520
if (to_loc != from_loc)
522
/* do we merge or copy? */
523
if (rhs_parts[to_loc] != null && rhs_parts[to_loc].is_action() &&
524
rhs_parts[from_loc].is_action())
527
rhs_parts[to_loc] = new action_part(
528
((action_part)rhs_parts[to_loc]).code_string() +
529
((action_part)rhs_parts[from_loc]).code_string());
535
rhs_parts[to_loc] = rhs_parts[from_loc];
540
/* return the used length */
541
return len - merge_cnt;
544
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
546
/** Helper routine to strip a trailing action off rhs and return it
547
* @param rhs_parts array of RHS parts.
548
* @param len how many of those are valid.
549
* @return the removed action part.
551
protected action_part strip_trailing_action(
552
production_part rhs_parts[],
557
/* bail out early if we have nothing to do */
558
if (rhs_parts == null || len == 0) return null;
560
/* see if we have a trailing action */
561
if (rhs_parts[len-1].is_action())
563
/* snip it out and return it */
564
result = (action_part)rhs_parts[len-1];
565
rhs_parts[len-1] = null;
572
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
574
/** Remove all embedded actions from a production by factoring them
575
* out into individual action production using new non terminals.
576
* if the original production was: <pre>
577
* A ::= B {action1} C {action2} D
579
* then it will be factored into: <pre>
580
* A ::= B NT$1 C NT$2 D
584
* where NT$1 and NT$2 are new system created non terminals.
587
/* the declarations added to the parent production are also passed along,
588
as they should be perfectly valid in this code string, since it
589
was originally a code string in the parent, not on its own.
591
protected void remove_embedded_actions(
593
) throws internal_error
598
int lastLocation = -1;
599
/* walk over the production and process each action */
600
for (int act_loc = 0; act_loc < rhs_length(); act_loc++)
601
if (rhs(act_loc).is_action())
605
declare_str = declare_labels(
607
/* create a new non terminal for the action production */
608
new_nt = non_terminal.create_new(null, lhs().the_symbol().stack_type()); // TUM 20060608 embedded actions patch
609
new_nt.is_embedded_action = true; /* 24-Mar-1998, CSA */
611
/* create a new production with just the action */
612
new_prod = new action_production(this, new_nt, null, 0,
613
declare_str + ((action_part)rhs(act_loc)).code_string(), (lastLocation==-1)?-1:(act_loc-lastLocation));
615
/* replace the action with the generated non terminal */
616
_rhs[act_loc] = new symbol_part(new_nt);
617
lastLocation = act_loc;
621
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
623
/** Check to see if the production (now) appears to be nullable.
624
* A production is nullable if its RHS could derive the empty string.
625
* This results when the RHS is empty or contains only non terminals
626
* which themselves are nullable.
628
public boolean check_nullable() throws internal_error
630
production_part part;
634
/* if we already know bail out early */
635
if (nullable_known()) return nullable();
637
/* if we have a zero size RHS we are directly nullable */
638
if (rhs_length() == 0)
640
/* stash and return the result */
641
return set_nullable(true);
644
/* otherwise we need to test all of our parts */
645
for (pos=0; pos<rhs_length(); pos++)
649
/* only look at non-actions */
650
if (!part.is_action())
652
sym = ((symbol_part)part).the_symbol();
654
/* if its a terminal we are definitely not nullable */
655
if (!sym.is_non_term())
656
return set_nullable(false);
657
/* its a non-term, is it marked nullable */
658
else if (!((non_terminal)sym).nullable())
659
/* this one not (yet) nullable, so we aren't */
664
/* if we make it here all parts are nullable */
665
return set_nullable(true);
668
/** set (and return) nullability */
669
boolean set_nullable(boolean v)
671
_nullable_known = true;
676
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
678
/** Update (and return) the first set based on current NT firsts.
679
* This assumes that nullability has already been computed for all non
680
* terminals and productions.
682
public terminal_set check_first_set() throws internal_error
687
/* walk down the right hand side till we get past all nullables */
688
for (part=0; part<rhs_length(); part++)
690
/* only look at non-actions */
691
if (!rhs(part).is_action())
693
sym = ((symbol_part)rhs(part)).the_symbol();
695
/* is it a non-terminal?*/
696
if (sym.is_non_term())
698
/* add in current firsts from that NT */
699
_first_set.add(((non_terminal)sym).first_set());
701
/* if its not nullable, we are done */
702
if (!((non_terminal)sym).nullable())
707
/* its a terminal -- add that to the set */
708
_first_set.add((terminal)sym);
716
/* return our updated first set */
720
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
722
/** Equality comparison. */
723
public boolean equals(production other)
725
if (other == null) return false;
726
return other._index == _index;
729
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
731
/** Generic equality comparison. */
732
public boolean equals(Object other)
734
if (!(other instanceof production))
737
return equals((production)other);
740
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
742
/** Produce a hash code. */
743
public int hashCode()
745
/* just use a simple function of the index */
749
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
751
/** Convert to a string. */
752
public String toString()
756
/* catch any internal errors */
758
result = "production [" + index() + "]: ";
759
result += ((lhs() != null) ? lhs().toString() : "$$NULL-LHS$$");
761
for (int i = 0; i<rhs_length(); i++)
762
result += rhs(i) + " ";
764
if (action() != null && action().code_string() != null)
765
result += " {" + action().code_string() + "}";
767
if (nullable_known())
769
result += "[NULLABLE]";
771
result += "[NOT NULLABLE]";
772
} catch (internal_error e) {
773
/* crash on internal error since we can't throw it from here (because
774
superclass does not throw anything. */
782
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
784
/** Convert to a simpler string. */
785
public String to_simple_string() throws internal_error
789
result = ((lhs() != null) ? lhs().the_symbol().name() : "NULL_LHS");
791
for (int i = 0; i < rhs_length(); i++)
792
if (!rhs(i).is_action())
793
result += ((symbol_part)rhs(i)).the_symbol().name() + " ";
798
/*-----------------------------------------------------------*/