2
* STANDARD ML OF NEW JERSEY COPYRIGHT NOTICE, LICENSE AND DISCLAIMER.
4
* Copyright (c) 1989-1998 by Lucent Technologies
6
* Permission to use, copy, modify, and distribute this software and its
7
* documentation for any purpose and without fee is hereby granted, provided
8
* that the above copyright notice appear in all copies and that both the
9
* copyright notice and this permission notice and warranty disclaimer appear
10
* in supporting documentation, and that the name of Lucent Technologies, Bell
11
* Labs or any Lucent entity not be used in advertising or publicity pertaining
12
* to distribution of the software without specific, written prior permission.
14
* Lucent disclaims all warranties with regard to this software, including all
15
* implied warranties of merchantability and fitness. In no event shall Lucent
16
* be liable for any special, indirect or consequential damages or any damages
17
* whatsoever resulting from loss of use, data or profits, whether in an action
18
* of contract, negligence or other tortious action, arising out of or in
19
* connection with the use or performance of this software.
21
* Taken from this URL:
22
* http://www.smlnj.org/license.html
24
* This license is compatible with the GNU GPL (see section "Standard ML of New
25
* Jersey Copyright License"):
26
* http://www.gnu.org/licenses/license-list.html#StandardMLofNJ
30
* Copyright 1996-1999 by Scott Hudson, Frank Flannery, C. Scott Ananian
33
package weka.filters.unsupervised.instance.subsetbyexpression;
36
import weka.core.parser.java_cup.runtime.*;
42
* A parser for evaluating whether an Instance complies to a boolean expression
45
* @author FracPete (fracpete at waikato dot ac dot nz)
46
* @version $Revision: 1.2 $
50
/** variable - value relation. */
51
protected HashMap m_Symbols = new HashMap();
53
/** attribute - attribute-type (constants from weka.core.Attribute) relation. */
54
protected Hashtable<String,Integer> m_AttributeTypes = new Hashtable<String,Integer>();
56
/** for storing the result of the expression. */
57
protected Boolean m_Result = null;
60
* Sets the variable - value relation to use.
62
* @param value the variable-value relation
64
public void setSymbols(HashMap value) {
69
* Returns the current variable - value relation in use.
71
* @return the variable-value relation
73
public HashMap getSymbols() {
78
* Sets the attribute - attribute-type relation to use.
80
* @param value the att - att-type relation
82
public void setAttributeTypes(Hashtable value) {
83
m_AttributeTypes = value;
87
* Returns the current attribute - attribute-type relation in use.
89
* @return the att - att-type relation
91
public Hashtable getAttributeTypes() {
92
return m_AttributeTypes;
96
* Sets the result of the evaluation.
98
* @param value the result
100
public void setResult(Boolean value) {
105
* Returns the result of the evaluation.
109
public Boolean getResult() {
114
* Returns either a String object for nominal attributes or a Double for numeric
115
* ones. For all other attribute types this method throws an Exception.
116
* It also returns a Double object in case of a missing value (for all
119
* @param instance the instance to work on
120
* @param index the index of the attribute to return
121
* @return the converted value
123
public static Object getValue(Instance instance, int index) {
124
if (instance.isMissing(index))
125
return new Double(Instance.missingValue());
126
else if (instance.attribute(index).isNominal())
127
return new String(instance.stringValue(index));
128
else if (instance.attribute(index).isNumeric())
129
return new Double(instance.value(index));
131
throw new IllegalArgumentException(
132
"Unhandled attribute type '" + instance.attribute(index).type() + "'!");
136
* Filters the input dataset against the provided expression.
138
* @param expression the expression used for filtering
139
* @param input the input data
140
* @return the filtered data
141
* @throws Exception if parsing fails
143
public static Instances filter(String expression, Instances input) throws Exception {
145
Instances output = new Instances(input, 0);
147
// setup attribute - attribute-type relation
148
Hashtable<String,Integer> attTypes = new Hashtable<String,Integer>();
149
for (int i = 0; i < input.numAttributes(); i++)
150
attTypes.put("ATT" + (i+1), input.attribute(i).type());
151
if (input.classIndex() > -1)
152
attTypes.put("CLASS", input.classAttribute().type());
155
SymbolFactory sf = new DefaultSymbolFactory();
156
HashMap symbols = new HashMap();
157
ByteArrayInputStream parserInput = new ByteArrayInputStream(expression.getBytes());
158
for (int i = 0; i < input.numInstances(); i++) {
159
Instance instance = input.instance(i);
162
for (int n = 0; n < instance.numAttributes(); n++) {
163
if (n == instance.classIndex())
164
symbols.put("CLASS", getValue(instance, n));
165
symbols.put("ATT" + (n+1), getValue(instance, n));
168
// evaluate expression
170
Parser parser = new Parser(new Scanner(parserInput,sf), sf);
171
parser.setSymbols(symbols);
173
if (parser.getResult())
174
output.add((Instance) instance.copy());
181
* Runs the parser from commandline. Takes the following arguments:
183
* <li>expression</li>
184
* <li>input file</li>
185
* <li>class index (first|last|num), use 0 to ignore</li>
186
* <li>output file</li>
189
* @param args the commandline arguments
190
* @throws Exception if something goes wrong
192
public static void main(String args[]) throws Exception {
194
String expression = args[0];
197
BufferedReader reader = new BufferedReader(new FileReader(args[1]));
198
Instances input = new Instances(reader);
200
if (args[2].equals("first"))
201
input.setClassIndex(0);
202
else if (args[2].equals("last"))
203
input.setClassIndex(input.numAttributes() - 1);
205
input.setClassIndex(Integer.parseInt(args[2]) - 1);
208
Instances output = filter(expression, input);
211
BufferedWriter writer = new BufferedWriter(new FileWriter(args[3]));
212
writer.write(new Instances(output, 0).toString());
214
for (int i = 0; i < output.numInstances(); i++) {
215
writer.write(output.instance(i).toString());
223
terminal COMMA, LPAREN, RPAREN, ISMISSING;
224
terminal MINUS, PLUS, TIMES, DIVISION;
225
terminal ABS, SQRT, LOG, EXP, SIN, COS, TAN, RINT, FLOOR, POW, CEIL;
226
terminal TRUE, FALSE, LT, LE, GT, GE, EQ, NOT, AND, OR, IS;
227
terminal Double NUMBER;
228
terminal Boolean BOOLEAN;
229
terminal String ATTRIBUTE, STRING;
231
non terminal boolexpr_list, boolexpr_part;
232
non terminal Double expr;
233
non terminal Double opexpr;
234
non terminal Double funcexpr;
235
non terminal Boolean boolexpr;
237
precedence left PLUS, MINUS;
238
precedence left TIMES, DIVISION;
239
precedence left LPAREN, RPAREN;
240
precedence left ABS, SQRT, LOG, EXP, SIN, COS, TAN, RINT, FLOOR, POW, CEIL;
241
precedence left AND, OR;
244
boolexpr_list ::= boolexpr_list boolexpr_part | boolexpr_part;
245
boolexpr_part ::= boolexpr:e {: parser.setResult(e); :} ;
246
boolexpr ::= BOOLEAN:b
249
{: RESULT = new Boolean(true); :}
251
{: RESULT = new Boolean(false); :}
253
{: RESULT = new Boolean(l.doubleValue() < r.doubleValue()); :}
255
{: RESULT = new Boolean(l.doubleValue() <= r.doubleValue()); :}
257
{: RESULT = new Boolean(l.doubleValue() > r.doubleValue()); :}
259
{: RESULT = new Boolean(l.doubleValue() >= r.doubleValue()); :}
261
{: RESULT = new Boolean(l.doubleValue() == r.doubleValue()); :}
262
| LPAREN boolexpr:b RPAREN
266
| boolexpr:l AND boolexpr:r
267
{: RESULT = l && r; :}
268
| boolexpr:l OR boolexpr:r
269
{: RESULT = l || r; :}
270
| ATTRIBUTE:a IS STRING:s
271
{: if (parser.getSymbols().containsKey(a))
272
RESULT = (parser.getSymbols().get(a) instanceof String) && ((String) parser.getSymbols().get(a)).equals(s);
274
throw new IllegalStateException("Unknown symbol '" + a + "'!");
276
| ISMISSING LPAREN ATTRIBUTE:a RPAREN
277
{: if (parser.getSymbols().containsKey(a))
278
RESULT = (parser.getSymbols().get(a) instanceof Double) && Instance.isMissingValue((Double) parser.getSymbols().get(a));
280
throw new IllegalStateException("Unknown symbol '" + a + "'!");
287
{: if (parser.getSymbols().containsKey(a))
288
RESULT = (Double) parser.getSymbols().get(a);
290
throw new IllegalStateException("Unknown symbol '" + a + "'!");
292
| LPAREN expr:e RPAREN
300
opexpr ::= expr:l PLUS expr:r
301
{: RESULT = new Double(l.doubleValue() + r.doubleValue()); :}
302
| expr:l MINUS expr:r
303
{: RESULT = new Double(l.doubleValue() - r.doubleValue()); :}
304
| expr:l TIMES expr:r
305
{: RESULT = new Double(l.doubleValue() * r.doubleValue()); :}
306
| expr:l DIVISION expr:r
307
{: RESULT = new Double(l.doubleValue() / r.doubleValue()); :}
310
funcexpr ::= ABS LPAREN expr:e RPAREN
311
{: RESULT = new Double(Math.abs(e)); :}
312
| SQRT LPAREN expr:e RPAREN
313
{: RESULT = new Double(Math.sqrt(e)); :}
314
| LOG LPAREN expr:e RPAREN
315
{: RESULT = new Double(Math.log(e)); :}
316
| EXP LPAREN expr:e RPAREN
317
{: RESULT = new Double(Math.exp(e)); :}
318
| SIN LPAREN expr:e RPAREN
319
{: RESULT = new Double(Math.sin(e)); :}
320
| COS LPAREN expr:e RPAREN
321
{: RESULT = new Double(Math.cos(e)); :}
322
| TAN LPAREN expr:e RPAREN
323
{: RESULT = new Double(Math.tan(e)); :}
324
| RINT LPAREN expr:e RPAREN
325
{: RESULT = new Double(Math.rint(e)); :}
326
| FLOOR LPAREN expr:e RPAREN
327
{: RESULT = new Double(Math.floor(e)); :}
328
| POW LPAREN expr:base COMMA expr:exponent RPAREN
329
{: RESULT = new Double(Math.pow(base, exponent)); :}
330
| CEIL LPAREN expr:e RPAREN
331
{: RESULT = new Double(Math.ceil(e)); :}