2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2004 Prados Julien
20
* Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
23
package weka.filters.unsupervised.attribute;
25
import weka.core.AttributeStats;
26
import weka.core.Capabilities;
27
import weka.core.Instance;
28
import weka.core.Instances;
29
import weka.core.MathematicalExpression;
30
import weka.core.Option;
31
import weka.core.Range;
32
import weka.core.SparseInstance;
33
import weka.core.Utils;
34
import weka.core.Capabilities.Capability;
35
import weka.filters.UnsupervisedFilter;
37
import java.util.Enumeration;
38
import java.util.HashMap;
39
import java.util.Vector;
42
<!-- globalinfo-start -->
43
* Modify numeric attributes according to a given expression
45
<!-- globalinfo-end -->
47
<!-- options-start -->
48
* Valid options are: <p/>
50
* <pre> -unset-class-temporarily
51
* Unsets the class index temporarily before the filter is
52
* applied to the data.
55
* <pre> -E <expression>
56
* Specify the expression to apply. Eg. pow(A,6)/(MEAN+MAX)
57
* Supported operators are +, -, *, /, pow, log,
58
* abs, cos, exp, sqrt, tan, sin, ceil, floor, rint, (, ),
59
* MEAN, MAX, MIN, SD, COUNT, SUM, SUMSQUARED, ifelse</pre>
61
* <pre> -R <index1,index2-index4,...>
62
* Specify list of columns to ignore. First and last are valid
63
* indexes. (default none)</pre>
66
* Invert matching sense (i.e. only modify specified columns)</pre>
70
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
71
* @author Prados Julien (julien.prados@cui.unige.ch)
72
* @version $Revision: 1.8 $
73
* @see MathematicalExpression
75
public class MathExpression
76
extends PotentialClassIgnorer
77
implements UnsupervisedFilter {
79
/** for serialization */
80
static final long serialVersionUID = -3713222714671997901L;
82
/** Stores which columns to select as a funky range */
83
protected Range m_SelectCols = new Range();
85
/** The default modification expression */
86
public static final String m_defaultExpression = "(A-MIN)/(MAX-MIN)";
88
/** The modification expression */
89
private String m_expression = m_defaultExpression;
91
/** The expression tree */
92
private MathematicalExpression.TreeNode m_expTree = null;
94
/** Attributes statistics */
95
private AttributeStats[] m_attStats;
100
public MathExpression() {
102
setInvertSelection(false);
106
* Returns a string describing this filter
108
* @return a description of the filter suitable for
109
* displaying in the explorer/experimenter gui
111
public String globalInfo() {
113
return "Modify numeric attributes according to a given expression ";
117
* Returns the Capabilities of this filter.
119
* @return the capabilities of this object
122
public Capabilities getCapabilities() {
123
Capabilities result = super.getCapabilities();
126
result.enableAllAttributes();
127
result.enable(Capability.MISSING_VALUES);
130
result.enableAllClasses();
131
result.enable(Capability.MISSING_CLASS_VALUES);
132
result.enable(Capability.NO_CLASS);
138
* Sets the format of the input instances.
140
* @param instanceInfo an Instances object containing the input
141
* instance structure (any instances contained in the object are
142
* ignored - only the structure is required).
143
* @return true if the outputFormat may be collected immediately
144
* @throws Exception if the input format can't be set
147
public boolean setInputFormat(Instances instanceInfo)
149
m_SelectCols.setUpper(instanceInfo.numAttributes() - 1);
150
super.setInputFormat(instanceInfo);
151
setOutputFormat(instanceInfo);
158
* Input an instance for filtering. Filter requires all
159
* training instances be read before producing output.
161
* @param instance the input instance
162
* @return true if the filtered instance may now be
163
* collected with output().
164
* @throws IllegalStateException if no input format has been set.
166
public boolean input(Instance instance) throws Exception {
168
if (getInputFormat() == null) {
169
throw new IllegalStateException("No input instance format defined");
175
if (m_attStats == null) {
176
bufferInput(instance);
179
convertInstance(instance);
185
* Signify that this batch of input to the filter is finished.
186
* If the filter requires all instances prior to filtering,
187
* output() may now be called to retrieve the filtered instances.
189
* @return true if there are instances pending output
190
* @throws IllegalStateException if no input structure has been defined
192
public boolean batchFinished() throws Exception {
194
if (getInputFormat() == null) {
195
throw new IllegalStateException("No input instance format defined");
197
if (m_attStats == null) {
198
Instances input = getInputFormat();
200
m_expTree = MathematicalExpression.parse(getExpression());
201
m_attStats = new AttributeStats [input.numAttributes()];
203
for (int i = 0; i < input.numAttributes(); i++) {
204
if (input.attribute(i).isNumeric() &&
205
(input.classIndex() != i)) {
206
m_attStats[i] = input.attributeStats(i);
210
// Convert pending input instances
211
for(int i = 0; i < input.numInstances(); i++) {
212
convertInstance(input.instance(i));
219
return (numPendingOutput() != 0);
223
* Convert a single instance over. The converted instance is
224
* added to the end of the output queue.
226
* @param instance the instance to convert
227
* @throws Exception if instance cannot be converted
229
private void convertInstance(Instance instance) throws Exception {
231
Instance inst = null;
232
HashMap symbols = new HashMap(5);
233
if (instance instanceof SparseInstance) {
234
double[] newVals = new double[instance.numAttributes()];
235
int[] newIndices = new int[instance.numAttributes()];
236
double[] vals = instance.toDoubleArray();
238
for (int j = 0; j < instance.numAttributes(); j++) {
239
if (m_SelectCols.isInRange(j)) {
241
if (instance.attribute(j).isNumeric() &&
242
(!Instance.isMissingValue(vals[j])) &&
243
(getInputFormat().classIndex() != j)) {
244
symbols.put("A", new Double(vals[j]));
245
symbols.put("MAX", new Double(m_attStats[j].numericStats.max));
246
symbols.put("MIN", new Double(m_attStats[j].numericStats.min));
247
symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));
248
symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));
249
symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));
250
symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));
251
symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));
252
value = m_expTree.eval(symbols);
253
if (Double.isNaN(value) || Double.isInfinite(value)) {
254
System.err.println("WARNING:Error in evaluating the expression: missing value set");
255
value = Instance.missingValue();
258
newVals[ind] = value;
265
newVals[ind] = value;
272
double[] tempVals = new double[ind];
273
int[] tempInd = new int[ind];
274
System.arraycopy(newVals, 0, tempVals, 0, ind);
275
System.arraycopy(newIndices, 0, tempInd, 0, ind);
276
inst = new SparseInstance(instance.weight(), tempVals, tempInd,
277
instance.numAttributes());
279
double[] vals = instance.toDoubleArray();
280
for (int j = 0; j < getInputFormat().numAttributes(); j++) {
281
if (m_SelectCols.isInRange(j)) {
282
if (instance.attribute(j).isNumeric() &&
283
(!Instance.isMissingValue(vals[j])) &&
284
(getInputFormat().classIndex() != j)) {
285
symbols.put("A", new Double(vals[j]));
286
symbols.put("MAX", new Double(m_attStats[j].numericStats.max));
287
symbols.put("MIN", new Double(m_attStats[j].numericStats.min));
288
symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));
289
symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));
290
symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));
291
symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));
292
symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));
293
vals[j] = m_expTree.eval(symbols);
294
if (Double.isNaN(vals[j]) || Double.isInfinite(vals[j])) {
295
System.err.println("WARNING:Error in Evaluation the Expression: missing value set");
296
vals[j] = Instance.missingValue();
301
inst = new Instance(instance.weight(), vals);
303
inst.setDataset(instance.dataset());
308
* Parses a given list of options. <p/>
310
<!-- options-start -->
311
* Valid options are: <p/>
313
* <pre> -unset-class-temporarily
314
* Unsets the class index temporarily before the filter is
315
* applied to the data.
316
* (default: no)</pre>
318
* <pre> -E <expression>
319
* Specify the expression to apply. Eg. pow(A,6)/(MEAN+MAX)
320
* Supported operators are +, -, *, /, pow, log,
321
* abs, cos, exp, sqrt, tan, sin, ceil, floor, rint, (, ),
322
* MEAN, MAX, MIN, SD, COUNT, SUM, SUMSQUARED, ifelse</pre>
324
* <pre> -R <index1,index2-index4,...>
325
* Specify list of columns to ignore. First and last are valid
326
* indexes. (default none)</pre>
329
* Invert matching sense (i.e. only modify specified columns)</pre>
333
* @param options the list of options as an array of strings
334
* @throws Exception if an option is not supported
336
public void setOptions(String[] options) throws Exception {
337
super.setOptions(options);
339
String expString = Utils.getOption('E', options);
340
if (expString.length() != 0) {
341
setExpression(expString);
343
setExpression(m_defaultExpression);
346
String ignoreList = Utils.getOption('R', options);
347
if (ignoreList.length() != 0) {
348
setIgnoreRange(ignoreList);
351
setInvertSelection(Utils.getFlag('V', options));
355
* Gets the current settings of the filter.
357
* @return an array of strings suitable for passing to setOptions
359
public String [] getOptions() {
364
result = new Vector();
366
options = super.getOptions();
367
for (i = 0; i < options.length; i++)
368
result.add(options[i]);
371
result.add(getExpression());
373
if (getInvertSelection())
376
if (!getIgnoreRange().equals("")) {
378
result.add(getIgnoreRange());
381
return (String[]) result.toArray(new String[result.size()]);
385
* Returns an enumeration describing the available options.
387
* @return an enumeration of all the available options.
389
public Enumeration listOptions() {
390
Vector result = new Vector();
391
Enumeration enm = super.listOptions();
392
while (enm.hasMoreElements())
393
result.add(enm.nextElement());
395
result.addElement(new Option(
396
"\tSpecify the expression to apply. Eg. pow(A,6)/(MEAN+MAX)"
397
+"\n\tSupported operators are +, -, *, /, pow, log,"
398
+"\n\tabs, cos, exp, sqrt, tan, sin, ceil, floor, rint, (, ), "
399
+"\n\tMEAN, MAX, MIN, SD, COUNT, SUM, SUMSQUARED, ifelse",
400
"E",1,"-E <expression>"));
402
result.addElement(new Option(
403
"\tSpecify list of columns to ignore. First and last are valid\n"
404
+"\tindexes. (default none)",
405
"R", 1, "-R <index1,index2-index4,...>"));
407
result.addElement(new Option(
408
"\tInvert matching sense (i.e. only modify specified columns)",
411
return result.elements();
415
* Returns the tip text for this property
417
* @return tip text for this property suitable for
418
* displaying in the explorer/experimenter gui
420
public String expressionTipText() {
421
return "Specify the expression to apply. The 'A' letter"
422
+ "refers to the attribute value. MIN,MAX,MEAN,SD"
423
+ "refer respectively to minimum, maximum, mean and"
424
+ "standard deviation of the attribute."
425
+"\n\tSupported operators are +, -, *, /, pow, log,"
426
+"abs, cos, exp, sqrt, tan, sin, ceil, floor, rint, (, ),"
427
+"A,MEAN, MAX, MIN, SD, COUNT, SUM, SUMSQUARED, ifelse"
428
+"\n\tEg. pow(A,6)/(MEAN+MAX)*ifelse(A<0,0,sqrt(A))+ifelse(![A>9 && A<15])";
432
* Set the expression to apply
433
* @param expr a mathematical expression to apply
435
public void setExpression(String expr) {
441
* @return the expression
443
public String getExpression() {
448
* Returns the tip text for this property
450
* @return tip text for this property suitable for
451
* displaying in the explorer/experimenter gui
453
public String invertSelectionTipText() {
455
return "Determines whether action is to select or unselect."
456
+ " If set to true, only the specified attributes will be modified;"
457
+ " If set to false, specified attributes will not be modified.";
461
* Get whether the supplied columns are to be select or unselect
463
* @return true if the supplied columns will be kept
465
public boolean getInvertSelection() {
467
return !m_SelectCols.getInvert();
471
* Set whether selected columns should be select or unselect. If true the
472
* selected columns are modified. If false the selected columns are not
475
* @param invert the new invert setting
477
public void setInvertSelection(boolean invert) {
479
m_SelectCols.setInvert(!invert);
483
* Returns the tip text for this property
485
* @return tip text for this property suitable for
486
* displaying in the explorer/experimenter gui
488
public String ignoreRangeTipText() {
490
return "Specify range of attributes to act on."
491
+ " This is a comma separated list of attribute indices, with"
492
+ " \"first\" and \"last\" valid values. Specify an inclusive"
493
+ " range with \"-\". E.g: \"first-3,5,6-10,last\".";
497
* Get the current range selection.
499
* @return a string containing a comma separated list of ranges
501
public String getIgnoreRange() {
503
return m_SelectCols.getRanges();
507
* Set which attributes are to be ignored
509
* @param rangeList a string representing the list of attributes. Since
510
* the string will typically come from a user, attributes are indexed from
512
* eg: first-3,5,6-last
514
public void setIgnoreRange(String rangeList) {
516
m_SelectCols.setRanges(rangeList);
520
* Main method for testing this class.
522
* @param argv should contain arguments to the filter:
525
public static void main(String [] argv) {
526
runFilter(new MathExpression(), argv);