22
22
package weka.filters.unsupervised.instance;
24
24
import weka.core.Capabilities;
25
import weka.core.Instance;
25
26
import weka.core.Instances;
26
27
import weka.core.Option;
27
28
import weka.core.RevisionUtils;
108
109
* not ismissing(ATT3)<br/>
110
111
<!-- globalinfo-end -->
112
113
<!-- options-start -->
113
114
* Valid options are: <p/>
115
116
* <pre> -E <expr>
116
117
* The expression to use for filtering
117
118
* (default: true).</pre>
121
* Apply the filter to instances that arrive after the first
122
* (training) batch. The default is to not apply the filter (i.e.
123
* always return the instance)</pre>
119
125
<!-- options-end -->
121
127
* @author fracpete (fracpete at waikato dot ac dot nz)
122
* @version $Revision: 6113 $
128
* @version $Revision: 7599 $
124
130
public class SubsetByExpression
125
131
extends SimpleBatchFilter {
127
133
/** for serialization. */
128
134
private static final long serialVersionUID = 5628686110979589602L;
130
136
/** the expresion to use for filtering. */
131
137
protected String m_Expression = "true";
139
/** Whether to filter instances after the first batch has been processed */
140
protected boolean m_filterAfterFirstBatch = false;
134
143
* Returns a string describing this filter.
208
217
+ " from the 'labor' UCI dataset:\n"
209
218
+ " not ismissing(ATT3)\n"
223
* Input an instance for filtering. Filter requires all
224
* training instances be read before producing output (calling the method
225
* batchFinished() makes the data available). If this instance is part of
226
* a new batch, m_NewBatch is set to false.
228
* @param instance the input instance
229
* @return true if the filtered instance may now be
230
* collected with output().
231
* @throws IllegalStateException if no input structure has been defined
232
* @throws Exception if something goes wrong
233
* @see #batchFinished()
235
public boolean input(Instance instance) throws Exception {
236
if (getInputFormat() == null)
237
throw new IllegalStateException("No input instance format defined");
244
bufferInput(instance);
246
int numReturnedFromParser = 0;
247
if (isFirstBatchDone()) {
248
Instances inst = new Instances(getInputFormat());
249
inst = process(inst);
250
numReturnedFromParser = inst.numInstances();
251
for (int i = 0; i < inst.numInstances(); i++)
252
push(inst.instance(i));
256
return (numReturnedFromParser > 0);
218
264
public Enumeration listOptions() {
221
267
result = new Vector();
223
269
result.addElement(new Option(
224
270
"\tThe expression to use for filtering\n"
225
271
+ "\t(default: true).",
226
272
"E", 1, "-E <expr>"));
274
result.addElement(new Option(
275
"\tApply the filter to instances that arrive after the first\n" +
276
"\t(training) batch. The default is to not apply the filter (i.e.\n" +
277
"\talways return the instance)",
228
280
return result.elements();
233
285
* Parses a given list of options. <p/>
235
287
<!-- options-start -->
236
288
* Valid options are: <p/>
238
290
* <pre> -E <expr>
239
291
* The expression to use for filtering
240
292
* (default: true).</pre>
295
* Apply the filter to instances that arrive after the first
296
* (training) batch. The default is to not apply the filter (i.e.
297
* always return the instance)</pre>
242
299
<!-- options-end -->
244
301
* @param options the list of options as an array of strings
247
304
public void setOptions(String[] options) throws Exception {
250
307
tmpStr = Utils.getOption('E', options);
251
308
if (tmpStr.length() != 0)
252
309
setExpression(tmpStr);
254
311
setExpression("true");
313
m_filterAfterFirstBatch = Utils.getFlag('F', options);
256
315
if (getInputFormat() != null)
257
316
setInputFormat(getInputFormat());
265
324
public String[] getOptions() {
266
325
Vector<String> result;
268
327
result = new Vector();
270
329
result.add("-E");
271
330
result.add("" + getExpression());
332
if (m_filterAfterFirstBatch) {
273
336
return result.toArray(new String[result.size()]);
277
340
* Returns the Capabilities of this filter.
279
342
* @return the capabilities of this object
288
351
result.enable(Capability.NUMERIC_ATTRIBUTES);
289
352
result.enable(Capability.DATE_ATTRIBUTES);
290
353
result.enable(Capability.MISSING_VALUES);
293
356
result.enable(Capability.NOMINAL_CLASS);
294
357
result.enable(Capability.NUMERIC_CLASS);
295
358
result.enable(Capability.DATE_CLASS);
296
359
result.enable(Capability.MISSING_CLASS_VALUES);
297
360
result.enable(Capability.NO_CLASS);
321
384
* Returns the tip text for this property.
323
386
* @return tip text for this property suitable for
324
387
* displaying in the explorer/experimenter gui
326
389
public String expressionTipText() {
327
390
return "The expression to used for filtering the dataset.";
394
* Set whether to apply the filter to instances that arrive once
395
* the first (training) batch has been seen. The default is to
396
* not apply the filter and just return each instance input. This
397
* is so that, when used in the FilteredClassifier, a test instance
398
* does not get "consumed" by the filter and a prediction is always
401
* @param b true if the filter should be applied to instances that
402
* arrive after the first (training) batch has been processed.
404
public void setFilterAfterFirstBatch(boolean b) {
405
m_filterAfterFirstBatch = b;
409
* Get whether to apply the filter to instances that arrive once
410
* the first (training) batch has been seen. The default is to
411
* not apply the filter and just return each instance input. This
412
* is so that, when used in the FilteredClassifier, a test instance
413
* does not get "consumed" by the filter and a prediction is always
416
* @return true if the filter should be applied to instances that
417
* arrive after the first (training) batch has been processed.
419
public boolean getFilterAfterFirstBatch() {
420
return m_filterAfterFirstBatch;
424
* Returns the tip text for this property.
426
* @return tip text for this property suitable for
427
* displaying in the explorer/experimenter gui
429
public String filterAfterFirstBatchTipText() {
430
return "Whether to apply the filtering process to instances that " +
431
"are input after the first (training) batch. The default " +
432
"is false so that, when used in a FilteredClassifier, test" +
433
" instances do not potentially get 'consumed' by the filter " +
434
"an a prediction is always made.";
331
* Determines the output format based on the input format and returns
438
* Determines the output format based on the input format and returns
334
441
* @param inputFormat the input format to base the output format on
351
458
* @see #batchFinished()
353
460
protected Instances process(Instances instances) throws Exception {
354
if (!isFirstBatchDone())
461
if (!isFirstBatchDone() || m_filterAfterFirstBatch) {
355
462
return Parser.filter(m_Expression, instances);
357
464
return instances;
361
469
* Returns the revision string.
363
471
* @return the revision
365
473
public String getRevision() {
366
return RevisionUtils.extract("$Revision: 6113 $");
474
return RevisionUtils.extract("$Revision: 7599 $");