2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* SimpleStreamFilter.java
19
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
25
import weka.core.Instance;
26
import weka.core.Instances;
29
* This filter is a superclass for simple stream filters. <p/>
32
* <b>General notes:</b><br/>
34
* <li>After the first call of batchFinished() the field m_FirstBatchDone is
35
* set to <code>true</code>. </li>
39
* <b>Example:</b><br/>
40
* The following code snippet uses the filter <code>SomeFilter</code> on a
41
* dataset that is loaded from <code>filename</code>.
44
* import weka.filters.*;
47
* SomeFilter filter = new SomeFilter();
48
* // set necessary options for the filter
49
* Instances data = new Instances(
51
* new FileReader(filename)));
52
* Instances filteredData = Filter.useFilter(data, filter);
55
* <b>Implementation:</b><br/>
56
* Only the following abstract methods need to be implemented:
58
* <li>globalInfo()</li>
59
* <li>determineOutputFormat(Instances)</li>
60
* <li>process(Instance)</li>
63
* And the <b>getCapabilities()</b> method must return what kind of
64
* attributes and classes the filter can handle.
67
* If more options are necessary, then the following methods need to be
70
* <li>listOptions()</li>
71
* <li>setOptions(String[])</li>
72
* <li>getOptions()</li>
76
* To make the filter available from commandline one must add the following
77
* main method for correct execution (<Filtername> must be replaced
78
* with the actual filter classname):
80
* public static void main(String[] args) {
81
* runFilter(new <Filtername>(), args);
86
* <b>Example implementation:</b><br/>
89
* import weka.core.Capabilities.*;
90
* import weka.filters.*;
92
* import java.util.Random;
94
* public class SimpleStream
95
* extends SimpleStreamFilter {
97
* public String globalInfo() {
98
* return "A simple stream filter that adds an attribute 'bla' at the end containing a random number.";
101
* public Capabilities getCapabilities() {
102
* Capabilities result = super.getCapabilities();
103
* result.enableAllAttributes();
104
* result.enableAllClasses();
105
* result.enable(Capability.NO_CLASS); // filter doesn't need class to be set
109
* protected Instances determineOutputFormat(Instances inputFormat) {
110
* Instances result = new Instances(inputFormat, 0);
111
* result.insertAttributeAt(new Attribute("bla"), result.numAttributes());
115
* protected Instance process(Instance inst) {
116
* double[] values = new double[inst.numAttributes() + 1];
117
* for (int n = 0; n < inst.numAttributes(); n++)
118
* values[n] = inst.value(n);
119
* values[values.length - 1] = new Random().nextInt();
120
* Instance result = new Instance(1, values);
124
* public static void main(String[] args) {
125
* runFilter(new SimpleStream(), args);
131
* <b>Options:</b><br/>
132
* Valid filter-specific options are:<p/>
135
* Turns on output of debugging information.<p/>
137
* @author FracPete (fracpete at waikato dot ac dot nz)
138
* @version $Revision: 1.8 $
139
* @see SimpleBatchFilter
140
* @see #input(Instance)
141
* @see #batchFinished()
142
* @see #m_FirstBatchDone
144
public abstract class SimpleStreamFilter
146
implements StreamableFilter {
148
/** for serialization */
149
private static final long serialVersionUID = 2754882676192747091L;
152
* Returns true if the output format is immediately available after the
153
* input format has been set and not only after all the data has been
154
* seen (see batchFinished()). This method should normally return true
155
* for a stream filter, since the data will be processed in a batch
156
* manner instead (or at least for the second batch of files, see
159
* @return true if the output format is immediately available
160
* @see #batchFinished()
161
* @see #setInputFormat(Instances)
162
* @see #m_FirstBatchDone
164
protected boolean hasImmediateOutputFormat() {
169
* Determines the output format based on the input format and returns
170
* this. In case the output format cannot be returned immediately, i.e.,
171
* hasImmediateOutputFormat() returns false, then this method will called
172
* from batchFinished() after the call of preprocess(Instances), in which,
173
* e.g., statistics for the actual processing step can be gathered.
175
* @param inputFormat the input format to base the output format on
176
* @return the output format
177
* @throws Exception in case the determination goes wrong
178
* @see #hasImmediateOutputFormat()
179
* @see #batchFinished()
180
* @see #preprocess(Instances)
182
protected abstract Instances determineOutputFormat(Instances inputFormat) throws Exception;
185
* processes the given instance (may change the provided instance) and
186
* returns the modified version.
188
* @param instance the instance to process
189
* @return the modified data
190
* @throws Exception in case the processing goes wrong
192
protected abstract Instance process(Instance instance) throws Exception;
195
* Processes the given data (may change the provided dataset) and returns
196
* the modified version. This method is called in batchFinished().
197
* This implementation only calls process(Instance) for each instance
198
* in the given dataset.
200
* @param instances the data to process
201
* @return the modified data
202
* @throws Exception in case the processing goes wrong
203
* @see #batchFinished()
204
* @see #process(Instance)
206
protected Instances process(Instances instances) throws Exception {
210
result = new Instances(getOutputFormat(), 0);
212
for (i = 0; i < instances.numInstances(); i++)
213
result.add(process(instances.instance(i)));
219
* In case the output format cannot be returned immediately, this method
220
* is called before the actual processing of the instances. Derived classes
221
* can implement specific behavior here.
223
* @param instances the instances to work on
224
* @see #hasImmediateOutputFormat()
225
* @see #determineOutputFormat(Instances)
227
protected void preprocess(Instances instances) {
231
* Input an instance for filtering. Filter requires all
232
* training instances be read before producing output.
234
* @param instance the input instance
235
* @return true if the filtered instance may now be
236
* collected with output().
237
* @throws IllegalStateException if no input structure has been defined
239
public boolean input(Instance instance) {
240
if (getInputFormat() == null)
241
throw new IllegalStateException("No input instance format defined");
249
if (hasImmediateOutputFormat() || isFirstBatchDone()) {
250
push(process((Instance) instance.copy()));
254
bufferInput(instance);
258
catch (Exception e) {
264
* Signify that this batch of input to the filter is finished. If
265
* the filter requires all instances prior to filtering, output()
266
* may now be called to retrieve the filtered instances. Any
267
* subsequent instances filtered should be filtered based on setting
268
* obtained from the first batch (unless the setInputFormat has been
269
* re-assigned or new options have been set).
271
* @return true if there are instances pending output
272
* @throws IllegalStateException if no input format has been set.
274
public boolean batchFinished() {
278
if (getInputFormat() == null)
279
throw new IllegalStateException("No input instance format defined");
282
inst = new Instances(getInputFormat());
285
if (!hasImmediateOutputFormat())
289
inst = process(inst);
291
// if output format hasn't been set yet, do it now
292
if (!hasImmediateOutputFormat() && !isFirstBatchDone())
293
setOutputFormat(inst);
295
// move data to the output
296
for (i = 0; i < inst.numInstances(); i++)
297
push(inst.instance(i));
299
catch (Exception e) {
304
m_FirstBatchDone = true;
306
return (numPendingOutput() != 0);