2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
24
package weka.filters.unsupervised.attribute;
26
import weka.core.Attribute;
27
import weka.core.Capabilities;
28
import weka.core.FastVector;
29
import weka.core.Instance;
30
import weka.core.Instances;
31
import weka.core.Option;
32
import weka.core.OptionHandler;
33
import weka.core.Range;
34
import weka.core.SparseInstance;
35
import weka.core.UnsupportedAttributeTypeException;
36
import weka.core.Utils;
37
import weka.core.Capabilities.Capability;
38
import weka.filters.Filter;
39
import weka.filters.StreamableFilter;
40
import weka.filters.UnsupervisedFilter;
42
import java.util.Enumeration;
43
import java.util.Vector;
46
<!-- globalinfo-start -->
47
* This instance filter takes a range of N numeric attributes and replaces them with N-1 numeric attributes, the values of which are the difference between consecutive attribute values from the original instance. eg: <br/>
49
* Original attribute values<br/>
51
* 0.1, 0.2, 0.3, 0.1, 0.3<br/>
53
* New attribute values<br/>
55
* 0.1, 0.1, -0.2, 0.2<br/>
57
* The range of attributes used is taken in numeric order. That is, a range spec of 7-11,3-5 will use the attribute ordering 3,4,5,7,8,9,10,11 for the differences, NOT 7,8,9,10,11,3,4,5.
59
<!-- globalinfo-end -->
61
<!-- options-start -->
62
* Valid options are: <p/>
64
* <pre> -R <index1,index2-index4,...>
65
* Specify list of columns to take the differences between.
66
* First and last are valid indexes.
67
* (default none)</pre>
71
* @author Len Trigg (trigg@cs.waikato.ac.nz)
72
* @version $Revision: 1.8 $
74
public class FirstOrder
76
implements UnsupervisedFilter, StreamableFilter, OptionHandler {
78
/** for serialization */
79
static final long serialVersionUID = -7500464545400454179L;
81
/** Stores which columns to take differences between */
82
protected Range m_DeltaCols = new Range();
85
* Returns a string describing this filter
87
* @return a description of the filter suitable for
88
* displaying in the explorer/experimenter gui
90
public String globalInfo() {
92
return "This instance filter takes a range of N numeric attributes and replaces "
93
+ "them with N-1 numeric attributes, the values of which are the difference "
94
+ "between consecutive attribute values from the original instance. eg: \n\n"
95
+ "Original attribute values\n\n"
96
+ " 0.1, 0.2, 0.3, 0.1, 0.3\n\n"
97
+ "New attribute values\n\n"
98
+ " 0.1, 0.1, -0.2, 0.2\n\n"
99
+ "The range of attributes used is taken in numeric order. That is, a range "
100
+ "spec of 7-11,3-5 will use the attribute ordering 3,4,5,7,8,9,10,11 for the "
101
+ "differences, NOT 7,8,9,10,11,3,4,5.";
105
* Returns an enumeration describing the available options.
107
* @return an enumeration of all the available options.
109
public Enumeration listOptions() {
111
Vector newVector = new Vector(1);
113
newVector.addElement(new Option(
114
"\tSpecify list of columns to take the differences between.\n"
115
+ "\tFirst and last are valid indexes.\n"
116
+ "\t(default none)",
117
"R", 1, "-R <index1,index2-index4,...>"));
119
return newVector.elements();
124
* Parses a given list of options. <p/>
126
<!-- options-start -->
127
* Valid options are: <p/>
129
* <pre> -R <index1,index2-index4,...>
130
* Specify list of columns to take the differences between.
131
* First and last are valid indexes.
132
* (default none)</pre>
136
* @param options the list of options as an array of strings
137
* @throws Exception if an option is not supported
139
public void setOptions(String[] options) throws Exception {
141
String deltaList = Utils.getOption('R', options);
142
if (deltaList.length() != 0) {
143
setAttributeIndices(deltaList);
145
setAttributeIndices("");
148
if (getInputFormat() != null)
149
setInputFormat(getInputFormat());
154
* Gets the current settings of the filter.
156
* @return an array of strings suitable for passing to setOptions
158
public String [] getOptions() {
160
String [] options = new String [2];
163
if (!getAttributeIndices().equals("")) {
164
options[current++] = "-R"; options[current++] = getAttributeIndices();
167
while (current < options.length) {
168
options[current++] = "";
174
* Returns the Capabilities of this filter.
176
* @return the capabilities of this object
179
public Capabilities getCapabilities() {
180
Capabilities result = super.getCapabilities();
183
result.enableAllAttributes();
184
result.enable(Capability.MISSING_VALUES);
187
result.enableAllClasses();
188
result.enable(Capability.MISSING_CLASS_VALUES);
189
result.enable(Capability.NO_CLASS);
195
* Sets the format of the input instances.
197
* @param instanceInfo an Instances object containing the input instance
198
* structure (any instances contained in the object are ignored - only the
199
* structure is required).
200
* @return true if the outputFormat may be collected immediately
201
* @throws UnsupportedAttributeTypeException if any of the
202
* selected attributes are not numeric
203
* @throws Exception if only one attribute has been selected.
205
public boolean setInputFormat(Instances instanceInfo) throws Exception {
207
super.setInputFormat(instanceInfo);
209
m_DeltaCols.setUpper(getInputFormat().numAttributes() - 1);
210
int selectedCount = 0;
211
for (int i = getInputFormat().numAttributes() - 1; i >= 0; i--) {
212
if (m_DeltaCols.isInRange(i)) {
214
if (!getInputFormat().attribute(i).isNumeric()) {
215
throw new UnsupportedAttributeTypeException("Selected attributes must be all numeric");
219
if (selectedCount == 1) {
220
throw new Exception("Cannot select only one attribute.");
223
// Create the output buffer
224
FastVector newAtts = new FastVector();
225
boolean inRange = false;
226
String foName = null;
228
for(int i = 0; i < instanceInfo.numAttributes(); i++) {
229
if (m_DeltaCols.isInRange(i) && (i != instanceInfo.classIndex())) {
231
Attribute newAttrib = new Attribute(foName);
232
newAtts.addElement(newAttrib);
234
foName = instanceInfo.attribute(i).name();
235
foName = "'FO " + foName.replace('\'', ' ').trim() + '\'';
238
newAtts.addElement((Attribute)instanceInfo.attribute(i).copy());
239
if ((i == instanceInfo.classIndex()))
240
clsIndex = newAtts.size() - 1;
243
Instances data = new Instances(instanceInfo.relationName(), newAtts, 0);
244
data.setClassIndex(clsIndex);
245
setOutputFormat(data);
251
* Input an instance for filtering. Ordinarily the instance is processed
252
* and made available for output immediately. Some filters require all
253
* instances be read before producing output.
255
* @param instance the input instance
256
* @return true if the filtered instance may now be
257
* collected with output().
258
* @throws IllegalStateException if no input format has been defined.
260
public boolean input(Instance instance) {
262
if (getInputFormat() == null) {
263
throw new IllegalStateException("No input instance format defined");
270
Instances outputFormat = outputFormatPeek();
271
double[] vals = new double[outputFormat.numAttributes()];
272
boolean inRange = false;
273
double lastVal = Instance.missingValue();
275
for(i = 0, j = 0; j < outputFormat.numAttributes(); i++) {
276
if (m_DeltaCols.isInRange(i) && (i != instance.classIndex())) {
278
if (Instance.isMissingValue(lastVal) || instance.isMissing(i)) {
279
vals[j++] = Instance.missingValue();
281
vals[j++] = instance.value(i) - lastVal;
286
lastVal = instance.value(i);
288
vals[j++] = instance.value(i);
292
Instance inst = null;
293
if (instance instanceof SparseInstance) {
294
inst = new SparseInstance(instance.weight(), vals);
296
inst = new Instance(instance.weight(), vals);
298
inst.setDataset(getOutputFormat());
299
copyValues(inst, false, instance.dataset(), getOutputFormat());
300
inst.setDataset(getOutputFormat());
307
* Returns the tip text for this property
309
* @return tip text for this property suitable for
310
* displaying in the explorer/experimenter gui
312
public String attributeIndicesTipText() {
313
return "Specify range of attributes to act on."
314
+ " This is a comma separated list of attribute indices, with"
315
+ " \"first\" and \"last\" valid values. Specify an inclusive"
316
+ " range with \"-\". E.g: \"first-3,5,6-10,last\".";
320
* Get the current range selection
322
* @return a string containing a comma separated list of ranges
324
public String getAttributeIndices() {
326
return m_DeltaCols.getRanges();
330
* Set which attributes are to be deleted (or kept if invert is true)
332
* @param rangeList a string representing the list of attributes. Since
333
* the string will typically come from a user, attributes are indexed from
335
* eg: first-3,5,6-last
336
* @throws Exception if an invalid range list is supplied
338
public void setAttributeIndices(String rangeList) throws Exception {
340
m_DeltaCols.setRanges(rangeList);
344
* Set which attributes are to be deleted (or kept if invert is true)
346
* @param attributes an array containing indexes of attributes to select.
347
* Since the array will typically come from a program, attributes are indexed
349
* @throws Exception if an invalid set of ranges is supplied
351
public void setAttributeIndicesArray(int [] attributes) throws Exception {
353
setAttributeIndices(Range.indicesToRangeList(attributes));
357
* Main method for testing this class.
359
* @param argv should contain arguments to the filter: use -h for help
361
public static void main(String [] argv) {
362
runFilter(new FirstOrder(), argv);