2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
24
package weka.filters.unsupervised.attribute;
26
import weka.core.Attribute;
27
import weka.core.Capabilities;
28
import weka.core.Instance;
29
import weka.core.Instances;
30
import weka.core.Option;
31
import weka.core.OptionHandler;
32
import weka.core.Range;
33
import weka.core.SparseInstance;
34
import weka.core.Utils;
35
import weka.core.Capabilities.Capability;
36
import weka.filters.Filter;
37
import weka.filters.StreamableFilter;
38
import weka.filters.UnsupervisedFilter;
40
import java.util.Enumeration;
41
import java.util.Vector;
44
<!-- globalinfo-start -->
45
* An instance filter that copies a range of attributes in the dataset. This is used in conjunction with other filters that overwrite attribute values during the course of their operation -- this filter allows the original attributes to be kept as well as the new attributes.
47
<!-- globalinfo-end -->
49
<!-- options-start -->
50
* Valid options are: <p/>
52
* <pre> -R <index1,index2-index4,...>
53
* Specify list of columns to copy. First and last are valid
54
* indexes. (default none)</pre>
57
* Invert matching sense (i.e. copy all non-specified columns)</pre>
61
* @author Len Trigg (trigg@cs.waikato.ac.nz)
62
* @version $Revision: 1.6 $
66
implements UnsupervisedFilter, StreamableFilter, OptionHandler {
68
/** for serialization */
69
static final long serialVersionUID = -8543707493627441566L;
71
/** Stores which columns to copy */
72
protected Range m_CopyCols = new Range();
75
* Stores the indexes of the selected attributes in order, once the
78
protected int [] m_SelectedAttributes;
81
* Returns an enumeration describing the available options.
83
* @return an enumeration of all the available options.
85
public Enumeration listOptions() {
87
Vector newVector = new Vector(2);
89
newVector.addElement(new Option(
90
"\tSpecify list of columns to copy. First and last are valid\n"
91
+"\tindexes. (default none)",
92
"R", 1, "-R <index1,index2-index4,...>"));
93
newVector.addElement(new Option(
94
"\tInvert matching sense (i.e. copy all non-specified columns)",
97
return newVector.elements();
101
* Parses a given list of options. <p/>
103
<!-- options-start -->
104
* Valid options are: <p/>
106
* <pre> -R <index1,index2-index4,...>
107
* Specify list of columns to copy. First and last are valid
108
* indexes. (default none)</pre>
111
* Invert matching sense (i.e. copy all non-specified columns)</pre>
115
* @param options the list of options as an array of strings
116
* @throws Exception if an option is not supported
118
public void setOptions(String[] options) throws Exception {
120
String copyList = Utils.getOption('R', options);
121
if (copyList.length() != 0) {
122
setAttributeIndices(copyList);
124
setInvertSelection(Utils.getFlag('V', options));
126
if (getInputFormat() != null) {
127
setInputFormat(getInputFormat());
132
* Gets the current settings of the filter.
134
* @return an array of strings suitable for passing to setOptions
136
public String [] getOptions() {
138
String [] options = new String [3];
141
if (getInvertSelection()) {
142
options[current++] = "-V";
144
if (!getAttributeIndices().equals("")) {
145
options[current++] = "-R"; options[current++] = getAttributeIndices();
148
while (current < options.length) {
149
options[current++] = "";
155
* Returns the Capabilities of this filter.
157
* @return the capabilities of this object
160
public Capabilities getCapabilities() {
161
Capabilities result = super.getCapabilities();
164
result.enableAllAttributes();
165
result.enable(Capability.MISSING_VALUES);
168
result.enableAllClasses();
169
result.enable(Capability.MISSING_CLASS_VALUES);
170
result.enable(Capability.NO_CLASS);
176
* Sets the format of the input instances.
178
* @param instanceInfo an Instances object containing the input instance
179
* structure (any instances contained in the object are ignored - only the
180
* structure is required).
181
* @return true if the outputFormat may be collected immediately
182
* @throws Exception if a problem occurs setting the input format
184
public boolean setInputFormat(Instances instanceInfo) throws Exception {
186
super.setInputFormat(instanceInfo);
188
m_CopyCols.setUpper(instanceInfo.numAttributes() - 1);
190
// Create the output buffer
191
Instances outputFormat = new Instances(instanceInfo, 0);
192
m_SelectedAttributes = m_CopyCols.getSelection();
193
for (int i = 0; i < m_SelectedAttributes.length; i++) {
194
int current = m_SelectedAttributes[i];
195
// Create a copy of the attribute with a different name
196
Attribute origAttribute = instanceInfo.attribute(current);
197
outputFormat.insertAttributeAt((Attribute)origAttribute.copy(),
198
outputFormat.numAttributes());
199
outputFormat.renameAttribute(outputFormat.numAttributes() - 1,
200
"Copy of " + origAttribute.name());
205
int[] newIndices = new int[instanceInfo.numAttributes() + m_SelectedAttributes.length];
206
for (int i = 0; i < instanceInfo.numAttributes(); i++)
208
for (int i = 0; i < m_SelectedAttributes.length; i++)
209
newIndices[instanceInfo.numAttributes() + i] = m_SelectedAttributes[i];
210
initInputLocators(instanceInfo, newIndices);
212
setOutputFormat(outputFormat);
219
* Input an instance for filtering. Ordinarily the instance is processed
220
* and made available for output immediately. Some filters require all
221
* instances be read before producing output.
223
* @param instance the input instance
224
* @return true if the filtered instance may now be
225
* collected with output().
226
* @throws IllegalStateException if no input format has been defined.
228
public boolean input(Instance instance) {
230
if (getInputFormat() == null) {
231
throw new IllegalStateException("No input instance format defined");
238
double[] vals = new double[outputFormatPeek().numAttributes()];
239
for(int i = 0; i < getInputFormat().numAttributes(); i++) {
240
vals[i] = instance.value(i);
242
int j = getInputFormat().numAttributes();
243
for (int i = 0; i < m_SelectedAttributes.length; i++) {
244
int current = m_SelectedAttributes[i];
245
vals[i + j] = instance.value(current);
247
Instance inst = null;
248
if (instance instanceof SparseInstance) {
249
inst = new SparseInstance(instance.weight(), vals);
251
inst = new Instance(instance.weight(), vals);
254
inst.setDataset(getOutputFormat());
255
copyValues(inst, false, instance.dataset(), getOutputFormat());
256
inst.setDataset(getOutputFormat());
262
* Returns a string describing this filter
264
* @return a description of the filter suitable for
265
* displaying in the explorer/experimenter gui
267
public String globalInfo() {
269
return "An instance filter that copies a range of attributes in the"
270
+ " dataset. This is used in conjunction with other filters that"
271
+ " overwrite attribute values during the course of their operation --"
272
+ " this filter allows the original attributes to be kept as well"
273
+ " as the new attributes.";
277
* Returns the tip text for this property
279
* @return tip text for this property suitable for
280
* displaying in the explorer/experimenter gui
282
public String invertSelectionTipText() {
283
return "Sets copy selected vs unselected action."
284
+ " If set to false, only the specified attributes will be copied;"
285
+ " If set to true, non-specified attributes will be copied.";
289
* Get whether the supplied columns are to be removed or kept
291
* @return true if the supplied columns will be kept
293
public boolean getInvertSelection() {
295
return m_CopyCols.getInvert();
299
* Set whether selected columns should be removed or kept. If true the
300
* selected columns are kept and unselected columns are copied. If false
301
* selected columns are copied and unselected columns are kept. <br>
302
* Note: use this method before you call
303
* <code>setInputFormat(Instances)</code>, since the output format is
304
* determined in that method.
306
* @param invert the new invert setting
308
public void setInvertSelection(boolean invert) {
310
m_CopyCols.setInvert(invert);
314
* Get the current range selection
316
* @return a string containing a comma separated list of ranges
318
public String getAttributeIndices() {
320
return m_CopyCols.getRanges();
324
* Returns the tip text for this property
326
* @return tip text for this property suitable for
327
* displaying in the explorer/experimenter gui
329
public String attributeIndicesTipText() {
330
return "Specify range of attributes to act on."
331
+ " This is a comma separated list of attribute indices, with"
332
+ " \"first\" and \"last\" valid values. Specify an inclusive"
333
+ " range with \"-\". E.g: \"first-3,5,6-10,last\".";
337
* Set which attributes are to be copied (or kept if invert is true)
339
* @param rangeList a string representing the list of attributes. Since
340
* the string will typically come from a user, attributes are indexed from
342
* eg: first-3,5,6-last<br>
343
* Note: use this method before you call
344
* <code>setInputFormat(Instances)</code>, since the output format is
345
* determined in that method.
346
* @throws Exception if an invalid range list is supplied
348
public void setAttributeIndices(String rangeList) throws Exception {
350
m_CopyCols.setRanges(rangeList);
354
* Set which attributes are to be copied (or kept if invert is true)
356
* @param attributes an array containing indexes of attributes to select.
357
* Since the array will typically come from a program, attributes are indexed
359
* Note: use this method before you call
360
* <code>setInputFormat(Instances)</code>, since the output format is
361
* determined in that method.
362
* @throws Exception if an invalid set of ranges is supplied
364
public void setAttributeIndicesArray(int [] attributes) throws Exception {
366
setAttributeIndices(Range.indicesToRangeList(attributes));
370
* Main method for testing this class.
372
* @param argv should contain arguments to the filter: use -h for help
374
public static void main(String [] argv) {
375
runFilter(new Copy(), argv);