2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
24
package weka.filters.unsupervised.attribute;
26
import weka.core.Attribute;
27
import weka.core.Capabilities;
28
import weka.core.FastVector;
29
import weka.core.Instance;
30
import weka.core.Instances;
31
import weka.core.Option;
32
import weka.core.OptionHandler;
33
import weka.core.SingleIndex;
34
import weka.core.UnsupportedAttributeTypeException;
35
import weka.core.Utils;
36
import weka.core.Capabilities.Capability;
37
import weka.filters.Filter;
38
import weka.filters.StreamableFilter;
39
import weka.filters.UnsupervisedFilter;
41
import java.util.Enumeration;
42
import java.util.Vector;
45
<!-- globalinfo-start -->
46
* Merges two values of a nominal attribute into one value.
48
<!-- globalinfo-end -->
50
<!-- options-start -->
51
* Valid options are: <p/>
53
* <pre> -C <col>
54
* Sets the attribute index (default last).</pre>
56
* <pre> -F <value index>
57
* Sets the first value's index (default first).</pre>
59
* <pre> -S <value index>
60
* Sets the second value's index (default last).</pre>
64
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
65
* @version $Revision: 1.9 $
67
public class MergeTwoValues
69
implements UnsupervisedFilter, StreamableFilter, OptionHandler {
71
/** for serialization */
72
static final long serialVersionUID = 2925048980504034018L;
74
/** The attribute's index setting. */
75
private SingleIndex m_AttIndex = new SingleIndex("last");
77
/** The first value's index setting. */
78
private SingleIndex m_FirstIndex = new SingleIndex("first");
80
/** The second value's index setting. */
81
private SingleIndex m_SecondIndex = new SingleIndex("last");
84
* Returns a string describing this filter
86
* @return a description of the filter suitable for
87
* displaying in the explorer/experimenter gui
89
public String globalInfo() {
91
return "Merges two values of a nominal attribute into one value.";
95
* Returns the Capabilities of this filter.
97
* @return the capabilities of this object
100
public Capabilities getCapabilities() {
101
Capabilities result = super.getCapabilities();
104
result.enableAllAttributes();
105
result.enable(Capability.MISSING_VALUES);
108
result.enableAllClasses();
109
result.enable(Capability.MISSING_CLASS_VALUES);
110
result.enable(Capability.NO_CLASS);
116
* Sets the format of the input instances.
118
* @param instanceInfo an Instances object containing the input
119
* instance structure (any instances contained in the object are
120
* ignored - only the structure is required).
121
* @return true if the outputFormat may be collected immediately
122
* @throws Exception if the input format can't be set
125
public boolean setInputFormat(Instances instanceInfo)
128
super.setInputFormat(instanceInfo);
129
m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
130
m_FirstIndex.setUpper(instanceInfo.
131
attribute(m_AttIndex.getIndex()).numValues() - 1);
132
m_SecondIndex.setUpper(instanceInfo.
133
attribute(m_AttIndex.getIndex()).numValues() - 1);
134
if (!instanceInfo.attribute(m_AttIndex.getIndex()).isNominal()) {
135
throw new UnsupportedAttributeTypeException("Chosen attribute not nominal.");
137
if (instanceInfo.attribute(m_AttIndex.getIndex()).numValues() < 2) {
138
throw new UnsupportedAttributeTypeException("Chosen attribute has less than " +
141
if (m_SecondIndex.getIndex() <= m_FirstIndex.getIndex()) {
142
// XXX Maybe we should just swap the values??
143
throw new Exception("The second index has to be greater "+
151
* Input an instance for filtering. The instance is processed
152
* and made available for output immediately.
154
* @param instance the input instance
155
* @return true if the filtered instance may now be
156
* collected with output().
157
* @throws IllegalStateException if no input format has been set.
159
public boolean input(Instance instance) {
161
if (getInputFormat() == null) {
162
throw new IllegalStateException("No input instance format defined");
168
Instance newInstance = (Instance)instance.copy();
169
if ((int)newInstance.value(m_AttIndex.getIndex()) == m_SecondIndex.getIndex()) {
170
newInstance.setValue(m_AttIndex.getIndex(), (double)m_FirstIndex.getIndex());
172
else if ((int)newInstance.value(m_AttIndex.getIndex()) > m_SecondIndex.getIndex()) {
173
newInstance.setValue(m_AttIndex.getIndex(),
174
newInstance.value(m_AttIndex.getIndex()) - 1);
181
* Returns an enumeration describing the available options.
183
* @return an enumeration of all the available options.
185
public Enumeration listOptions() {
187
Vector newVector = new Vector(3);
189
newVector.addElement(new Option(
190
"\tSets the attribute index (default last).",
191
"C", 1, "-C <col>"));
193
newVector.addElement(new Option(
194
"\tSets the first value's index (default first).",
195
"F", 1, "-F <value index>"));
197
newVector.addElement(new Option(
198
"\tSets the second value's index (default last).",
199
"S", 1, "-S <value index>"));
201
return newVector.elements();
206
* Parses a given list of options. <p/>
208
<!-- options-start -->
209
* Valid options are: <p/>
211
* <pre> -C <col>
212
* Sets the attribute index (default last).</pre>
214
* <pre> -F <value index>
215
* Sets the first value's index (default first).</pre>
217
* <pre> -S <value index>
218
* Sets the second value's index (default last).</pre>
222
* @param options the list of options as an array of strings
223
* @throws Exception if an option is not supported
225
public void setOptions(String[] options) throws Exception {
227
String attIndex = Utils.getOption('C', options);
228
if (attIndex.length() != 0) {
229
setAttributeIndex(attIndex);
231
setAttributeIndex("last");
234
String firstValIndex = Utils.getOption('F', options);
235
if (firstValIndex.length() != 0) {
236
setFirstValueIndex(firstValIndex);
238
setFirstValueIndex("first");
241
String secondValIndex = Utils.getOption('S', options);
242
if (secondValIndex.length() != 0) {
243
setSecondValueIndex(secondValIndex);
245
setSecondValueIndex("last");
248
if (getInputFormat() != null) {
249
setInputFormat(getInputFormat());
254
* Gets the current settings of the filter.
256
* @return an array of strings suitable for passing to setOptions
258
public String [] getOptions() {
260
String [] options = new String [6];
263
options[current++] = "-C";
264
options[current++] = "" + getAttributeIndex();
265
options[current++] = "-F";
266
options[current++] = "" + getFirstValueIndex();
267
options[current++] = "-S";
268
options[current++] = "" + getSecondValueIndex();
269
while (current < options.length) {
270
options[current++] = "";
276
* @return tip text for this property suitable for
277
* displaying in the explorer/experimenter gui
279
public String attributeIndexTipText() {
281
return "Sets which attribute to process. This "
282
+ "attribute must be nominal (\"first\" and \"last\" are valid values)";
286
* Get the index of the attribute used.
288
* @return the index of the attribute
290
public String getAttributeIndex() {
292
return m_AttIndex.getSingleIndex();
296
* Sets index of the attribute used.
298
* @param attIndex the index of the attribute
300
public void setAttributeIndex(String attIndex) {
302
m_AttIndex.setSingleIndex(attIndex);
306
* @return tip text for this property suitable for
307
* displaying in the explorer/experimenter gui
309
public String firstValueIndexTipText() {
311
return "Sets the first value to be merged. "
312
+ "(\"first\" and \"last\" are valid values)";
316
* Get the index of the first value used.
318
* @return the index of the first value
320
public String getFirstValueIndex() {
322
return m_FirstIndex.getSingleIndex();
326
* Sets index of the first value used.
328
* @param firstIndex the index of the first value
330
public void setFirstValueIndex(String firstIndex) {
332
m_FirstIndex.setSingleIndex(firstIndex);
336
* @return tip text for this property suitable for
337
* displaying in the explorer/experimenter gui
339
public String secondValueIndexTipText() {
341
return "Sets the second value to be merged. "
342
+ "(\"first\" and \"last\" are valid values)";
346
* Get the index of the second value used.
348
* @return the index of the second value
350
public String getSecondValueIndex() {
352
return m_SecondIndex.getSingleIndex();
356
* Sets index of the second value used.
358
* @param secondIndex the index of the second value
360
public void setSecondValueIndex(String secondIndex) {
362
m_SecondIndex.setSingleIndex(secondIndex);
366
* Set the output format. Takes the current average class values
367
* and m_InputFormat and calls setOutputFormat(Instances)
370
private void setOutputFormat() {
373
FastVector newAtts, newVals;
374
boolean firstEndsWithPrime = false,
375
secondEndsWithPrime = false;
376
StringBuffer text = new StringBuffer();
378
// Compute new attributes
380
newAtts = new FastVector(getInputFormat().numAttributes());
381
for (int j = 0; j < getInputFormat().numAttributes(); j++) {
382
Attribute att = getInputFormat().attribute(j);
383
if (j != m_AttIndex.getIndex()) {
384
newAtts.addElement(att.copy());
389
if (att.value(m_FirstIndex.getIndex()).endsWith("'")) {
390
firstEndsWithPrime = true;
392
if (att.value(m_SecondIndex.getIndex()).endsWith("'")) {
393
secondEndsWithPrime = true;
395
if (firstEndsWithPrime || secondEndsWithPrime) {
398
if (firstEndsWithPrime) {
399
text.append(((String)att.value(m_FirstIndex.getIndex())).
400
substring(1, ((String)att.value(m_FirstIndex.getIndex())).
403
text.append((String)att.value(m_FirstIndex.getIndex()));
406
if (secondEndsWithPrime) {
407
text.append(((String)att.value(m_SecondIndex.getIndex())).
408
substring(1, ((String)att.value(m_SecondIndex.getIndex())).
411
text.append((String)att.value(m_SecondIndex.getIndex()));
413
if (firstEndsWithPrime || secondEndsWithPrime) {
417
// Compute list of attribute values
419
newVals = new FastVector(att.numValues() - 1);
420
for (int i = 0; i < att.numValues(); i++) {
421
if (i == m_FirstIndex.getIndex()) {
422
newVals.addElement(text.toString());
423
} else if (i != m_SecondIndex.getIndex()) {
424
newVals.addElement(att.value(i));
427
newAtts.addElement(new Attribute(att.name(), newVals));
431
// Construct new header
433
newData = new Instances(getInputFormat().relationName(), newAtts,
435
newData.setClassIndex(getInputFormat().classIndex());
436
setOutputFormat(newData);
440
* Main method for testing this class.
442
* @param argv should contain arguments to the filter:
445
public static void main(String [] argv) {
446
runFilter(new MergeTwoValues(), argv);