2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* StringToNominal.java
19
* Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
24
package weka.filters.unsupervised.attribute;
26
import weka.core.Attribute;
27
import weka.core.Capabilities;
28
import weka.core.FastVector;
29
import weka.core.Instance;
30
import weka.core.Instances;
31
import weka.core.Option;
32
import weka.core.OptionHandler;
33
import weka.core.SingleIndex;
34
import weka.core.UnsupportedAttributeTypeException;
35
import weka.core.Utils;
36
import weka.core.Capabilities.Capability;
37
import weka.filters.Filter;
38
import weka.filters.UnsupervisedFilter;
40
import java.util.Enumeration;
41
import java.util.Vector;
44
<!-- globalinfo-start -->
45
* Converts a string attribute (i.e. unspecified number of values) to nominal (i.e. set number of values). You should ensure that all string values that will appear are represented in the first batch of the data.
47
<!-- globalinfo-end -->
49
<!-- options-start -->
50
* Valid options are: <p/>
52
* <pre> -C <col>
53
* Sets the attribute index (default last).</pre>
57
* @author Len Trigg (len@reeltwo.com)
58
* @version $Revision: 1.8 $
60
public class StringToNominal
62
implements UnsupervisedFilter, OptionHandler {
64
/** for serialization */
65
static final long serialVersionUID = 8655492378380068939L;
67
/** The attribute's index setting. */
68
private SingleIndex m_AttIndex = new SingleIndex("last");
71
* Returns a string describing this filter
73
* @return a description of the filter suitable for
74
* displaying in the explorer/experimenter gui
76
public String globalInfo() {
78
return "Converts a string attribute (i.e. unspecified number of values) to nominal "
79
+ "(i.e. set number of values). You should ensure that all string values that "
80
+ "will appear are represented in the first batch of the data.";
84
* Returns the Capabilities of this filter.
86
* @return the capabilities of this object
89
public Capabilities getCapabilities() {
90
Capabilities result = super.getCapabilities();
93
result.enableAllAttributes();
94
result.enable(Capability.MISSING_VALUES);
97
result.enableAllClasses();
98
result.enable(Capability.MISSING_CLASS_VALUES);
99
result.enable(Capability.NO_CLASS);
105
* Sets the format of the input instances.
107
* @param instanceInfo an Instances object containing the input
108
* instance structure (any instances contained in the object are
109
* ignored - only the structure is required).
110
* @return true if the outputFormat may be collected immediately.
111
* @throws UnsupportedAttributeTypeException if the selected attribute
112
* a string attribute.
113
* @throws Exception if the input format can't be set
116
public boolean setInputFormat(Instances instanceInfo)
119
super.setInputFormat(instanceInfo);
120
m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
121
if (!instanceInfo.attribute(m_AttIndex.getIndex()).isString()) {
122
throw new UnsupportedAttributeTypeException("Chosen attribute is not of "
129
* Input an instance for filtering. The instance is processed
130
* and made available for output immediately.
132
* @param instance the input instance.
133
* @return true if the filtered instance may now be
134
* collected with output().
135
* @throws IllegalStateException if no input structure has been defined.
137
public boolean input(Instance instance) {
139
if (getInputFormat() == null) {
140
throw new IllegalStateException("No input instance format defined");
147
if (isOutputFormatDefined()) {
148
Instance newInstance = (Instance)instance.copy();
153
bufferInput(instance);
159
* Signifies that this batch of input to the filter is finished. If the
160
* filter requires all instances prior to filtering, output() may now
161
* be called to retrieve the filtered instances.
163
* @return true if there are instances pending output.
164
* @throws IllegalStateException if no input structure has been defined.
166
public boolean batchFinished() {
168
if (getInputFormat() == null) {
169
throw new IllegalStateException("No input instance format defined");
171
if (!isOutputFormatDefined()) {
175
// Convert pending input instances
176
for(int i = 0; i < getInputFormat().numInstances(); i++) {
177
push((Instance) getInputFormat().instance(i).copy());
183
return (numPendingOutput() != 0);
188
* Returns an enumeration describing the available options.
190
* @return an enumeration of all the available options.
192
public Enumeration listOptions() {
194
Vector newVector = new Vector(1);
196
newVector.addElement(new Option(
197
"\tSets the attribute index (default last).",
198
"C", 1, "-C <col>"));
200
return newVector.elements();
205
* Parses a given list of options. <p/>
207
<!-- options-start -->
208
* Valid options are: <p/>
210
* <pre> -C <col>
211
* Sets the attribute index (default last).</pre>
215
* @param options the list of options as an array of strings
216
* @throws Exception if an option is not supported
218
public void setOptions(String[] options) throws Exception {
220
String attIndex = Utils.getOption('C', options);
221
if (attIndex.length() != 0) {
222
setAttributeIndex(attIndex);
224
setAttributeIndex("last");
227
if (getInputFormat() != null) {
228
setInputFormat(getInputFormat());
233
* Gets the current settings of the filter.
235
* @return an array of strings suitable for passing to setOptions
237
public String [] getOptions() {
239
String [] options = new String [6];
242
options[current++] = "-C";
243
options[current++] = "" + (getAttributeIndex());
245
while (current < options.length) {
246
options[current++] = "";
252
* @return tip text for this property suitable for
253
* displaying in the explorer/experimenter gui
255
public String attributeIndexTipText() {
257
return "Sets which attribute to process. This attribute "
258
+ "must be a string attribute (\"first\" and \"last\" are valid values)";
262
* Get the index of the attribute used.
264
* @return the index of the attribute
266
public String getAttributeIndex() {
268
return m_AttIndex.getSingleIndex();
272
* Sets index of the attribute used.
274
* @param attIndex the index of the attribute
276
public void setAttributeIndex(String attIndex) {
278
m_AttIndex.setSingleIndex(attIndex);
282
* Set the output format. Takes the current average class values
283
* and m_InputFormat and calls setOutputFormat(Instances)
286
private void setOutputFormat() {
289
FastVector newAtts, newVals;
291
// Compute new attributes
293
newAtts = new FastVector(getInputFormat().numAttributes());
294
for (int j = 0; j < getInputFormat().numAttributes(); j++) {
295
Attribute att = getInputFormat().attribute(j);
296
if (j != m_AttIndex.getIndex()) {
298
// We don't have to copy the attribute because the
299
// attribute index remains unchanged.
300
newAtts.addElement(att);
303
// Compute list of attribute values
304
newVals = new FastVector(att.numValues());
305
for (int i = 0; i < att.numValues(); i++) {
306
newVals.addElement(att.value(i));
308
newAtts.addElement(new Attribute(att.name(), newVals));
312
// Construct new header
313
newData = new Instances(getInputFormat().relationName(), newAtts, 0);
314
newData.setClassIndex(getInputFormat().classIndex());
315
setOutputFormat(newData);
319
* Main method for testing this class.
321
* @param argv should contain arguments to the filter:
324
public static void main(String [] argv) {
325
runFilter(new StringToNominal(), argv);