2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* MultiInstanceToPropositional.java
19
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
23
package weka.filters.unsupervised.attribute;
25
import weka.core.Attribute;
26
import weka.core.Capabilities;
27
import weka.core.Instance;
28
import weka.core.Instances;
29
import weka.core.MultiInstanceCapabilitiesHandler;
30
import weka.core.Option;
31
import weka.core.OptionHandler;
32
import weka.core.RelationalLocator;
33
import weka.core.SelectedTag;
34
import weka.core.StringLocator;
36
import weka.core.Utils;
37
import weka.core.Capabilities.Capability;
38
import weka.filters.Filter;
39
import weka.filters.UnsupervisedFilter;
41
import java.util.Enumeration;
42
import java.util.Vector;
45
<!-- globalinfo-start -->
46
* Converts the multi-instance dataset into single instance dataset so that the Nominalize, Standardize and other type of filters or transformation can be applied to these data for the further preprocessing.<br/>
47
* Note: the first attribute of the converted dataset is a nominal attribute and refers to the bagId.
49
<!-- globalinfo-end -->
51
<!-- options-start -->
52
* Valid options are: <p/>
54
* <pre> -A <num>
55
* The type of weight setting for each prop. instance:
56
* 0.weight = original single bag weight /Total number of
57
* prop. instance in the corresponding bag;
59
* 2.weight = 1.0/Total number of prop. instance in the
61
* 3. weight = Total number of prop. instance / (Total number
62
* of bags * Total number of prop. instance in the
68
* @author Lin Dong (ld21@cs.waikato.ac.nz)
69
* @version $Revision: 1.6 $
70
* @see PropositionalToMultiInstance
72
public class MultiInstanceToPropositional
74
implements OptionHandler, UnsupervisedFilter, MultiInstanceCapabilitiesHandler {
76
/** for serialization */
77
private static final long serialVersionUID = -4102847628883002530L;
79
/** the total number of bags */
80
protected int m_NumBags;
82
/** Indices of string attributes in the bag */
83
protected StringLocator m_BagStringAtts = null;
85
/** Indices of relational attributes in the bag */
86
protected RelationalLocator m_BagRelAtts = null;
88
/** the total number of the propositional instance in the dataset */
89
protected int m_NumInstances;
91
/** weight method: keep the weight to be the same as the original value */
92
public static final int WEIGHTMETHOD_ORIGINAL = 0;
93
/** weight method: 1.0 */
94
public static final int WEIGHTMETHOD_1 = 1;
95
/** weight method: 1.0 / Total # of prop. instance in the corresp. bag */
96
public static final int WEIGHTMETHOD_INVERSE1 = 2;
97
/** weight method: Total # of prop. instance / (Total # of bags * Total # of prop. instance in the corresp. bag) */
98
public static final int WEIGHTMETHOD_INVERSE2 = 3;
100
public static final Tag[] TAGS_WEIGHTMETHOD = {
101
new Tag(WEIGHTMETHOD_ORIGINAL,
102
"keep the weight to be the same as the original value"),
103
new Tag(WEIGHTMETHOD_1,
105
new Tag(WEIGHTMETHOD_INVERSE1,
106
"1.0 / Total # of prop. instance in the corresp. bag"),
107
new Tag(WEIGHTMETHOD_INVERSE2,
108
"Total # of prop. instance / (Total # of bags * Total # of prop. instance in the corresp. bag)")
111
/** the propositional instance weight setting method */
112
protected int m_WeightMethod = WEIGHTMETHOD_INVERSE2;
115
* Returns an enumeration describing the available options
117
* @return an enumeration of all the available options
119
public Enumeration listOptions() {
120
Vector result = new Vector();
122
result.addElement(new Option(
123
"\tThe type of weight setting for each prop. instance:\n"
124
+ "\t0.weight = original single bag weight /Total number of\n"
125
+ "\tprop. instance in the corresponding bag;\n"
126
+ "\t1.weight = 1.0;\n"
127
+ "\t2.weight = 1.0/Total number of prop. instance in the \n"
128
+ "\t\tcorresponding bag; \n"
129
+ "\t3. weight = Total number of prop. instance / (Total number \n"
130
+ "\t\tof bags * Total number of prop. instance in the \n"
131
+ "\t\tcorresponding bag). \n"
133
"A", 1, "-A <num>"));
135
return result.elements();
140
* Parses a given list of options. <p/>
142
<!-- options-start -->
143
* Valid options are: <p/>
145
* <pre> -A <num>
146
* The type of weight setting for each prop. instance:
147
* 0.weight = original single bag weight /Total number of
148
* prop. instance in the corresponding bag;
150
* 2.weight = 1.0/Total number of prop. instance in the
152
* 3. weight = Total number of prop. instance / (Total number
153
* of bags * Total number of prop. instance in the
154
* corresponding bag).
159
* @param options the list of options as an array of strings
160
* @throws Exception if an option is not supported
162
public void setOptions(String[] options) throws Exception {
163
String weightString = Utils.getOption('A', options);
164
if (weightString.length() != 0) {
166
new SelectedTag(Integer.parseInt(weightString), TAGS_WEIGHTMETHOD));
169
new SelectedTag(WEIGHTMETHOD_INVERSE2, TAGS_WEIGHTMETHOD));
174
* Gets the current settings of the classifier.
176
* @return an array of strings suitable for passing to setOptions
178
public String [] getOptions() {
181
result = new Vector();
184
result.add("" + m_WeightMethod);
186
return (String[]) result.toArray(new String[result.size()]);
190
* Returns the tip text for this property
192
* @return tip text for this property suitable for
193
* displaying in the explorer/experimenter gui
195
public String weightMethodTipText() {
196
return "The method used for weighting the instances.";
200
* The new method for weighting the instances.
202
* @param method the new method
204
public void setWeightMethod(SelectedTag method){
205
if (method.getTags() == TAGS_WEIGHTMETHOD)
206
m_WeightMethod = method.getSelectedTag().getID();
210
* Returns the current weighting method for instances.
212
* @return the current weight method
214
public SelectedTag getWeightMethod(){
215
return new SelectedTag(m_WeightMethod, TAGS_WEIGHTMETHOD);
219
* Returns a string describing this filter
221
* @return a description of the filter suitable for
222
* displaying in the explorer/experimenter gui
224
public String globalInfo() {
227
"Converts the multi-instance dataset into single instance dataset "
228
+ "so that the Nominalize, Standardize and other type of filters or transformation "
229
+ " can be applied to these data for the further preprocessing.\n"
230
+ "Note: the first attribute of the converted dataset is a nominal "
231
+ "attribute and refers to the bagId.";
235
* Returns the Capabilities of this filter.
237
* @return the capabilities of this object
240
public Capabilities getCapabilities() {
241
Capabilities result = super.getCapabilities();
244
result.disableAllAttributes();
245
result.enable(Capability.NOMINAL_ATTRIBUTES);
246
result.enable(Capability.RELATIONAL_ATTRIBUTES);
247
result.enable(Capability.MISSING_VALUES);
250
result.enableAllClasses();
251
result.enable(Capability.MISSING_CLASS_VALUES);
254
result.enable(Capability.ONLY_MULTIINSTANCE);
260
* Returns the capabilities of this multi-instance filter for the
261
* relational data (i.e., the bags).
263
* @return the capabilities of this object
266
public Capabilities getMultiInstanceCapabilities() {
267
Capabilities result = new Capabilities(this);
270
result.enableAllAttributes();
271
result.disable(Capability.RELATIONAL_ATTRIBUTES);
272
result.enable(Capability.MISSING_VALUES);
275
result.enableAllClasses();
276
result.enable(Capability.MISSING_CLASS_VALUES);
277
result.enable(Capability.NO_CLASS);
280
result.setMinimumNumberInstances(0);
286
* Sets the format of the input instances.
288
* @param instanceInfo an Instances object containing the input
289
* instance structure (any instances contained in the object are
290
* ignored - only the structure is required).
291
* @return true if the outputFormat may be collected immediately
292
* @throws Exception if the input format can't be set
295
public boolean setInputFormat(Instances instanceInfo)
298
if (instanceInfo.attribute(1).type()!=Attribute.RELATIONAL) {
299
throw new Exception("Can only handle relational-valued attribute!");
301
super.setInputFormat(instanceInfo);
303
m_NumBags = instanceInfo.numInstances();
305
for (int i=0; i<m_NumBags; i++)
306
m_NumInstances += instanceInfo.instance(i).relationalValue(1).numInstances();
308
Attribute classAttribute = (Attribute) instanceInfo.classAttribute().copy();
309
Attribute bagIndex = (Attribute) instanceInfo.attribute(0).copy();
311
/* create a new output format (propositional instance format) */
312
Instances newData = instanceInfo.attribute(1).relation().stringFreeStructure();
313
newData.insertAttributeAt(bagIndex, 0);
314
newData.insertAttributeAt(classAttribute, newData.numAttributes());
315
newData.setClassIndex(newData.numAttributes() - 1);
317
super.setOutputFormat(newData.stringFreeStructure());
319
m_BagStringAtts = new StringLocator(instanceInfo.attribute(1).relation().stringFreeStructure());
320
m_BagRelAtts = new RelationalLocator(instanceInfo.attribute(1).relation().stringFreeStructure());
327
* Input an instance for filtering. Filter requires all
328
* training instances be read before producing output.
330
* @param instance the input instance
331
* @return true if the filtered instance may now be
332
* collected with output().
333
* @throws IllegalStateException if no input format has been set.
335
public boolean input(Instance instance) {
337
if (getInputFormat() == null) {
338
throw new IllegalStateException("No input instance format defined");
345
convertInstance(instance);
351
* Signify that this batch of input to the filter is finished.
352
* If the filter requires all instances prior to filtering,
353
* output() may now be called to retrieve the filtered instances.
355
* @return true if there are instances pending output
356
* @throws IllegalStateException if no input structure has been defined
358
public boolean batchFinished() {
360
if (getInputFormat() == null) {
361
throw new IllegalStateException("No input instance format defined");
364
Instances input = getInputFormat();
366
// Convert pending input instances
367
for(int i = 0; i < input.numInstances(); i++) {
368
convertInstance(input.instance(i));
375
return (numPendingOutput() != 0);
379
* Convert a single bag over. The converted instances is
380
* added to the end of the output queue.
382
* @param bag the bag to convert
384
private void convertInstance(Instance bag) {
386
Instances data = bag.relationalValue(1);
387
int bagSize = data.numInstances();
388
double bagIndex = bag.value(0);
389
double classValue = bag.classValue();
391
//the proper weight for each instance in a bag
392
if (m_WeightMethod == WEIGHTMETHOD_1)
394
else if (m_WeightMethod == WEIGHTMETHOD_INVERSE1)
395
weight = (double) 1.0 / bagSize;
396
else if (m_WeightMethod == WEIGHTMETHOD_INVERSE2)
397
weight=(double) m_NumInstances / (m_NumBags * bagSize);
399
weight = (double) bag.weight() / bagSize;
402
Instances outputFormat = getOutputFormat().stringFreeStructure();
404
for (int i = 0; i < bagSize; i++) {
405
newInst = new Instance (outputFormat.numAttributes());
406
newInst.setDataset(outputFormat);
407
newInst.setValue(0,bagIndex);
408
if (!bag.classIsMissing())
409
newInst.setClassValue(classValue);
410
// copy the attribute values to new instance
411
for (int j = 1; j < outputFormat.numAttributes() - 1; j++){
412
newInst.setValue(j,data.instance(i).value(j - 1));
415
newInst.setWeight(weight);
417
// copy strings/relational values
418
StringLocator.copyStringValues(
420
data, m_BagStringAtts,
421
outputFormat, m_OutputStringAtts);
423
RelationalLocator.copyRelationalValues(
426
outputFormat, m_OutputRelAtts);
433
* Main method for running this filter.
435
* @param args should contain arguments to the filter:
438
public static void main(String[] args) {
439
runFilter(new MultiInstanceToPropositional(), args);