2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* PropositionalToMultiInstance.java
19
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
23
package weka.filters.unsupervised.attribute;
25
import weka.core.Attribute;
26
import weka.core.Capabilities;
27
import weka.core.FastVector;
28
import weka.core.Instance;
29
import weka.core.Instances;
30
import weka.core.Option;
31
import weka.core.OptionHandler;
32
import weka.core.RelationalLocator;
33
import weka.core.StringLocator;
34
import weka.core.Utils;
35
import weka.core.Capabilities.Capability;
36
import weka.filters.Filter;
37
import weka.filters.UnsupervisedFilter;
39
import java.util.Enumeration;
40
import java.util.Random;
41
import java.util.Vector;
44
<!-- globalinfo-start -->
45
* Converts the propositional instance dataset into multi-instance dataset (with relational attribute). When normalize or standardize a multi-instance dataset, a MIToSingleInstance filter can be applied first to convert the multi-instance dataset into propositional instance dataset. After normalization or standardization, may use this PropositionalToMultiInstance filter to convert the data back to multi-instance format.<br/>
47
* Note: the first attribute of the original propositional instance dataset must be a nominal attribute which is expected to be bagId attribute.
49
<!-- globalinfo-end -->
51
<!-- options-start -->
52
* Valid options are: <p/>
54
* <pre> -S <num>
55
* The seed for the randomization of the order of bags. (default 1)</pre>
58
* Randomizes the order of the produced bags after the generation. (default off)</pre>
62
* @author Lin Dong (ld21@cs.waikato.ac.nz)
63
* @version $Revision: 1.6 $
64
* @see MultiInstanceToPropositional
66
public class PropositionalToMultiInstance
68
implements OptionHandler, UnsupervisedFilter {
70
/** for serialization */
71
private static final long serialVersionUID = 5825873573912102482L;
73
/** the seed for randomizing, default is 1 */
74
protected int m_Seed = 1;
76
/** whether to randomize the output data */
77
protected boolean m_Randomize = false;
79
/** Indices of string attributes in the bag */
80
protected StringLocator m_BagStringAtts = null;
82
/** Indices of relational attributes in the bag */
83
protected RelationalLocator m_BagRelAtts = null;
86
* Returns a string describing this filter
88
* @return a description of the filter suitable for
89
* displaying in the explorer/experimenter gui
91
public String globalInfo() {
93
"Converts the propositional instance dataset into multi-instance "
94
+ "dataset (with relational attribute). When normalize or standardize a "
95
+ "multi-instance dataset, a MIToSingleInstance filter can be applied "
96
+ "first to convert the multi-instance dataset into propositional "
97
+ "instance dataset. After normalization or standardization, may use "
98
+ "this PropositionalToMultiInstance filter to convert the data back to "
99
+ "multi-instance format.\n\n"
100
+ "Note: the first attribute of the original propositional instance "
101
+ "dataset must be a nominal attribute which is expected to be bagId "
107
* Returns an enumeration describing the available options
109
* @return an enumeration of all the available options
111
public Enumeration listOptions() {
112
Vector result = new Vector();
114
result.addElement(new Option(
115
"\tThe seed for the randomization of the order of bags."
117
"S", 1, "-S <num>"));
119
result.addElement(new Option(
120
"\tRandomizes the order of the produced bags after the generation."
124
return result.elements();
129
* Parses a given list of options. <p/>
131
<!-- options-start -->
132
* Valid options are: <p/>
134
* <pre> -S <num>
135
* The seed for the randomization of the order of bags. (default 1)</pre>
138
* Randomizes the order of the produced bags after the generation. (default off)</pre>
142
* @param options the list of options as an array of strings
143
* @throws Exception if an option is not supported
145
public void setOptions(String[] options) throws Exception {
148
setRandomize(Utils.getFlag('R', options));
150
tmpStr = Utils.getOption('S', options);
151
if (tmpStr.length() != 0)
152
setSeed(Integer.parseInt(tmpStr));
158
* Gets the current settings of the classifier.
160
* @return an array of strings suitable for passing to setOptions
162
public String [] getOptions() {
165
result = new Vector();
168
result.add("" + getSeed());
173
return (String[]) result.toArray(new String[result.size()]);
177
* Returns the tip text for this property
179
* @return tip text for this property suitable for
180
* displaying in the explorer/experimenter gui
182
public String seedTipText() {
183
return "The random seed used by the random number generator";
187
* Sets the new seed for randomizing the order of the generated data
189
* @param value the new seed value
191
public void setSeed(int value) {
196
* Returns the current seed value for randomizing the order of the generated
199
* @return the current seed value
201
public int getSeed() {
206
* Sets whether the order of the generated data is randomized
208
* @param value whether to randomize or not
210
public void setRandomize(boolean value) {
215
* Gets whether the order of the generated is randomized
217
* @return true if the order is randomized
219
public boolean getRandomize() {
224
* Returns the tip text for this property
226
* @return tip text for this property suitable for
227
* displaying in the explorer/experimenter gui
229
public String randomizeTipText() {
230
return "Whether the order of the generated data is randomized.";
234
* Returns the Capabilities of this filter.
236
* @return the capabilities of this object
239
public Capabilities getCapabilities() {
240
Capabilities result = super.getCapabilities();
243
result.enable(Capability.NOMINAL_ATTRIBUTES);
244
result.enable(Capability.NUMERIC_ATTRIBUTES);
245
result.enable(Capability.DATE_ATTRIBUTES);
246
result.enable(Capability.STRING_ATTRIBUTES);
247
result.enable(Capability.MISSING_VALUES);
250
result.enableAllClasses();
251
result.enable(Capability.MISSING_CLASS_VALUES);
252
result.enable(Capability.NO_CLASS);
258
* Sets the format of the input instances.
260
* @param instanceInfo an Instances object containing the input
261
* instance structure (any instances contained in the object are
262
* ignored - only the structure is required).
263
* @return true if the outputFormat may be collected immediately
264
* @throws Exception if the input format can't be set
267
public boolean setInputFormat(Instances instanceInfo)
270
if (instanceInfo.attribute(0).type()!= Attribute.NOMINAL) {
271
throw new Exception("The first attribute type of the original propositional instance dataset must be Nominal!");
273
super.setInputFormat(instanceInfo);
275
/* create a new output format (multi-instance format) */
276
Instances newData = instanceInfo.stringFreeStructure();
277
Attribute attBagIndex = (Attribute) newData.attribute(0).copy();
278
Attribute attClass = (Attribute) newData.classAttribute().copy();
279
// remove the bagIndex attribute
280
newData.deleteAttributeAt(0);
281
// remove the class attribute
282
newData.setClassIndex(-1);
283
newData.deleteAttributeAt(newData.numAttributes() - 1);
285
FastVector attInfo = new FastVector(3);
286
attInfo.addElement(attBagIndex);
287
attInfo.addElement(new Attribute("bag", newData)); // relation-valued attribute
288
attInfo.addElement(attClass);
289
Instances data = new Instances("Multi-Instance-Dataset", attInfo, 0);
290
data.setClassIndex(data.numAttributes() - 1);
292
super.setOutputFormat(data.stringFreeStructure());
294
m_BagStringAtts = new StringLocator(data.attribute(1).relation());
295
m_BagRelAtts = new RelationalLocator(data.attribute(1).relation());
301
* adds a new bag out of the given data and adds it to the output
303
* @param input the intput dataset
304
* @param output the dataset this bag is added to
305
* @param bagInsts the instances in this bag
306
* @param bagIndex the bagIndex of this bag
307
* @param classValue the associated class value
308
* @param bagWeight the weight of the bag
310
protected void addBag(
318
// copy strings/relational values
319
for (int i = 0; i < bagInsts.numInstances(); i++) {
320
RelationalLocator.copyRelationalValues(
321
bagInsts.instance(i), false,
322
input, m_InputRelAtts,
323
bagInsts, m_BagRelAtts);
325
StringLocator.copyStringValues(
326
bagInsts.instance(i), false,
327
input, m_InputStringAtts,
328
bagInsts, m_BagStringAtts);
331
int value = output.attribute(1).addRelation(bagInsts);
332
Instance newBag = new Instance(output.numAttributes());
333
newBag.setValue(0, bagIndex);
334
newBag.setValue(2, classValue);
335
newBag.setValue(1, value);
336
newBag.setWeight(bagWeight);
337
newBag.setDataset(output);
342
* Adds an output instance to the queue. The derived class should use this
343
* method for each output instance it makes available.
345
* @param instance the instance to be added to the queue.
347
protected void push(Instance instance) {
348
if (instance != null) {
349
super.push(instance);
350
// set correct references
355
* Signify that this batch of input to the filter is finished.
356
* If the filter requires all instances prior to filtering,
357
* output() may now be called to retrieve the filtered instances.
359
* @return true if there are instances pending output
360
* @throws IllegalStateException if no input structure has been defined
362
public boolean batchFinished() {
364
if (getInputFormat() == null) {
365
throw new IllegalStateException("No input instance format defined");
368
Instances input = getInputFormat();
369
input.sort(0); // make sure that bagID is sorted
370
Instances output = getOutputFormat();
371
Instances bagInsts = output.attribute(1).relation();
372
Instance inst = new Instance(bagInsts.numAttributes());
373
inst.setDataset(bagInsts);
375
double bagIndex = input.instance(0).value(0);
376
double classValue = input.instance(0).classValue();
377
double bagWeight = 0.0;
379
// Convert pending input instances
380
for(int i = 0; i < input.numInstances(); i++) {
381
double currentBagIndex = input.instance(i).value(0);
383
// copy the propositional instance value, except the bagIndex and the class value
384
for (int j = 0; j < input.numAttributes() - 2; j++)
385
inst.setValue(j, input.instance(i).value(j + 1));
386
inst.setWeight(input.instance(i).weight());
388
if (currentBagIndex == bagIndex){
390
bagWeight += inst.weight();
393
addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);
395
bagInsts = bagInsts.stringFreeStructure();
397
bagIndex = currentBagIndex;
398
classValue = input.instance(i).classValue();
399
bagWeight = inst.weight();
403
// reach the last instance, create and add the last bag
404
addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);
407
output.randomize(new Random(getSeed()));
409
for (int i = 0; i < output.numInstances(); i++)
410
push(output.instance(i));
416
m_FirstBatchDone = true;
418
return (numPendingOutput() != 0);
422
* Main method for running this filter.
424
* @param args should contain arguments to the filter:
427
public static void main(String[] args) {
428
runFilter(new PropositionalToMultiInstance(), args);