2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
23
package weka.classifiers.mi;
25
import weka.classifiers.Classifier;
26
import weka.core.Capabilities;
27
import weka.core.FastVector;
28
import weka.core.Instance;
29
import weka.core.Instances;
30
import weka.core.MultiInstanceCapabilitiesHandler;
31
import weka.core.Optimization;
32
import weka.core.Option;
33
import weka.core.OptionHandler;
34
import weka.core.SelectedTag;
36
import weka.core.TechnicalInformation;
37
import weka.core.TechnicalInformationHandler;
38
import weka.core.Utils;
39
import weka.core.Capabilities.Capability;
40
import weka.core.TechnicalInformation.Field;
41
import weka.core.TechnicalInformation.Type;
42
import weka.filters.Filter;
43
import weka.filters.unsupervised.attribute.Normalize;
44
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
45
import weka.filters.unsupervised.attribute.Standardize;
47
import java.util.Enumeration;
48
import java.util.Vector;
51
<!-- globalinfo-start -->
52
* Modified Diverse Density algorithm, with collective assumption.<br/>
54
* More information about DD:<br/>
56
* Oded Maron (1998). Learning from ambiguity.<br/>
58
* O. Maron, T. Lozano-Perez (1998). A Framework for Multiple Instance Learning. Neural Information Processing Systems. 10.
60
<!-- globalinfo-end -->
62
<!-- technical-bibtex-start -->
65
* @phdthesis{Maron1998,
66
* author = {Oded Maron},
67
* school = {Massachusetts Institute of Technology},
68
* title = {Learning from ambiguity},
72
* @article{Maron1998,
73
* author = {O. Maron and T. Lozano-Perez},
74
* journal = {Neural Information Processing Systems},
75
* title = {A Framework for Multiple Instance Learning},
81
<!-- technical-bibtex-end -->
83
<!-- options-start -->
84
* Valid options are: <p/>
87
* Turn on debugging output.</pre>
89
* <pre> -N <num>
90
* Whether to 0=normalize/1=standardize/2=neither.
91
* (default 1=standardize)</pre>
95
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
96
* @author Xin Xu (xx5@cs.waikato.ac.nz)
97
* @version $Revision: 1.3 $
101
implements OptionHandler, MultiInstanceCapabilitiesHandler,
102
TechnicalInformationHandler {
104
/** for serialization */
105
static final long serialVersionUID = -7273119490545290581L;
107
/** The index of the class attribute */
108
protected int m_ClassIndex;
110
protected double[] m_Par;
112
/** The number of the class labels */
113
protected int m_NumClasses;
115
/** Class labels for each bag */
116
protected int[] m_Classes;
119
protected double[][][] m_Data;
121
/** All attribute names */
122
protected Instances m_Attributes;
124
/** The filter used to standardize/normalize all values. */
125
protected Filter m_Filter =null;
127
/** Whether to normalize/standardize/neither, default:standardize */
128
protected int m_filterType = FILTER_STANDARDIZE;
130
/** Normalize training data */
131
public static final int FILTER_NORMALIZE = 0;
132
/** Standardize training data */
133
public static final int FILTER_STANDARDIZE = 1;
134
/** No normalization/standardization */
135
public static final int FILTER_NONE = 2;
136
/** The filter to apply to the training data */
137
public static final Tag [] TAGS_FILTER = {
138
new Tag(FILTER_NORMALIZE, "Normalize training data"),
139
new Tag(FILTER_STANDARDIZE, "Standardize training data"),
140
new Tag(FILTER_NONE, "No normalization/standardization"),
143
/** The filter used to get rid of missing values. */
144
protected ReplaceMissingValues m_Missing = new ReplaceMissingValues();
147
* Returns a string describing this filter
149
* @return a description of the filter suitable for
150
* displaying in the explorer/experimenter gui
152
public String globalInfo() {
154
"Modified Diverse Density algorithm, with collective assumption.\n\n"
155
+ "More information about DD:\n\n"
156
+ getTechnicalInformation().toString();
160
* Returns an instance of a TechnicalInformation object, containing
161
* detailed information about the technical background of this class,
162
* e.g., paper reference or book this class is based on.
164
* @return the technical information about this class
166
public TechnicalInformation getTechnicalInformation() {
167
TechnicalInformation result;
168
TechnicalInformation additional;
170
result = new TechnicalInformation(Type.PHDTHESIS);
171
result.setValue(Field.AUTHOR, "Oded Maron");
172
result.setValue(Field.YEAR, "1998");
173
result.setValue(Field.TITLE, "Learning from ambiguity");
174
result.setValue(Field.SCHOOL, "Massachusetts Institute of Technology");
176
additional = result.add(Type.ARTICLE);
177
additional.setValue(Field.AUTHOR, "O. Maron and T. Lozano-Perez");
178
additional.setValue(Field.YEAR, "1998");
179
additional.setValue(Field.TITLE, "A Framework for Multiple Instance Learning");
180
additional.setValue(Field.JOURNAL, "Neural Information Processing Systems");
181
additional.setValue(Field.VOLUME, "10");
187
* Returns an enumeration describing the available options
189
* @return an enumeration of all the available options
191
public Enumeration listOptions() {
192
Vector result = new Vector();
194
result.addElement(new Option(
195
"\tTurn on debugging output.",
198
result.addElement(new Option(
199
"\tWhether to 0=normalize/1=standardize/2=neither.\n"
200
+ "\t(default 1=standardize)",
201
"N", 1, "-N <num>"));
203
return result.elements();
207
* Parses a given list of options.
209
* @param options the list of options as an array of strings
210
* @throws Exception if an option is not supported
212
public void setOptions(String[] options) throws Exception {
213
setDebug(Utils.getFlag('D', options));
215
String nString = Utils.getOption('N', options);
216
if (nString.length() != 0) {
217
setFilterType(new SelectedTag(Integer.parseInt(nString), TAGS_FILTER));
219
setFilterType(new SelectedTag(FILTER_STANDARDIZE, TAGS_FILTER));
224
* Gets the current settings of the classifier.
226
* @return an array of strings suitable for passing to setOptions
228
public String[] getOptions() {
231
result = new Vector();
237
result.add("" + m_filterType);
239
return (String[]) result.toArray(new String[result.size()]);
243
* Returns the tip text for this property
245
* @return tip text for this property suitable for
246
* displaying in the explorer/experimenter gui
248
public String filterTypeTipText() {
249
return "The filter type for transforming the training data.";
253
* Gets how the training data will be transformed. Will be one of
254
* FILTER_NORMALIZE, FILTER_STANDARDIZE, FILTER_NONE.
256
* @return the filtering mode
258
public SelectedTag getFilterType() {
259
return new SelectedTag(m_filterType, TAGS_FILTER);
263
* Sets how the training data will be transformed. Should be one of
264
* FILTER_NORMALIZE, FILTER_STANDARDIZE, FILTER_NONE.
266
* @param newType the new filtering mode
268
public void setFilterType(SelectedTag newType) {
270
if (newType.getTags() == TAGS_FILTER) {
271
m_filterType = newType.getSelectedTag().getID();
277
extends Optimization{
280
* Evaluate objective function
281
* @param x the current values of variables
282
* @return the value of the objective function
284
protected double objectiveFunction(double[] x){
285
double nll = 0; // -LogLikelihood
286
for(int i=0; i<m_Classes.length; i++){ // ith bag
287
int nI = m_Data[i][0].length; // numInstances in ith bag
288
double bag = 0; // NLL of each bag
290
for(int j=0; j<nI; j++){
292
for(int k=0; k<m_Data[i].length; k++) {
293
ins += (m_Data[i][k][j]-x[k*2])*(m_Data[i][k][j]-x[k*2])/
296
ins = Math.exp(-ins);
298
if(m_Classes[i] == 1)
299
bag += ins/(double)nI;
301
bag += (1.0-ins)/(double)nI;
303
if(bag<=m_Zero) bag=m_Zero;
304
nll -= Math.log(bag);
311
* Evaluate Jacobian vector
312
* @param x the current values of variables
313
* @return the gradient vector
315
protected double[] evaluateGradient(double[] x){
316
double[] grad = new double[x.length];
317
for(int i=0; i<m_Classes.length; i++){ // ith bag
318
int nI = m_Data[i][0].length; // numInstances in ith bag
321
double[] numrt = new double[x.length];
323
for(int j=0; j<nI; j++){
325
for(int k=0; k<m_Data[i].length; k++)
326
exp += (m_Data[i][k][j]-x[k*2])*(m_Data[i][k][j]-x[k*2])/
328
exp = Math.exp(-exp);
334
// Instance-wise update
335
for(int p=0; p<m_Data[i].length; p++){ // pth variable
336
numrt[2*p] += exp*2.0*(x[2*p]-m_Data[i][p][j])/
339
exp*(x[2*p]-m_Data[i][p][j])*(x[2*p]-m_Data[i][p][j])/
340
(x[2*p+1]*x[2*p+1]*x[2*p+1]);
349
for(int q=0; q<m_Data[i].length; q++){
351
grad[2*q] += numrt[2*q]/denom;
352
grad[2*q+1] -= numrt[2*q+1]/denom;
354
grad[2*q] -= numrt[2*q]/denom;
355
grad[2*q+1] += numrt[2*q+1]/denom;
365
* Returns default capabilities of the classifier.
367
* @return the capabilities of this classifier
369
public Capabilities getCapabilities() {
370
Capabilities result = super.getCapabilities();
373
result.enable(Capability.NOMINAL_ATTRIBUTES);
374
result.enable(Capability.RELATIONAL_ATTRIBUTES);
375
result.enable(Capability.MISSING_VALUES);
378
result.enable(Capability.BINARY_CLASS);
379
result.enable(Capability.MISSING_CLASS_VALUES);
382
result.enable(Capability.ONLY_MULTIINSTANCE);
388
* Returns the capabilities of this multi-instance classifier for the
391
* @return the capabilities of this object
394
public Capabilities getMultiInstanceCapabilities() {
395
Capabilities result = super.getCapabilities();
398
result.enable(Capability.NOMINAL_ATTRIBUTES);
399
result.enable(Capability.NUMERIC_ATTRIBUTES);
400
result.enable(Capability.DATE_ATTRIBUTES);
401
result.enable(Capability.MISSING_VALUES);
404
result.disableAllClasses();
405
result.enable(Capability.NO_CLASS);
411
* Builds the classifier
413
* @param train the training data to be used for generating the
414
* boosted classifier.
415
* @throws Exception if the classifier could not be built successfully
417
public void buildClassifier(Instances train) throws Exception {
418
// can classifier handle the data?
419
getCapabilities().testWithFail(train);
421
// remove instances with missing class
422
train = new Instances(train);
423
train.deleteWithMissingClass();
425
m_ClassIndex = train.classIndex();
426
m_NumClasses = train.numClasses();
428
int nR = train.attribute(1).relation().numAttributes();
429
int nC = train.numInstances();
430
int [] bagSize=new int [nC];
431
Instances datasets= new Instances(train.attribute(1).relation(),0);
433
m_Data = new double [nC][nR][]; // Data values
434
m_Classes = new int [nC]; // Class values
435
m_Attributes = datasets.stringFreeStructure();
436
double sY1=0, sY0=0; // Number of classes
439
System.out.println("Extracting data...");
441
FastVector maxSzIdx=new FastVector();
444
for(int h=0; h<nC; h++){
445
Instance current = train.instance(h);
446
m_Classes[h] = (int)current.classValue(); // Class value starts from 0
447
Instances currInsts = current.relationalValue(1);
448
int nI = currInsts.numInstances();
451
for (int i=0; i<nI;i++){
452
Instance inst=currInsts.instance(i);
459
maxSzIdx=new FastVector(1);
460
maxSzIdx.addElement(new Integer(h));
463
maxSzIdx.addElement(new Integer(h));
467
/* filter the training data */
468
if (m_filterType == FILTER_STANDARDIZE)
469
m_Filter = new Standardize();
470
else if (m_filterType == FILTER_NORMALIZE)
471
m_Filter = new Normalize();
475
if (m_Filter!=null) {
476
m_Filter.setInputFormat(datasets);
477
datasets = Filter.useFilter(datasets, m_Filter);
480
m_Missing.setInputFormat(datasets);
481
datasets = Filter.useFilter(datasets, m_Missing);
485
for(int h=0; h<nC; h++) {
486
for (int i = 0; i < datasets.numAttributes(); i++) {
487
// initialize m_data[][][]
488
m_Data[h][i] = new double[bagSize[h]];
490
for (int k=0; k<bagSize[h]; k++){
491
m_Data[h][i][k]=datasets.instance(instIndex).value(i);
498
if (m_Classes[h] == 1)
505
System.out.println("\nIteration History..." );
508
double[] x = new double[nR*2], tmp = new double[x.length];
509
double[][] b = new double[2][x.length];
512
double nll, bestnll = Double.MAX_VALUE;
513
for (int t=0; t<x.length; t++){
514
b[0][t] = Double.NaN;
515
b[1][t] = Double.NaN;
518
// Largest positive exemplar
519
for(int s=0; s<maxSzIdx.size(); s++){
520
int exIdx = ((Integer)maxSzIdx.elementAt(s)).intValue();
521
for(int p=0; p<m_Data[exIdx][0].length; p++){
522
for (int q=0; q < nR;q++){
523
x[2*q] = m_Data[exIdx][q][p]; // pick one instance
528
tmp = opt.findArgmin(x, b);
530
tmp = opt.getVarbValues();
532
System.out.println("200 iterations finished, not enough!");
533
tmp = opt.findArgmin(tmp, b);
535
nll = opt.getMinFunction();
541
System.out.println("!!!!!!!!!!!!!!!!Smaller NLL found: "+nll);
544
System.out.println(exIdx+": -------------<Converged>--------------");
550
* Computes the distribution for a given exemplar
552
* @param exmp the exemplar for which distribution is computed
553
* @return the distribution
554
* @throws Exception if the distribution can't be computed successfully
556
public double[] distributionForInstance(Instance exmp)
560
Instances ins = exmp.relationalValue(1);
562
ins = Filter.useFilter(ins, m_Filter);
564
ins = Filter.useFilter(ins, m_Missing);
566
int nI = ins.numInstances(), nA = ins.numAttributes();
567
double[][] dat = new double [nI][nA];
568
for(int j=0; j<nI; j++){
569
for(int k=0; k<nA; k++){
570
dat[j][k] = ins.instance(j).value(k);
574
// Compute the probability of the bag
575
double [] distribution = new double[2];
576
distribution[1]=0.0; // Prob. for class 1
578
for(int i=0; i<nI; i++){
580
for(int r=0; r<nA; r++)
581
exp += (m_Par[r*2]-dat[i][r])*(m_Par[r*2]-dat[i][r])/
582
((m_Par[r*2+1])*(m_Par[r*2+1]));
583
exp = Math.exp(-exp);
585
// Prob. updated for one instance
586
distribution[1] += exp/(double)nI;
587
distribution[0] += (1.0-exp)/(double)nI;
594
* Gets a string describing the classifier.
596
* @return a string describing the classifer built.
598
public String toString() {
600
String result = "Modified Logistic Regression";
602
return result + ": No model built yet.";
605
result += "\nCoefficients...\n"
606
+ "Variable Coeff.\n";
607
for (int j = 0, idx=0; j < m_Par.length/2; j++, idx++) {
609
result += m_Attributes.attribute(idx).name();
610
result += " "+Utils.doubleToString(m_Par[j*2], 12, 4);
611
result += " "+Utils.doubleToString(m_Par[j*2+1], 12, 4)+"\n";
618
* Main method for testing this class.
620
* @param argv should contain the command line arguments to the
621
* scheme (see Evaluation)
623
public static void main(String[] argv) {
624
runClassifier(new MDD(), argv);