2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
22
package weka.classifiers.mi;
24
import weka.classifiers.Classifier;
25
import weka.core.Capabilities;
26
import weka.core.Instance;
27
import weka.core.Instances;
28
import weka.core.MultiInstanceCapabilitiesHandler;
29
import weka.core.Option;
30
import weka.core.OptionHandler;
31
import weka.core.TechnicalInformation;
32
import weka.core.TechnicalInformationHandler;
33
import weka.core.Utils;
34
import weka.core.Capabilities.Capability;
35
import weka.core.TechnicalInformation.Field;
36
import weka.core.TechnicalInformation.Type;
38
import java.io.Serializable;
39
import java.util.Enumeration;
40
import java.util.Vector;
42
<!-- globalinfo-start -->
43
* Modified version of the Citation kNN multi instance classifier.<br/>
45
* For more information see:<br/>
47
* Jun Wang, Zucker, Jean-Daniel: Solving Multiple-Instance Problem: A Lazy Learning Approach. In: 17th International Conference on Machine Learning, 1119-1125, 2000.
49
<!-- globalinfo-end -->
51
<!-- technical-bibtex-start -->
54
* @inproceedings{Wang2000,
55
* author = {Jun Wang and Zucker and Jean-Daniel},
56
* booktitle = {17th International Conference on Machine Learning},
57
* editor = {Pat Langley},
58
* pages = {1119-1125},
59
* title = {Solving Multiple-Instance Problem: A Lazy Learning Approach},
64
<!-- technical-bibtex-end -->
66
<!-- options-start -->
67
* Valid options are: <p/>
69
* <pre> -R <number of references>
70
* Number of Nearest References (default 1)</pre>
72
* <pre> -C <number of citers>
73
* Number of Nearest Citers (default 1)</pre>
75
* <pre> -H <rank>
76
* Rank of the Hausdorff Distance (default 1)</pre>
80
* @author Miguel Garcia Torres (mgarciat@ull.es)
81
* @version $Revision: 1.7 $
83
public class CitationKNN
85
implements OptionHandler, MultiInstanceCapabilitiesHandler,
86
TechnicalInformationHandler {
88
/** for serialization */
89
static final long serialVersionUID = -8435377743874094852L;
91
/** The index of the class attribute */
92
protected int m_ClassIndex;
94
/** The number of the class labels */
95
protected int m_NumClasses;
98
protected int m_IdIndex;
100
/** Debugging output */
101
protected boolean m_Debug;
103
/** Class labels for each bag */
104
protected int[] m_Classes;
106
/** attribute name structure of the relational attribute*/
107
protected Instances m_Attributes;
109
/** Number of references */
110
protected int m_NumReferences = 1;
112
/** Number of citers*/
113
protected int m_NumCiters = 1;
116
protected Instances m_TrainBags;
118
/** Different debugging output */
119
protected boolean m_CNNDebug = false;
121
protected boolean m_CitersDebug = false;
123
protected boolean m_ReferencesDebug = false;
125
protected boolean m_HDistanceDebug = false;
127
protected boolean m_NeighborListDebug = false;
129
/** C nearest neighbors considering all the bags*/
130
protected NeighborList[] m_CNN;
132
/** C nearest citers */
133
protected int[] m_Citers;
135
/** R nearest references */
136
protected int[] m_References;
138
/** Rank associated to the Hausdorff distance*/
139
protected int m_HDRank = 1;
141
/** Normalization of the euclidean distance */
142
private double[] m_Diffs;
144
private double[] m_Min;
146
private double m_MinNorm = 0.95;
148
private double[] m_Max;
150
private double m_MaxNorm = 1.05;
153
* Returns a string describing this filter
155
* @return a description of the filter suitable for
156
* displaying in the explorer/experimenter gui
158
public String globalInfo() {
160
"Modified version of the Citation kNN multi instance classifier.\n\n"
161
+ "For more information see:\n\n"
162
+ getTechnicalInformation().toString();
166
* Returns an instance of a TechnicalInformation object, containing
167
* detailed information about the technical background of this class,
168
* e.g., paper reference or book this class is based on.
170
* @return the technical information about this class
172
public TechnicalInformation getTechnicalInformation() {
173
TechnicalInformation result;
175
result = new TechnicalInformation(Type.INPROCEEDINGS);
176
result.setValue(Field.AUTHOR, "Jun Wang and Zucker and Jean-Daniel");
177
result.setValue(Field.TITLE, "Solving Multiple-Instance Problem: A Lazy Learning Approach");
178
result.setValue(Field.BOOKTITLE, "17th International Conference on Machine Learning");
179
result.setValue(Field.EDITOR, "Pat Langley");
180
result.setValue(Field.YEAR, "2000");
181
result.setValue(Field.PAGES, "1119-1125");
187
* Calculates the normalization of each attribute.
189
public void preprocessData(){
194
// compute the min/max of each feature
196
for (i=0;i<m_Attributes.numAttributes();i++) {
197
min=Double.POSITIVE_INFINITY ;
198
max=Double.NEGATIVE_INFINITY ;
199
for(j = 0; j < m_TrainBags.numInstances(); j++){
200
instances = m_TrainBags.instance(j).relationalValue(1);
201
for (k=0;k<instances.numInstances();k++) {
202
instance = instances.instance(k);
203
if(instance.value(i) < min)
204
min= instance.value(i);
205
if(instance.value(i) > max)
206
max= instance.value(i);
209
m_Min[i] = min * m_MinNorm;
210
m_Max[i] = max * m_MaxNorm;
211
m_Diffs[i]= max * m_MaxNorm - min * m_MinNorm;
217
* Returns the tip text for this property
219
* @return tip text for this property suitable for
220
* displaying in the explorer/experimenter gui
222
public String HDRankTipText() {
223
return "The rank associated to the Hausdorff distance.";
227
* Sets the rank associated to the Hausdorff distance
228
* @param hDRank the rank of the Hausdorff distance
230
public void setHDRank(int hDRank){
235
* Returns the rank associated to the Hausdorff distance
236
* @return the rank number
238
public int getHDRank(){
243
* Returns the tip text for this property
245
* @return tip text for this property suitable for
246
* displaying in the explorer/experimenter gui
248
public String numReferencesTipText() {
250
"The number of references considered to estimate the class "
251
+ "prediction of tests bags.";
255
* Sets the number of references considered to estimate
256
* the class prediction of tests bags
257
* @param numReferences the number of references
259
public void setNumReferences(int numReferences){
260
m_NumReferences = numReferences;
264
* Returns the number of references considered to estimate
265
* the class prediction of tests bags
266
* @return the number of references
268
public int getNumReferences(){
269
return m_NumReferences;
273
* Returns the tip text for this property
275
* @return tip text for this property suitable for
276
* displaying in the explorer/experimenter gui
278
public String numCitersTipText() {
280
"The number of citers considered to estimate the class "
281
+ "prediction of test bags.";
285
* Sets the number of citers considered to estimate
286
* the class prediction of tests bags
287
* @param numCiters the number of citers
289
public void setNumCiters(int numCiters){
290
m_NumCiters = numCiters;
294
* Returns the number of citers considered to estimate
295
* the class prediction of tests bags
296
* @return the number of citers
298
public int getNumCiters(){
303
* Returns default capabilities of the classifier.
305
* @return the capabilities of this classifier
307
public Capabilities getCapabilities() {
308
Capabilities result = super.getCapabilities();
311
result.enable(Capability.NOMINAL_ATTRIBUTES);
312
result.enable(Capability.NUMERIC_ATTRIBUTES);
313
result.enable(Capability.DATE_ATTRIBUTES);
314
result.enable(Capability.RELATIONAL_ATTRIBUTES);
315
result.enable(Capability.MISSING_VALUES);
318
result.enable(Capability.NOMINAL_CLASS);
319
result.enable(Capability.MISSING_CLASS_VALUES);
322
result.enable(Capability.ONLY_MULTIINSTANCE);
328
* Returns the capabilities of this multi-instance classifier for the
331
* @return the capabilities of this object
334
public Capabilities getMultiInstanceCapabilities() {
335
Capabilities result = super.getCapabilities();
338
result.enable(Capability.NOMINAL_ATTRIBUTES);
339
result.enable(Capability.NUMERIC_ATTRIBUTES);
340
result.enable(Capability.DATE_ATTRIBUTES);
341
result.enable(Capability.MISSING_VALUES);
344
result.disableAllClasses();
345
result.enable(Capability.NO_CLASS);
351
* Builds the classifier
353
* @param train the training data to be used for generating the
354
* boosted classifier.
355
* @throws Exception if the classifier could not be built successfully
357
public void buildClassifier(Instances train) throws Exception {
358
// can classifier handle the data?
359
getCapabilities().testWithFail(train);
361
// remove instances with missing class
362
train = new Instances(train);
363
train.deleteWithMissingClass();
366
m_ClassIndex = train.classIndex();
368
m_NumClasses = train.numClasses();
370
m_Classes = new int [train.numInstances()]; // Class values
371
m_Attributes = train.instance(0).relationalValue(1).stringFreeStructure();
373
m_Citers = new int[train.numClasses()];
374
m_References = new int[train.numClasses()];
376
m_Diffs = new double[m_Attributes.numAttributes()];
377
m_Min = new double[m_Attributes.numAttributes()];
378
m_Max = new double[m_Attributes.numAttributes()];
385
System.out.println("########################################### ");
386
System.out.println("###########CITATION######################## ");
387
System.out.println("########################################### ");
388
for(int i = 0; i < m_CNN.length; i++){
389
System.out.println("Bag: " + i);
390
m_CNN[i].printReducedList();
396
* generates all the variables associated to the citation
399
* @throws Exception if generation fails
401
public void buildCNN() throws Exception {
405
if((m_NumCiters >= m_TrainBags.numInstances()) ||
407
throw new Exception("Number of citers is out of the range [0, numInstances)");
409
numCiters = m_NumCiters;
411
m_CNN = new NeighborList[m_TrainBags.numInstances()];
414
for(int i = 0; i< m_TrainBags.numInstances(); i++){
415
bag = m_TrainBags.instance(i);
416
//first we find its neighbors
417
NeighborList neighborList = findNeighbors(bag, numCiters, m_TrainBags);
418
m_CNN[i] = neighborList;
423
* calculates the citers associated to a bag
424
* @param bag the bag cited
426
public void countBagCiters(Instance bag){
428
//Initialization of the vector
429
for(int i = 0; i < m_TrainBags.numClasses(); i++)
432
if(m_CitersDebug == true)
433
System.out.println("-------CITERS--------");
435
NeighborList neighborList;
436
NeighborNode current;
437
boolean stopSearch = false;
440
// compute the distance between the test bag and each training bag. Update
441
// the bagCiter count in case it be a neighbour
443
double bagDistance = 0;
444
for(int i = 0; i < m_TrainBags.numInstances(); i++){
445
//measure the distance
446
bagDistance = distanceSet(bag, m_TrainBags.instance(i));
447
if(m_CitersDebug == true){
448
System.out.print("bag - bag(" + i + "): " + bagDistance);
449
System.out.println(" <" + m_TrainBags.instance(i).classValue() + ">");
451
//compare the distance to see if it would belong to the
452
// neighborhood of each training exemplar
453
neighborList = m_CNN[i];
454
current = neighborList.mFirst;
456
while((current != null) && (!stopSearch)) {
457
if(m_CitersDebug == true)
458
System.out.println("\t\tciter Distance: " + current.mDistance);
459
if(current.mDistance < bagDistance){
460
current = current.mNext;
463
if(m_CitersDebug == true){
464
System.out.println("\t***");
469
if(stopSearch == true){
471
index = (int)(m_TrainBags.instance(i)).classValue();
472
m_Citers[index] += 1;
477
if(m_CitersDebug == true){
478
for(int i= 0; i < m_Citers.length; i++){
479
System.out.println("[" + i + "]: " + m_Citers[i]);
486
* Calculates the references of the exemplar bag
487
* @param bag the exemplar to which the nearest references
490
public void countBagReferences(Instance bag){
491
int index = 0, referencesIndex = 0;
493
if(m_TrainBags.numInstances() < m_NumReferences)
494
referencesIndex = m_TrainBags.numInstances() - 1;
496
referencesIndex = m_NumReferences;
498
if(m_CitersDebug == true){
499
System.out.println("-------References (" + referencesIndex+ ")--------");
501
//Initialization of the vector
502
for(int i = 0; i < m_References.length; i++)
505
if(referencesIndex > 0){
506
//first we find its neighbors
507
NeighborList neighborList = findNeighbors(bag, referencesIndex, m_TrainBags);
508
if(m_ReferencesDebug == true){
509
System.out.println("Bag: " + bag + " Neighbors: ");
510
neighborList.printReducedList();
512
NeighborNode current = neighborList.mFirst;
513
while(current != null){
514
index = (int) current.mBag.classValue();
515
m_References[index] += 1;
516
current = current.mNext;
519
if(m_ReferencesDebug == true){
520
System.out.println("References:");
521
for(int j = 0; j < m_References.length; j++)
522
System.out.println("[" + j + "]: " + m_References[j]);
527
* Build the list of nearest k neighbors to the given test instance.
528
* @param bag the bag to search for neighbors of
529
* @param kNN the number of nearest neighbors
530
* @param bags the data
531
* @return a list of neighbors
533
protected NeighborList findNeighbors(Instance bag, int kNN, Instances bags){
537
if(kNN > bags.numInstances())
538
kNN = bags.numInstances() - 1;
540
NeighborList neighborList = new NeighborList(kNN);
541
for(int i = 0; i < bags.numInstances(); i++){
542
if(bag != bags.instance(i)){ // for hold-one-out cross-validation
543
distance = distanceSet(bag, bags.instance(i)) ; //mDistanceSet.distance(bag, mInstances, bags.exemplar(i), mInstances);
544
if(m_NeighborListDebug)
545
System.out.println("distance(bag, " + i + "): " + distance);
546
if(neighborList.isEmpty() || (index < kNN) || (distance <= neighborList.mLast.mDistance))
547
neighborList.insertSorted(distance, bags.instance(i), i);
552
if(m_NeighborListDebug){
553
System.out.println("bag neighbors:");
554
neighborList.printReducedList();
561
* Calculates the distance between two instances
562
* @param first instance
563
* @param second instance
564
* @return the distance value
566
public double distanceSet(Instance first, Instance second){
567
double[] h_f = new double[first.relationalValue(1).numInstances()];
571
for(int i = 0; i < h_f.length; i++)
572
h_f[i] = Double.MAX_VALUE;
578
if(m_HDRank >= first.relationalValue(1).numInstances())
579
rank = first.relationalValue(1).numInstances();
580
else if(m_HDRank < 1)
585
if(m_HDistanceDebug){
586
System.out.println("-------HAUSDORFF DISTANCE--------");
587
System.out.println("rank: " + rank + "\nset of instances:");
588
System.out.println("\tset 1:");
589
for(int i = 0; i < first.relationalValue(1).numInstances(); i++)
590
System.out.println(first.relationalValue(1).instance(i));
592
System.out.println("\n\tset 2:");
593
for(int i = 0; i < second.relationalValue(1).numInstances(); i++)
594
System.out.println(second.relationalValue(1).instance(i));
596
System.out.println("\n");
599
//for each instance in bag first
600
for(int i = 0; i < first.relationalValue(1).numInstances(); i++){
601
// calculate the distance to each instance in
603
if(m_HDistanceDebug){
604
System.out.println("\nDistances:");
606
for(int j = 0; j < second.relationalValue(1).numInstances(); j++){
607
distance = distance(first.relationalValue(1).instance(i), second.relationalValue(1).instance(j));
608
if(distance < h_f[i])
610
if(m_HDistanceDebug){
611
System.out.println("\tdist(" + i + ", "+ j + "): " + distance + " --> h_f[" + i + "]: " + h_f[i]);
615
int[] index_f = Utils.stableSort(h_f);
617
if(m_HDistanceDebug){
618
System.out.println("\nRanks:\n");
619
for(int i = 0; i < index_f.length; i++)
620
System.out.println("\trank " + (i + 1) + ": " + h_f[index_f[i]]);
622
System.out.println("\n\t\t>>>>> rank " + rank + ": " + h_f[index_f[rank - 1]] + " <<<<<");
625
return h_f[index_f[rank - 1]];
629
* distance between two instances
630
* @param first the first instance
631
* @param second the other instance
632
* @return the distance in double precision
634
public double distance(Instance first, Instance second){
636
double sum = 0, diff;
637
for(int i = 0; i < m_Attributes.numAttributes(); i++){
638
diff = (first.value(i) - m_Min[i])/ m_Diffs[i] -
639
(second.value(i) - m_Min[i])/ m_Diffs[i];
642
return sum = Math.sqrt(sum);
646
* Computes the distribution for a given exemplar
648
* @param bag the exemplar for which distribution is computed
649
* @return the distribution
650
* @throws Exception if the distribution can't be computed successfully
652
public double[] distributionForInstance(Instance bag)
655
if(m_TrainBags.numInstances() == 0)
656
throw new Exception("No training bags!");
658
updateNormalization(bag);
660
//build references (R nearest neighbors)
661
countBagReferences(bag);
666
return makeDistribution();
670
* Updates the normalization of each attribute.
672
* @param bag the exemplar to update the normalization for
674
public void updateNormalization(Instance bag){
679
// compute the min/max of each feature
680
for (i = 0; i < m_TrainBags.attribute(1).relation().numAttributes(); i++) {
681
min = m_Min[i] / m_MinNorm;
682
max = m_Max[i] / m_MaxNorm;
684
instances = bag.relationalValue(1);
685
for (k=0;k<instances.numInstances();k++) {
686
instance = instances.instance(k);
687
if(instance.value(i) < min)
688
min = instance.value(i);
689
if(instance.value(i) > max)
690
max = instance.value(i);
692
m_Min[i] = min * m_MinNorm;
693
m_Max[i] = max * m_MaxNorm;
694
m_Diffs[i]= max * m_MaxNorm - min * m_MinNorm;
699
* Wether the instances of two exemplars are or are not equal
700
* @param exemplar1 first exemplar
701
* @param exemplar2 second exemplar
702
* @return if the instances of the exemplars are equal or not
704
public boolean equalExemplars(Instance exemplar1, Instance exemplar2){
705
if(exemplar1.relationalValue(1).numInstances() ==
706
exemplar2.relationalValue(1).numInstances()){
707
Instances instances1 = exemplar1.relationalValue(1);
708
Instances instances2 = exemplar2.relationalValue(1);
709
for(int i = 0; i < instances1.numInstances(); i++){
710
Instance instance1 = instances1.instance(i);
711
Instance instance2 = instances2.instance(i);
712
for(int j = 0; j < instance1.numAttributes(); j++){
713
if(instance1.value(j) != instance2.value(j)){
724
* Turn the references and citers list into a probability distribution
726
* @return the probability distribution
727
* @throws Exception if computation of distribution fails
729
protected double[] makeDistribution() throws Exception {
732
double[] distribution = new double[m_TrainBags.numClasses()];
733
boolean debug = false;
735
total = (double)m_TrainBags.numClasses() / Math.max(1, m_TrainBags.numInstances());
737
for(int i = 0; i < m_TrainBags.numClasses(); i++){
738
distribution[i] = 1.0 / Math.max(1, m_TrainBags.numInstances());
739
if(debug) System.out.println("distribution[" + i + "]: " + distribution[i]);
742
if(debug)System.out.println("total: " + total);
744
for(int i = 0; i < m_TrainBags.numClasses(); i++){
745
distribution[i] += m_References[i];
746
distribution[i] += m_Citers[i];
751
for(int i = 0; i < m_TrainBags.numClasses(); i++){
752
total += distribution[i];
753
if(debug)System.out.println("distribution[" + i + "]: " + distribution[i]);
756
for(int i = 0; i < m_TrainBags.numClasses(); i++){
757
distribution[i] = distribution[i] / total;
758
if(debug)System.out.println("distribution[" + i + "]: " + distribution[i]);
766
* Returns an enumeration of all the available options..
768
* @return an enumeration of all available options.
770
public Enumeration listOptions(){
771
Vector result = new Vector();
773
result.addElement(new Option(
774
"\tNumber of Nearest References (default 1)",
775
"R", 0, "-R <number of references>"));
777
result.addElement(new Option(
778
"\tNumber of Nearest Citers (default 1)",
779
"C", 0, "-C <number of citers>"));
781
result.addElement(new Option(
782
"\tRank of the Hausdorff Distance (default 1)",
783
"H", 0, "-H <rank>"));
785
return result.elements();
789
* Sets the OptionHandler's options using the given list. All options
790
* will be set (or reset) during this call (i.e. incremental setting
791
* of options is not possible). <p/>
793
<!-- options-start -->
794
* Valid options are: <p/>
796
* <pre> -R <number of references>
797
* Number of Nearest References (default 1)</pre>
799
* <pre> -C <number of citers>
800
* Number of Nearest Citers (default 1)</pre>
802
* <pre> -H <rank>
803
* Rank of the Hausdorff Distance (default 1)</pre>
807
* @param options the list of options as an array of strings
808
* @throws Exception if an option is not supported
810
public void setOptions(String[] options) throws Exception{
811
setDebug(Utils.getFlag('D', options));
813
String option = Utils.getOption('R', options);
814
if(option.length() != 0)
815
setNumReferences(Integer.parseInt(option));
819
option = Utils.getOption('C', options);
820
if(option.length() != 0)
821
setNumCiters(Integer.parseInt(option));
825
option = Utils.getOption('H', options);
826
if(option.length() != 0)
827
setHDRank(Integer.parseInt(option));
832
* Gets the current option settings for the OptionHandler.
834
* @return the list of current option settings as an array of strings
836
public String[] getOptions() {
839
result = new Vector();
845
result.add("" + getNumReferences());
848
result.add("" + getNumCiters());
851
result.add("" + getHDRank());
853
return (String[]) result.toArray(new String[result.size()]);
857
* returns a string representation of the classifier
859
* @return the string representation
861
public String toString() {
865
result = new StringBuffer();
868
result.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
869
result.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
871
if (m_Citers == null) {
872
result.append("no model built yet!\n");
875
// internal representation
876
result.append("Citers....: " + Utils.arrayToString(m_Citers) + "\n");
878
result.append("References: " + Utils.arrayToString(m_References) + "\n");
880
result.append("Min.......: ");
881
for (i = 0; i < m_Min.length; i++) {
884
result.append(Utils.doubleToString(m_Min[i], 3));
888
result.append("Max.......: ");
889
for (i = 0; i < m_Max.length; i++) {
892
result.append(Utils.doubleToString(m_Max[i], 3));
896
result.append("Diffs.....: ");
897
for (i = 0; i < m_Diffs.length; i++) {
900
result.append(Utils.doubleToString(m_Diffs[i], 3));
905
return result.toString();
909
* Main method for testing this class.
911
* @param argv should contain the command line arguments to the
912
* scheme (see Evaluation)
914
public static void main(String[] argv) {
915
runClassifier(new CitationKNN(), argv);
918
//########################################################################
919
//########################################################################
920
//########################################################################
921
//########################################################################
922
//########################################################################
925
* A class for storing data about a neighboring instance
927
private class NeighborNode
928
implements Serializable {
930
/** for serialization */
931
static final long serialVersionUID = -3947320761906511289L;
933
/** The neighbor bag */
934
private Instance mBag;
936
/** The distance from the current instance to this neighbor */
937
private double mDistance;
939
/** A link to the next neighbor instance */
940
private NeighborNode mNext;
942
/** the position in the bag */
943
private int mBagPosition;
946
* Create a new neighbor node.
948
* @param distance the distance to the neighbor
949
* @param bag the bag instance
950
* @param position the position in the bag
951
* @param next the next neighbor node
953
public NeighborNode(double distance, Instance bag, int position, NeighborNode next){
954
mDistance = distance;
957
mBagPosition = position;
961
* Create a new neighbor node that doesn't link to any other nodes.
963
* @param distance the distance to the neighbor
964
* @param bag the neighbor instance
965
* @param position the position in the bag
967
public NeighborNode(double distance, Instance bag, int position) {
968
this(distance, bag, position, null);
972
//##################################################
974
* A class for a linked list to store the nearest k neighbours
975
* to an instance. We use a list so that we can take care of
976
* cases where multiple neighbours are the same distance away.
977
* i.e. the minimum length of the list is k.
979
private class NeighborList
980
implements Serializable {
982
/** for serialization */
983
static final long serialVersionUID = 3432555644456217394L;
985
/** The first node in the list */
986
private NeighborNode mFirst;
987
/** The last node in the list */
988
private NeighborNode mLast;
990
/** The number of nodes to attempt to maintain in the list */
991
private int mLength = 1;
994
* Creates the neighborlist with a desired length
996
* @param length the length of list to attempt to maintain
998
public NeighborList(int length) {
1002
* Gets whether the list is empty.
1004
* @return true if so
1006
public boolean isEmpty() {
1007
return (mFirst == null);
1010
* Gets the current length of the list.
1012
* @return the current length of the list
1014
public int currentLength() {
1017
NeighborNode current = mFirst;
1018
while (current != null) {
1020
current = current.mNext;
1026
* Inserts an instance neighbor into the list, maintaining the list
1027
* sorted by distance.
1029
* @param distance the distance to the instance
1030
* @param bag the neighboring instance
1031
* @param position the position in the bag
1033
public void insertSorted(double distance, Instance bag, int position) {
1036
mFirst = mLast = new NeighborNode(distance, bag, position);
1038
NeighborNode current = mFirst;
1039
if (distance < mFirst.mDistance) {// Insert at head
1040
mFirst = new NeighborNode(distance, bag, position, mFirst);
1041
} else { // Insert further down the list
1042
for( ;(current.mNext != null) &&
1043
(current.mNext.mDistance < distance);
1044
current = current.mNext);
1045
current.mNext = new NeighborNode(distance, bag, position, current.mNext);
1046
if (current.equals(mLast)) {
1047
mLast = current.mNext;
1051
// Trip down the list until we've got k list elements (or more if the
1052
// distance to the last elements is the same).
1054
for(current = mFirst; current.mNext != null;
1055
current = current.mNext) {
1057
if ((valcount >= mLength) && (current.mDistance !=
1058
current.mNext.mDistance)) {
1060
current.mNext = null;
1068
* Prunes the list to contain the k nearest neighbors. If there are
1069
* multiple neighbors at the k'th distance, all will be kept.
1071
* @param k the number of neighbors to keep in the list.
1073
public void pruneToK(int k) {
1080
double currentDist = mFirst.mDistance;
1081
NeighborNode current = mFirst;
1082
for(; current.mNext != null; current = current.mNext) {
1084
currentDist = current.mDistance;
1085
if ((currentK >= k) && (currentDist != current.mNext.mDistance)) {
1087
current.mNext = null;
1094
* Prints out the contents of the neighborlist
1096
public void printList() {
1099
System.out.println("Empty list");
1101
NeighborNode current = mFirst;
1102
while (current != null) {
1103
System.out.print("Node: instance " + current.mBagPosition + "\n");
1104
System.out.println(current.mBag);
1105
System.out.println(", distance " + current.mDistance);
1106
current = current.mNext;
1108
System.out.println();
1112
* Prints out the contents of the neighborlist
1114
public void printReducedList() {
1117
System.out.println("Empty list");
1119
NeighborNode current = mFirst;
1120
while (current != null) {
1121
System.out.print("Node: bag " + current.mBagPosition + " (" + current.mBag.relationalValue(1).numInstances() +"): ");
1122
//for(int i = 0; i < current.mBag.getInstances().numInstances(); i++){
1123
//System.out.print(" " + (current.mBag).getInstances().instance(i));
1125
System.out.print(" <" + current.mBag.classValue() + ">");
1126
System.out.println(" (d: " + current.mDistance + ")");
1127
current = current.mNext;
1129
System.out.println();