2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2001 University of Waikato, Hamilton, New Zealand
23
package weka.clusterers;
25
import weka.core.AttributeStats;
26
import weka.core.Capabilities;
27
import weka.core.Drawable;
28
import weka.core.FastVector;
29
import weka.core.Instance;
30
import weka.core.Instances;
31
import weka.core.Option;
32
import weka.core.TechnicalInformation;
33
import weka.core.TechnicalInformationHandler;
34
import weka.core.Utils;
35
import weka.core.Capabilities.Capability;
36
import weka.core.TechnicalInformation.Field;
37
import weka.core.TechnicalInformation.Type;
38
import weka.experiment.Stats;
39
import weka.filters.Filter;
40
import weka.filters.unsupervised.attribute.Add;
42
import java.io.Serializable;
43
import java.util.Enumeration;
44
import java.util.Random;
45
import java.util.Vector;
48
<!-- globalinfo-start -->
49
* Class implementing the Cobweb and Classit clustering algorithms.<br/>
51
* Note: the application of node operators (merging, splitting etc.) in terms of ordering and priority differs (and is somewhat ambiguous) between the original Cobweb and Classit papers. This algorithm always compares the best host, adding a new leaf, merging the two best hosts, and splitting the best host when considering where to place a new instance.<br/>
53
* For more information see:<br/>
55
* D. Fisher (1987). Knowledge acquisition via incremental conceptual clustering. Machine Learning. 2(2):139-172.<br/>
57
* J. H. Gennari, P. Langley, D. Fisher (1990). Models of incremental concept formation. Artificial Intelligence. 40:11-61.
59
<!-- globalinfo-end -->
61
<!-- technical-bibtex-start -->
64
* @article{Fisher1987,
65
* author = {D. Fisher},
66
* journal = {Machine Learning},
69
* title = {Knowledge acquisition via incremental conceptual clustering},
74
* @article{Gennari1990,
75
* author = {J. H. Gennari and P. Langley and D. Fisher},
76
* journal = {Artificial Intelligence},
78
* title = {Models of incremental concept formation},
84
<!-- technical-bibtex-end -->
86
<!-- options-start -->
87
* Valid options are: <p/>
89
* <pre> -A <acuity>
93
* <pre> -C <cutoff>
95
* (default=0.002)</pre>
97
* <pre> -S <num>
103
* @author <a href="mailto:mhall@cs.waikato.ac.nz">Mark Hall</a>
104
* @version $Revision: 1.24 $
105
* @see RandomizableClusterer
109
extends RandomizableClusterer
110
implements Drawable, TechnicalInformationHandler, UpdateableClusterer {
112
/** for serialization */
113
static final long serialVersionUID = 928406656495092318L;
116
* Inner class handling node operations for Cobweb.
121
implements Serializable {
123
/** for serialization */
124
static final long serialVersionUID = 3452097436933325631L;
126
* Within cluster attribute statistics
128
private AttributeStats[] m_attStats;
131
* Number of attributes
133
private int m_numAttributes;
136
* Instances at this node
138
protected Instances m_clusterInstances = null;
141
* Children of this node
143
private FastVector m_children = null;
146
* Total instances at this node
148
private double m_totalInstances = 0.0;
151
* Cluster number of this node
153
private int m_clusterNum = -1;
156
* Creates an empty <code>CNode</code> instance.
158
* @param numAttributes the number of attributes in the data
160
public CNode(int numAttributes) {
161
m_numAttributes = numAttributes;
165
* Creates a new leaf <code>CNode</code> instance.
167
* @param numAttributes the number of attributes in the data
168
* @param leafInstance the instance to store at this leaf
170
public CNode(int numAttributes, Instance leafInstance) {
172
if (m_clusterInstances == null) {
173
m_clusterInstances = new Instances(leafInstance.dataset(), 1);
175
m_clusterInstances.add(leafInstance);
176
updateStats(leafInstance, false);
180
* Adds an instance to this cluster.
182
* @param newInstance the instance to add
183
* @throws Exception if an error occurs
185
protected void addInstance(Instance newInstance) throws Exception {
186
// Add the instance to this cluster
188
if (m_clusterInstances == null) {
189
m_clusterInstances = new Instances(newInstance.dataset(), 1);
190
m_clusterInstances.add(newInstance);
191
updateStats(newInstance, false);
193
} else if (m_children == null) {
194
/* we are a leaf, so make our existing instance(s) into a child
195
and then add the new instance as a child */
196
m_children = new FastVector();
197
CNode tempSubCluster = new CNode(m_numAttributes,
198
m_clusterInstances.instance(0));
200
// System.out.println("Dumping "+m_clusterInstances.numInstances());
201
for (int i = 1; i < m_clusterInstances.numInstances(); i++) {
202
tempSubCluster.m_clusterInstances.
203
add(m_clusterInstances.instance(i));
204
tempSubCluster.updateStats(m_clusterInstances.instance(i), false);
206
m_children = new FastVector();
207
m_children.addElement(tempSubCluster);
208
m_children.addElement(new CNode(m_numAttributes, newInstance));
210
m_clusterInstances.add(newInstance);
211
updateStats(newInstance, false);
213
// here is where we check against cutoff (also check cutoff
215
if (categoryUtility() < m_cutoff) {
216
// System.out.println("Cutting (leaf add) ");
222
// otherwise, find the best host for this instance
223
CNode bestHost = findHost(newInstance, false);
224
if (bestHost != null) {
225
// now add to the best host
226
bestHost.addInstance(newInstance);
231
* Temporarily adds a new instance to each of this nodes children
232
* in turn and computes the category utility.
234
* @param newInstance the new instance to evaluate
235
* @return an array of category utility values---the result of considering
236
* each child in turn as a host for the new instance
237
* @throws Exception if an error occurs
239
private double[] cuScoresForChildren(Instance newInstance)
241
// look for a host in existing children
242
double[] categoryUtils = new double [m_children.size()];
244
// look for a home for this instance in the existing children
245
for (int i = 0; i < m_children.size(); i++) {
246
CNode temp = (CNode) m_children.elementAt(i);
247
// tentitively add the new instance to this child
248
temp.updateStats(newInstance, false);
249
categoryUtils[i] = categoryUtility();
251
// remove the new instance from this child
252
temp.updateStats(newInstance, true);
254
return categoryUtils;
257
private double cuScoreForBestTwoMerged(CNode merged,
259
Instance newInstance)
262
double mergedCU = -Double.MAX_VALUE;
263
// consider merging the best and second
265
merged.m_clusterInstances = new Instances(m_clusterInstances, 1);
267
merged.addChildNode(a);
268
merged.addChildNode(b);
269
merged.updateStats(newInstance, false); // add new instance to stats
270
// remove the best and second best nodes
271
m_children.removeElementAt(m_children.indexOf(a));
272
m_children.removeElementAt(m_children.indexOf(b));
273
m_children.addElement(merged);
274
mergedCU = categoryUtility();
275
// restore the status quo
276
merged.updateStats(newInstance, true);
277
m_children.removeElementAt(m_children.indexOf(merged));
278
m_children.addElement(a);
279
m_children.addElement(b);
284
* Finds a host for the new instance in this nodes children. Also
285
* considers merging the two best hosts and splitting the best host.
287
* @param newInstance the instance to find a host for
288
* @param structureFrozen true if the instance is not to be added to
289
* the tree and instead the best potential host is to be returned
290
* @return the best host
291
* @throws Exception if an error occurs
293
private CNode findHost(Instance newInstance,
294
boolean structureFrozen) throws Exception {
296
if (!structureFrozen) {
297
updateStats(newInstance, false);
300
// look for a host in existing children and also consider as a new leaf
301
double[] categoryUtils = cuScoresForChildren(newInstance);
303
// make a temporary new leaf for this instance and get CU
304
CNode newLeaf = new CNode(m_numAttributes, newInstance);
305
m_children.addElement(newLeaf);
306
double bestHostCU = categoryUtility();
307
CNode finalBestHost = newLeaf;
309
// remove new leaf when seaching for best and second best nodes to
310
// consider for merging and splitting
311
m_children.removeElementAt(m_children.size()-1);
313
// now determine the best host (and the second best)
316
for (int i = 0; i < categoryUtils.length; i++) {
317
if (categoryUtils[i] > categoryUtils[secondBest]) {
318
if (categoryUtils[i] > categoryUtils[best]) {
327
CNode a = (CNode) m_children.elementAt(best);
328
CNode b = (CNode) m_children.elementAt(secondBest);
329
if (categoryUtils[best] > bestHostCU) {
330
bestHostCU = categoryUtils[best];
332
// System.out.println("Node is best");
335
if (structureFrozen) {
336
if (finalBestHost == newLeaf) {
337
return null; // *this* node is the best host
339
return finalBestHost;
343
double mergedCU = -Double.MAX_VALUE;
344
CNode merged = new CNode(m_numAttributes);
346
mergedCU = cuScoreForBestTwoMerged(merged, a, b, newInstance);
348
if (mergedCU > bestHostCU) {
349
bestHostCU = mergedCU;
350
finalBestHost = merged;
354
// Consider splitting the best
355
double splitCU = -Double.MAX_VALUE;
356
double splitBestChildCU = -Double.MAX_VALUE;
357
double splitPlusNewLeafCU = -Double.MAX_VALUE;
358
double splitPlusMergeBestTwoCU = -Double.MAX_VALUE;
359
if (a.m_children != null) {
360
FastVector tempChildren = new FastVector();
362
for (int i = 0; i < m_children.size(); i++) {
363
CNode existingChild = (CNode)m_children.elementAt(i);
364
if (existingChild != a) {
365
tempChildren.addElement(existingChild);
368
for (int i = 0; i < a.m_children.size(); i++) {
369
CNode promotedChild = (CNode)a.m_children.elementAt(i);
370
tempChildren.addElement(promotedChild);
372
// also add the new leaf
373
tempChildren.addElement(newLeaf);
375
FastVector saveStatusQuo = m_children;
376
m_children = tempChildren;
377
splitPlusNewLeafCU = categoryUtility(); // split + new leaf
378
// remove the new leaf
379
tempChildren.removeElementAt(tempChildren.size()-1);
380
// now look for best and second best
381
categoryUtils = cuScoresForChildren(newInstance);
383
// now determine the best host (and the second best)
386
for (int i = 0; i < categoryUtils.length; i++) {
387
if (categoryUtils[i] > categoryUtils[secondBest]) {
388
if (categoryUtils[i] > categoryUtils[best]) {
396
CNode sa = (CNode) m_children.elementAt(best);
397
CNode sb = (CNode) m_children.elementAt(secondBest);
398
splitBestChildCU = categoryUtils[best];
400
// now merge best and second best
401
CNode mergedSplitChildren = new CNode(m_numAttributes);
403
splitPlusMergeBestTwoCU =
404
cuScoreForBestTwoMerged(mergedSplitChildren, sa, sb, newInstance);
406
splitCU = (splitBestChildCU > splitPlusNewLeafCU) ?
407
splitBestChildCU : splitPlusNewLeafCU;
408
splitCU = (splitCU > splitPlusMergeBestTwoCU) ?
409
splitCU : splitPlusMergeBestTwoCU;
411
if (splitCU > bestHostCU) {
412
bestHostCU = splitCU;
413
finalBestHost = this;
414
// tempChildren.removeElementAt(tempChildren.size()-1);
416
// restore the status quo
417
m_children = saveStatusQuo;
421
if (finalBestHost != this) {
422
// can commit the instance to the set of instances at this node
423
m_clusterInstances.add(newInstance);
428
if (finalBestHost == merged) {
430
m_children.removeElementAt(m_children.indexOf(a));
431
m_children.removeElementAt(m_children.indexOf(b));
432
m_children.addElement(merged);
435
if (finalBestHost == newLeaf) {
436
finalBestHost = new CNode(m_numAttributes);
437
m_children.addElement(finalBestHost);
440
if (bestHostCU < m_cutoff) {
441
if (finalBestHost == this) {
442
// splitting was the best, but since we are cutting all children
443
// recursion is aborted and we still need to add the instance
444
// to the set of instances at this node
445
m_clusterInstances.add(newInstance);
448
finalBestHost = null;
451
if (finalBestHost == this) {
452
// splitting is still the best, so downdate the stats as
453
// we'll be recursively calling on this node
454
updateStats(newInstance, true);
457
return finalBestHost;
461
* Adds the supplied node as a child of this node. All of the child's
462
* instances are added to this nodes instances
464
* @param child the child to add
466
protected void addChildNode(CNode child) {
467
for (int i = 0; i < child.m_clusterInstances.numInstances(); i++) {
468
Instance temp = child.m_clusterInstances.instance(i);
469
m_clusterInstances.add(temp);
470
updateStats(temp, false);
473
if (m_children == null) {
474
m_children = new FastVector();
476
m_children.addElement(child);
480
* Computes the utility of all children with respect to this node
482
* @return the category utility of the children with respect to this node.
483
* @throws Exception if there are no children
485
protected double categoryUtility() throws Exception {
487
if (m_children == null) {
488
throw new Exception("categoryUtility: No children!");
493
for (int i = 0; i < m_children.size(); i++) {
494
CNode child = (CNode) m_children.elementAt(i);
495
totalCU += categoryUtilityChild(child);
498
totalCU /= (double)m_children.size();
503
* Computes the utility of a single child with respect to this node
505
* @param child the child for which to compute the utility
506
* @return the utility of the child with respect to this node
507
* @throws Exception if something goes wrong
509
protected double categoryUtilityChild(CNode child) throws Exception {
512
for (int i = 0; i < m_numAttributes; i++) {
513
if (m_clusterInstances.attribute(i).isNominal()) {
515
j < m_clusterInstances.attribute(i).numValues(); j++) {
516
double x = child.getProbability(i, j);
517
double y = getProbability(i, j);
518
sum += (x * x) - (y * y);
522
sum += ((m_normal / child.getStandardDev(i)) -
523
(m_normal / getStandardDev(i)));
527
return (child.m_totalInstances / m_totalInstances) * sum;
531
* Returns the probability of a value of a nominal attribute in this node
533
* @param attIndex the index of the attribute
534
* @param valueIndex the index of the value of the attribute
535
* @return the probability
536
* @throws Exception if the requested attribute is not nominal
538
protected double getProbability(int attIndex, int valueIndex)
541
if (!m_clusterInstances.attribute(attIndex).isNominal()) {
542
throw new Exception("getProbability: attribute is not nominal");
545
if (m_attStats[attIndex].totalCount <= 0) {
549
return (double) m_attStats[attIndex].nominalCounts[valueIndex] /
550
(double) m_attStats[attIndex].totalCount;
554
* Returns the standard deviation of a numeric attribute
556
* @param attIndex the index of the attribute
557
* @return the standard deviation
558
* @throws Exception if an error occurs
560
protected double getStandardDev(int attIndex) throws Exception {
561
if (!m_clusterInstances.attribute(attIndex).isNumeric()) {
562
throw new Exception("getStandardDev: attribute is not numeric");
565
m_attStats[attIndex].numericStats.calculateDerived();
566
double stdDev = m_attStats[attIndex].numericStats.stdDev;
567
if (Double.isNaN(stdDev) || Double.isInfinite(stdDev)) {
571
return Math.max(m_acuity, stdDev);
575
* Update attribute stats using the supplied instance.
577
* @param updateInstance the instance for updating
578
* @param delete true if the values of the supplied instance are
579
* to be removed from the statistics
581
protected void updateStats(Instance updateInstance,
584
if (m_attStats == null) {
585
m_attStats = new AttributeStats[m_numAttributes];
586
for (int i = 0; i < m_numAttributes; i++) {
587
m_attStats[i] = new AttributeStats();
588
if (m_clusterInstances.attribute(i).isNominal()) {
589
m_attStats[i].nominalCounts =
590
new int [m_clusterInstances.attribute(i).numValues()];
592
m_attStats[i].numericStats = new Stats();
596
for (int i = 0; i < m_numAttributes; i++) {
597
if (!updateInstance.isMissing(i)) {
598
double value = updateInstance.value(i);
599
if (m_clusterInstances.attribute(i).isNominal()) {
600
m_attStats[i].nominalCounts[(int)value] += (delete) ?
601
(-1.0 * updateInstance.weight()) :
602
updateInstance.weight();
603
m_attStats[i].totalCount += (delete) ?
604
(-1.0 * updateInstance.weight()) :
605
updateInstance.weight();
608
m_attStats[i].numericStats.subtract(value,
609
updateInstance.weight());
611
m_attStats[i].numericStats.add(value, updateInstance.weight());
616
m_totalInstances += (delete)
617
? (-1.0 * updateInstance.weight())
618
: (updateInstance.weight());
622
* Recursively assigns numbers to the nodes in the tree.
624
* @param cl_num an <code>int[]</code> value
625
* @throws Exception if an error occurs
627
private void assignClusterNums(int[] cl_num) throws Exception {
628
if (m_children != null && m_children.size() < 2) {
629
throw new Exception("assignClusterNums: tree not built correctly!");
632
m_clusterNum = cl_num[0];
634
if (m_children != null) {
635
for (int i = 0; i < m_children.size(); i++) {
636
CNode child = (CNode) m_children.elementAt(i);
637
child.assignClusterNums(cl_num);
643
* Recursively build a string representation of the Cobweb tree
645
* @param depth depth of this node in the tree
646
* @param text holds the string representation
648
protected void dumpTree(int depth, StringBuffer text) {
651
determineNumberOfClusters();
653
if (m_children == null) {
655
for (int j = 0; j < depth; j++) {
658
text.append("leaf "+m_clusterNum+" ["
659
+m_clusterInstances.numInstances()+"]");
661
for (int i = 0; i < m_children.size(); i++) {
663
for (int j = 0; j < depth; j++) {
666
text.append("node "+m_clusterNum+" ["
667
+m_clusterInstances.numInstances()
669
((CNode) m_children.elementAt(i)).dumpTree(depth+1, text);
675
* Returns the instances at this node as a string. Appends the cluster
676
* number of the child that each instance belongs to.
678
* @return a <code>String</code> value
679
* @throws Exception if an error occurs
681
protected String dumpData() throws Exception {
682
if (m_children == null) {
683
return m_clusterInstances.toString();
686
// construct instances string with cluster numbers attached
687
CNode tempNode = new CNode(m_numAttributes);
688
tempNode.m_clusterInstances = new Instances(m_clusterInstances, 1);
689
for (int i = 0; i < m_children.size(); i++) {
690
tempNode.addChildNode((CNode)m_children.elementAt(i));
692
Instances tempInst = tempNode.m_clusterInstances;
696
af.setAttributeName("Cluster");
698
for (int i = 0; i < m_children.size(); i++) {
699
CNode temp = (CNode)m_children.elementAt(i);
700
labels += ("C"+temp.m_clusterNum);
701
if (i < m_children.size()-1) {
705
af.setNominalLabels(labels);
706
af.setInputFormat(tempInst);
707
tempInst = Filter.useFilter(tempInst, af);
708
tempInst.setRelationName("Cluster "+m_clusterNum);
711
for (int i = 0; i < m_children.size(); i++) {
712
CNode temp = (CNode)m_children.elementAt(i);
713
for (int j = 0; j < temp.m_clusterInstances.numInstances(); j++) {
714
tempInst.instance(z).setValue(m_numAttributes, (double)i);
718
return tempInst.toString();
722
* Recursively generate the graph string for the Cobweb tree.
724
* @param text holds the graph string
725
* @throws Exception if generation fails
727
protected void graphTree(StringBuffer text) throws Exception {
729
text.append("N"+m_clusterNum
730
+ " [label=\""+((m_children == null)
733
+" ("+m_clusterInstances.numInstances()
735
+((m_children == null)
736
? "shape=box style=filled " : "")
738
? "data =\n"+dumpData() +"\n,\n"
741
if (m_children != null) {
742
for (int i = 0; i < m_children.size(); i++) {
743
CNode temp = (CNode)m_children.elementAt(i);
744
text.append("N"+m_clusterNum
746
+"N" + temp.m_clusterNum
750
for (int i = 0; i < m_children.size(); i++) {
751
CNode temp = (CNode)m_children.elementAt(i);
752
temp.graphTree(text);
761
protected static final double m_normal = 1.0/(2 * Math.sqrt(Math.PI));
764
* Acuity (minimum standard deviation).
766
protected double m_acuity = 1.0;
769
* Cutoff (minimum category utility).
771
protected double m_cutoff = 0.01 * Cobweb.m_normal;
774
* Holds the root of the Cobweb tree.
776
protected CNode m_cobwebTree = null;
779
* Number of clusters (nodes in the tree). Must never be queried directly,
780
* only via the method numberOfClusters(). Otherwise it's not guaranteed that
781
* it contains the correct value.
783
* @see #numberOfClusters()
784
* @see #m_numberOfClustersDetermined
786
protected int m_numberOfClusters = -1;
788
/** whether the number of clusters was already determined */
789
protected boolean m_numberOfClustersDetermined = false;
791
/** the number of splits that happened */
792
protected int m_numberSplits;
794
/** the number of merges that happened */
795
protected int m_numberMerges;
798
* Output instances in graph representation of Cobweb tree (Allows
799
* instances at nodes in the tree to be visualized in the Explorer).
801
protected boolean m_saveInstances = false;
804
* default constructor
810
setSeed(m_SeedDefault);
814
* Returns a string describing this clusterer
815
* @return a description of the evaluator suitable for
816
* displaying in the explorer/experimenter gui
818
public String globalInfo() {
820
"Class implementing the Cobweb and Classit clustering algorithms.\n\n"
821
+ "Note: the application of node operators (merging, splitting etc.) in "
822
+ "terms of ordering and priority differs (and is somewhat ambiguous) "
823
+ "between the original Cobweb and Classit papers. This algorithm always "
824
+ "compares the best host, adding a new leaf, merging the two best hosts, "
825
+ "and splitting the best host when considering where to place a new "
827
+ "For more information see:\n\n"
828
+ getTechnicalInformation().toString();
832
* Returns an instance of a TechnicalInformation object, containing
833
* detailed information about the technical background of this class,
834
* e.g., paper reference or book this class is based on.
836
* @return the technical information about this class
838
public TechnicalInformation getTechnicalInformation() {
839
TechnicalInformation result;
840
TechnicalInformation additional;
842
result = new TechnicalInformation(Type.ARTICLE);
843
result.setValue(Field.AUTHOR, "D. Fisher");
844
result.setValue(Field.YEAR, "1987");
845
result.setValue(Field.TITLE, "Knowledge acquisition via incremental conceptual clustering");
846
result.setValue(Field.JOURNAL, "Machine Learning");
847
result.setValue(Field.VOLUME, "2");
848
result.setValue(Field.NUMBER, "2");
849
result.setValue(Field.PAGES, "139-172");
851
additional = result.add(Type.ARTICLE);
852
additional.setValue(Field.AUTHOR, "J. H. Gennari and P. Langley and D. Fisher");
853
additional.setValue(Field.YEAR, "1990");
854
additional.setValue(Field.TITLE, "Models of incremental concept formation");
855
additional.setValue(Field.JOURNAL, "Artificial Intelligence");
856
additional.setValue(Field.VOLUME, "40");
857
additional.setValue(Field.PAGES, "11-61");
863
* Returns default capabilities of the clusterer.
865
* @return the capabilities of this clusterer
867
public Capabilities getCapabilities() {
868
Capabilities result = super.getCapabilities();
871
result.enable(Capability.NOMINAL_ATTRIBUTES);
872
result.enable(Capability.NUMERIC_ATTRIBUTES);
873
result.enable(Capability.DATE_ATTRIBUTES);
874
result.enable(Capability.MISSING_VALUES);
877
result.setMinimumNumberInstances(0);
883
* Builds the clusterer.
885
* @param data the training instances.
886
* @throws Exception if something goes wrong.
888
public void buildClusterer(Instances data) throws Exception {
889
m_numberOfClusters = -1;
894
// can clusterer handle the data?
895
getCapabilities().testWithFail(data);
897
// randomize the instances
898
data = new Instances(data);
899
data.randomize(new Random(getSeed()));
901
for (int i = 0; i < data.numInstances(); i++) {
902
updateClusterer(data.instance(i));
909
* Singals the end of the updating.
911
public void updateFinished() {
912
determineNumberOfClusters();
916
* Classifies a given instance.
918
* @param instance the instance to be assigned to a cluster
919
* @return the number of the assigned cluster as an interger
920
* if the class is enumerated, otherwise the predicted value
921
* @throws Exception if instance could not be classified
924
public int clusterInstance(Instance instance) throws Exception {
925
CNode host = m_cobwebTree;
928
determineNumberOfClusters();
931
if (host.m_children == null) {
936
host.updateStats(instance, false);
937
temp = host.findHost(instance, true);
938
host.updateStats(instance, true);
943
} while (temp != null);
945
return host.m_clusterNum;
949
* determines the number of clusters if necessary
951
* @see #m_numberOfClusters
952
* @see #m_numberOfClustersDetermined
954
protected void determineNumberOfClusters() {
955
if ( !m_numberOfClustersDetermined
956
&& (m_cobwebTree != null) ) {
957
int[] numClusts = new int [1];
960
m_cobwebTree.assignClusterNums(numClusts);
962
catch (Exception e) {
966
m_numberOfClusters = numClusts[0];
968
m_numberOfClustersDetermined = true;
973
* Returns the number of clusters.
975
* @return the number of clusters
977
public int numberOfClusters() {
978
determineNumberOfClusters();
979
return m_numberOfClusters;
983
* Adds an instance to the clusterer.
985
* @param newInstance the instance to be added
986
* @throws Exception if something goes wrong
988
public void updateClusterer(Instance newInstance) throws Exception {
989
m_numberOfClustersDetermined = false;
991
if (m_cobwebTree == null) {
992
m_cobwebTree = new CNode(newInstance.numAttributes(), newInstance);
994
m_cobwebTree.addInstance(newInstance);
999
* Adds an instance to the Cobweb tree.
1001
* @param newInstance the instance to be added
1002
* @throws Exception if something goes wrong
1003
* @deprecated updateClusterer(Instance) should be used instead
1004
* @see #updateClusterer(Instance)
1006
public void addInstance(Instance newInstance) throws Exception {
1007
updateClusterer(newInstance);
1011
* Returns an enumeration describing the available options.
1013
* @return an enumeration of all the available options.
1015
public Enumeration listOptions() {
1016
Vector result = new Vector();
1018
result.addElement(new Option(
1021
"A", 1,"-A <acuity>"));
1023
result.addElement(new Option(
1025
+"\t(default=0.002)",
1026
"C", 1,"-C <cutoff>"));
1028
Enumeration en = super.listOptions();
1029
while (en.hasMoreElements())
1030
result.addElement(en.nextElement());
1032
return result.elements();
1036
* Parses a given list of options. <p/>
1038
<!-- options-start -->
1039
* Valid options are: <p/>
1041
* <pre> -A <acuity>
1043
* (default=1.0)</pre>
1045
* <pre> -C <cutoff>
1047
* (default=0.002)</pre>
1049
* <pre> -S <num>
1050
* Random number seed.
1051
* (default 42)</pre>
1053
<!-- options-end -->
1055
* @param options the list of options as an array of strings
1056
* @throws Exception if an option is not supported
1058
public void setOptions(String[] options) throws Exception {
1059
String optionString;
1061
optionString = Utils.getOption('A', options);
1062
if (optionString.length() != 0) {
1063
Double temp = new Double(optionString);
1064
setAcuity(temp.doubleValue());
1069
optionString = Utils.getOption('C', options);
1070
if (optionString.length() != 0) {
1071
Double temp = new Double(optionString);
1072
setCutoff(temp.doubleValue());
1075
m_cutoff = 0.01 * Cobweb.m_normal;
1078
super.setOptions(options);
1082
* Returns the tip text for this property
1083
* @return tip text for this property suitable for
1084
* displaying in the explorer/experimenter gui
1086
public String acuityTipText() {
1087
return "set the minimum standard deviation for numeric attributes";
1092
* @param a the acuity value
1094
public void setAcuity(double a) {
1099
* get the acuity value
1100
* @return the acuity
1102
public double getAcuity() {
1107
* Returns the tip text for this property
1108
* @return tip text for this property suitable for
1109
* displaying in the explorer/experimenter gui
1111
public String cutoffTipText() {
1112
return "set the category utility threshold by which to prune nodes";
1117
* @param c the cutof
1119
public void setCutoff(double c) {
1125
* @return the cutoff
1127
public double getCutoff() {
1132
* Returns the tip text for this property
1133
* @return tip text for this property suitable for
1134
* displaying in the explorer/experimenter gui
1136
public String saveInstanceDataTipText() {
1137
return "save instance information for visualization purposes";
1141
* Get the value of saveInstances.
1143
* @return Value of saveInstances.
1145
public boolean getSaveInstanceData() {
1147
return m_saveInstances;
1151
* Set the value of saveInstances.
1153
* @param newsaveInstances Value to assign to saveInstances.
1155
public void setSaveInstanceData(boolean newsaveInstances) {
1157
m_saveInstances = newsaveInstances;
1161
* Gets the current settings of Cobweb.
1163
* @return an array of strings suitable for passing to setOptions()
1165
public String[] getOptions() {
1167
Vector<String> result;
1170
result = new Vector<String>();
1173
result.add("" + m_acuity);
1175
result.add("" + m_cutoff);
1177
options = super.getOptions();
1178
for (i = 0; i < options.length; i++)
1179
result.add(options[i]);
1181
return result.toArray(new String[result.size()]);
1185
* Returns a description of the clusterer as a string.
1187
* @return a string describing the clusterer.
1189
public String toString() {
1190
StringBuffer text = new StringBuffer();
1191
if (m_cobwebTree == null) {
1192
return "Cobweb hasn't been built yet!";
1195
m_cobwebTree.dumpTree(0, text);
1196
return "Number of merges: "
1197
+ m_numberMerges+"\nNumber of splits: "
1198
+ m_numberSplits+"\nNumber of clusters: "
1199
+ numberOfClusters() +"\n"+text.toString()+"\n\n";
1205
* Returns the type of graphs this class
1207
* @return Drawable.TREE
1209
public int graphType() {
1210
return Drawable.TREE;
1214
* Generates the graph string of the Cobweb tree
1216
* @return a <code>String</code> value
1217
* @throws Exception if an error occurs
1219
public String graph() throws Exception {
1220
StringBuffer text = new StringBuffer();
1222
text.append("digraph CobwebTree {\n");
1223
m_cobwebTree.graphTree(text);
1225
return text.toString();
1231
* @param argv the commandline options
1233
public static void main(String[] argv) {
1234
runClusterer(new Cobweb(), argv);