2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* SubspaceClusterDefinition.java
19
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
23
package weka.datagenerators.clusterers;
25
import weka.core.Option;
26
import weka.core.Range;
27
import weka.core.SelectedTag;
28
import weka.core.Utils;
29
import weka.datagenerators.ClusterDefinition;
30
import weka.datagenerators.ClusterGenerator;
32
import java.util.Enumeration;
33
import java.util.Random;
34
import java.util.StringTokenizer;
35
import java.util.Vector;
38
<!-- globalinfo-start -->
39
* A single cluster for the SubspaceCluster datagenerator
41
<!-- globalinfo-end -->
43
<!-- options-start -->
44
* Valid options are: <p/>
46
* <pre> -A <range>
47
* Generates randomly distributed instances in the cluster.</pre>
49
* <pre> -U <range>
50
* Generates uniformly distributed instances in the cluster.</pre>
52
* <pre> -G <range>
53
* Generates gaussian distributed instances in the cluster.</pre>
55
* <pre> -D <num>,<num>
56
* The attribute min/max (-A and -U) or mean/stddev (-G) for
59
* <pre> -N <num>..<num>
60
* The range of number of instances per cluster (default 1..50).</pre>
63
* Uses integer instead of continuous values (default continuous).</pre>
67
* @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
68
* @author FracPete (fracpete at waikato dot ac dot nz)
69
* @version $Revision: 1.4 $
70
* @see SubspaceCluster
72
public class SubspaceClusterDefinition
73
extends ClusterDefinition {
75
/** for serialization */
76
static final long serialVersionUID = 3135678125044007231L;
79
protected int m_clustertype;
81
/** cluster subtypes */
82
protected int m_clustersubtype;
84
/** number of attributes the cluster is defined for */
85
protected int m_numClusterAttributes;
87
/** number of instances for this cluster */
88
protected int m_numInstances;
90
/** minimal number of instances for this cluster */
91
protected int m_MinInstNum;
93
/** maximal number of instances for this cluster */
94
protected int m_MaxInstNum;
96
/** range of atttributes */
97
protected Range m_AttrIndexRange;
99
/** attributes of this cluster */
100
protected boolean[] m_attributes;
102
/** global indices of the attributes of the cluster */
103
protected int[] m_attrIndices;
105
/** ranges of each attribute (min); not used if gaussian */
106
protected double[] m_minValue;
108
/** ranges of each attribute (max); not used if gaussian */
109
protected double[] m_maxValue;
111
/** mean ; only used if gaussian */
112
protected double[] m_meanValue;
114
/** standarddev; only used if gaussian */
115
protected double[] m_stddevValue;
118
* initializes the cluster, without a parent cluster (necessary for GOE)
120
public SubspaceClusterDefinition() {
125
* initializes the cluster with default values
127
* @param parent the datagenerator this cluster belongs to
129
public SubspaceClusterDefinition(ClusterGenerator parent) {
134
* sets the default values
136
* @throws Exception if setting of defaults fails
138
protected void setDefaults() throws Exception {
139
setClusterType(defaultClusterType());
140
setClusterSubType(defaultClusterSubType());
141
setMinInstNum(defaultMinInstNum());
142
setMaxInstNum(defaultMaxInstNum());
143
setAttrIndexRange(defaultAttrIndexRange());
144
m_numClusterAttributes = 1;
145
setValuesList(defaultValuesList());
150
* Returns a string describing this data generator.
152
* @return a description of the data generator suitable for
153
* displaying in the explorer/experimenter gui
155
public String globalInfo() {
156
return "A single cluster for the SubspaceCluster datagenerator";
159
* Returns an enumeration describing the available options.
161
* @return an enumeration of all the available options
163
public Enumeration listOptions() {
164
Vector result = new Vector();
166
result.addElement(new Option(
167
"\tGenerates randomly distributed instances in the cluster.",
168
"A", 1, "-A <range>"));
170
result.addElement(new Option(
171
"\tGenerates uniformly distributed instances in the cluster.",
172
"U", 1, "-U <range>"));
174
result.addElement(new Option(
175
"\tGenerates gaussian distributed instances in the cluster.",
176
"G", 1, "-G <range>"));
178
result.addElement(new Option(
179
"\tThe attribute min/max (-A and -U) or mean/stddev (-G) for\n"
181
"D", 1, "-D <num>,<num>"));
183
result.addElement(new Option(
184
"\tThe range of number of instances per cluster (default "
185
+ defaultMinInstNum() + ".." + defaultMaxInstNum() + ").",
186
"N", 1, "-N <num>..<num>"));
188
result.addElement(new Option(
189
"\tUses integer instead of continuous values (default continuous).",
192
return result.elements();
196
* Parses a list of options for this object. <p/>
198
<!-- options-start -->
199
* Valid options are: <p/>
201
* <pre> -A <range>
202
* Generates randomly distributed instances in the cluster.</pre>
204
* <pre> -U <range>
205
* Generates uniformly distributed instances in the cluster.</pre>
207
* <pre> -G <range>
208
* Generates gaussian distributed instances in the cluster.</pre>
210
* <pre> -D <num>,<num>
211
* The attribute min/max (-A and -U) or mean/stddev (-G) for
214
* <pre> -N <num>..<num>
215
* The range of number of instances per cluster (default 1..50).</pre>
218
* Uses integer instead of continuous values (default continuous).</pre>
222
* @param options the list of options as an array of strings
223
* @throws Exception if an option is not supported
225
public void setOptions(String[] options) throws Exception {
233
tmpStr = Utils.getOption('A', options);
234
if (tmpStr.length() != 0) {
238
SubspaceCluster.UNIFORM_RANDOM, SubspaceCluster.TAGS_CLUSTERTYPE));
242
tmpStr = Utils.getOption('U', options);
243
if (tmpStr.length() != 0) {
247
SubspaceCluster.TOTAL_UNIFORM, SubspaceCluster.TAGS_CLUSTERTYPE));
251
tmpStr = Utils.getOption('G', options);
252
if (tmpStr.length() != 0) {
256
SubspaceCluster.GAUSSIAN, SubspaceCluster.TAGS_CLUSTERTYPE));
260
// default is uniform/random
264
SubspaceCluster.UNIFORM_RANDOM, SubspaceCluster.TAGS_CLUSTERTYPE));
265
else if (typeCount > 1)
266
throw new Exception("Only one cluster type can be specified!");
268
setAttrIndexRange(fromToStr);
270
tmpStr = Utils.getOption('D', options);
272
if (tmpStr.length() != 0)
273
setMeanStddev(tmpStr);
275
setMeanStddev(defaultMeanStddev());
278
if (tmpStr.length() != 0)
279
setValuesList(tmpStr);
281
setValuesList(defaultValuesList());
284
tmpStr = Utils.getOption('N', options);
285
if (tmpStr.length() != 0)
288
setInstNums(defaultMinInstNum() + ".." + defaultMaxInstNum());
290
if (Utils.getFlag('I', options))
293
SubspaceCluster.INTEGER, SubspaceCluster.TAGS_CLUSTERSUBTYPE));
297
SubspaceCluster.CONTINUOUS, SubspaceCluster.TAGS_CLUSTERSUBTYPE));
301
* Gets the current settings of the datagenerator BIRCHCluster.
303
* @return an array of strings suitable for passing to setOptions
305
public String[] getOptions() {
308
result = new Vector();
312
result.add("" + getAttrIndexRange());
314
result.add("" + getValuesList());
316
else if (isUniform()) {
318
result.add("" + getAttrIndexRange());
320
result.add("" + getValuesList());
322
else if (isGaussian()) {
324
result.add("" + getAttrIndexRange());
326
result.add("" + getMeanStddev());
330
result.add("" + getInstNums());
332
if (m_clustersubtype == SubspaceCluster.INTEGER)
335
return (String[]) result.toArray(new String[result.size()]);
339
* Make a string from the attribues list.
341
* @return the attributes as string
343
public String attributesToString() {
344
StringBuffer text = new StringBuffer();
346
for (int i = 0; i < m_attributes.length; i++) {
347
if (m_attributes[i]) {
349
text.append(" Attribute: " + i);
350
text.append(" Mean: "+ m_meanValue[j]);
351
text.append(" StdDev: "+m_stddevValue[j]+"\n%");
354
text.append(" Attribute: " + i);
355
text.append(" Range: "+ m_minValue[j]);
356
text.append(" - "+m_maxValue[j]+"\n%");
361
return text.toString();
365
* Make a string from the cluster features.
367
* @return the cluster features as string
369
public String toString() {
370
StringBuffer text = new StringBuffer();
371
text.append("attributes " + attributesToString() + "\n");
372
text.append("number of instances " + getInstNums());
373
return text.toString();
377
* sets the parent datagenerator this cluster belongs to
378
* @param parent the parent datagenerator
380
public void setParent(SubspaceCluster parent) {
381
super.setParent(parent);
382
m_AttrIndexRange.setUpper(getParent().getNumAttributes());
386
* returns the default attribute index range
388
* @return the default attribute index range
390
protected String defaultAttrIndexRange() {
395
* Sets which attributes are used in the cluster
396
* attributes among the selection will be discretized.
398
* @param rangeList a string representing the list of attributes. Since
399
* the string will typically come from a user, attributes are indexed from
401
* eg: first-3,5,6-last
403
public void setAttrIndexRange(String rangeList) {
404
m_numClusterAttributes = 0;
405
if (m_AttrIndexRange == null)
406
m_AttrIndexRange = new Range();
407
m_AttrIndexRange.setRanges(rangeList);
409
if (getParent() != null) {
410
m_AttrIndexRange.setUpper(getParent().getNumAttributes());
411
m_attributes = new boolean [getParent().getNumAttributes()];
412
for (int i = 0; i < m_attributes.length; i++) {
413
if (m_AttrIndexRange.isInRange(i)) {
414
m_numClusterAttributes++;
415
m_attributes[i] = true;
418
m_attributes[i] = false;
422
//store translation from attr in cluster to attr in whole dataset
423
m_attrIndices = new int[m_numClusterAttributes];
425
for (int i = 0; i < m_attributes.length; i++) {
426
if (m_AttrIndexRange.isInRange(i)) {
428
m_attrIndices[clusterI] = i;
435
* returns the attribute range(s).
437
* @return the attribute range(s).
439
public String getAttrIndexRange() {
440
return m_AttrIndexRange.getRanges();
444
* Returns the tip text for this property
445
* @return tip text for this property suitable for
446
* displaying in the explorer/experimenter gui
448
public String attrIndexRangeTipText() {
449
return "The attribute range(s).";
452
public boolean[] getAttributes() {
456
public double[] getMinValue() {
460
public double[] getMaxValue() {
464
public double[] getMeanValue() {
468
public double[] getStddevValue() {
469
return m_stddevValue;
472
public int getNumInstances () {
473
return m_numInstances;
477
* returns the default cluster type
479
* @return the default cluster type
481
protected SelectedTag defaultClusterType() {
482
return new SelectedTag(
483
SubspaceCluster.UNIFORM_RANDOM, SubspaceCluster.TAGS_CLUSTERTYPE);
487
* Gets the cluster type.
489
* @return the cluster type
490
* @see SubspaceCluster#TAGS_CLUSTERTYPE
492
public SelectedTag getClusterType() {
493
return new SelectedTag(m_clustertype, SubspaceCluster.TAGS_CLUSTERTYPE);
497
* Sets the cluster type.
499
* @param value the new cluster type.
500
* @see SubspaceCluster#TAGS_CLUSTERTYPE
502
public void setClusterType(SelectedTag value) {
503
if (value.getTags() == SubspaceCluster.TAGS_CLUSTERTYPE)
504
m_clustertype = value.getSelectedTag().getID();
508
* Returns the tip text for this property
509
* @return tip text for this property suitable for
510
* displaying in the explorer/experimenter gui
512
public String clusterTypeTipText() {
513
return "The type of cluster to use.";
517
* returns the default cluster sub type
519
* @return the default cluster sub type
521
protected SelectedTag defaultClusterSubType() {
522
return new SelectedTag(
523
SubspaceCluster.CONTINUOUS, SubspaceCluster.TAGS_CLUSTERSUBTYPE);
527
* Gets the cluster sub type.
529
* @return the cluster sub type
530
* @see SubspaceCluster#TAGS_CLUSTERSUBTYPE
532
public SelectedTag getClusterSubType() {
533
return new SelectedTag(
534
m_clustersubtype, SubspaceCluster.TAGS_CLUSTERSUBTYPE);
538
* Sets the cluster sub type.
540
* @param value the new cluster sub type.
541
* @see SubspaceCluster#TAGS_CLUSTERSUBTYPE
543
public void setClusterSubType(SelectedTag value) {
544
if (value.getTags() == SubspaceCluster.TAGS_CLUSTERSUBTYPE)
545
m_clustersubtype = value.getSelectedTag().getID();
549
* Returns the tip text for this property
550
* @return tip text for this property suitable for
551
* displaying in the explorer/experimenter gui
553
public String clusterSubTypeTipText() {
554
return "The sub-type of cluster to use.";
558
* checks, whether cluster type is random
560
* @return true if cluster type is random
562
public boolean isRandom() {
563
return (m_clustertype == SubspaceCluster.UNIFORM_RANDOM);
567
* checks, whether cluster type is uniform
569
* @return true if cluster type is uniform
571
public boolean isUniform() {
572
return (m_clustertype == SubspaceCluster.TOTAL_UNIFORM);
576
* checks, whether cluster type is gaussian
578
* @return true if cluster type is gaussian
580
public boolean isGaussian() {
581
return (m_clustertype == SubspaceCluster.GAUSSIAN);
585
* checks, whether cluster sub type is continuous
587
* @return true if cluster sub type is continuous
589
public boolean isContinuous() {
590
return (m_clustertype == SubspaceCluster.CONTINUOUS);
594
* checks, whether cluster sub type is integer
596
* @return true if cluster sub type is integer
598
public boolean isInteger() {
599
return (m_clustertype == SubspaceCluster.INTEGER);
603
* Sets the upper and lower boundary for instances for this cluster.
605
* @param fromTo the string containing the upper and lower boundary for
606
* instances per cluster separated by ..
608
protected void setInstNums(String fromTo) {
609
int i = fromTo.indexOf("..");
612
String from = fromTo.substring(0, i);
613
m_MinInstNum = Integer.parseInt(from);
614
if (i < fromTo.length()) {
615
String to = fromTo.substring(i + 2, fromTo.length());
616
m_MaxInstNum = Integer.parseInt(to);
619
m_MaxInstNum = m_MinInstNum;
624
* Get a string with the upper and lower boundary for the
625
* number of instances for this cluster.
627
* @return the string containing the upper and lower boundary for
628
* instances per cluster separated by ..
630
protected String getInstNums() {
631
String text = new String(""+m_MinInstNum+".."+m_MaxInstNum);
636
* Returns the tip text for this property
637
* @return tip text for this property suitable for
638
* displaying in the explorer/experimenter gui
640
protected String instNumsTipText() {
641
return "The lower and upper boundary for the number of instances in this cluster.";
645
* returns the default min number of instances
647
* @return the default min number of instances
649
protected int defaultMinInstNum() {
654
* Gets the lower boundary for instances per cluster.
656
* @return the the lower boundary for instances per cluster
658
public int getMinInstNum() {
663
* Sets the lower boundary for instances per cluster.
665
* @param newMinInstNum new lower boundary for instances per cluster
667
public void setMinInstNum(int newMinInstNum) {
668
m_MinInstNum = newMinInstNum;
672
* Returns the tip text for this property
674
* @return tip text for this property suitable for
675
* displaying in the explorer/experimenter gui
677
public String minInstNumTipText() {
678
return "The lower boundary for instances per cluster.";
682
* returns the default max number of instances
684
* @return the default max number of instances
686
protected int defaultMaxInstNum() {
691
* Gets the upper boundary for instances per cluster.
693
* @return the upper boundary for instances per cluster
695
public int getMaxInstNum() {
700
* Sets the upper boundary for instances per cluster.
702
* @param newMaxInstNum new upper boundary for instances per cluster
704
public void setMaxInstNum(int newMaxInstNum) {
705
m_MaxInstNum = newMaxInstNum;
709
* Returns the tip text for this property
711
* @return tip text for this property suitable for
712
* displaying in the explorer/experimenter gui
714
public String maxInstNumTipText() {
715
return "The upper boundary for instances per cluster.";
719
* Sets the real number of instances for this cluster.
721
* @param r random number generator
723
public void setNumInstances(Random r) {
724
if (m_MaxInstNum > m_MinInstNum)
725
m_numInstances = (int)(r.nextDouble()
726
* (m_MaxInstNum - m_MinInstNum) + m_MinInstNum);
728
m_numInstances = m_MinInstNum;
732
* returns the default values list
734
* @return the default values list
736
protected String defaultValuesList() {
741
* Sets the ranges for each attribute.
743
* @param fromToList the string containing the upper and lower boundary for
744
* instances per cluster separated by ..
745
* @throws Exception if values are not correct in number or value
747
public void setValuesList(String fromToList) throws Exception {
748
m_minValue = new double [m_numClusterAttributes];
749
m_maxValue = new double [m_numClusterAttributes];
750
setValuesList(fromToList, m_minValue, m_maxValue, "D");
751
SubspaceCluster parent = (SubspaceCluster) getParent();
753
for (int i = 0; i < m_numClusterAttributes; i++) {
754
if (m_minValue[i] > m_maxValue[i])
755
throw new Exception("Min must be smaller than max.");
757
if (getParent() != null) {
758
// boolean values are only 0.0 and 1.0
759
if (parent.isBoolean(m_attrIndices[i])) {
760
parent.getNumValues()[m_attrIndices[i]] = 2;
761
if (((m_minValue[i] != 0.0) && (m_minValue[i] != 1.0)) ||
762
((m_maxValue[i] != 0.0) && (m_maxValue[i] != 1.0)))
763
throw new Exception("Ranges for boolean must be 0 or 1 only.");
766
if (parent.isNominal(m_attrIndices[i])) {
767
// nominal values: attributes range might have to be enlarged
768
double rest = m_minValue[i] - Math.rint(m_minValue[i]);
770
throw new Exception(" Ranges for nominal must be integer");
771
rest = m_maxValue[i] - Math.rint(m_maxValue[i]);
773
throw new Exception("Ranges for nominal must be integer");
774
if (m_minValue[i] < 0.0)
775
throw new Exception("Range for nominal must start with number 0.0 or higher");
776
if (m_maxValue[i] + 1 > parent.getNumValues()[m_attrIndices[i]]) {
777
// add new values to attribute
778
// (actual format is not yet defined)
779
parent.getNumValues()[m_attrIndices[i]] = (int)m_maxValue[i] + 1;
787
* returns the range for each attribute as string
789
public String getValuesList() {
795
if (m_minValue != null) {
796
for (i = 0; i < m_minValue.length; i++) {
799
result += "" + m_minValue[i] + "," + m_maxValue[i];
807
* Returns the tip text for this property
808
* @return tip text for this property suitable for
809
* displaying in the explorer/experimenter gui
811
public String valuesListTipText() {
812
return "The range for each each attribute as string.";
816
* returns the default mean/stddev list
818
protected String defaultMeanStddev() {
823
* Sets mean and standarddeviation.
825
* @param meanstddev the string containing the upper and lower boundary for
826
* instances per cluster separated by ..
827
* @throws Exception if values are not correct in number or value
829
public void setMeanStddev(String meanstddev) throws Exception {
830
m_meanValue = new double [m_numClusterAttributes];
831
m_stddevValue = new double [m_numClusterAttributes];
832
setValuesList(meanstddev, m_meanValue, m_stddevValue, "D");
836
* returns the current mean/stddev setup
838
public String getMeanStddev() {
844
if (m_meanValue != null) {
845
for (i = 0; i < m_meanValue.length; i++) {
848
result += "" + m_meanValue[i] + "," + m_stddevValue[i];
856
* Returns the tip text for this property
857
* @return tip text for this property suitable for
858
* displaying in the explorer/experimenter gui
860
public String meanStddevTipText() {
861
return "The mean and stddev, in case of gaussian.";
865
* Sets the ranges for each attribute.
867
* @param fromToList the string containing the upper and lower boundary for
868
* instances per cluster separated by ..
869
* @param first the "from's"
870
* @param second the "to's"
871
* @param optionLetter the option, from which the list came
872
* @throws Exception if values are not correct in number or value
874
public void setValuesList(String fromToList, double[] first, double[] second,
875
String optionLetter) throws Exception {
880
tok = new StringTokenizer(fromToList, ",");
881
if (tok.countTokens() != first.length + second.length)
883
"Wrong number of values for option '-" + optionLetter + "'.");
886
while (tok.hasMoreTokens()) {
887
first[index] = Double.parseDouble(tok.nextToken());
888
second[index] = Double.parseDouble(tok.nextToken());