2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* AbstractClusterer.java
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
23
package weka.clusterers;
25
import weka.core.Capabilities;
26
import weka.core.CapabilitiesHandler;
27
import weka.core.Instance;
28
import weka.core.Instances;
29
import weka.core.RevisionHandler;
30
import weka.core.SerializedObject;
31
import weka.core.Utils;
32
import weka.core.Capabilities.Capability;
34
import java.io.Serializable;
39
* @author Mark Hall (mhall@cs.waikato.ac.nz)
40
* @version $Revision: 1.1 $
42
public abstract class AbstractClusterer
43
implements Clusterer, Cloneable, Serializable, CapabilitiesHandler, RevisionHandler {
45
/** for serialization */
46
private static final long serialVersionUID = -6099962589663877632L;
53
* Generates a clusterer. Has to initialize all fields of the clusterer
54
* that are not being set via options.
56
* @param data set of instances serving as training data
57
* @exception Exception if the clusterer has not been
58
* generated successfully
60
public abstract void buildClusterer(Instances data) throws Exception;
63
* Classifies a given instance. Either this or distributionForInstance()
64
* needs to be implemented by subclasses.
66
* @param instance the instance to be assigned to a cluster
67
* @return the number of the assigned cluster as an integer
68
* @exception Exception if instance could not be clustered
71
public int clusterInstance(Instance instance) throws Exception {
73
double [] dist = distributionForInstance(instance);
76
throw new Exception("Null distribution predicted");
79
if (Utils.sum(dist) <= 0) {
80
throw new Exception("Unable to cluster instance");
82
return Utils.maxIndex(dist);
86
* Predicts the cluster memberships for a given instance. Either
87
* this or clusterInstance() needs to be implemented by subclasses.
89
* @param instance the instance to be assigned a cluster.
90
* @return an array containing the estimated membership
91
* probabilities of the test instance in each cluster (this
92
* should sum to at most 1)
93
* @exception Exception if distribution could not be
94
* computed successfully
96
public double[] distributionForInstance(Instance instance)
99
double[] d = new double[numberOfClusters()];
101
d[clusterInstance(instance)] = 1.0;
107
* Returns the number of clusters.
109
* @return the number of clusters generated for a training dataset.
110
* @exception Exception if number of clusters could not be returned
113
public abstract int numberOfClusters() throws Exception;
116
* Creates a new instance of a clusterer given it's class name and
117
* (optional) arguments to pass to it's setOptions method. If the
118
* clusterer implements OptionHandler and the options parameter is
119
* non-null, the clusterer will have it's options set.
121
* @param clustererName the fully qualified class name of the clusterer
122
* @param options an array of options suitable for passing to setOptions. May
124
* @return the newly created search object, ready for use.
125
* @exception Exception if the clusterer class name is invalid, or the
126
* options supplied are not acceptable to the clusterer.
128
public static Clusterer forName(String clustererName,
129
String [] options) throws Exception {
130
return (Clusterer)Utils.forName(Clusterer.class,
136
* Creates a deep copy of the given clusterer using serialization.
138
* @param model the clusterer to copy
139
* @return a deep copy of the clusterer
140
* @exception Exception if an error occurs
142
public static Clusterer makeCopy(Clusterer model) throws Exception {
143
return (Clusterer) new SerializedObject(model).getObject();
147
* Creates copies of the current clusterer. Note that this method
148
* now uses Serialization to perform a deep copy, so the Clusterer
149
* object must be fully Serializable. Any currently built model will
150
* now be copied as well.
152
* @param model an example clusterer to copy
153
* @param num the number of clusterer copies to create.
154
* @return an array of clusterers.
155
* @exception Exception if an error occurs
157
public static Clusterer [] makeCopies(Clusterer model,
158
int num) throws Exception {
160
throw new Exception("No model clusterer set");
162
Clusterer [] clusterers = new Clusterer [num];
163
SerializedObject so = new SerializedObject(model);
164
for(int i = 0; i < clusterers.length; i++) {
165
clusterers[i] = (Clusterer) so.getObject();
171
* Returns the Capabilities of this clusterer. Derived classifiers have to
172
* override this method to enable capabilities.
174
* @return the capabilities of this object
177
public Capabilities getCapabilities() {
180
result = new Capabilities(this);
181
result.enable(Capability.NO_CLASS);
187
* runs the clusterer instance with the given options.
189
* @param clusterer the clusterer to run
190
* @param options the commandline options
192
protected static void runClusterer(Clusterer clusterer, String[] options) {
194
System.out.println(ClusterEvaluation.evaluateClusterer(clusterer, options));
196
catch (Exception e) {
197
if ( (e.getMessage() == null)
198
|| ( (e.getMessage() != null)
199
&& (e.getMessage().indexOf("General options") == -1) ) )
202
System.err.println(e.getMessage());