2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
23
package weka.estimators;
25
import java.io.FileOutputStream;
26
import java.io.PrintWriter;
31
* Contains static utility functions for Estimators.<p>
33
* @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
34
* @version $Revision: 1.3 $
36
public class EstimatorUtils {
39
* Find the minimum distance between values
40
* @param inst sorted instances, sorted
41
* @param attrIndex index of the attribute, they are sorted after
42
* @return the minimal distance
44
public static double findMinDistance(Instances inst, int attrIndex) {
45
double min = Double.MAX_VALUE;
46
int numInst = inst.numInstances();
48
if (numInst < 2) return min;
50
Instance instance = null;
54
{ instance = inst.instance(begin); }
55
} while (begin < numInst && instance.isMissing(attrIndex));
57
double secondValue = inst.instance(begin).value(attrIndex);
58
for (int i = begin; i < numInst && !inst.instance(i).isMissing(attrIndex); i++) {
59
double firstValue = secondValue;
60
secondValue = inst.instance(i).value(attrIndex);
61
if (secondValue != firstValue) {
62
diff = secondValue - firstValue;
63
if (diff < min && diff > 0.0) {
72
* Find the minimum and the maximum of the attribute and return it in
73
* the last parameter..
74
* @param inst instances used to build the estimator
75
* @param attrIndex index of the attribute
76
* @param minMax the array to return minimum and maximum in
77
* @return number of not missing values
78
* @exception Exception if parameter minMax wasn't initialized properly
80
public static int getMinMax(Instances inst, int attrIndex, double [] minMax)
82
double min = Double.NaN;
83
double max = Double.NaN;
84
Instance instance = null;
85
int numNotMissing = 0;
86
if ((minMax == null) || (minMax.length < 2)) {
87
throw new Exception("Error in Program, privat method getMinMax");
90
Enumeration enumInst = inst.enumerateInstances();
91
if (enumInst.hasMoreElements()) {
93
instance = (Instance) enumInst.nextElement();
94
} while (instance.isMissing(attrIndex) && (enumInst.hasMoreElements()));
96
// add values if not missing
97
if (!instance.isMissing(attrIndex)) {
99
min = instance.value(attrIndex);
100
max = instance.value(attrIndex);
102
while (enumInst.hasMoreElements()) {
103
instance = (Instance) enumInst.nextElement();
104
if (!instance.isMissing(attrIndex)) {
106
if (instance.value(attrIndex) < min) {
107
min = (instance.value(attrIndex));
109
if (instance.value(attrIndex) > max) {
110
max = (instance.value(attrIndex));
118
return numNotMissing;
122
* Returns a dataset that contains all instances of a certain class value.
124
* @param data dataset to select the instances from
125
* @param attrIndex index of the relevant attribute
126
* @param classIndex index of the class attribute
127
* @param classValue the relevant class value
128
* @return a dataset with only
130
public static Vector getInstancesFromClass(Instances data, int attrIndex,
132
double classValue, Instances workData) {
133
//Oops.pln("getInstancesFromClass classValue"+classValue+" workData"+data.numInstances());
134
Vector dataPlusInfo = new Vector(0);
136
int numClassValue = 0;
137
//workData = new Instances(data, 0);
138
for (int i = 0; i < data.numInstances(); i++) {
139
if (!data.instance(i).isMissing(attrIndex)) {
141
if (data.instance(i).value(classIndex) == classValue) {
142
workData.add(data.instance(i));
148
Double alphaFactor = new Double((double)numClassValue/(double)num);
149
dataPlusInfo.add(workData);
150
dataPlusInfo.add(alphaFactor);
156
* Returns a dataset that contains of all instances of a certain class value.
157
* @param data dataset to select the instances from
158
* @param classIndex index of the class attribute
159
* @param classValue the class value
160
* @return a dataset with only instances of one class value
162
public static Instances getInstancesFromClass(Instances data, int classIndex,
164
Instances workData = new Instances(data, 0);
165
for (int i = 0; i < data.numInstances(); i++) {
166
if (data.instance(i).value(classIndex) == classValue) {
167
workData.add(data.instance(i));
177
* Output of an n points of a density curve.
178
* Filename is parameter f + ".curv".
180
* @param f string to build filename
185
* @throws Exception if something goes wrong
187
public static void writeCurve(String f, Estimator est,
188
double min, double max,
189
int numPoints) throws Exception {
191
PrintWriter output = null;
192
StringBuffer text = new StringBuffer("");
194
if (f.length() != 0) {
195
// add attribute indexnumber to filename and extension .hist
196
String name = f + ".curv";
197
output = new PrintWriter(new FileOutputStream(name));
202
double diff = (max - min) / ((double)numPoints - 1.0);
204
text.append("" + min + " " + est.getProbability(min) + " \n");
206
for (double value = min + diff; value < max; value += diff) {
207
text.append("" + value + " " + est.getProbability(value) + " \n");
209
text.append("" + max + " " + est.getProbability(max) + " \n");
210
} catch (Exception ex) {
211
ex.printStackTrace();
212
System.out.println(ex.getMessage());
214
output.println(text.toString());
217
if (output != null) {
223
* Output of an n points of a density curve.
224
* Filename is parameter f + ".curv".
226
* @param f string to build filename
233
* @throws Exception if something goes wrong
235
public static void writeCurve(String f, Estimator est,
238
double min, double max,
239
int numPoints) throws Exception {
241
PrintWriter output = null;
242
StringBuffer text = new StringBuffer("");
244
if (f.length() != 0) {
245
// add attribute indexnumber to filename and extension .hist
246
String name = f + ".curv";
247
output = new PrintWriter(new FileOutputStream(name));
252
double diff = (max - min) / ((double)numPoints - 1.0);
254
text.append("" + min + " " +
255
est.getProbability(min) * classEst.getProbability(classIndex)
258
for (double value = min + diff; value < max; value += diff) {
259
text.append("" + value + " " +
260
est.getProbability(value) * classEst.getProbability(classIndex)
263
text.append("" + max + " " +
264
est.getProbability(max) * classEst.getProbability(classIndex)
266
} catch (Exception ex) {
267
ex.printStackTrace();
268
System.out.println(ex.getMessage());
270
output.println(text.toString());
273
if (output != null) {
280
* Returns a dataset that contains of all instances of a certain value
281
* for the given attribute.
282
* @param data dataset to select the instances from
283
* @param index the index of the attribute
285
* @return a subdataset with only instances of one value for the attribute
287
public static Instances getInstancesFromValue(Instances data, int index,
289
Instances workData = new Instances(data, 0);
290
for (int i = 0; i < data.numInstances(); i++) {
291
if (data.instance(i).value(index) == v) {
292
workData.add(data.instance(i));
300
* Returns a string representing the cutpoints
302
public static String cutpointsToString(double [] cutPoints, boolean [] cutAndLeft) {
303
StringBuffer text = new StringBuffer("");
304
if (cutPoints == null) {
305
text.append("\n# no cutpoints found - attribute \n");
307
text.append("\n#* "+cutPoints.length+" cutpoint(s) -\n");
308
for (int i = 0; i < cutPoints.length; i++) {
309
text.append("# "+cutPoints[i]+" ");
310
text.append(""+cutAndLeft[i]+"\n");
312
text.append("# end\n");
314
return text.toString();