2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
25
import java.io.Serializable;
28
* A Utility class that contains summary information on an
29
* the values that appear in a dataset for a particular attribute.
31
* @author <a href="mailto:len@reeltwo.com">Len Trigg</a>
32
* @version $Revision: 1.9 $
34
public class AttributeStats
35
implements Serializable {
37
/** for serialization */
38
private static final long serialVersionUID = 4434688832743939380L;
40
/** The number of int-like values */
41
public int intCount = 0;
43
/** The number of real-like values (i.e. have a fractional part) */
44
public int realCount = 0;
46
/** The number of missing values */
47
public int missingCount = 0;
49
/** The number of distinct values */
50
public int distinctCount = 0;
52
/** The number of values that only appear once */
53
public int uniqueCount = 0;
55
/** The total number of values (i.e. number of instances) */
56
public int totalCount = 0;
58
/** Stats on numeric value distributions */
59
// perhaps Stats should be moved from weka.experiment to weka.core
60
public weka.experiment.Stats numericStats;
62
/** Counts of each nominal value */
63
public int [] nominalCounts;
66
* Updates the counters for one more observed distinct value.
68
* @param value the value that has just been seen
69
* @param count the number of times the value appeared
71
protected void addDistinct(double value, int count) {
77
if (Utils.eq(value, (double)((int)value))) {
82
if (nominalCounts != null) {
83
nominalCounts[(int)value] = count;
85
if (numericStats != null) {
86
numericStats.add(value, count);
87
numericStats.calculateDerived();
94
* Returns a human readable representation of this AttributeStats instance.
96
* @return a String represtinging these AttributeStats.
98
public String toString() {
100
StringBuffer sb = new StringBuffer();
101
sb.append(Utils.padLeft("Type", 4)).append(Utils.padLeft("Nom", 5));
102
sb.append(Utils.padLeft("Int", 5)).append(Utils.padLeft("Real", 5));
103
sb.append(Utils.padLeft("Missing", 12));
104
sb.append(Utils.padLeft("Unique", 12));
105
sb.append(Utils.padLeft("Dist", 6));
106
if (nominalCounts != null) {
108
for (int i = 0; i < nominalCounts.length; i++) {
109
sb.append(Utils.padLeft("C[" + i + "]", 5));
115
percent = Math.round(100.0 * intCount / totalCount);
116
if (nominalCounts != null) {
117
sb.append(Utils.padLeft("Nom", 4)).append(' ');
118
sb.append(Utils.padLeft("" + percent, 3)).append("% ");
119
sb.append(Utils.padLeft("" + 0, 3)).append("% ");
121
sb.append(Utils.padLeft("Num", 4)).append(' ');
122
sb.append(Utils.padLeft("" + 0, 3)).append("% ");
123
sb.append(Utils.padLeft("" + percent, 3)).append("% ");
125
percent = Math.round(100.0 * realCount / totalCount);
126
sb.append(Utils.padLeft("" + percent, 3)).append("% ");
127
sb.append(Utils.padLeft("" + missingCount, 5)).append(" /");
128
percent = Math.round(100.0 * missingCount / totalCount);
129
sb.append(Utils.padLeft("" + percent, 3)).append("% ");
130
sb.append(Utils.padLeft("" + uniqueCount, 5)).append(" /");
131
percent = Math.round(100.0 * uniqueCount / totalCount);
132
sb.append(Utils.padLeft("" + percent, 3)).append("% ");
133
sb.append(Utils.padLeft("" + distinctCount, 5)).append(' ');
134
if (nominalCounts != null) {
135
for (int i = 0; i < nominalCounts.length; i++) {
136
sb.append(Utils.padLeft("" + nominalCounts[i], 5));
140
return sb.toString();