2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
17
package org.apache.commons.math.stat.descriptive;
19
import java.io.Serializable;
21
import org.apache.commons.math.MathRuntimeException;
22
import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
23
import org.apache.commons.math.stat.descriptive.moment.Mean;
24
import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
25
import org.apache.commons.math.stat.descriptive.moment.Variance;
26
import org.apache.commons.math.stat.descriptive.rank.Max;
27
import org.apache.commons.math.stat.descriptive.rank.Min;
28
import org.apache.commons.math.stat.descriptive.summary.Sum;
29
import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
30
import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
31
import org.apache.commons.math.util.MathUtils;
35
* Computes summary statistics for a stream of data values added using the
36
* {@link #addValue(double) addValue} method. The data values are not stored in
37
* memory, so this class can be used to compute statistics for very large data
41
* The {@link StorelessUnivariateStatistic} instances used to maintain summary
42
* state and compute statistics are configurable via setters. For example, the
43
* default implementation for the variance can be overridden by calling
44
* {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
45
* these methods must implement the {@link StorelessUnivariateStatistic}
46
* interface and configuration must be completed before <code>addValue</code>
47
* is called. No configuration is necessary to use the default, commons-math
48
* provided implementations.
51
* Note: This class is not thread-safe. Use
52
* {@link SynchronizedSummaryStatistics} if concurrent access from multiple
53
* threads is required.
55
* @version $Revision: 791728 $ $Date: 2009-07-07 03:17:50 -0400 (Tue, 07 Jul 2009) $
57
public class SummaryStatistics implements StatisticalSummary, Serializable {
59
/** Serialization UID */
60
private static final long serialVersionUID = -2021321786743555871L;
63
* Construct a SummaryStatistics instance
65
public SummaryStatistics() {
69
* A copy constructor. Creates a deep-copy of the {@code original}.
71
* @param original the {@code SummaryStatistics} instance to copy
73
public SummaryStatistics(SummaryStatistics original) {
77
/** count of values that have been added */
80
/** SecondMoment is used to compute the mean and variance */
81
protected SecondMoment secondMoment = new SecondMoment();
83
/** sum of values that have been added */
84
protected Sum sum = new Sum();
86
/** sum of the square of each value that has been added */
87
protected SumOfSquares sumsq = new SumOfSquares();
89
/** min of values that have been added */
90
protected Min min = new Min();
92
/** max of values that have been added */
93
protected Max max = new Max();
95
/** sumLog of values that have been added */
96
protected SumOfLogs sumLog = new SumOfLogs();
98
/** geoMean of values that have been added */
99
protected GeometricMean geoMean = new GeometricMean(sumLog);
101
/** mean of values that have been added */
102
protected Mean mean = new Mean();
104
/** variance of values that have been added */
105
protected Variance variance = new Variance();
107
/** Sum statistic implementation - can be reset by setter. */
108
private StorelessUnivariateStatistic sumImpl = sum;
110
/** Sum of squares statistic implementation - can be reset by setter. */
111
private StorelessUnivariateStatistic sumsqImpl = sumsq;
113
/** Minimum statistic implementation - can be reset by setter. */
114
private StorelessUnivariateStatistic minImpl = min;
116
/** Maximum statistic implementation - can be reset by setter. */
117
private StorelessUnivariateStatistic maxImpl = max;
119
/** Sum of log statistic implementation - can be reset by setter. */
120
private StorelessUnivariateStatistic sumLogImpl = sumLog;
122
/** Geometric mean statistic implementation - can be reset by setter. */
123
private StorelessUnivariateStatistic geoMeanImpl = geoMean;
125
/** Mean statistic implementation - can be reset by setter. */
126
private StorelessUnivariateStatistic meanImpl = mean;
128
/** Variance statistic implementation - can be reset by setter. */
129
private StorelessUnivariateStatistic varianceImpl = variance;
132
* Return a {@link StatisticalSummaryValues} instance reporting current
134
* @return Current values of statistics
136
public StatisticalSummary getSummary() {
137
return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
138
getMax(), getMin(), getSum());
142
* Add a value to the data
143
* @param value the value to add
145
public void addValue(double value) {
146
sumImpl.increment(value);
147
sumsqImpl.increment(value);
148
minImpl.increment(value);
149
maxImpl.increment(value);
150
sumLogImpl.increment(value);
151
secondMoment.increment(value);
152
// If mean, variance or geomean have been overridden,
153
// need to increment these
154
if (!(meanImpl instanceof Mean)) {
155
meanImpl.increment(value);
157
if (!(varianceImpl instanceof Variance)) {
158
varianceImpl.increment(value);
160
if (!(geoMeanImpl instanceof GeometricMean)) {
161
geoMeanImpl.increment(value);
167
* Returns the number of available values
168
* @return The number of available values
175
* Returns the sum of the values that have been added
176
* @return The sum or <code>Double.NaN</code> if no values have been added
178
public double getSum() {
179
return sumImpl.getResult();
183
* Returns the sum of the squares of the values that have been added.
185
* Double.NaN is returned if no values have been added.
187
* @return The sum of squares
189
public double getSumsq() {
190
return sumsqImpl.getResult();
194
* Returns the mean of the values that have been added.
196
* Double.NaN is returned if no values have been added.
200
public double getMean() {
201
if (mean == meanImpl) {
202
return new Mean(secondMoment).getResult();
204
return meanImpl.getResult();
209
* Returns the standard deviation of the values that have been added.
211
* Double.NaN is returned if no values have been added.
213
* @return the standard deviation
215
public double getStandardDeviation() {
216
double stdDev = Double.NaN;
219
stdDev = Math.sqrt(getVariance());
228
* Returns the variance of the values that have been added.
230
* Double.NaN is returned if no values have been added.
232
* @return the variance
234
public double getVariance() {
235
if (varianceImpl == variance) {
236
return new Variance(secondMoment).getResult();
238
return varianceImpl.getResult();
243
* Returns the maximum of the values that have been added.
245
* Double.NaN is returned if no values have been added.
247
* @return the maximum
249
public double getMax() {
250
return maxImpl.getResult();
254
* Returns the minimum of the values that have been added.
256
* Double.NaN is returned if no values have been added.
258
* @return the minimum
260
public double getMin() {
261
return minImpl.getResult();
265
* Returns the geometric mean of the values that have been added.
267
* Double.NaN is returned if no values have been added.
269
* @return the geometric mean
271
public double getGeometricMean() {
272
return geoMeanImpl.getResult();
276
* Returns the sum of the logs of the values that have been added.
278
* Double.NaN is returned if no values have been added.
280
* @return the sum of logs
283
public double getSumOfLogs() {
284
return sumLogImpl.getResult();
288
* Returns a statistic related to the Second Central Moment. Specifically,
289
* what is returned is the sum of squared deviations from the sample mean
290
* among the values that have been added.
292
* Returns <code>Double.NaN</code> if no data values have been added and
293
* returns <code>0</code> if there is just one value in the data set.</p>
295
* @return second central moment statistic
298
public double getSecondMoment() {
299
return secondMoment.getResult();
303
* Generates a text report displaying summary statistics from values that
305
* @return String with line feeds displaying statistics
309
public String toString() {
310
StringBuffer outBuffer = new StringBuffer();
312
outBuffer.append("SummaryStatistics:").append(endl);
313
outBuffer.append("n: ").append(getN()).append(endl);
314
outBuffer.append("min: ").append(getMin()).append(endl);
315
outBuffer.append("max: ").append(getMax()).append(endl);
316
outBuffer.append("mean: ").append(getMean()).append(endl);
317
outBuffer.append("geometric mean: ").append(getGeometricMean())
319
outBuffer.append("variance: ").append(getVariance()).append(endl);
320
outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
321
outBuffer.append("standard deviation: ").append(getStandardDeviation())
323
outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
324
return outBuffer.toString();
328
* Resets all statistics and storage
330
public void clear() {
338
secondMoment.clear();
339
if (meanImpl != mean) {
342
if (varianceImpl != variance) {
343
varianceImpl.clear();
348
* Returns true iff <code>object</code> is a
349
* <code>SummaryStatistics</code> instance and all statistics have the
350
* same values as this.
351
* @param object the object to test equality against.
352
* @return true if object equals this
355
public boolean equals(Object object) {
356
if (object == this) {
359
if (object instanceof SummaryStatistics == false) {
362
SummaryStatistics stat = (SummaryStatistics)object;
363
return (MathUtils.equals(stat.getGeometricMean(), this.getGeometricMean()) &&
364
MathUtils.equals(stat.getMax(), this.getMax()) &&
365
MathUtils.equals(stat.getMean(), this.getMean()) &&
366
MathUtils.equals(stat.getMin(), this.getMin()) &&
367
MathUtils.equals(stat.getN(), this.getN()) &&
368
MathUtils.equals(stat.getSum(), this.getSum()) &&
369
MathUtils.equals(stat.getSumsq(), this.getSumsq()) &&
370
MathUtils.equals(stat.getVariance(),
371
this.getVariance()));
375
* Returns hash code based on values of statistics
379
public int hashCode() {
380
int result = 31 + MathUtils.hash(getGeometricMean());
381
result = result * 31 + MathUtils.hash(getGeometricMean());
382
result = result * 31 + MathUtils.hash(getMax());
383
result = result * 31 + MathUtils.hash(getMean());
384
result = result * 31 + MathUtils.hash(getMin());
385
result = result * 31 + MathUtils.hash(getN());
386
result = result * 31 + MathUtils.hash(getSum());
387
result = result * 31 + MathUtils.hash(getSumsq());
388
result = result * 31 + MathUtils.hash(getVariance());
392
// Getters and setters for statistics implementations
394
* Returns the currently configured Sum implementation
395
* @return the StorelessUnivariateStatistic implementing the sum
398
public StorelessUnivariateStatistic getSumImpl() {
404
* Sets the implementation for the Sum.
407
* This method must be activated before any data has been added - i.e.,
408
* before {@link #addValue(double) addValue} has been used to add data;
409
* otherwise an IllegalStateException will be thrown.
411
* @param sumImpl the StorelessUnivariateStatistic instance to use for
413
* @throws IllegalStateException if data has already been added (i.e if n >
417
public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
419
this.sumImpl = sumImpl;
423
* Returns the currently configured sum of squares implementation
424
* @return the StorelessUnivariateStatistic implementing the sum of squares
427
public StorelessUnivariateStatistic getSumsqImpl() {
433
* Sets the implementation for the sum of squares.
436
* This method must be activated before any data has been added - i.e.,
437
* before {@link #addValue(double) addValue} has been used to add data;
438
* otherwise an IllegalStateException will be thrown.
440
* @param sumsqImpl the StorelessUnivariateStatistic instance to use for
441
* computing the sum of squares
442
* @throws IllegalStateException if data has already been added (i.e if n >
446
public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
448
this.sumsqImpl = sumsqImpl;
452
* Returns the currently configured minimum implementation
453
* @return the StorelessUnivariateStatistic implementing the minimum
456
public StorelessUnivariateStatistic getMinImpl() {
462
* Sets the implementation for the minimum.
465
* This method must be activated before any data has been added - i.e.,
466
* before {@link #addValue(double) addValue} has been used to add data;
467
* otherwise an IllegalStateException will be thrown.
469
* @param minImpl the StorelessUnivariateStatistic instance to use for
470
* computing the minimum
471
* @throws IllegalStateException if data has already been added (i.e if n >
475
public void setMinImpl(StorelessUnivariateStatistic minImpl) {
477
this.minImpl = minImpl;
481
* Returns the currently configured maximum implementation
482
* @return the StorelessUnivariateStatistic implementing the maximum
485
public StorelessUnivariateStatistic getMaxImpl() {
491
* Sets the implementation for the maximum.
494
* This method must be activated before any data has been added - i.e.,
495
* before {@link #addValue(double) addValue} has been used to add data;
496
* otherwise an IllegalStateException will be thrown.
498
* @param maxImpl the StorelessUnivariateStatistic instance to use for
499
* computing the maximum
500
* @throws IllegalStateException if data has already been added (i.e if n >
504
public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
506
this.maxImpl = maxImpl;
510
* Returns the currently configured sum of logs implementation
511
* @return the StorelessUnivariateStatistic implementing the log sum
514
public StorelessUnivariateStatistic getSumLogImpl() {
520
* Sets the implementation for the sum of logs.
523
* This method must be activated before any data has been added - i.e.,
524
* before {@link #addValue(double) addValue} has been used to add data;
525
* otherwise an IllegalStateException will be thrown.
527
* @param sumLogImpl the StorelessUnivariateStatistic instance to use for
528
* computing the log sum
529
* @throws IllegalStateException if data has already been added (i.e if n >
533
public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
535
this.sumLogImpl = sumLogImpl;
536
geoMean.setSumLogImpl(sumLogImpl);
540
* Returns the currently configured geometric mean implementation
541
* @return the StorelessUnivariateStatistic implementing the geometric mean
544
public StorelessUnivariateStatistic getGeoMeanImpl() {
550
* Sets the implementation for the geometric mean.
553
* This method must be activated before any data has been added - i.e.,
554
* before {@link #addValue(double) addValue} has been used to add data;
555
* otherwise an IllegalStateException will be thrown.
557
* @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
558
* computing the geometric mean
559
* @throws IllegalStateException if data has already been added (i.e if n >
563
public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
565
this.geoMeanImpl = geoMeanImpl;
569
* Returns the currently configured mean implementation
570
* @return the StorelessUnivariateStatistic implementing the mean
573
public StorelessUnivariateStatistic getMeanImpl() {
579
* Sets the implementation for the mean.
582
* This method must be activated before any data has been added - i.e.,
583
* before {@link #addValue(double) addValue} has been used to add data;
584
* otherwise an IllegalStateException will be thrown.
586
* @param meanImpl the StorelessUnivariateStatistic instance to use for
588
* @throws IllegalStateException if data has already been added (i.e if n >
592
public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
594
this.meanImpl = meanImpl;
598
* Returns the currently configured variance implementation
599
* @return the StorelessUnivariateStatistic implementing the variance
602
public StorelessUnivariateStatistic getVarianceImpl() {
608
* Sets the implementation for the variance.
611
* This method must be activated before any data has been added - i.e.,
612
* before {@link #addValue(double) addValue} has been used to add data;
613
* otherwise an IllegalStateException will be thrown.
615
* @param varianceImpl the StorelessUnivariateStatistic instance to use for
616
* computing the variance
617
* @throws IllegalStateException if data has already been added (i.e if n >
621
public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
623
this.varianceImpl = varianceImpl;
627
* Throws IllegalStateException if n > 0.
629
private void checkEmpty() {
631
throw MathRuntimeException.createIllegalStateException(
632
"{0} values have been added before statistic is configured",
638
* Returns a copy of this SummaryStatistics instance with the same internal state.
640
* @return a copy of this
642
public SummaryStatistics copy() {
643
SummaryStatistics result = new SummaryStatistics();
649
* Copies source to dest.
650
* <p>Neither source nor dest can be null.</p>
652
* @param source SummaryStatistics to copy
653
* @param dest SummaryStatistics to copy to
654
* @throws NullPointerException if either source or dest is null
656
public static void copy(SummaryStatistics source, SummaryStatistics dest) {
657
dest.maxImpl = source.maxImpl.copy();
658
dest.meanImpl = source.meanImpl.copy();
659
dest.minImpl = source.minImpl.copy();
660
dest.sumImpl = source.sumImpl.copy();
661
dest.varianceImpl = source.varianceImpl.copy();
662
dest.sumLogImpl = source.sumLogImpl.copy();
663
dest.sumsqImpl = source.sumsqImpl.copy();
664
if (source.getGeoMeanImpl() instanceof GeometricMean) {
665
// Keep geoMeanImpl, sumLogImpl in synch
666
dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
668
dest.geoMeanImpl = source.geoMeanImpl.copy();
670
SecondMoment.copy(source.secondMoment, dest.secondMoment);
673
// Make sure that if stat == statImpl in source, same
674
// holds in dest; otherwise copy stat
675
if (source.geoMean == source.geoMeanImpl) {
676
dest.geoMean = (GeometricMean) dest.geoMeanImpl;
678
GeometricMean.copy(source.geoMean, dest.geoMean);
680
if (source.max == source.maxImpl) {
681
dest.max = (Max) dest.maxImpl;
683
Max.copy(source.max, dest.max);
685
if (source.mean == source.meanImpl) {
686
dest.mean = (Mean) dest.meanImpl;
688
Mean.copy(source.mean, dest.mean);
690
if (source.min == source.minImpl) {
691
dest.min = (Min) dest.minImpl;
693
Min.copy(source.min, dest.min);
695
if (source.sum == source.sumImpl) {
696
dest.sum = (Sum) dest.sumImpl;
698
Sum.copy(source.sum, dest.sum);
700
if (source.variance == source.varianceImpl) {
701
dest.variance = (Variance) dest.varianceImpl;
703
Variance.copy(source.variance, dest.variance);
705
if (source.sumLog == source.sumLogImpl) {
706
dest.sumLog = (SumOfLogs) dest.sumLogImpl;
708
SumOfLogs.copy(source.sumLog, dest.sumLog);
710
if (source.sumsq == source.sumsqImpl) {
711
dest.sumsq = (SumOfSquares) dest.sumsqImpl;
713
SumOfSquares.copy(source.sumsq, dest.sumsq);