2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
17
package org.apache.commons.math.stat.descriptive;
19
import java.io.Serializable;
20
import java.lang.reflect.InvocationTargetException;
21
import java.util.Arrays;
23
import org.apache.commons.math.MathRuntimeException;
24
import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
25
import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
26
import org.apache.commons.math.stat.descriptive.moment.Mean;
27
import org.apache.commons.math.stat.descriptive.moment.Skewness;
28
import org.apache.commons.math.stat.descriptive.moment.Variance;
29
import org.apache.commons.math.stat.descriptive.rank.Max;
30
import org.apache.commons.math.stat.descriptive.rank.Min;
31
import org.apache.commons.math.stat.descriptive.rank.Percentile;
32
import org.apache.commons.math.stat.descriptive.summary.Sum;
33
import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
34
import org.apache.commons.math.util.ResizableDoubleArray;
38
* Maintains a dataset of values of a single variable and computes descriptive
39
* statistics based on stored data. The {@link #getWindowSize() windowSize}
40
* property sets a limit on the number of values that can be stored in the
41
* dataset. The default value, INFINITE_WINDOW, puts no limit on the size of
42
* the dataset. This value should be used with caution, as the backing store
43
* will grow without bound in this case. For very large datasets,
44
* {@link SummaryStatistics}, which does not store the dataset, should be used
45
* instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and
46
* more values are added than can be stored in the dataset, new values are
47
* added in a "rolling" manner, with new values replacing the "oldest" values
50
* <p>Note: this class is not threadsafe. Use
51
* {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple
52
* threads is required.</p>
54
* @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
56
public class DescriptiveStatistics implements StatisticalSummary, Serializable {
58
/** Serialization UID */
59
private static final long serialVersionUID = 4133067267405273064L;
61
/** hold the window size **/
62
protected int windowSize = INFINITE_WINDOW;
67
protected ResizableDoubleArray eDA = new ResizableDoubleArray();
69
/** Mean statistic implementation - can be reset by setter. */
70
private UnivariateStatistic meanImpl = new Mean();
72
/** Geometric mean statistic implementation - can be reset by setter. */
73
private UnivariateStatistic geometricMeanImpl = new GeometricMean();
75
/** Kurtosis statistic implementation - can be reset by setter. */
76
private UnivariateStatistic kurtosisImpl = new Kurtosis();
78
/** Maximum statistic implementation - can be reset by setter. */
79
private UnivariateStatistic maxImpl = new Max();
81
/** Minimum statistic implementation - can be reset by setter. */
82
private UnivariateStatistic minImpl = new Min();
84
/** Percentile statistic implementation - can be reset by setter. */
85
private UnivariateStatistic percentileImpl = new Percentile();
87
/** Skewness statistic implementation - can be reset by setter. */
88
private UnivariateStatistic skewnessImpl = new Skewness();
90
/** Variance statistic implementation - can be reset by setter. */
91
private UnivariateStatistic varianceImpl = new Variance();
93
/** Sum of squares statistic implementation - can be reset by setter. */
94
private UnivariateStatistic sumsqImpl = new SumOfSquares();
96
/** Sum statistic implementation - can be reset by setter. */
97
private UnivariateStatistic sumImpl = new Sum();
100
* Construct a DescriptiveStatistics instance with an infinite window
102
public DescriptiveStatistics() {
106
* Construct a DescriptiveStatistics instance with the specified window
108
* @param window the window size.
110
public DescriptiveStatistics(int window) {
111
setWindowSize(window);
115
* Copy constructor. Construct a new DescriptiveStatistics instance that
116
* is a copy of original.
118
* @param original DescriptiveStatistics instance to copy
120
public DescriptiveStatistics(DescriptiveStatistics original) {
121
copy(original, this);
125
* Represents an infinite window size. When the {@link #getWindowSize()}
126
* returns this value, there is no limit to the number of data values
127
* that can be stored in the dataset.
129
public static final int INFINITE_WINDOW = -1;
132
* Adds the value to the dataset. If the dataset is at the maximum size
133
* (i.e., the number of stored elements equals the currently configured
134
* windowSize), the first (oldest) element in the dataset is discarded
135
* to make room for the new value.
137
* @param v the value to be added
139
public void addValue(double v) {
140
if (windowSize != INFINITE_WINDOW) {
141
if (getN() == windowSize) {
142
eDA.addElementRolling(v);
143
} else if (getN() < windowSize) {
152
* Removes the most recent value from the dataset.
154
public void removeMostRecentValue() {
155
eDA.discardMostRecentElements(1);
159
* Replaces the most recently stored value with the given value.
160
* There must be at least one element stored to call this method.
162
* @param v the value to replace the most recent stored value
163
* @return replaced value
165
public double replaceMostRecentValue(double v) {
166
return eDA.substituteMostRecentElement(v);
170
* Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
171
* arithmetic mean </a> of the available values
172
* @return The mean or Double.NaN if no values have been added.
174
public double getMean() {
175
return apply(meanImpl);
179
* Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
180
* geometric mean </a> of the available values
181
* @return The geometricMean, Double.NaN if no values have been added,
182
* or if the product of the available values is less than or equal to 0.
184
public double getGeometricMean() {
185
return apply(geometricMeanImpl);
189
* Returns the variance of the available values.
190
* @return The variance, Double.NaN if no values have been added
191
* or 0.0 for a single value set.
193
public double getVariance() {
194
return apply(varianceImpl);
198
* Returns the standard deviation of the available values.
199
* @return The standard deviation, Double.NaN if no values have been added
200
* or 0.0 for a single value set.
202
public double getStandardDeviation() {
203
double stdDev = Double.NaN;
206
stdDev = Math.sqrt(getVariance());
215
* Returns the skewness of the available values. Skewness is a
216
* measure of the asymmetry of a given distribution.
217
* @return The skewness, Double.NaN if no values have been added
218
* or 0.0 for a value set <=2.
220
public double getSkewness() {
221
return apply(skewnessImpl);
225
* Returns the Kurtosis of the available values. Kurtosis is a
226
* measure of the "peakedness" of a distribution
227
* @return The kurtosis, Double.NaN if no values have been added, or 0.0
228
* for a value set <=3.
230
public double getKurtosis() {
231
return apply(kurtosisImpl);
235
* Returns the maximum of the available values
236
* @return The max or Double.NaN if no values have been added.
238
public double getMax() {
239
return apply(maxImpl);
243
* Returns the minimum of the available values
244
* @return The min or Double.NaN if no values have been added.
246
public double getMin() {
247
return apply(minImpl);
251
* Returns the number of available values
252
* @return The number of available values
255
return eDA.getNumElements();
259
* Returns the sum of the values that have been added to Univariate.
260
* @return The sum or Double.NaN if no values have been added
262
public double getSum() {
263
return apply(sumImpl);
267
* Returns the sum of the squares of the available values.
268
* @return The sum of the squares or Double.NaN if no
269
* values have been added.
271
public double getSumsq() {
272
return apply(sumsqImpl);
276
* Resets all statistics and storage
278
public void clear() {
284
* Returns the maximum number of values that can be stored in the
285
* dataset, or INFINITE_WINDOW (-1) if there is no limit.
287
* @return The current window size or -1 if its Infinite.
289
public int getWindowSize() {
294
* WindowSize controls the number of values which contribute
295
* to the reported statistics. For example, if
296
* windowSize is set to 3 and the values {1,2,3,4,5}
297
* have been added <strong> in that order</strong>
298
* then the <i>available values</i> are {3,4,5} and all
299
* reported statistics will be based on these values
300
* @param windowSize sets the size of the window.
302
public void setWindowSize(int windowSize) {
303
if (windowSize < 1) {
304
if (windowSize != INFINITE_WINDOW) {
305
throw MathRuntimeException.createIllegalArgumentException(
306
"window size must be positive ({0})", windowSize);
310
this.windowSize = windowSize;
312
// We need to check to see if we need to discard elements
313
// from the front of the array. If the windowSize is less than
314
// the current number of elements.
315
if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
316
eDA.discardFrontElements(eDA.getNumElements() - windowSize);
321
* Returns the current set of values in an array of double primitives.
322
* The order of addition is preserved. The returned array is a fresh
323
* copy of the underlying data -- i.e., it is not a reference to the
326
* @return returns the current set of numbers in the order in which they
327
* were added to this set
329
public double[] getValues() {
330
return eDA.getElements();
334
* Returns the current set of values in an array of double primitives,
335
* sorted in ascending order. The returned array is a fresh
336
* copy of the underlying data -- i.e., it is not a reference to the
338
* @return returns the current set of
339
* numbers sorted in ascending order
341
public double[] getSortedValues() {
342
double[] sort = getValues();
348
* Returns the element at the specified index
349
* @param index The Index of the element
350
* @return return the element at the specified index
352
public double getElement(int index) {
353
return eDA.getElement(index);
357
* Returns an estimate for the pth percentile of the stored values.
359
* The implementation provided here follows the first estimation procedure presented
360
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
362
* <strong>Preconditions</strong>:<ul>
363
* <li><code>0 < p ≤ 100</code> (otherwise an
364
* <code>IllegalArgumentException</code> is thrown)</li>
365
* <li>at least one value must be stored (returns <code>Double.NaN
366
* </code> otherwise)</li>
369
* @param p the requested percentile (scaled from 0 - 100)
370
* @return An estimate for the pth percentile of the stored data
371
* @throws IllegalStateException if percentile implementation has been
372
* overridden and the supplied implementation does not support setQuantile
375
public double getPercentile(double p) {
376
if (percentileImpl instanceof Percentile) {
377
((Percentile) percentileImpl).setQuantile(p);
380
percentileImpl.getClass().getMethod("setQuantile",
381
new Class[] {Double.TYPE}).invoke(percentileImpl,
382
new Object[] {Double.valueOf(p)});
383
} catch (NoSuchMethodException e1) { // Setter guard should prevent
384
throw MathRuntimeException.createIllegalArgumentException(
385
"percentile implementation {0} does not support setQuantile",
386
percentileImpl.getClass().getName());
387
} catch (IllegalAccessException e2) {
388
throw MathRuntimeException.createIllegalArgumentException(
389
"cannot access setQuantile method in percentile implementation {0}",
390
percentileImpl.getClass().getName());
391
} catch (InvocationTargetException e3) {
392
throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
395
return apply(percentileImpl);
399
* Generates a text report displaying univariate statistics from values
400
* that have been added. Each statistic is displayed on a separate
403
* @return String with line feeds displaying statistics
406
public String toString() {
407
StringBuffer outBuffer = new StringBuffer();
409
outBuffer.append("DescriptiveStatistics:").append(endl);
410
outBuffer.append("n: ").append(getN()).append(endl);
411
outBuffer.append("min: ").append(getMin()).append(endl);
412
outBuffer.append("max: ").append(getMax()).append(endl);
413
outBuffer.append("mean: ").append(getMean()).append(endl);
414
outBuffer.append("std dev: ").append(getStandardDeviation())
416
outBuffer.append("median: ").append(getPercentile(50)).append(endl);
417
outBuffer.append("skewness: ").append(getSkewness()).append(endl);
418
outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl);
419
return outBuffer.toString();
423
* Apply the given statistic to the data associated with this set of statistics.
424
* @param stat the statistic to apply
425
* @return the computed value of the statistic.
427
public double apply(UnivariateStatistic stat) {
428
return stat.evaluate(eDA.getInternalValues(), eDA.start(), eDA.getNumElements());
431
// Implementation getters and setter
434
* Returns the currently configured mean implementation.
436
* @return the UnivariateStatistic implementing the mean
439
public synchronized UnivariateStatistic getMeanImpl() {
444
* <p>Sets the implementation for the mean.</p>
446
* @param meanImpl the UnivariateStatistic instance to use
447
* for computing the mean
450
public synchronized void setMeanImpl(UnivariateStatistic meanImpl) {
451
this.meanImpl = meanImpl;
455
* Returns the currently configured geometric mean implementation.
457
* @return the UnivariateStatistic implementing the geometric mean
460
public synchronized UnivariateStatistic getGeometricMeanImpl() {
461
return geometricMeanImpl;
465
* <p>Sets the implementation for the gemoetric mean.</p>
467
* @param geometricMeanImpl the UnivariateStatistic instance to use
468
* for computing the geometric mean
471
public synchronized void setGeometricMeanImpl(
472
UnivariateStatistic geometricMeanImpl) {
473
this.geometricMeanImpl = geometricMeanImpl;
477
* Returns the currently configured kurtosis implementation.
479
* @return the UnivariateStatistic implementing the kurtosis
482
public synchronized UnivariateStatistic getKurtosisImpl() {
487
* <p>Sets the implementation for the kurtosis.</p>
489
* @param kurtosisImpl the UnivariateStatistic instance to use
490
* for computing the kurtosis
493
public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) {
494
this.kurtosisImpl = kurtosisImpl;
498
* Returns the currently configured maximum implementation.
500
* @return the UnivariateStatistic implementing the maximum
503
public synchronized UnivariateStatistic getMaxImpl() {
508
* <p>Sets the implementation for the maximum.</p>
510
* @param maxImpl the UnivariateStatistic instance to use
511
* for computing the maximum
514
public synchronized void setMaxImpl(UnivariateStatistic maxImpl) {
515
this.maxImpl = maxImpl;
519
* Returns the currently configured minimum implementation.
521
* @return the UnivariateStatistic implementing the minimum
524
public synchronized UnivariateStatistic getMinImpl() {
529
* <p>Sets the implementation for the minimum.</p>
531
* @param minImpl the UnivariateStatistic instance to use
532
* for computing the minimum
535
public synchronized void setMinImpl(UnivariateStatistic minImpl) {
536
this.minImpl = minImpl;
540
* Returns the currently configured percentile implementation.
542
* @return the UnivariateStatistic implementing the percentile
545
public synchronized UnivariateStatistic getPercentileImpl() {
546
return percentileImpl;
550
* Sets the implementation to be used by {@link #getPercentile(double)}.
551
* The supplied <code>UnivariateStatistic</code> must provide a
552
* <code>setQuantile(double)</code> method; otherwise
553
* <code>IllegalArgumentException</code> is thrown.
555
* @param percentileImpl the percentileImpl to set
556
* @throws IllegalArgumentException if the supplied implementation does not
557
* provide a <code>setQuantile</code> method
560
public synchronized void setPercentileImpl(
561
UnivariateStatistic percentileImpl) {
563
percentileImpl.getClass().getMethod("setQuantile",
564
new Class[] {Double.TYPE}).invoke(percentileImpl,
565
new Object[] {Double.valueOf(50.0d)});
566
} catch (NoSuchMethodException e1) {
567
throw MathRuntimeException.createIllegalArgumentException(
568
"percentile implementation {0} does not support setQuantile",
569
percentileImpl.getClass().getName());
570
} catch (IllegalAccessException e2) {
571
throw MathRuntimeException.createIllegalArgumentException(
572
"cannot access setQuantile method in percentile implementation {0}",
573
percentileImpl.getClass().getName());
574
} catch (InvocationTargetException e3) {
575
throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
577
this.percentileImpl = percentileImpl;
581
* Returns the currently configured skewness implementation.
583
* @return the UnivariateStatistic implementing the skewness
586
public synchronized UnivariateStatistic getSkewnessImpl() {
591
* <p>Sets the implementation for the skewness.</p>
593
* @param skewnessImpl the UnivariateStatistic instance to use
594
* for computing the skewness
597
public synchronized void setSkewnessImpl(
598
UnivariateStatistic skewnessImpl) {
599
this.skewnessImpl = skewnessImpl;
603
* Returns the currently configured variance implementation.
605
* @return the UnivariateStatistic implementing the variance
608
public synchronized UnivariateStatistic getVarianceImpl() {
613
* <p>Sets the implementation for the variance.</p>
615
* @param varianceImpl the UnivariateStatistic instance to use
616
* for computing the variance
619
public synchronized void setVarianceImpl(
620
UnivariateStatistic varianceImpl) {
621
this.varianceImpl = varianceImpl;
625
* Returns the currently configured sum of squares implementation.
627
* @return the UnivariateStatistic implementing the sum of squares
630
public synchronized UnivariateStatistic getSumsqImpl() {
635
* <p>Sets the implementation for the sum of squares.</p>
637
* @param sumsqImpl the UnivariateStatistic instance to use
638
* for computing the sum of squares
641
public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) {
642
this.sumsqImpl = sumsqImpl;
646
* Returns the currently configured sum implementation.
648
* @return the UnivariateStatistic implementing the sum
651
public synchronized UnivariateStatistic getSumImpl() {
656
* <p>Sets the implementation for the sum.</p>
658
* @param sumImpl the UnivariateStatistic instance to use
659
* for computing the sum
662
public synchronized void setSumImpl(UnivariateStatistic sumImpl) {
663
this.sumImpl = sumImpl;
667
* Returns a copy of this DescriptiveStatistics instance with the same internal state.
669
* @return a copy of this
671
public DescriptiveStatistics copy() {
672
DescriptiveStatistics result = new DescriptiveStatistics();
678
* Copies source to dest.
679
* <p>Neither source nor dest can be null.</p>
681
* @param source DescriptiveStatistics to copy
682
* @param dest DescriptiveStatistics to copy to
683
* @throws NullPointerException if either source or dest is null
685
public static void copy(DescriptiveStatistics source, DescriptiveStatistics dest) {
686
// Copy data and window size
687
dest.eDA = source.eDA.copy();
688
dest.windowSize = source.windowSize;
690
// Copy implementations
691
dest.maxImpl = source.maxImpl.copy();
692
dest.meanImpl = source.meanImpl.copy();
693
dest.minImpl = source.minImpl.copy();
694
dest.sumImpl = source.sumImpl.copy();
695
dest.varianceImpl = source.varianceImpl.copy();
696
dest.sumsqImpl = source.sumsqImpl.copy();
697
dest.geometricMeanImpl = source.geometricMeanImpl.copy();
698
dest.kurtosisImpl = source.kurtosisImpl;
699
dest.skewnessImpl = source.skewnessImpl;
700
dest.percentileImpl = source.percentileImpl;