2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
17
package org.apache.commons.math.stat.inference;
19
import org.apache.commons.math.MathException;
20
import org.apache.commons.math.stat.descriptive.summary.Sum;
21
import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
23
import org.apache.commons.math.distribution.FDistribution;
24
import org.apache.commons.math.distribution.FDistributionImpl;
26
import java.util.Collection;
27
import java.util.Iterator;
31
* Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl}
35
* {@link org.apache.commons.math.distribution.FDistribution
36
* commons-math F Distribution implementation} to estimate exact p-values.</p>
38
* <p>This implementation is based on a description at
39
* http://faculty.vassar.edu/lowry/ch13pt1.html</p>
41
* Abbreviations: bg = between groups,
43
* ss = sum squared deviations
47
* @version $Revision$ $Date$
49
public class OneWayAnovaImpl implements OneWayAnova {
52
* Default constructor.
54
public OneWayAnovaImpl() {
59
* This implementation computes the F statistic using the definitional
63
* msbg = between group mean square
64
* mswg = within group mean square</pre>
65
* are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
68
public double anovaFValue(Collection categoryData)
69
throws IllegalArgumentException, MathException {
70
AnovaStats a = anovaStats(categoryData);
76
* This implementation uses the
77
* {@link org.apache.commons.math.distribution.FDistribution
78
* commons-math F Distribution implementation} to estimate the exact
79
* p-value, using the formula<pre>
80
* p = 1 - cumulativeProbability(F)</pre>
81
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
82
* is the commons-math implementation of the F distribution.</p>
84
public double anovaPValue(Collection categoryData)
85
throws IllegalArgumentException, MathException {
86
AnovaStats a = anovaStats(categoryData);
87
FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg);
88
return 1.0 - fdist.cumulativeProbability(a.F);
93
* This implementation uses the
94
* {@link org.apache.commons.math.distribution.FDistribution
95
* commons-math F Distribution implementation} to estimate the exact
96
* p-value, using the formula<pre>
97
* p = 1 - cumulativeProbability(F)</pre>
98
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
99
* is the commons-math implementation of the F distribution.</p>
100
* <p>True is returned iff the estimated p-value is less than alpha.</p>
102
public boolean anovaTest(Collection categoryData, double alpha)
103
throws IllegalArgumentException, MathException {
104
if ((alpha <= 0) || (alpha > 0.5)) {
105
throw new IllegalArgumentException("bad significance level: " + alpha);
107
return (anovaPValue(categoryData) < alpha);
112
* This method actually does the calculations (except P-value).
114
* @param categoryData <code>Collection</code> of <code>double[]</code>
115
* arrays each containing data for one category
116
* @return computed AnovaStats
117
* @throws IllegalArgumentException if categoryData does not meet
118
* preconditions specified in the interface definition
119
* @throws MathException if an error occurs computing the Anova stats
121
private AnovaStats anovaStats(Collection categoryData)
122
throws IllegalArgumentException, MathException {
124
// check if we have enough categories
125
if (categoryData.size() < 2) {
126
throw new IllegalArgumentException(
127
"ANOVA: two or more categories required");
130
// check if each category has enough data and all is double[]
131
for (Iterator iterator = categoryData.iterator(); iterator.hasNext();) {
134
array = (double[])iterator.next();
135
} catch (ClassCastException ex) {
136
throw new IllegalArgumentException(
137
"ANOVA: categoryData contains non-double[] elements.");
139
if (array.length <= 1) {
140
throw new IllegalArgumentException(
141
"ANOVA: one element of categoryData has fewer than 2 values.");
147
Sum totsum = new Sum();
148
SumOfSquares totsumsq = new SumOfSquares();
151
for (Iterator iterator = categoryData.iterator(); iterator.hasNext();) {
152
double[] data = (double[])iterator.next();
155
SumOfSquares sumsq = new SumOfSquares();
158
for (int i = 0; i < data.length; i++) {
159
double val = data[i];
164
sumsq.increment(val);
166
// for all categories
168
totsum.increment(val);
169
totsumsq.increment(val);
172
double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num;
175
double sst = totsumsq.getResult() - totsum.getResult() *
176
totsum.getResult()/totnum;
177
double ssbg = sst - sswg;
178
int dfbg = categoryData.size() - 1;
179
double msbg = ssbg/dfbg;
180
double mswg = sswg/dfwg;
181
double F = msbg/mswg;
183
return new AnovaStats(dfbg, dfwg, F);
187
Convenience class to pass dfbg,dfwg,F values around within AnovaImpl.
188
No get/set methods provided.
190
private static class AnovaStats {
197
* @param dfbg degrees of freedom in numerator (between groups)
198
* @param dfwg degrees of freedom in denominator (within groups)
201
AnovaStats(int dfbg, int dfwg, double F) {
b'\\ No newline at end of file'