2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
17
package org.apache.commons.math.stat.inference;
19
import org.apache.commons.math.MathException;
20
import org.apache.commons.math.MathRuntimeException;
21
import org.apache.commons.math.distribution.ChiSquaredDistribution;
22
import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
25
* Implements Chi-Square test statistics defined in the
26
* {@link UnknownDistributionChiSquareTest} interface.
28
* @version $Revision: 775470 $ $Date: 2009-05-16 10:29:07 -0400 (Sat, 16 May 2009) $
30
public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
32
/** Distribution used to compute inference statistics. */
33
private ChiSquaredDistribution distribution;
36
* Construct a ChiSquareTestImpl
38
public ChiSquareTestImpl() {
39
this(new ChiSquaredDistributionImpl(1.0));
43
* Create a test instance using the given distribution for computing
44
* inference statistics.
45
* @param x distribution used to compute inference statistics.
48
public ChiSquareTestImpl(ChiSquaredDistribution x) {
54
* <p><strong>Note: </strong>This implementation rescales the
55
* <code>expected</code> array if necessary to ensure that the sum of the
56
* expected and observed counts are equal.</p>
58
* @param observed array of observed frequency counts
59
* @param expected array of expected frequency counts
60
* @return chi-square test statistic
61
* @throws IllegalArgumentException if preconditions are not met
62
* or length is less than 2
64
public double chiSquare(double[] expected, long[] observed)
65
throws IllegalArgumentException {
66
if (expected.length < 2) {
67
throw MathRuntimeException.createIllegalArgumentException(
68
"expected array length = {0}, must be at least 2",
71
if (expected.length != observed.length) {
72
throw MathRuntimeException.createIllegalArgumentException(
73
"dimension mismatch {0} != {1}", expected.length, observed.length);
75
checkPositive(expected);
76
checkNonNegative(observed);
77
double sumExpected = 0d;
78
double sumObserved = 0d;
79
for (int i = 0; i < observed.length; i++) {
80
sumExpected += expected[i];
81
sumObserved += observed[i];
84
boolean rescale = false;
85
if (Math.abs(sumExpected - sumObserved) > 10E-6) {
86
ratio = sumObserved / sumExpected;
91
for (int i = 0; i < observed.length; i++) {
93
dev = (observed[i] - ratio * expected[i]);
94
sumSq += dev * dev / (ratio * expected[i]);
96
dev = (observed[i] - expected[i]);
97
sumSq += dev * dev / expected[i];
105
* <p><strong>Note: </strong>This implementation rescales the
106
* <code>expected</code> array if necessary to ensure that the sum of the
107
* expected and observed counts are equal.</p>
109
* @param observed array of observed frequency counts
110
* @param expected array of expected frequency counts
112
* @throws IllegalArgumentException if preconditions are not met
113
* @throws MathException if an error occurs computing the p-value
115
public double chiSquareTest(double[] expected, long[] observed)
116
throws IllegalArgumentException, MathException {
117
distribution.setDegreesOfFreedom(expected.length - 1.0);
118
return 1.0 - distribution.cumulativeProbability(
119
chiSquare(expected, observed));
124
* <p><strong>Note: </strong>This implementation rescales the
125
* <code>expected</code> array if necessary to ensure that the sum of the
126
* expected and observed counts are equal.</p>
128
* @param observed array of observed frequency counts
129
* @param expected array of expected frequency counts
130
* @param alpha significance level of the test
131
* @return true iff null hypothesis can be rejected with confidence
133
* @throws IllegalArgumentException if preconditions are not met
134
* @throws MathException if an error occurs performing the test
136
public boolean chiSquareTest(double[] expected, long[] observed,
137
double alpha) throws IllegalArgumentException, MathException {
138
if ((alpha <= 0) || (alpha > 0.5)) {
139
throw MathRuntimeException.createIllegalArgumentException(
140
"out of bounds significance level {0}, must be between {1} and {2}",
143
return (chiSquareTest(expected, observed) < alpha);
147
* @param counts array representation of 2-way table
148
* @return chi-square test statistic
149
* @throws IllegalArgumentException if preconditions are not met
151
public double chiSquare(long[][] counts) throws IllegalArgumentException {
154
int nRows = counts.length;
155
int nCols = counts[0].length;
157
// compute row, column and total sums
158
double[] rowSum = new double[nRows];
159
double[] colSum = new double[nCols];
161
for (int row = 0; row < nRows; row++) {
162
for (int col = 0; col < nCols; col++) {
163
rowSum[row] += counts[row][col];
164
colSum[col] += counts[row][col];
165
total += counts[row][col];
169
// compute expected counts and chi-square
171
double expected = 0.0d;
172
for (int row = 0; row < nRows; row++) {
173
for (int col = 0; col < nCols; col++) {
174
expected = (rowSum[row] * colSum[col]) / total;
175
sumSq += ((counts[row][col] - expected) *
176
(counts[row][col] - expected)) / expected;
183
* @param counts array representation of 2-way table
185
* @throws IllegalArgumentException if preconditions are not met
186
* @throws MathException if an error occurs computing the p-value
188
public double chiSquareTest(long[][] counts)
189
throws IllegalArgumentException, MathException {
191
double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
192
distribution.setDegreesOfFreedom(df);
193
return 1 - distribution.cumulativeProbability(chiSquare(counts));
197
* @param counts array representation of 2-way table
198
* @param alpha significance level of the test
199
* @return true iff null hypothesis can be rejected with confidence
201
* @throws IllegalArgumentException if preconditions are not met
202
* @throws MathException if an error occurs performing the test
204
public boolean chiSquareTest(long[][] counts, double alpha)
205
throws IllegalArgumentException, MathException {
206
if ((alpha <= 0) || (alpha > 0.5)) {
207
throw MathRuntimeException.createIllegalArgumentException(
208
"out of bounds significance level {0}, must be between {1} and {2}",
211
return (chiSquareTest(counts) < alpha);
215
* @param observed1 array of observed frequency counts of the first data set
216
* @param observed2 array of observed frequency counts of the second data set
217
* @return chi-square test statistic
218
* @throws IllegalArgumentException if preconditions are not met
221
public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
222
throws IllegalArgumentException {
224
// Make sure lengths are same
225
if (observed1.length < 2) {
226
throw MathRuntimeException.createIllegalArgumentException(
227
"observed array length = {0}, must be at least 2",
230
if (observed1.length != observed2.length) {
231
throw MathRuntimeException.createIllegalArgumentException(
232
"dimension mismatch {0} != {1}",
233
observed1.length, observed2.length);
236
// Ensure non-negative counts
237
checkNonNegative(observed1);
238
checkNonNegative(observed2);
240
// Compute and compare count sums
243
boolean unequalCounts = false;
245
for (int i = 0; i < observed1.length; i++) {
246
countSum1 += observed1[i];
247
countSum2 += observed2[i];
249
// Ensure neither sample is uniformly 0
250
if (countSum1 == 0) {
251
throw MathRuntimeException.createIllegalArgumentException(
252
"observed counts are all 0 in first observed array");
254
if (countSum2 == 0) {
255
throw MathRuntimeException.createIllegalArgumentException(
256
"observed counts are all 0 in second observed array");
258
// Compare and compute weight only if different
259
unequalCounts = (countSum1 != countSum2);
261
weight = Math.sqrt((double) countSum1 / (double) countSum2);
263
// Compute ChiSquare statistic
268
for (int i = 0; i < observed1.length; i++) {
269
if (observed1[i] == 0 && observed2[i] == 0) {
270
throw MathRuntimeException.createIllegalArgumentException(
271
"observed counts are both zero for entry {0}", i);
275
if (unequalCounts) { // apply weights
276
dev = obs1/weight - obs2 * weight;
280
sumSq += (dev * dev) / (obs1 + obs2);
287
* @param observed1 array of observed frequency counts of the first data set
288
* @param observed2 array of observed frequency counts of the second data set
290
* @throws IllegalArgumentException if preconditions are not met
291
* @throws MathException if an error occurs computing the p-value
294
public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
295
throws IllegalArgumentException, MathException {
296
distribution.setDegreesOfFreedom((double) observed1.length - 1);
297
return 1 - distribution.cumulativeProbability(
298
chiSquareDataSetsComparison(observed1, observed2));
302
* @param observed1 array of observed frequency counts of the first data set
303
* @param observed2 array of observed frequency counts of the second data set
304
* @param alpha significance level of the test
305
* @return true iff null hypothesis can be rejected with confidence
307
* @throws IllegalArgumentException if preconditions are not met
308
* @throws MathException if an error occurs performing the test
311
public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
312
double alpha) throws IllegalArgumentException, MathException {
313
if ((alpha <= 0) || (alpha > 0.5)) {
314
throw MathRuntimeException.createIllegalArgumentException(
315
"out of bounds significance level {0}, must be between {1} and {2}",
318
return (chiSquareTestDataSetsComparison(observed1, observed2) < alpha);
322
* Checks to make sure that the input long[][] array is rectangular,
323
* has at least 2 rows and 2 columns, and has all non-negative entries,
324
* throwing IllegalArgumentException if any of these checks fail.
326
* @param in input 2-way table to check
327
* @throws IllegalArgumentException if the array is not valid
329
private void checkArray(long[][] in) throws IllegalArgumentException {
332
throw MathRuntimeException.createIllegalArgumentException(
333
"invalid row dimension: {0} (must be at least 2)",
337
if (in[0].length < 2) {
338
throw MathRuntimeException.createIllegalArgumentException(
339
"invalid column dimension: {0} (must be at least 2)",
343
checkRectangular(in);
344
checkNonNegative(in);
348
//--------------------- Private array methods -- should find a utility home for these
351
* Throws IllegalArgumentException if the input array is not rectangular.
353
* @param in array to be tested
354
* @throws NullPointerException if input array is null
355
* @throws IllegalArgumentException if input array is not rectangular
357
private void checkRectangular(long[][] in) {
358
for (int i = 1; i < in.length; i++) {
359
if (in[i].length != in[0].length) {
360
throw MathRuntimeException.createIllegalArgumentException(
361
"some rows have length {0} while others have length {1}",
362
in[i].length, in[0].length);
368
* Check all entries of the input array are > 0.
370
* @param in array to be tested
371
* @exception IllegalArgumentException if one entry is not positive
373
private void checkPositive(double[] in) throws IllegalArgumentException {
374
for (int i = 0; i < in.length; i++) {
376
throw MathRuntimeException.createIllegalArgumentException(
377
"element {0} is not positive: {1}",
384
* Check all entries of the input array are >= 0.
386
* @param in array to be tested
387
* @exception IllegalArgumentException if one entry is negative
389
private void checkNonNegative(long[] in) throws IllegalArgumentException {
390
for (int i = 0; i < in.length; i++) {
392
throw MathRuntimeException.createIllegalArgumentException(
393
"element {0} is negative: {1}",
400
* Check all entries of the input array are >= 0.
402
* @param in array to be tested
403
* @exception IllegalArgumentException if one entry is negative
405
private void checkNonNegative(long[][] in) throws IllegalArgumentException {
406
for (int i = 0; i < in.length; i ++) {
407
for (int j = 0; j < in[i].length; j++) {
409
throw MathRuntimeException.createIllegalArgumentException(
410
"element ({0}, {1}) is negative: {2}",
418
* Modify the distribution used to compute inference statistics.
421
* the new distribution
424
public void setDistribution(ChiSquaredDistribution value) {
425
distribution = value;