2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
17
package org.apache.commons.math.stat.correlation;
19
import org.apache.commons.math.MathException;
20
import org.apache.commons.math.MathRuntimeException;
21
import org.apache.commons.math.distribution.TDistribution;
22
import org.apache.commons.math.distribution.TDistributionImpl;
23
import org.apache.commons.math.linear.RealMatrix;
24
import org.apache.commons.math.linear.BlockRealMatrix;
25
import org.apache.commons.math.stat.regression.SimpleRegression;
28
* Computes Pearson's product-moment correlation coefficients for pairs of arrays
29
* or columns of a matrix.
31
* <p>The constructors that take <code>RealMatrix</code> or
32
* <code>double[][]</code> arguments generate correlation matrices. The
33
* columns of the input matrices are assumed to represent variable values.
34
* Correlations are given by the formula</p>
35
* <code>cor(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
36
* where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
37
* is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.
39
* @version $Revision: 799857 $ $Date: 2009-08-01 09:07:12 -0400 (Sat, 01 Aug 2009) $
42
public class PearsonsCorrelation {
44
/** correlation matrix */
45
private final RealMatrix correlationMatrix;
47
/** number of observations */
48
private final int nObs;
51
* Create a PearsonsCorrelation instance without data
53
public PearsonsCorrelation() {
55
correlationMatrix = null;
60
* Create a PearsonsCorrelation from a rectangular array
61
* whose columns represent values of variables to be correlated.
63
* @param data rectangular array with columns representing variables
64
* @throws IllegalArgumentException if the input data array is not
65
* rectangular with at least two rows and two columns.
67
public PearsonsCorrelation(double[][] data) {
68
this(new BlockRealMatrix(data));
72
* Create a PearsonsCorrelation from a RealMatrix whose columns
73
* represent variables to be correlated.
75
* @param matrix matrix with columns representing variables to correlate
77
public PearsonsCorrelation(RealMatrix matrix) {
78
checkSufficientData(matrix);
79
nObs = matrix.getRowDimension();
80
correlationMatrix = computeCorrelationMatrix(matrix);
84
* Create a PearsonsCorrelation from a {@link Covariance}. The correlation
85
* matrix is computed by scaling the Covariance's covariance matrix.
86
* The Covariance instance must have been created from a data matrix with
87
* columns representing variable values.
89
* @param covariance Covariance instance
91
public PearsonsCorrelation(Covariance covariance) {
92
RealMatrix covarianceMatrix = covariance.getCovarianceMatrix();
93
if (covarianceMatrix == null) {
94
throw MathRuntimeException.createIllegalArgumentException("covariance matrix is null");
96
nObs = covariance.getN();
97
correlationMatrix = covarianceToCorrelation(covarianceMatrix);
101
* Create a PearsonsCorrelation from a covariance matrix. The correlation
102
* matrix is computed by scaling the covariance matrix.
104
* @param covarianceMatrix covariance matrix
105
* @param numberOfObservations the number of observations in the dataset used to compute
106
* the covariance matrix
108
public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
109
nObs = numberOfObservations;
110
correlationMatrix = covarianceToCorrelation(covarianceMatrix);
115
* Returns the correlation matrix
117
* @return correlation matrix
119
public RealMatrix getCorrelationMatrix() {
120
return correlationMatrix;
124
* Returns a matrix of standard errors associated with the estimates
125
* in the correlation matrix.<br/>
126
* <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
127
* error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
128
* <p>The formula used to compute the standard error is <br/>
129
* <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
130
* where <code>r</code> is the estimated correlation coefficient and
131
* <code>n</code> is the number of observations in the source dataset.</p>
133
* @return matrix of correlation standard errors
135
public RealMatrix getCorrelationStandardErrors() {
136
int nVars = correlationMatrix.getColumnDimension();
137
double[][] out = new double[nVars][nVars];
138
for (int i = 0; i < nVars; i++) {
139
for (int j = 0; j < nVars; j++) {
140
double r = correlationMatrix.getEntry(i, j);
141
out[i][j] = Math.sqrt((1 - r * r) /(nObs - 2));
144
return new BlockRealMatrix(out);
148
* Returns a matrix of p-values associated with the (two-sided) null
149
* hypothesis that the corresponding correlation coefficient is zero.
150
* <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
151
* that a random variable distributed as <code>t<sub>n-2</sub></code> takes
152
* a value with absolute value greater than or equal to <br>
153
* <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
154
* <p>The values in the matrix are sometimes referred to as the
155
* <i>significance</i> of the corresponding correlation coefficients.</p>
157
* @return matrix of p-values
158
* @throws MathException if an error occurs estimating probabilities
160
public RealMatrix getCorrelationPValues() throws MathException {
161
TDistribution tDistribution = new TDistributionImpl(nObs - 2);
162
int nVars = correlationMatrix.getColumnDimension();
163
double[][] out = new double[nVars][nVars];
164
for (int i = 0; i < nVars; i++) {
165
for (int j = 0; j < nVars; j++) {
169
double r = correlationMatrix.getEntry(i, j);
170
double t = Math.abs(r * Math.sqrt((nObs - 2)/(1 - r * r)));
171
out[i][j] = 2 * (1 - tDistribution.cumulativeProbability(t));
175
return new BlockRealMatrix(out);
180
* Computes the correlation matrix for the columns of the
183
* @param matrix matrix with columns representing variables to correlate
184
* @return correlation matrix
186
public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
187
int nVars = matrix.getColumnDimension();
188
RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
189
for (int i = 0; i < nVars; i++) {
190
for (int j = 0; j < i; j++) {
191
double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
192
outMatrix.setEntry(i, j, corr);
193
outMatrix.setEntry(j, i, corr);
195
outMatrix.setEntry(i, i, 1d);
201
* Computes the correlation matrix for the columns of the
202
* input rectangular array. The colums of the array represent values
203
* of variables to be correlated.
205
* @param data matrix with columns representing variables to correlate
206
* @return correlation matrix
208
public RealMatrix computeCorrelationMatrix(double[][] data) {
209
return computeCorrelationMatrix(new BlockRealMatrix(data));
213
* Computes the Pearson's product-moment correlation coefficient between the two arrays.
215
* </p>Throws IllegalArgumentException if the arrays do not have the same length
216
* or their common length is less than 2</p>
218
* @param xArray first data array
219
* @param yArray second data array
220
* @return Returns Pearson's correlation coefficient for the two arrays
221
* @throws IllegalArgumentException if the arrays lengths do not match or
222
* there is insufficient data
224
public double correlation(final double[] xArray, final double[] yArray) throws IllegalArgumentException {
225
SimpleRegression regression = new SimpleRegression();
226
if(xArray.length == yArray.length && xArray.length > 1) {
227
for(int i=0; i<xArray.length; i++) {
228
regression.addData(xArray[i], yArray[i]);
230
return regression.getR();
233
throw MathRuntimeException.createIllegalArgumentException(
234
"invalid array dimensions. xArray has size {0}; yArray has {1} elements",
235
xArray.length, yArray.length);
240
* Derives a correlation matrix from a covariance matrix.
242
* <p>Uses the formula <br/>
243
* <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where
244
* <code>r(·,·)</code> is the correlation coefficient and
245
* <code>s(·)</code> means standard deviation.</p>
247
* @param covarianceMatrix the covariance matrix
248
* @return correlation matrix
250
public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) {
251
int nVars = covarianceMatrix.getColumnDimension();
252
RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
253
for (int i = 0; i < nVars; i++) {
254
double sigma = Math.sqrt(covarianceMatrix.getEntry(i, i));
255
outMatrix.setEntry(i, i, 1d);
256
for (int j = 0; j < i; j++) {
257
double entry = covarianceMatrix.getEntry(i, j) /
258
(sigma * Math.sqrt(covarianceMatrix.getEntry(j, j)));
259
outMatrix.setEntry(i, j, entry);
260
outMatrix.setEntry(j, i, entry);
267
* Throws IllegalArgumentException of the matrix does not have at least
268
* two columns and two rows
270
* @param matrix matrix to check for sufficiency
272
private void checkSufficientData(final RealMatrix matrix) {
273
int nRows = matrix.getRowDimension();
274
int nCols = matrix.getColumnDimension();
275
if (nRows < 2 || nCols < 2) {
276
throw MathRuntimeException.createIllegalArgumentException(
277
"insufficient data: only {0} rows and {1} columns.",