1
/***************************************************************************
2
File : ShapiroWilkTest.cpp
4
--------------------------------------------------------------------
5
Copyright : (C) 2010 by Ion Vasilief
6
Email (use @ for *) : ion_vasilief*yahoo.fr
7
Description : Normality test. The code was taken from R (see file swilk.c)
9
***************************************************************************/
11
/***************************************************************************
13
* This program is free software; you can redistribute it and/or modify *
14
* it under the terms of the GNU General Public License as published by *
15
* the Free Software Foundation; either version 2 of the License, or *
16
* (at your option) any later version. *
18
* This program is distributed in the hope that it will be useful, *
19
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
20
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
21
* GNU General Public License for more details. *
23
* You should have received a copy of the GNU General Public License *
24
* along with this program; if not, write to the Free Software *
25
* Foundation, Inc., 51 Franklin Street, Fifth Floor, *
26
* Boston, MA 02110-1301 USA *
28
***************************************************************************/
30
#include <ShapiroWilkTest.h>
31
#include <gsl/gsl_randist.h>
32
#include <gsl/gsl_cdf.h>
33
#include <gsl/gsl_sort.h>
36
# define min(a, b) ((a) > (b) ? (b) : (a))
40
# define sign(a) ((a) >= 0 ? 1.0 : -1.0)
43
ShapiroWilkTest::ShapiroWilkTest(ApplicationWindow *parent, const QString& sample)
44
: StatisticTest(parent, 0.0, 0.05, sample),
48
if (d_n < 3 || d_n > 5000){
49
QMessageBox::critical(parent, QObject::tr("Attention!"), QObject::tr("Sample size must be between 3 and 5000."));
52
setObjectName(QObject::tr("Shapiro-Wilk Normality Test"));
59
gsl_sort(d_data, 1, d_n);// the data must be sorted first
60
swilk(&init, d_data, &n, &n1, &n2, a, &d_w, &d_pValue, &error);
64
QString ShapiroWilkTest::logInfo()
66
QString s = "\n[" + QDateTime::currentDateTime().toString(Qt::LocalDate)+ " \"" + d_table->objectName() + "\"]\t";
67
s += QObject::tr("Normality Test (Shapiro - Wilk)") + "\n\n";
68
return s + infoString();
71
QString ShapiroWilkTest::shortLogInfo()
73
return infoString(false);
76
QString ShapiroWilkTest::infoString(bool header)
78
ApplicationWindow *app = (ApplicationWindow *)parent();
79
QLocale l = app->locale();
80
int p = app->d_decimal_digits;
83
lst << QObject::tr("Dataset");
84
lst << QObject::tr("N");
85
lst << QObject::tr("W");
86
lst << QObject::tr("P Value");
88
lst << QString::number(d_n);
89
lst << l.toString(d_w, 'g', p);
90
lst << l.toString(d_pValue, 'g', p);
92
QFontMetrics fm(app->font());
94
foreach(QString aux, lst){
95
int aw = fm.width(aux);
102
for (int i = 0; i < 4; i++){
103
QString aux = lst[i];
104
int spaces = ceil((double)(width - fm.width(aux))/(double)fm.width(QLatin1Char(' '))) + 1;
105
head += aux + QString(spaces, QLatin1Char(' '));
108
head += QObject::tr("Decision");
115
for (int i = 4; i < lst.size(); i++){
116
QString aux = lst[i];
117
int spaces = ceil((double)(width - fm.width(aux))/(double)fm.width(QLatin1Char(' '))) + 1;
118
val += aux + QString(spaces, QLatin1Char(' '));
121
if (d_pValue >= d_significance_level)
122
val += QObject::tr("Normal at %1 level").arg(l.toString(d_significance_level));
124
val += QObject::tr("Not normal at %1 level").arg(l.toString(d_significance_level));
127
int scores = ceil((double)fm.width(val)/(double)fm.width(QLatin1Char('-')));
128
s +="\n" + QString(scores, QLatin1Char('-')) + "\n";
131
return s + val + "\n";
134
void ShapiroWilkTest::swilk(int *init,/* logical: is a[] already initialized ? */
135
double *x, int *n, int *n1, int *n2,
136
double *a,/* coefficients a[] */
137
double *w, double *pw, int *ifault)
140
/* ALGORITHM AS R94 APPL. STATIST. (1995) vol.44, no.4, 547-551.
142
Calculates the Shapiro-Wilk W test and its significance level
145
/* Initialized data */
146
const static double zero = 0.f;
147
const static double one = 1.f;
148
const static double two = 2.f;
150
const static double small = 1e-19f;
152
/* polynomial coefficients */
153
const static double g[2] = { -2.273f,.459f };
155
c1[6] = { 0.f,.221157f,-.147981f,-2.07119f, 4.434685f, -2.706056f },
156
c2[6] = { 0.f,.042981f,-.293762f,-1.752461f,5.682633f, -3.582633f };
157
const static double c3[4] = { .544f,-.39978f,.025054f,-6.714e-4f };
158
const static double c4[4] = { 1.3822f,-.77857f,.062767f,-.0020322f };
159
const static double c5[4] = { -1.5861f,-.31082f,-.083751f,.0038915f };
160
const static double c6[3] = { -.4803f,-.082676f,.0030302f };
161
const static double c7[2] = { .164f,.533f };
162
const static double c8[2] = { .1736f,.315f };
163
const static double c9[2] = { .256f,-.00635f };
165
/* System generated locals */
169
Auxiliary routines : poly() {below}
171
/* Local variables */
172
int i, j, ncens, i1, nn2;
174
double zbar, ssassx, summ2, ssumm2, gamma, delta, range;
175
double a1, a2, an, bf, ld, m, s, sa, xi, sx, xx, y, w1;
176
double fac, asa, an25, ssa, z90f, sax, zfm, z95f, zsd, z99f, rsn, ssx, xsx;
182
if (*n < 3) { *ifault = 1; return;
187
if (*n2 < nn2) { *ifault = 3; return;
189
if (*n1 < 3) { *ifault = 1; return;
192
if (ncens < 0 || (ncens > 0 && *n < 20)) { *ifault = 4; return;
195
delta = (double) ncens / an;
196
if (delta > .8f) { *ifault = 5; return;
198
} /* just for -Wall:*/ else { delta = 0.f; }
200
--a; /* so we can keep using 1-based indices */
202
/* If INIT is false (always when called from R),
203
* calculate coefficients a[] for the test statistic W */
206
const static double sqrth = .70710678f;/* = sqrt(1/2), was .70711f */
211
for (i = 1; i <= *n2; ++i) {
212
a[i] = gsl_cdf_ugaussian_Pinv ((i - .375f) / an25);
214
summ2 += r__1 * r__1;
217
ssumm2 = sqrt(summ2);
218
rsn = one / sqrt(an);
219
a1 = poly(c1, 6, rsn) - a[1] / ssumm2;
224
a2 = -a[2] / ssumm2 + poly(c2, 6, rsn);
225
fac = sqrt((summ2 - two * (a[1] * a[1]) - two * (a[2] * a[2]))
226
/ (one - two * (a1 * a1) - two * (a2 * a2)));
230
fac = sqrt((summ2 - two * (a[1] * a[1])) /
231
( one - two * (a1 * a1)));
234
for (i = i1; i <= nn2; ++i)
240
/* If W is input as negative, calculate significance level of -W */
248
/* Check for zero range */
250
range = x[*n1 - 1] - x[0];
255
/* Check for correct sort order on range - scaled X */
257
/* *ifault = 7; <-- a no-op, since it is changed below, in ANY CASE! */
263
for (i = 1; i < *n1; --j) {
265
if (xx - xi > small) {
266
/* Fortran had: print *, "ANYTHING"
267
* but do NOT; it *does* happen with sorted x (on Intel GNU/linux 32bit):
268
* shapiro.test(c(-1.7, -1,-1,-.73,-.61,-.5,-.24, .45,.62,.81,1))
275
sa += sign(i - j) * a[min(i,j)];
282
/* Calculate W statistic as squared correlation
283
between data and coefficients */
287
ssa = ssx = sax = zero;
289
for (i = 0; i < *n1; ++i, --j) {
291
asa = sign(i - j) * a[1+min(i,j)] - sa;
294
xsx = x[i] / range - sx;
300
/* W1 equals (1-W) calculated to avoid excessive rounding error
301
for W very near 1 (a potential problem in very large samples) */
303
ssassx = sqrt(ssa * ssx);
304
w1 = (ssassx - sax) * (ssassx + sax) / (ssa * ssx);
308
/* Calculate significance level for W */
310
if (*n == 3) {/* exact P value : */
311
const static double pi6 = 1.90985931710274;/* = 6/pi, was 1.909859f */
312
const static double stqr= 1.04719755119660;/* = asin(sqrt(3/4)), was 1.047198f */
313
*pw = pi6 * (asin(sqrt(*w)) - stqr);
314
if(*pw < 0.) *pw = 0.;
320
gamma = poly(g, 2, an);
322
*pw = 1e-99;/* an "obvious" value, was 'small' which was 1e-19f */
327
s = exp(poly(c4, 4, an));
328
} else {/* n >= 12 */
330
s = exp(poly(c6, 3, xx));
332
/*DBG printf("c(w1=%g, w=%g, y=%g, m=%g, s=%g)\n",w1,*w,y,m,s); */
334
if (ncens > 0) {/* <==> n > n1 --- not happening currently when called from R */
336
/* Censoring by proportion NCENS/N.
337
Calculate mean and sd of normal equivalent deviate of W. */
339
const static double three = 3.f;
341
const static double z90 = 1.2816f;
342
const static double z95 = 1.6449f;
343
const static double z99 = 2.3263f;
344
const static double zm = 1.7509f;
345
const static double zss = .56268f;
346
const static double bf1 = .8378f;
348
const static double xx90 = .556;
349
const static double xx95 = .622;
353
r__1 = pow(xx90, (double) xx);
354
z90f = z90 + bf * pow(poly(c7, 2, r__1), (double) ld);
355
r__1 = pow(xx95, (double) xx);
356
z95f = z95 + bf * pow(poly(c8, 2, r__1), (double) ld);
357
z99f = z99 + bf * pow(poly(c9, 2, xx), (double)ld);
359
/* Regress Z90F,...,Z99F on normal deviates Z90,...,Z99 to get
360
pseudo-mean and pseudo-sd of z as the slope and intercept */
362
zfm = (z90f + z95f + z99f) / three;
363
zsd = (z90 * (z90f - zfm) +
364
z95 * (z95f - zfm) + z99 * (z99f - zfm)) / zss;
365
zbar = zfm - zsd * zm;
369
//*pw = pnorm((double) y, (double)m, (double)s, 0/* upper tail */, 0);
370
*pw = gsl_cdf_gaussian_Q(y - m, s);
371
/* = alnorm_(dble((Y - M)/S), 1); */
376
double ShapiroWilkTest::poly(const double *cc, int nord, double x)
378
/* Algorithm AS 181.2 Appl. Statist. (1982) Vol. 31, No. 2
380
Calculates the algebraic polynomial of order nord-1 with
381
array of coefficients cc. Zero order coefficient is cc(1) = cc[0]
383
/* Local variables */
385
double p, ret_val;/* preserve precision! */
390
for (j = nord - 2; j > 0; j--)