1
mean(v::AbstractArray, dim::Int) = sum(v,dim)/size(v,dim)
3
weighted_mean(v::AbstractArray, w::AbstractArray) = sum(v.*w)/sum(w)
5
## median absolute deviation with known center with consistency adjustment
6
mad(v::AbstractArray, center::Number) = 1.4826 * median(abs(v - center))
8
## median absolute deviation
9
mad(v::AbstractArray) = mad(v, median(v))
11
## maximum likelihood estimate of skewness with known mean m
12
function skewness(v::AbstractVector, m::Number)
14
empirical_third_centered_moment = 0.0
15
empirical_variance = 0.0
17
empirical_third_centered_moment += (x_i - m)^3
18
empirical_variance += (x_i - m)^2
20
empirical_third_centered_moment /= n
21
empirical_variance /= n
22
return empirical_third_centered_moment / (empirical_variance^1.5)
25
## maximum likelihood estimate of skewness
26
skewness(v::AbstractVector) = skewness(v, mean(v))
28
## maximum likelihood estimate of kurtosis with known mean m
29
function kurtosis(v::AbstractVector, m::Number)
31
empirical_fourth_centered_moment = 0.0
32
empirical_variance = 0.0
34
empirical_fourth_centered_moment += (x_i - m)^4
35
empirical_variance += (x_i - m)^2
37
empirical_fourth_centered_moment /= n
38
empirical_variance /= n
39
return (empirical_fourth_centered_moment / (empirical_variance^2)) - 3.0
42
## maximum likelihood estimate of kurtosis
43
kurtosis(v::AbstractVector) = kurtosis(v, mean(v))
46
function dist(m::AbstractMatrix)
48
d = Array(Float64, n, n)
52
x = norm(m[i, :] - m[j, :])
60
## order (aka, rank), resolving ties using the mean rank
61
function tiedrank(v::AbstractArray)
64
ord = Array(Float64, n)
69
while j + 1 <= n && v[place[i]] == v[place[j + 1]]
74
m = sum(i:j) / (j - i + 1)
87
tiedrank(X::AbstractMatrix) = tiedrank(reshape(X, length(X)))
88
function tiedrank(X::AbstractMatrix, dim::Int)
89
retmat = apply(hcat, amap(tiedrank, X, 3 - dim))
90
return dim == 1 ? retmat : retmat'
93
## spearman covariance functions ##
95
# spearman covariance between two vectors
96
cov_spearman(x::AbstractVector, y::AbstractVector, corrected::Bool) = cov(tiedrank(x), tiedrank(y), corrected)
98
# spearman covariance over all pairs of columns of two matrices
99
cov_spearman(X::AbstractMatrix, Y::AbstractMatrix, corrected::Bool) = [cov_spearman(X[:,i], Y[:,j], corrected) for i = 1:size(X, 2), j = 1:size(Y,2)]
100
cov_spearman(x::AbstractVector, Y::AbstractMatrix, corrected::Bool) = [cov_spearman(x, Y[:,i], corrected) for i = 1:size(Y, 2)]
101
cov_spearman(X::AbstractMatrix, y::AbstractVector, corrected::Bool) = [cov_spearman(X[:,i], y, corrected) for i = 1:size(X, 2)]
103
# spearman covariance over all pairs of columns of a matrix
104
cov_spearman(X::AbstractMatrix, corrected::Bool) = cov(tiedrank(X, 1), corrected)
106
cov_spearman(x) = cov_spearman(x, true)
107
cov_spearman(x, y) = cov_spearman(x, y, true)
109
## spearman correlation functions ##
111
# spearman correlation between two vectors
112
cor_spearman(x::AbstractVector, y::AbstractVector, corrected::Bool) = cor(tiedrank(x), tiedrank(y), corrected)
114
# spearman correlation over all pairs of columns of two matrices
115
cor_spearman(X::AbstractMatrix, Y::AbstractMatrix, corrected::Bool) = cor(tiedrank(X, 1), tiedrank(Y, 1))
116
cor_spearman(X::AbstractMatrix, y::AbstractVector, corrected::Bool) = cor(tiedrank(X, 1), tiedrank(y))
117
cor_spearman(x::AbstractVector, Y::AbstractMatrix, corrected::Bool) = cor(tiedrank(x), tiedrank(Y, 1))
119
# spearman correlation over all pairs of columns of a matrix
120
cor_spearman(X::AbstractMatrix, corrected::Bool) = cor(tiedrank(X, 1), corrected)
122
cor_spearman(x) = cor_spearman(x, true)
123
cor_spearman(x, y) = cor_spearman(x, y, true)
125
## autocorrelation at a specific lag
126
autocor(v::AbstractVector, lag::Int) = cor(v[1:end-lag], v[1+lag:end])
128
## autocorrelation at a default lag of 1
129
autocor(v::AbstractVector) = autocor(v, 1)
131
quantile(v::AbstractVector) = quantile(v,[.0,.25,.5,.75,1.0])
132
percentile(v::AbstractVector) = quantile(v,[1:99]/100)
133
quartile(v::AbstractVector) = quantile(v,[.25,.5,.75])
134
quintile(v::AbstractVector) = quantile(v,[.2,.4,.6,.8])
135
decile(v::AbstractVector) = quantile(v,[.1,.2,.3,.4,.5,.6,.7,.8,.9])
136
iqr(v::AbstractVector) = quantile(v,[0.25,0.75])
138
## run-length encoding
139
function rle{T}(v::Vector{T})
145
lengths = Array(Int, n)
148
if v[i] == current_value
151
values[total_values] = current_value
153
lengths[total_lengths] = current_length
159
values[total_values] = current_value
160
lengths[total_lengths] = current_length
161
return (values[1:total_values], lengths[1:total_lengths])
164
## inverse run-length encoding
165
function inverse_rle{T}(values::Vector{T}, lengths::Vector{Int})
166
total_n = sum(lengths)
168
res = Array(T, total_n)
181
## old stats tests ##
183
@test abs(autocor([1, 2, 3, 4, 5]) - 1.0) < 10e-8
185
@test iqr([1, 2, 3, 4, 5]) == [2.0, 4.0]
187
z = [true, true, false, false, true, false, true, true, true]
188
values, lengths = rle(z)
189
@test values == [true, false, true, false, true]
190
@test lengths == [2, 2, 1, 1, 3]
191
@test inverse_rle(values, lengths) == z
193
z = [true, true, false, false, true, false, true, true, true, false]
194
values, lengths = rle(z)
195
@test values == [true, false, true, false, true, false]
196
@test lengths == [2, 2, 1, 1, 3, 1]
197
@test inverse_rle(values, lengths) == z
200
d = [0.0 sqrt(2); sqrt(2) 0.0]
201
@test norm(dist(m) - d) < 10e-8
203
m = [3.0 1.0; 5.0 1.0]
204
d = [0.0 2.0; 2.0 0.0]
205
@test norm(dist(m) - d) < 10e-8
207
m = [1 0 0; 0 1 0 ; 1 0 1]
208
d = [0.0 sqrt(2) 1.0; sqrt(2) 0.0 sqrt(3); 1.0 sqrt(3) 0.0]
209
@test norm(dist(m) - d) < 10e-8
211
@assert_approx_eq cov_spearman(X, y)[1] cov_spearman(X[:,1],y)
212
@assert_approx_eq cov_spearman(X) cov_spearman(X, X)
213
@assert_approx_eq cov_spearman(X, y) [-0.25, -0.1875]