3
A collection of moving functions for masked arrays and time series
5
:author: Pierre GF Gerard-Marchant & Matt Knox
6
:contact: pierregm_at_uga_dot_edu - mattknox_ca_at_hotmail_dot_com
7
:version: $Id: filters.py 2819 2007-03-03 23:00:20Z pierregm $
9
__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author: pierregm $)"
11
__revision__ = "$Revision: 2819 $"
12
__date__ = '$Date: 2007-03-03 18:00:20 -0500 (Sat, 03 Mar 2007) $'
15
from numpy import bool_, float_
18
from scipy.signal import convolve, get_window
20
import maskedarray as MA
21
from maskedarray import MaskedArray, nomask, getmask, getmaskarray, masked
24
from timeseries.cseries import MA_mov_stddev, MA_mov_sum, MA_mov_average, \
27
__all__ = ['mov_sum', 'mov_median',
28
'mov_average', 'mov_mean', 'mov_average_expw',
29
'mov_stddev', 'mov_var', 'mov_covar', 'mov_corr',
30
'cmov_average', 'cmov_mean', 'cmov_window'
33
def _process_result_dict(orig_data, result_dict):
34
"process the results from the c function"
36
rarray = result_dict['array']
37
rtype = result_dict['array'].dtype
38
rmask = result_dict['mask']
40
# makes a copy of the appropriate type
41
data = orig_data.astype(rtype).copy()
42
data.flat = result_dict['array'].ravel()
43
if not hasattr(data, '__setmask__'):
44
data = data.view(MA.MaskedArray)
45
data.__setmask__(rmask)
48
def _moving_func(data, cfunc, kwargs):
51
kwargs['array'] = data
53
result_dict = cfunc(**kwargs)
54
return _process_result_dict(data, result_dict)
57
for i in range(data.shape[-1]):
58
kwargs['array'] = data[:,i]
59
result_dict = cfunc(**kwargs)
62
rtype = result_dict['array'].dtype
63
result = data.astype(rtype)
64
print data.dtype, result.dtype
66
rmask = result_dict['mask']
68
curr_col = marray(result_dict['array'], mask=rmask, copy=False)
69
result[:,i] = curr_col
74
raise ValueError, "Data should be at most 2D"
76
#...............................................................................
77
def mov_sum(data, span, dtype=None):
78
"""Calculates the moving sum of a series.
85
kwargs = {'span':span}
87
kwargs['dtype'] = dtype
89
return _moving_func(data, MA_mov_sum, kwargs)
90
#...............................................................................
91
def mov_median(data, span, dtype=None):
92
"""Calculates the moving median of a series.
99
kwargs = {'span':span}
100
if dtype is not None:
101
kwargs['dtype'] = dtype
103
return _moving_func(data, MA_mov_median, kwargs)
104
#...............................................................................
105
def mov_average(data, span, dtype=None):
106
"""Calculates the moving average of a series.
113
kwargs = {'span':span}
114
if dtype is not None:
115
kwargs['dtype'] = dtype
117
return _moving_func(data, MA_mov_average, kwargs)
118
mov_mean = mov_average
119
#...............................................................................
120
def _mov_var_stddev(data, span, is_variance, bias, dtype):
121
"helper function for mov_var and mov_stddev functions"
123
kwargs = {'span':span,
124
'is_variance':is_variance,
126
if dtype is not None:
127
kwargs['dtype'] = dtype
129
return _moving_func(data, MA_mov_stddev, kwargs)
130
#...............................................................................
131
def mov_var(data, span, bias=False, dtype=None):
132
"""Calculates the moving variance of a 1-D array.
140
return _mov_var_stddev(data=data, span=span,
141
is_variance=1, bias=int(bias), dtype=dtype)
142
#...............................................................................
143
def mov_stddev(data, span, bias=False, dtype=None):
144
"""Calculates the moving standard deviation of a 1-D array.
152
return _mov_var_stddev(data=data, span=span,
153
is_variance=0, bias=int(bias), dtype=dtype)
154
#...............................................................................
155
def mov_covar(x, y, span, bias=False, dtype=None):
156
"""Calculates the moving covariance of two 1-D arrays.
165
result = x - mov_average(x, span, dtype=dtype)
166
result = result * (y - mov_average(y, span, dtype=dtype))
168
if bias: denom = span
169
else: denom = span - 1
172
#...............................................................................
173
def mov_corr(x, y, span, dtype=None):
174
"""Calculates the moving correlation of two 1-D arrays.
182
result = mov_covar(x, y, span, bias=True, dtype=dtype)
183
result = result / mov_stddev(x, span, bias=True, dtype=dtype)
184
result = result / mov_stddev(y, span, bias=True, dtype=dtype)
187
#...............................................................................
188
def mov_average_expw(data, span, tol=1e-6):
189
"""Calculates the exponentially weighted moving average of a series.
194
Time periods. The smoothing factor is 2/(span + 1)
195
tol : float, *[1e-6]*
196
Tolerance for the definition of the mask. When data contains masked
197
values, this parameter determinea what points in the result should be masked.
198
Values in the result that would not be "significantly" impacted (as
199
determined by this parameter) by the masked values are left unmasked."""
201
data = marray(data, copy=True, subok=True)
202
ismasked = (data._mask is not nomask)
203
data._mask = N.zeros(data.shape, bool_)
206
k = 2./float(span + 1)
207
def expmave_sub(a, b):
208
return a + k * (b - a)
210
data._data.flat = N.frompyfunc(expmave_sub, 2, 1).accumulate(_data)
212
_unmasked = N.logical_not(data._mask).astype(float_)
213
marker = 1. - N.frompyfunc(expmave_sub, 2, 1).accumulate(_unmasked)
214
data._mask[marker > tol] = True
218
#...............................................................................
219
def cmov_window(data, span, window_type):
220
"""Applies a centered moving window of type window_type and size span on the
223
Returns a (subclass of) MaskedArray. The k first and k last data are always
224
masked (with k=span//2). When data has a missing value at position i, the
225
result has missing values in the interval [i-k:i+k+1].
230
Data to process. The array should be at most 2D. On 2D arrays, the window
231
is applied recursively on each column.
233
The width of the window.
234
window_type : string/tuple/float
235
Window type (see Notes)
240
The recognized window types are: boxcar, triang, blackman, hamming, hanning,
241
bartlett, parzen, bohman, blackmanharris, nuttall, barthann, kaiser (needs beta),
242
gaussian (needs std), general_gaussian (needs power, width), slepian (needs width).
243
If the window requires parameters, the window_type argument should be a tuple
244
with the first argument the string name of the window, and the next arguments
245
the needed parameters. If window_type is a floating point number, it is interpreted
246
as the beta parameter of the kaiser window.
248
Note also that only boxcar has been thoroughly tested."""
250
data = marray(data, copy=True, subok=True)
251
if data._mask is nomask:
252
data._mask = N.zeros(data.shape, bool_)
253
window = get_window(window_type, span, fftbins=False)
254
(n, k) = (len(data), span//2)
257
data._data.flat = convolve(data._data, window)[k:n+k] / float(span)
258
data._mask[:] = ((convolve(getmaskarray(data), window) > 0)[k:n+k])
260
for i in range(data.shape[-1]):
261
_data = data._data[:,i]
262
_data.flat = convolve(_data, window)[k:n+k] / float(span)
263
data._mask[:,i] = (convolve(data._mask[:,i], window) > 0)[k:n+k]
265
raise ValueError, "Data should be at most 2D"
266
data._mask[:k] = data._mask[-k:] = True
269
def cmov_average(data, span):
270
"""Computes the centered moving average of size span on the data.
272
Returns a (subclass of) MaskedArray. The k first and k last data are always
273
masked (with k=span//2). When data has a missing value at position i,
274
the result has missing values in the interval [i-k:i+k+1].
278
Data to process. The array should be at most 2D. On 2D arrays, the window
279
is applied recursively on each column.
281
The width of the window."""
282
return cmov_window(data, span, 'boxcar')
284
cmov_mean = cmov_average
287
param_doc['data'] = \
289
Data must be an ndarray (or subclass). In particular, note that
290
TimeSeries objects are valid here."""
294
First array to be included in the calculation. x must be an ndarray (or
295
subclass). In particular, note that TimeSeries objects are valid here."""
299
Second array to be included in the calculation. y must be an ndarray (or
300
subclass). In particular, note that TimeSeries objects are valid here."""
302
param_doc['span'] = \
304
Time periods to use for each calculation."""
306
param_doc['bias'] = \
307
"""bias : boolean (*False*)
308
If False, Normalization is by (N-1) where N == span (unbiased
309
estimate). If True then normalization is by N."""
311
param_doc['dtype'] = \
312
"""dtype : numpy data type specification (*None*)
313
dtype for the result"""
319
The result is always a masked array (preserves subclass attributes). The
320
result at index i uses values from [i-span:i+1], and will be masked for the
321
first `span` values. The result will also be masked at i if any of the
322
input values in the slice [i-span:i+1] are masked."""
326
# generate function doc strings
327
for fn in (x for x in __all__ if x[:4] == 'mov_' and x[4:] != 'mean'):
328
fdoc = _g[fn].func_doc
329
for prm, dc in param_doc.iteritems():
330
fdoc = fdoc.replace('$$'+prm+'$$', dc)
331
fdoc += mov_result_doc
332
_g[fn].func_doc = fdoc
335
###############################################################################
336
if __name__ == '__main__':
337
from timeseries import time_series, today
338
from maskedarray.testutils import assert_equal, assert_almost_equal
340
series = time_series(N.arange(10),start_date=today('D'))
342
filtered = mov_sum(series,3)
343
assert_equal(filtered, [0,1,3,6,9,12,15,18,21,24])
344
assert_equal(filtered._mask, [1,1,0,0,0,0,0,0,0,0])
345
assert_equal(filtered._dates, series._dates)
346
assert_equal(series, N.arange(10))
348
filtered = mov_average(series,3)
349
assert_equal(filtered, [0,1,1,2,3,4,5,6,7,8])
350
assert_equal(filtered._mask, [1,1,0,0,0,0,0,0,0,0])
351
assert_equal(filtered._dates, series._dates)
352
assert_equal(series, N.arange(10))
354
filtered = mov_average(series._data,3)
355
assert_equal(filtered, [0,1,1,2,3,4,5,6,7,8])
356
assert_equal(filtered._mask, [1,1,0,0,0,0,0,0,0,0])
357
assert_equal(series, N.arange(10))