19
34
# Main purpose of this is to allow custom kernels and to allow speed up
20
35
# from finite support.
22
def __init__(self, shape, h = 1.0, domain = None):
37
def __init__(self, shape, h = 1.0, domain = None, norm = None):
24
39
shape should be a lambda taking and returning numeric type.
26
41
For sanity it should always return positive or zero but this isn't
42
enforced incase you want to do weird things. Bear in mind that the
43
statistical tests etc. may not be valid for non-positive kernels.
45
The bandwidth of the kernel is supplied as h.
47
You may specify a domain as a list of 2 values [min,max], in which case
48
kernel will be treated as zero outside these values. This will speed up
51
You may also specify the normalisation constant for the supplied Kernel.
52
If you do this number will be stored and used as the normalisation
53
without calculation. It is recommended you do this if you know the
54
constant, to speed up calculation. In particular if the shape function
55
provided is already normalised you should provide
62
self._normconst = norm
29
63
self.domain = domain
30
# TODO: Add checking code that shape is valid
67
raise TypeError("shape must be a callable object/function")
72
"""Getter for kernel bandwidth, h"""
74
def seth(self, value):
75
"""Setter for kernel bandwidth, h"""
77
h = property(geth, seth, doc="Kernel Bandwidth")
79
def inDomain(self, xs, ys, x):
81
Returns the filtered (xs, ys) based on the Kernel domain centred on x
83
# Disable black-list functions: filter used for speed instead of
85
# pylint: disable-msg=W0141
87
"""Used for filter to check if point is in the domain"""
89
return u >= self.domain[0] and u <= self.domain[1]
91
if self.domain is None:
94
filtered = filter(isInDomain, zip(xs, ys))
96
xs, ys = zip(*filtered)
34
101
def smooth(self, xs, ys, x):
35
102
"""Returns the kernel smoothing estimate for point x based on x-values
36
103
xs and y-values ys.
37
104
Not expected to be called by the user.
39
# TODO: make filtering more efficient
40
# TODO: Implement WARPing algorithm when possible
41
if self.domain is None:
42
filtered = zip(xs, ys)
46
for xx,yy in zip(xs,ys)
47
if (xx-x)/self.h >= self.domain[0]
48
and (xx-x)/self.h <= self.domain[1]
106
xs, ys = self.inDomain(xs, ys, x)
52
xs,ys = zip(*filtered)
53
109
w = np.sum([self((xx-x)/self.h) for xx in xs])
54
v = np.sum([yy*self((xx-x)/self.h) for xx, yy in zip(xs,ys)])
110
v = np.sum([yy*self((xx-x)/self.h) for xx, yy in zip(xs, ys)])
59
115
def smoothvar(self, xs, ys, x):
60
116
"""Returns the kernel smoothing estimate of the variance at point x.
62
# TODO: linear interpolation of the fit can speed this up for large
63
# datasets without noticable error.
64
# TODO: this is thoroughly inefficient way of doing this at the moment
65
# but it works for small dataset - remove repeated w calc.
66
if self.domain is None:
67
filtered = zip(xs, ys)
71
for xx,yy in zip(xs, ys)
72
if (xx-x)/self.h >= self.domain[0]
73
and (xx-x)/self.h <= self.domain[1]
77
xs,ys = zip(*filtered)
118
xs, ys = self.inDomain(xs, ys, x)
78
121
fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])
80
subtract(ys, fittedvals)
122
sqresid = square( subtract(ys, fittedvals) )
82
123
w = np.sum([self((xx-x)/self.h) for xx in xs])
83
v = np.sum([rr*self((xx-x)/self.h) for xx, rr in zip(xs, rr)])
124
v = np.sum([rr*self((xx-x)/self.h) for xx, rr in zip(xs, sqresid)])
88
129
def smoothconf(self, xs, ys, x):
89
130
"""Returns the kernel smoothing estimate with confidence 1sigma bounds
91
# TODO: This is a HORRIBLE implementation - need unhorribleising
92
# Also without the correct L2Norm and norm_const this will be out by a
94
if self.domain is None:
95
filtered = zip(xs, ys)
99
for xx,yy in zip(xs, ys)
100
if (xx-x)/self.h >= self.domain[0]
101
and (xx-x)/self.h <= self.domain[1]
104
if len(filtered) > 0:
105
xs,ys = zip(*filtered)
132
xs, ys = self.inDomain(xs, ys, x)
106
135
fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])
107
136
sqresid = square(
108
137
subtract(ys, fittedvals)
110
139
w = np.sum([self((xx-x)/self.h) for xx in xs])
111
140
v = np.sum([rr*self((xx-x)/self.h) for xx, rr in zip(xs, sqresid)])
113
142
sd = np.sqrt(var)
115
144
yhat = self.smooth(xs, ys, x)
116
err = sd * K/np.sqrt(w)
117
return (yhat-err, yhat, yhat+err)
145
err = sd * K / np.sqrt(w * self.h * self.norm_const)
146
return (yhat - err, yhat, yhat + err)
119
148
return (np.nan, np.nan, np.nan)
121
# TODO: make this a property
122
151
def L2Norm(self):
123
152
"""Returns the integral of the square of the kernal from -inf to inf"""
124
#TODO: yeah right. For now just stick a number here
125
# easy enough to sort this for the specific kernels
128
# TODO: make this a property
129
def norm_const(self, x):
130
"""Normalising constant for kernel (integral from -inf to inf)"""
131
# TODO: again, this needs sorting
153
if self._L2Norm is None:
154
L2Func = lambda x: (self.norm_const*self._shape(x))**2
155
if self.domain is None:
156
self._L2Norm = scipy.integrate.quad(L2Func, -inf, inf)[0]
158
self._L2Norm = scipy.integrate.quad(L2Func, self.domain[0],
163
def norm_const(self):
165
Normalising constant for kernel (integral from -inf to inf)
167
if self._normconst is None:
168
if self.domain is None:
169
quadres = scipy.integrate.quad(self._shape, -inf, inf)
171
quadres = scipy.integrate.quad(self._shape, self.domain[0],
173
self._normconst = 1.0/(quadres[0])
174
return self._normconst
177
"""This returns the normalised weight at distance x"""
178
return self.norm_const*self._shape(x)
134
180
def __call__(self, x):
182
This simply returns the value of the kernel function at x
184
Does the same as weight if the function is normalised
135
186
return self._shape(x)
139
class Default(object):
189
# pylint: disable-msg=R0903
190
class DefaultKernel(CustomKernel):
140
191
"""Represents the default kernel - should not be used directly
141
192
This contains no functionality and acts as a placeholder for the consuming
146
class Uniform(Custom):
147
def __init__(self, h=1.0):
149
self._shape = lambda x: 1.0
150
self.domain = [-1.0,1.0]
152
class Triangular(Custom):
153
def __init__(self, h=1.0):
155
self._shape = lambda x: 1-abs(x)
156
self.domain = [-1.0,1.0]
158
class Epanechnikov(Custom):
159
def __init__(self, h=1.0):
161
self._shape = lambda x: (1-x*x)
162
self.domain = [-1.0,1.0]
164
class Biweight(Custom):
165
def __init__(self, h=1.0):
167
self._shape = lambda x: (1-x*x)**2
168
self.domain = [-1.0,1.0]
170
class Triweight(Custom):
171
def __init__(self, h=1.0):
173
self._shape = lambda x: (1-x*x)**3
174
self.domain = [-1.0,1.0]
176
class Gaussian(Custom):
177
def __init__(self, h=1.0):
179
self._shape = lambda x: np.exp(-x**2/2.0)
182
def smooth(self, xs, ys, x):
196
# pylint: enable-msg=R0903
198
class Uniform(CustomKernel):
199
def __init__(self, h=1.0):
200
CustomKernel.__init__(self, shape=lambda x: 0.5, h=h,
201
domain=[-1.0, 1.0], norm = 1.0)
204
class Triangular(CustomKernel):
205
def __init__(self, h=1.0):
206
CustomKernel.__init__(self, shape=lambda x: 1 - abs(x), h=h,
207
domain=[-1.0, 1.0], norm = 1.0)
208
self._L2Norm = 2.0/3.0
210
class Epanechnikov(CustomKernel):
211
def __init__(self, h=1.0):
212
CustomKernel.__init__(self, shape=lambda x: 0.75*(1 - x*x), h=h,
213
domain=[-1.0, 1.0], norm = 1.0)
216
class Biweight(CustomKernel):
217
def __init__(self, h=1.0):
218
CustomKernel.__init__(self, shape=lambda x: 0.9375*(1 - x*x)**2, h=h,
219
domain=[-1.0, 1.0], norm = 1.0)
220
self._L2Norm = 5.0/7.0
222
def smooth(self, xs, ys, x):
223
"""Returns the kernel smoothing estimate for point x based on x-values
225
Not expected to be called by the user.
227
Special implementation optimised for Biweight.
229
xs, ys = self.inDomain(xs, ys, x)
232
w = np.sum(square(subtract(1, square(divide(subtract(xs, x),
234
v = np.sum(multiply(ys, square(subtract(1, square(divide(
235
subtract(xs, x), self.h))))))
240
def smoothvar(self, xs, ys, x):
242
Returns the kernel smoothing estimate of the variance at point x.
244
xs, ys = self.inDomain(xs, ys, x)
247
fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])
248
rs = square(subtract(ys, fittedvals))
249
w = np.sum(square(subtract(1.0, square(divide(subtract(xs, x),
251
v = np.sum(multiply(rs, square(subtract(1, square(divide(
252
subtract(xs, x), self.h))))))
257
def smoothconf(self, xs, ys, x):
258
"""Returns the kernel smoothing estimate with confidence 1sigma bounds
260
xs, ys = self.inDomain(xs, ys, x)
263
fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])
265
subtract(ys, fittedvals)
267
w = np.sum(square(subtract(1.0, square(divide(subtract(xs, x),
269
v = np.sum(multiply(rs, square(subtract(1, square(divide(
270
subtract(xs, x), self.h))))))
274
yhat = self.smooth(xs, ys, x)
275
err = sd * K / np.sqrt(0.9375 * w * self.h)
276
return (yhat - err, yhat, yhat + err)
278
return (np.nan, np.nan, np.nan)
280
class Triweight(CustomKernel):
281
def __init__(self, h=1.0):
282
CustomKernel.__init__(self, shape=lambda x: 1.09375*(1 - x*x)**3, h=h,
283
domain=[-1.0, 1.0], norm = 1.0)
284
self._L2Norm = 350.0/429.0
286
class Gaussian(CustomKernel):
288
Gaussian (Normal) Kernel
290
K(u) = 1 / (sqrt(2*pi)) exp(-0.5 u**2)
292
def __init__(self, h=1.0):
293
CustomKernel.__init__(self, shape = lambda x: 0.3989422804014327 *
294
np.exp(-x**2/2.0), h = h, domain = None, norm = 1.0)
295
self._L2Norm = 1.0/(2.0*np.sqrt(np.pi))
297
def smooth(self, xs, ys, x):
298
"""Returns the kernel smoothing estimate for point x based on x-values
300
Not expected to be called by the user.
302
Special implementation optimised for Gaussian.
304
w = np.sum(exp(multiply(square(divide(subtract(xs, x),
306
v = np.sum(multiply(ys, exp(multiply(square(divide(subtract(xs, x),
216
def norm_const(self):
217
return 0.39894228 # 1/sqrt(2 pi)
220
return 0.86226925 # sqrt(pi)/2
222
class Cosine(Custom):
310
class Cosine(CustomKernel):
314
K(u) = pi/4 cos(0.5 * pi * u) between -1.0 and 1.0
223
316
def __init__(self, h=1.0):
225
self._shape = lambda x: np.cos(np.pi/2.0 * x)
226
self.domain = [-1.0,1.0]
b'\\ No newline at end of file'
317
CustomKernel.__init__(self, shape=lambda x: 0.78539816339744828 *
318
np.cos(np.pi/2.0 * x), h=h, domain=[-1.0, 1.0], norm = 1.0)
319
self._L2Norm = np.pi**2/16.0
b'\\ No newline at end of file'