~vincent-vincentdavis/statsmodels/update_formatting

~vincent-vincentdavis/statsmodels/update_formatting_descstats

« back to all changes in this revision

Viewing changes to scikits/statsmodels/sandbox/kernel.py

Committer: Vincent Davis
Date: 2010-04-29 02:23:39 UTC
mfrom: (1967.1.9 statsmodels_trunk4)
Revision ID: vincent@vincentdavis.net-20100429022339-dmq5ts3181rpqgq1

upadted from truck

files modified:
scikits/statsmodels/sandbox/kernel.py

scikits/statsmodels/sandbox/smoothers.py

Show diffs side-by-side

added added

removed removed

scikits/statsmodels/sandbox/kernel.py

# -*- coding: utf-8 -*-

"""

This models contains the Kernels for Kernel smoothing.

Hopefully in the future they may be reused/extended for other kernel based method

Hopefully in the future they may be reused/extended for other kernel based

method

References:

----------

Pointwise Kernel Confidence Bounds

(smoothconf)

http://fedc.wiwi.hu-berlin.de/xplore/ebooks/html/anr/anrhtmlframe62.html

"""

# pylint: disable-msg=C0103

# pylint: disable-msg=W0142

# pylint: disable-msg=E1101

# pylint: disable-msg=E0611

import numpy as np

from numpy import exp, multiply, square, divide, subtract

import scipy.integrate

from numpy import exp, multiply, square, divide, subtract, inf

class Custom(object):

class CustomKernel(object):

"""

Generic 1D Kernel object.

Can be constructed by selecting a standard named Kernel,

# Main purpose of this is to allow custom kernels and to allow speed up

# from finite support.

def __init__(self, shape, h = 1.0, domain = None):

def __init__(self, shape, h = 1.0, domain = None, norm = None):

"""

shape should be a lambda taking and returning numeric type.

For sanity it should always return positive or zero but this isn't

enforces.

enforced incase you want to do weird things. Bear in mind that the

statistical tests etc. may not be valid for non-positive kernels.

The bandwidth of the kernel is supplied as h.

You may specify a domain as a list of 2 values [min,max], in which case

kernel will be treated as zero outside these values. This will speed up

calculation.

You may also specify the normalisation constant for the supplied Kernel.

If you do this number will be stored and used as the normalisation

without calculation. It is recommended you do this if you know the

constant, to speed up calculation. In particular if the shape function

provided is already normalised you should provide

norm = 1.0

norm = True

"""

if norm is True:

norm = 1.0

self._normconst = norm

self.domain = domain

# TODO: Add checking code that shape is valid

self._shape = shape

self.h = h

if callable(shape):

self._shape = shape

else:

raise TypeError("shape must be a callable object/function")

self._h = h

self._L2Norm = None

def geth(self):

"""Getter for kernel bandwidth, h"""

return self._h

def seth(self, value):

"""Setter for kernel bandwidth, h"""

self._h = value

h = property(geth, seth, doc="Kernel Bandwidth")

def inDomain(self, xs, ys, x):

"""

Returns the filtered (xs, ys) based on the Kernel domain centred on x

"""

# Disable black-list functions: filter used for speed instead of

# list-comprehension

# pylint: disable-msg=W0141

def isInDomain(xy):

"""Used for filter to check if point is in the domain"""

u = (xy[0]-x)/self.h

return u >= self.domain[0] and u <= self.domain[1]

if self.domain is None:

return (xs, ys)

else:

filtered = filter(isInDomain, zip(xs, ys))

if len(filtered) > 0:

xs, ys = zip(*filtered)

return (xs, ys)

else:

return ([], [])

100

101

def smooth(self, xs, ys, x):

102

"""Returns the kernel smoothing estimate for point x based on x-values

103

xs and y-values ys.

104

Not expected to be called by the user.

105

"""

# TODO: make filtering more efficient

# TODO: Implement WARPing algorithm when possible

if self.domain is None:

filtered = zip(xs, ys)

else:

filtered = [

(xx,yy)

for xx,yy in zip(xs,ys)

if (xx-x)/self.h >= self.domain[0]

and (xx-x)/self.h <= self.domain[1]

]

106

xs, ys = self.inDomain(xs, ys, x)

107

if len(filtered) > 0:

xs,ys = zip(*filtered)

108

if len(xs)>0:

109

w = np.sum([self((xx-x)/self.h) for xx in xs])

v = np.sum([yy*self((xx-x)/self.h) for xx, yy in zip(xs,ys)])

return v/w

110

v = np.sum([yy*self((xx-x)/self.h) for xx, yy in zip(xs, ys)])

111

return v / w

112

else:

113

return np.nan

114

115

def smoothvar(self, xs, ys, x):

116

"""Returns the kernel smoothing estimate of the variance at point x.

117

"""

# TODO: linear interpolation of the fit can speed this up for large

# datasets without noticable error.

# TODO: this is thoroughly inefficient way of doing this at the moment

# but it works for small dataset - remove repeated w calc.

if self.domain is None:

filtered = zip(xs, ys)

else:

filtered = [

(xx,yy)

for xx,yy in zip(xs, ys)

if (xx-x)/self.h >= self.domain[0]

and (xx-x)/self.h <= self.domain[1]

]

if len(filtered) > 0:

xs,ys = zip(*filtered)

118

xs, ys = self.inDomain(xs, ys, x)

119

120

if len(xs) > 0:

121

fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])

sqresid = square(

subtract(ys, fittedvals)

)

122

sqresid = square( subtract(ys, fittedvals) )

123

w = np.sum([self((xx-x)/self.h) for xx in xs])

v = np.sum([rr*self((xx-x)/self.h) for xx, rr in zip(xs, rr)])

var = v/w

124

v = np.sum([rr*self((xx-x)/self.h) for xx, rr in zip(xs, sqresid)])

125

return v / w

126

else:

127

return np.nan

128

129

def smoothconf(self, xs, ys, x):

130

"""Returns the kernel smoothing estimate with confidence 1sigma bounds

131

"""

# TODO: This is a HORRIBLE implementation - need unhorribleising

# Also without the correct L2Norm and norm_const this will be out by a

# factor.

if self.domain is None:

filtered = zip(xs, ys)

else:

filtered = [

(xx,yy)

for xx,yy in zip(xs, ys)

100

if (xx-x)/self.h >= self.domain[0]

101

and (xx-x)/self.h <= self.domain[1]

102

]

103

104

if len(filtered) > 0:

105

xs,ys = zip(*filtered)

132

xs, ys = self.inDomain(xs, ys, x)

133

134

if len(xs) > 0:

106

135

fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])

107

136

sqresid = square(

108

137

subtract(ys, fittedvals)

109

138

)

110

139

w = np.sum([self((xx-x)/self.h) for xx in xs])

111

140

v = np.sum([rr*self((xx-x)/self.h) for xx, rr in zip(xs, sqresid)])

112

var = v/w

141

var = v / w

113

142

sd = np.sqrt(var)

114

K = self.L2Norm()

143

K = self.L2Norm

115

144

yhat = self.smooth(xs, ys, x)

116

err = sd * K/np.sqrt(w)

117

return (yhat-err, yhat, yhat+err)

145

err = sd * K / np.sqrt(w * self.h * self.norm_const)

146

return (yhat - err, yhat, yhat + err)

118

147

else:

119

148

return (np.nan, np.nan, np.nan)

120

149

121

# TODO: make this a property

150

@property

122

151

def L2Norm(self):

123

152

"""Returns the integral of the square of the kernal from -inf to inf"""

124

#TODO: yeah right. For now just stick a number here

125

# easy enough to sort this for the specific kernels

126

return 1

127

128

# TODO: make this a property

129

def norm_const(self, x):

130

"""Normalising constant for kernel (integral from -inf to inf)"""

131

# TODO: again, this needs sorting

132

return 1

153

if self._L2Norm is None:

154

L2Func = lambda x: (self.norm_const*self._shape(x))**2

155

if self.domain is None:

156

self._L2Norm = scipy.integrate.quad(L2Func, -inf, inf)[0]

157

else:

158

self._L2Norm = scipy.integrate.quad(L2Func, self.domain[0],

159

self.domain[1])[0]

160

return self._L2Norm

161

162

@property

163

def norm_const(self):

164

"""

165

Normalising constant for kernel (integral from -inf to inf)

166

"""

167

if self._normconst is None:

168

if self.domain is None:

169

quadres = scipy.integrate.quad(self._shape, -inf, inf)

170

else:

171

quadres = scipy.integrate.quad(self._shape, self.domain[0],

172

self.domain[1])

173

self._normconst = 1.0/(quadres[0])

174

return self._normconst

175

176

def weight(self, x):

177

"""This returns the normalised weight at distance x"""

178

return self.norm_const*self._shape(x)

133

179

134

180

def __call__(self, x):

181

"""

182

This simply returns the value of the kernel function at x

183

184

Does the same as weight if the function is normalised

185

"""

135

186

return self._shape(x)

136

187

137

188

138

139

class Default(object):

189

# pylint: disable-msg=R0903

190

class DefaultKernel(CustomKernel):

140

191

"""Represents the default kernel - should not be used directly

141

192

This contains no functionality and acts as a placeholder for the consuming

142

193

function.

143

194

"""

144

195

pass

145

146

class Uniform(Custom):

147

def __init__(self, h=1.0):

148

self.h = h

149

self._shape = lambda x: 1.0

150

self.domain = [-1.0,1.0]

151

152

class Triangular(Custom):

153

def __init__(self, h=1.0):

154

self.h = h

155

self._shape = lambda x: 1-abs(x)

156

self.domain = [-1.0,1.0]

157

158

class Epanechnikov(Custom):

159

def __init__(self, h=1.0):

160

self.h = h

161

self._shape = lambda x: (1-x*x)

162

self.domain = [-1.0,1.0]

163

164

class Biweight(Custom):

165

def __init__(self, h=1.0):

166

self.h = h

167

self._shape = lambda x: (1-x*x)**2

168

self.domain = [-1.0,1.0]

169

170

class Triweight(Custom):

171

def __init__(self, h=1.0):

172

self.h = h

173

self._shape = lambda x: (1-x*x)**3

174

self.domain = [-1.0,1.0]

175

176

class Gaussian(Custom):

177

def __init__(self, h=1.0):

178

self.h = h

179

self._shape = lambda x: np.exp(-x**2/2.0)

180

self.domain = None

181

182

def smooth(self, xs, ys, x):

183

w = np.sum(

184

exp(

185

multiply(

186

square(

187

divide(

188

subtract(xs, x),

189

self.h

190

)

191

192

-0.5

193

)

194

)

195

)

196

197

v = np.sum(

198

multiply(

199

ys,

200

exp(

201

multiply(

202

square(

203

divide(

204

subtract( xs, x),

205

self.h

206

)

207

208

-0.5

209

)

210

)

211

)

212

)

213

196

# pylint: enable-msg=R0903

197

198

class Uniform(CustomKernel):

199

def __init__(self, h=1.0):

200

CustomKernel.__init__(self, shape=lambda x: 0.5, h=h,

201

domain=[-1.0, 1.0], norm = 1.0)

202

self._L2Norm = 0.5

203

204

class Triangular(CustomKernel):

205

def __init__(self, h=1.0):

206

CustomKernel.__init__(self, shape=lambda x: 1 - abs(x), h=h,

207

domain=[-1.0, 1.0], norm = 1.0)

208

self._L2Norm = 2.0/3.0

209

210

class Epanechnikov(CustomKernel):

211

def __init__(self, h=1.0):

212

CustomKernel.__init__(self, shape=lambda x: 0.75*(1 - x*x), h=h,

213

domain=[-1.0, 1.0], norm = 1.0)

214

self._L2Norm = 0.6

215

216

class Biweight(CustomKernel):

217

def __init__(self, h=1.0):

218

CustomKernel.__init__(self, shape=lambda x: 0.9375*(1 - x*x)**2, h=h,

219

domain=[-1.0, 1.0], norm = 1.0)

220

self._L2Norm = 5.0/7.0

221

222

def smooth(self, xs, ys, x):

223

"""Returns the kernel smoothing estimate for point x based on x-values

224

xs and y-values ys.

225

Not expected to be called by the user.

226

227

Special implementation optimised for Biweight.

228

"""

229

xs, ys = self.inDomain(xs, ys, x)

230

231

if len(xs) > 0:

232

w = np.sum(square(subtract(1, square(divide(subtract(xs, x),

233

self.h)))))

234

v = np.sum(multiply(ys, square(subtract(1, square(divide(

235

subtract(xs, x), self.h))))))

236

return v / w

237

else:

238

return np.nan

239

240

def smoothvar(self, xs, ys, x):

241

"""

242

Returns the kernel smoothing estimate of the variance at point x.

243

"""

244

xs, ys = self.inDomain(xs, ys, x)

245

246

if len(xs) > 0:

247

fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])

248

rs = square(subtract(ys, fittedvals))

249

w = np.sum(square(subtract(1.0, square(divide(subtract(xs, x),

250

self.h)))))

251

v = np.sum(multiply(rs, square(subtract(1, square(divide(

252

subtract(xs, x), self.h))))))

253

return v / w

254

else:

255

return np.nan

256

257

def smoothconf(self, xs, ys, x):

258

"""Returns the kernel smoothing estimate with confidence 1sigma bounds

259

"""

260

xs, ys = self.inDomain(xs, ys, x)

261

262

if len(xs) > 0:

263

fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])

264

rs = square(

265

subtract(ys, fittedvals)

266

)

267

w = np.sum(square(subtract(1.0, square(divide(subtract(xs, x),

268

self.h)))))

269

v = np.sum(multiply(rs, square(subtract(1, square(divide(

270

subtract(xs, x), self.h))))))

271

var = v / w

272

sd = np.sqrt(var)

273

K = self.L2Norm

274

yhat = self.smooth(xs, ys, x)

275

err = sd * K / np.sqrt(0.9375 * w * self.h)

276

return (yhat - err, yhat, yhat + err)

277

else:

278

return (np.nan, np.nan, np.nan)

279

280

class Triweight(CustomKernel):

281

def __init__(self, h=1.0):

282

CustomKernel.__init__(self, shape=lambda x: 1.09375*(1 - x*x)**3, h=h,

283

domain=[-1.0, 1.0], norm = 1.0)

284

self._L2Norm = 350.0/429.0

285

286

class Gaussian(CustomKernel):

287

"""

288

Gaussian (Normal) Kernel

289

290

K(u) = 1 / (sqrt(2*pi)) exp(-0.5 u**2)

291

"""

292

def __init__(self, h=1.0):

293

CustomKernel.__init__(self, shape = lambda x: 0.3989422804014327 *

294

np.exp(-x**2/2.0), h = h, domain = None, norm = 1.0)

295

self._L2Norm = 1.0/(2.0*np.sqrt(np.pi))

296

297

def smooth(self, xs, ys, x):

298

"""Returns the kernel smoothing estimate for point x based on x-values

299

xs and y-values ys.

300

Not expected to be called by the user.

301

302

Special implementation optimised for Gaussian.

303

"""

304

w = np.sum(exp(multiply(square(divide(subtract(xs, x),

305

self.h)),-0.5)))

306

v = np.sum(multiply(ys, exp(multiply(square(divide(subtract(xs, x),

307

self.h)), -0.5))))

214

308

return v/w

215

216

def norm_const(self):

217

return 0.39894228 # 1/sqrt(2 pi)

218

219

def L2Norm(self):

220

return 0.86226925 # sqrt(pi)/2

221

309

222

class Cosine(Custom):

310

class Cosine(CustomKernel):

311

"""

312

Cosine Kernel

313

314

K(u) = pi/4 cos(0.5 * pi * u) between -1.0 and 1.0

315

"""

223

316

def __init__(self, h=1.0):

224

self.h = h

225

self._shape = lambda x: np.cos(np.pi/2.0 * x)

226

self.domain = [-1.0,1.0]

b'\\ No newline at end of file'

317

CustomKernel.__init__(self, shape=lambda x: 0.78539816339744828 *

318

np.cos(np.pi/2.0 * x), h=h, domain=[-1.0, 1.0], norm = 1.0)

319

self._L2Norm = np.pi**2/16.0

b'\\ No newline at end of file'

Older »