~scipystats/+junk/skipper-revjp

« back to all changes in this revision

Viewing changes to nipy/fixes/scipy/stats/models/regression.py

Committer: joep
Date: 2009-08-18 02:04:29 UTC
mfrom: (1799.2.14 skipper-working)
Revision ID: josef.pktd@gmail.com-20090818020429-td9xt6ay2acg5dr9

merge from skipper, have criss-cross with lot's of conflicts in ols_fstat.py and model.py

files modified:
nipy/fixes/scipy/stats/models/TODO.txt

nipy/fixes/scipy/stats/models/contrast.py

nipy/fixes/scipy/stats/models/examples/ols_fstat.py

nipy/fixes/scipy/stats/models/model.py

nipy/fixes/scipy/stats/models/regression.py

nipy/fixes/scipy/stats/models/tests/test_contrast.py

nipy/fixes/scipy/stats/models/tests/test_regression.py

Show diffs side-by-side

added added

removed removed

nipy/fixes/scipy/stats/models/regression.py

"""

This module implements some standard regression models: OLS and WLS

models, as well as an AR(p) regression model.

Models are specified with a design matrix and are fit using their

'fit' method.

This module implements some standard regression models:

Generalized Least Squares (GLS),

Ordinary Least Squares (OLS),

and Weighted Least Squares (WLS),

as well as an GLS model with autoregressive error terms GLSAR(p)

Models are specified with an endogenous response variable and an

exogenous design matrix and are fit using their `fit` method.

Subclasses that have more complicated covariance matrices

should write over the 'whiten' method as the fit method

'Introduction to Linear Regression Analysis', Douglas C. Montgomery,

Elizabeth A. Peck, G. Geoffrey Vining. Wiley, 2006.

Econometrics references for regression models:

R. Davidson and J.G. MacKinnon. "Econometric Theory and Methods," Oxford,

2004.

W. Green. "Econometric Analysis," 5th ed., Pearson, 2003.

"""

__docformat__ = 'restructuredtext en'

from models.model import LikelihoodModel, LikelihoodModelResults

from models import tools

from models.tools import add_constant

from scipy import stats, derivative

from scipy.stats.stats import ss # could be avoided to eliminate overhead

from scipy import stats

from scipy.stats.stats import ss

class GLS(LikelihoodModel):

"""

Generalized least squares model with a general covariance structure

Generalized least squares model with a general covariance structure.

Parameters

----------

llf : float

`llf` is the value of the maximum likelihood function of the model.

#TODO: clarify between results llf and model llf.

#model llf requires the parameters, and results llf, evaluates the llf

#at the calculated parameters

normalized_cov_params : array

`normalized_cov_params` is a p x p array that is the inverse of ...

#TODO: clarify

In matrix notation this can be written (X^(T)X)^(-1)

sigma :

106

Returns the Fisher information matrix.

107

108

initialize

100

#TODO: initialize as a public method?

109

(Re)-initialize a model.

101

110

102

111

newton

103

112

Used to solve the maximum likelihood problem.

113

Not currently implemented.

104

114

105

115

predict

106

116

Returns the fitted values given the parameters and exogenous design.

110

120

111

121

whiten

112

122

TODO

113

114

115

Formulas

116

--------

117

calc_params attribute:

118

..math :: X^{+}\approx(X^{T}X)^{-1}X^{T}

119

120

123

"""

121

124

def __init__(self, endog, exog, sigma=None):

122

125

self.sigma = sigma

128

131

def initialize(self):

129

132

self.wdesign = self.whiten(self._exog)

130

133

#JP: calc_params is not an informative name, but anything better?

134

#SS: gen_inv? it's the generalized inverse

135

# ie., beta = calc_params dot y

131

136

self.calc_params = np.linalg.pinv(self.wdesign)

132

137

self.normalized_cov_params = np.dot(self.calc_params,

133

138

np.transpose(self.calc_params))

141

146

else:

142

147

return Y

143

148

144

#TODO: Do we need df_model and df_resid defined twice?

145

149

def fit(self):

146

150

"""

147

151

Full fit of the model including estimate of covariance matrix,

187

191

Uncentered sum of squares

188

192

189

193

The whitened response variable

190

191

Formulas

192

--------

193

Adjusted R-squared for models with an intercept

194

.. math :: 1-\frac{\left(n-1\right)}{\left(n-p\right)}\left(1-R^{2}\right)

195

196

R-squared for models with an intercept

197

.. math :: 1-\frac{\sum e_{i}^{2}}{\sum(y_{i}-\overline{y})^{2}}

198

194

"""

199

195

Z = self.whiten(self._endog)

200

#JP put beta=np.dot(self.calc_params, Z) on separate line with temp variable

201

# for better readability

196

beta = np.dot(self.calc_params, Z)

202

197

# should this use lstsq instead?

203

lfit = RegressionResults(self, np.dot(self.calc_params, Z),

198

lfit = RegressionResults(self, beta,

204

199

normalized_cov_params=self.normalized_cov_params)

205

200

lfit.predict = np.dot(self._exog, lfit.params)

206

201

# lfit.resid = Z - np.dot(self.wdesign, lfit.params)

207

202

#TODO: why was the above in the original? do we care about whitened resids?

208

203

# lfit.resid = Y - np.dot(self._exog, lfit.params)

204

#TODO: check discussion in D&M

209

205

lfit.Z = Z # not a good name wendog analogy to wdesign

210

206

lfit.df_resid = self.df_resid

211

207

lfit.df_model = self.df_model

238

234

239

235

lfit.uTSS = ss(lfit.Z)

240

236

# Centered R2 for models with intercepts

241

# would be different for no constant regression...

242

# if self.hascons is True:

243

237

lfit.Rsq = 1 - lfit.SSR/lfit.cTSS

244

# else:

245

# lfit.Rsq = 1 - lfit.SSR/lfit.uTSS

246

238

lfit.ESS = ss(lfit.predict - np.mean(lfit.Z))

247

239

lfit.SSR = ss(lfit.resid)

248

240

lfit.adjRsq = 1 - (lfit.nobs - 1)/(lfit.nobs - (lfit.df_model+1))\

253

245

lfit.F = lfit.MSE_model/lfit.MSE_resid

254

246

lfit.F_p = 1 - stats.f.cdf(lfit.F, lfit.df_model, lfit.df_resid)

255

247

lfit.bse = np.sqrt(np.diag(lfit.cov_params()))

256

lfit.llf = self.logLike(lfit.params)

248

lfit.llf = self.loglike(lfit.params)

257

249

lfit.aic = -2 * lfit.llf + 2*(self.df_model+1)

258

250

lfit.bic = -2 * lfit.llf + np.log(lfit.nobs)*(self.df_model+1)

259

251

260

def logLike(self, params):

252

def loglike(self, params):

261

253

"""

262

254

Returns the value of the gaussian loglikelihood function at b.

263

255

264

256

Given the whitened design matrix, the loglikelihood is evaluated

265

at the parameter vector `b` for the dependent variable `Y`.

257

at the parameter vector `params` for the dependent variable `Y`.

266

258

267

259

Parameters

268

260

----------

269

261

`params` : array-like

270

The parameter estimates. Must be of length df_model.

262

The parameter estimates.

271

263

272

264

Returns

273

265

-------

284

276

285

277

The BIC (or Schwartz Criterion) is

286

278

.. math:: \text{BIC}=\log\frac{SSR}{n}+\frac{K}{n}\log n

287

288

279

289

References

290

----------

291

.. [1] W. Green. "Econometric Analysis," 5th ed., Pearson, 2003.

292

280

"""

293

281

nobs = float(self._exog.shape[0])

294

282

nobs2 = nobs / 2.0

297

285

llf -= (1+np.log(np.pi/nobs2))*nobs2 # with constant

298

286

return llf

299

287

300

def score(self, params):

301

"""

302

Score function of the classical OLS Model.

303

304

The gradient of logL with respect to params

305

306

Parameters

307

----------

308

params : array-like

309

310

"""

311

#JP: this is generic and should go into LikeliHoodModel

312

return derivative(self.llf, params, dx=1e-04, n=1, order=3)

313

314

def information(self, params):

315

"""

316

Fisher information matrix of model

317

"""

318

raise NotImplementedError

319

320

321

def newton(self, params):

322

"""

323

"""

324

raise NotImplementedError

325

326

288

327

289

class WLS(GLS):

328

#FIXME: update the example to the correct values returned once tested

329

290

"""

330

A regression model with diagonal but non-identity covariance

331

structure. The weights are presumed to be

332

(proportional to the) inverse of the

291

A regression model with diagonal but non-identity covariance structure.

292

The weights are presumed to be (proportional to the) inverse of the

333

293

variance of the observations.

334

294

335

>>> import numpy as np

336

>>>1

337

>>> from models.tools import add_constant

338

>>> from models.regression import WLS

339

>>>

295

Parameters

296

----------

297

298

Methods

299

-------

300

301

Attributes

302

----------

303

304

Examples

305

---------

306

>>>import numpy as np

307

>>>import models

308

>>>import numpy as np

340

309

>>> Y = [1,3,4,5,2,3,4]

341

310

>>> X = range(1,8)

342

>>> X = add_constant(X)

343

>>>

344

>>> model = WLS(Y,X, weights=range(1,8))

345

>>> results = model.fit()

346

>>>

311

>>> X = models.tools.add_constant(X)

312

>>> model1 = models.WLS(Y,X, weights=range(1,8))

313

>>> results = model1.fit()

347

314

>>> results.params

348

315

array([ 0.0952381 , 2.91666667])

349

316

>>> results.t()

350

array([ 0.35684428, 2.0652652 ])

317

array([ 0.61890419, 3.58195812])

351

318

>>> print results.Tcontrast([0,1])

352

319

353

320

>>> print results.Fcontrast(np.identity(2))

354

355

"""

321

<F contrast: F=81.213965087281849, p=0.00015411866558, df_denom=5, df_num=2> """

356

322

357

323

def __init__(self, endog, exog, weights=1):

358

324

weights = np.array(weights)

359

if weights.shape == (): # scalar

325

if weights.shape == ():

360

326

self.weights = weights

361

327

else:

362

328

design_rows = exog.shape[0]

375

341

if X.ndim == 1:

376

342

return X * np.sqrt(self.weights)

377

343

elif X.ndim == 2:

378

if np.shape(self.weights) == (): # 0-d weights

344

if np.shape(self.weights) == ():

379

345

whitened = np.sqrt(self.weights)*X

380

346

else:

381

347

whitened = np.sqrt(self.weights)[:,None]*X

387

353

388

354

Parameters

389

355

----------

390

`design`: array-like

391

This is your design matrix. Data are assumed to be column ordered

392

with observations in rows.

356

`endog` : array-like

357

1d vector of response/dependent variable

358

359

`exog`: array-like

360

Column ordered (observations in rows) design matrix.

393

361

394

362

Methods

395

363

-------

396

model.llf(b=self.params, Y)

397

Returns the log-likelihood of the parameter estimates

398

399

Parameters

400

----------

401

b : array-like

402

`b` is an array of parameter estimates the log-likelihood of which

403

is to be tested.

404

Y : array-like

405

`Y` is the vector of dependent variables.

406

model.__init___(design)

407

Creates a `OLS` from a design.

408

364

409

365

Attributes

410

366

----------

450

406

"""

451

407

def __init__(self, endog, exog=None):

452

408

super(OLS, self).__init__(endog, exog)

409

410

def loglike(self, params):

411

'''

412

The likelihood function for the clasical OLS model.

413

414

Parameters

415

----------

416

params : array-like

417

The coefficients with which to estimate the loglikelihood.

418

419

Returns

420

-------

421

The concentrated likelihood function evaluated at params.

422

'''

423

nobs2 = self._endog.shape[0]/2.

424

return -nobs2*np.log(2*np.pi)-nobs2*np.log(1/(2*nobs2) *\

425

np.dot(np.transpose(self._endog -

426

np.dot(self._exog, params)),

427

(self._endog - np.dot(self._exog,params)))) -\

428

nobs2

453

429

454

430

def whiten(self, Y):

455

431

"""

Older »