2
This module implements some standard regression models: OLS and WLS
3
models, as well as an AR(p) regression model.
5
Models are specified with a design matrix and are fit using their
2
This module implements some standard regression models:
4
Generalized Least Squares (GLS),
5
Ordinary Least Squares (OLS),
6
and Weighted Least Squares (WLS),
7
as well as an GLS model with autoregressive error terms GLSAR(p)
9
Models are specified with an endogenous response variable and an
10
exogenous design matrix and are fit using their `fit` method.
8
12
Subclasses that have more complicated covariance matrices
9
13
should write over the 'whiten' method as the fit method
14
18
'Introduction to Linear Regression Analysis', Douglas C. Montgomery,
15
19
Elizabeth A. Peck, G. Geoffrey Vining. Wiley, 2006.
21
Econometrics references for regression models:
23
R. Davidson and J.G. MacKinnon. "Econometric Theory and Methods," Oxford,
26
W. Green. "Econometric Analysis," 5th ed., Pearson, 2003.
19
32
__docformat__ = 'restructuredtext en'
28
41
from models.model import LikelihoodModel, LikelihoodModelResults
29
42
from models import tools
30
43
from models.tools import add_constant
31
from scipy import stats, derivative
32
from scipy.stats.stats import ss # could be avoided to eliminate overhead
44
from scipy import stats
45
from scipy.stats.stats import ss
34
47
class GLS(LikelihoodModel):
36
Generalized least squares model with a general covariance structure
49
Generalized least squares model with a general covariance structure.
70
83
`llf` is the value of the maximum likelihood function of the model.
71
#TODO: clarify between results llf and model llf.
72
#model llf requires the parameters, and results llf, evaluates the llf
73
#at the calculated parameters
75
85
normalized_cov_params : array
76
86
`normalized_cov_params` is a p x p array that is the inverse of ...
78
87
In matrix notation this can be written (X^(T)X)^(-1)
128
131
def initialize(self):
129
132
self.wdesign = self.whiten(self._exog)
130
133
#JP: calc_params is not an informative name, but anything better?
134
#SS: gen_inv? it's the generalized inverse
135
# ie., beta = calc_params dot y
131
136
self.calc_params = np.linalg.pinv(self.wdesign)
132
137
self.normalized_cov_params = np.dot(self.calc_params,
133
138
np.transpose(self.calc_params))
187
191
Uncentered sum of squares
189
193
The whitened response variable
193
Adjusted R-squared for models with an intercept
194
.. math :: 1-\frac{\left(n-1\right)}{\left(n-p\right)}\left(1-R^{2}\right)
196
R-squared for models with an intercept
197
.. math :: 1-\frac{\sum e_{i}^{2}}{\sum(y_{i}-\overline{y})^{2}}
199
195
Z = self.whiten(self._endog)
200
#JP put beta=np.dot(self.calc_params, Z) on separate line with temp variable
201
# for better readability
196
beta = np.dot(self.calc_params, Z)
202
197
# should this use lstsq instead?
203
lfit = RegressionResults(self, np.dot(self.calc_params, Z),
198
lfit = RegressionResults(self, beta,
204
199
normalized_cov_params=self.normalized_cov_params)
205
200
lfit.predict = np.dot(self._exog, lfit.params)
206
201
# lfit.resid = Z - np.dot(self.wdesign, lfit.params)
207
202
#TODO: why was the above in the original? do we care about whitened resids?
208
203
# lfit.resid = Y - np.dot(self._exog, lfit.params)
204
#TODO: check discussion in D&M
209
205
lfit.Z = Z # not a good name wendog analogy to wdesign
210
206
lfit.df_resid = self.df_resid
211
207
lfit.df_model = self.df_model
239
235
lfit.uTSS = ss(lfit.Z)
240
236
# Centered R2 for models with intercepts
241
# would be different for no constant regression...
242
# if self.hascons is True:
243
237
lfit.Rsq = 1 - lfit.SSR/lfit.cTSS
245
# lfit.Rsq = 1 - lfit.SSR/lfit.uTSS
246
238
lfit.ESS = ss(lfit.predict - np.mean(lfit.Z))
247
239
lfit.SSR = ss(lfit.resid)
248
240
lfit.adjRsq = 1 - (lfit.nobs - 1)/(lfit.nobs - (lfit.df_model+1))\
253
245
lfit.F = lfit.MSE_model/lfit.MSE_resid
254
246
lfit.F_p = 1 - stats.f.cdf(lfit.F, lfit.df_model, lfit.df_resid)
255
247
lfit.bse = np.sqrt(np.diag(lfit.cov_params()))
256
lfit.llf = self.logLike(lfit.params)
248
lfit.llf = self.loglike(lfit.params)
257
249
lfit.aic = -2 * lfit.llf + 2*(self.df_model+1)
258
250
lfit.bic = -2 * lfit.llf + np.log(lfit.nobs)*(self.df_model+1)
260
def logLike(self, params):
252
def loglike(self, params):
262
254
Returns the value of the gaussian loglikelihood function at b.
264
256
Given the whitened design matrix, the loglikelihood is evaluated
265
at the parameter vector `b` for the dependent variable `Y`.
257
at the parameter vector `params` for the dependent variable `Y`.
269
261
`params` : array-like
270
The parameter estimates. Must be of length df_model.
262
The parameter estimates.
297
285
llf -= (1+np.log(np.pi/nobs2))*nobs2 # with constant
300
def score(self, params):
302
Score function of the classical OLS Model.
304
The gradient of logL with respect to params
311
#JP: this is generic and should go into LikeliHoodModel
312
return derivative(self.llf, params, dx=1e-04, n=1, order=3)
314
def information(self, params):
316
Fisher information matrix of model
318
raise NotImplementedError
321
def newton(self, params):
324
raise NotImplementedError
328
#FIXME: update the example to the correct values returned once tested
330
A regression model with diagonal but non-identity covariance
331
structure. The weights are presumed to be
332
(proportional to the) inverse of the
291
A regression model with diagonal but non-identity covariance structure.
292
The weights are presumed to be (proportional to the) inverse of the
333
293
variance of the observations.
335
>>> import numpy as np
337
>>> from models.tools import add_constant
338
>>> from models.regression import WLS
306
>>>import numpy as np
308
>>>import numpy as np
340
309
>>> Y = [1,3,4,5,2,3,4]
341
310
>>> X = range(1,8)
342
>>> X = add_constant(X)
344
>>> model = WLS(Y,X, weights=range(1,8))
345
>>> results = model.fit()
311
>>> X = models.tools.add_constant(X)
312
>>> model1 = models.WLS(Y,X, weights=range(1,8))
313
>>> results = model1.fit()
347
314
>>> results.params
348
315
array([ 0.0952381 , 2.91666667])
350
array([ 0.35684428, 2.0652652 ])
317
array([ 0.61890419, 3.58195812])
351
318
>>> print results.Tcontrast([0,1])
352
<T contrast: effect=2.91666666667, sd=1.41224801095, t=2.06526519708, df_denom=5>
319
<T contrast: effect=2.9166666666666674, sd=0.81426598880777334, t=3.5819581153538946, p=0.0079210215745977308, df_denom=5>
353
320
>>> print results.Fcontrast(np.identity(2))
354
<F contrast: F=26.9986072423, df_denom=5, df_num=2>
321
<F contrast: F=81.213965087281849, p=0.00015411866558, df_denom=5, df_num=2> """
357
323
def __init__(self, endog, exog, weights=1):
358
324
weights = np.array(weights)
359
if weights.shape == (): # scalar
325
if weights.shape == ():
360
326
self.weights = weights
362
328
design_rows = exog.shape[0]
391
This is your design matrix. Data are assumed to be column ordered
392
with observations in rows.
357
1d vector of response/dependent variable
360
Column ordered (observations in rows) design matrix.
396
model.llf(b=self.params, Y)
397
Returns the log-likelihood of the parameter estimates
402
`b` is an array of parameter estimates the log-likelihood of which
405
`Y` is the vector of dependent variables.
406
model.__init___(design)
407
Creates a `OLS` from a design.
451
407
def __init__(self, endog, exog=None):
452
408
super(OLS, self).__init__(endog, exog)
410
def loglike(self, params):
412
The likelihood function for the clasical OLS model.
417
The coefficients with which to estimate the loglikelihood.
421
The concentrated likelihood function evaluated at params.
423
nobs2 = self._endog.shape[0]/2.
424
return -nobs2*np.log(2*np.pi)-nobs2*np.log(1/(2*nobs2) *\
425
np.dot(np.transpose(self._endog -
426
np.dot(self._exog, params)),
427
(self._endog - np.dot(self._exog,params)))) -\
454
430
def whiten(self, Y):