4
from scipy.sandbox.models import survival, model
9
A simple little class for working with discrete random vectors.
12
def __init__(self, x, w=None):
14
if self.x.shape == ():
15
self.x = N.array([self.x])
16
self.n = self.x.shape[0]
18
w = N.ones(self.n, N.float64)
20
if w.shape[0] != self.n:
21
raise ValueError, 'incompatible shape for weights w'
22
if N.any(N.less(w, 0)):
23
raise ValueError, 'weights should be non-negative'
26
def mean(self, f=None):
31
return (fx * self.w).sum()
35
dx = self.x - N.multiply.outer(mu, self.x.shape[1])
36
return N.dot(dx, N.transpose(dx))
38
class observation(survival.right_censored):
40
def __getitem__(self, item):
41
if self.namespace is not None:
42
return self.namespace[item]
44
return getattr(self, item)
46
def __init__(self, time, delta, namespace=None):
47
self.namespace = namespace
48
survival.right_censored.__init__(self, time, delta)
50
def __call__(self, formula, time=None, **extra):
51
return formula(namespace=self, time=time, **extra)
53
class coxph(model.likelihood_model):
55
def __init__(self, subjects, formula, time_dependent=False):
56
self.subjects, self.formula = subjects, formula
57
self.time_dependent = time_dependent
58
self.initialize(self.subjects)
60
def initialize(self, subjects):
63
for i in range(len(subjects)):
66
if not self.failures.has_key(s.time):
67
self.failures[s.time] = [i]
69
self.failures[s.time].append(i)
71
self.failure_times = self.failures.keys()
72
self.failure_times.sort()
75
if self.time_dependent:
76
self.cachedir = tempfile.mkdtemp()
82
for t in self.failures.keys():
83
if self.time_dependent:
84
d = N.array([s(self.formula, time=t)
85
for s in self.subjects]).astype('<f8')
87
dfile = file(tempfile.mkstemp(dir=self.cachedir)[1], 'w')
91
self.design[t] = N.memmap(dfile.name,
95
d = N.array([s(self.formula, time=t)
96
for s in self.subjects]).astype(N.float64)
100
self.risk[t] = N.compress([s.atrisk(t) for s in self.subjects],
101
N.arange(self.design[t].shape[0]),axis=-1)
103
shutil.rmtree(self.cachedir, ignore_errors=True)
105
def logL(self, b, ties='breslow'):
108
for t in self.failures.keys():
109
fail = self.failures[t]
112
Zb = N.dot(self.design[t], b)
114
logL += Zb[fail].sum()
116
if ties == 'breslow':
117
s = N.exp(Zb[risk]).sum()
118
logL -= N.log(N.exp(Zb[risk]).sum()) * d
119
elif ties == 'efron':
120
s = N.exp(Zb[risk]).sum()
121
r = N.exp(Zb[fail]).sum()
123
logL -= N.log(s - j * r / d)
125
raise NotImplementedError, 'Cox tie breaking method not implemented'
127
raise NotImplementedError, 'tie breaking method not recognized'
130
def score(self, b, ties='breslow'):
133
for t in self.failures.keys():
134
fail = self.failures[t]
139
score += Z[fail].sum()
141
if ties == 'breslow':
142
w = N.exp(N.dot(Z, b))
143
rv = discrete(Z[risk], w=w[risk])
144
score -= rv.mean() * d
145
elif ties == 'efron':
146
w = N.exp(N.dot(Z, b))
147
score += Z[fail].sum()
150
efron_w[fail] -= i * w[fail] / d
151
rv = discrete(Z[risk], w=efron_w[risk])
154
raise NotImplementedError, 'Cox tie breaking method not implemented'
156
raise NotImplementedError, 'tie breaking method not recognized'
157
# FIXME: score is an int. it has no shape
158
# is it that we shouldn't be using an int above
159
# or that we shouldn't be looking at shape here
160
if score.shape == ():
161
score = N.array([score])
164
def information(self, b, ties='breslow'):
168
for t in self.failures.keys():
169
fail = self.failures[t]
174
if ties == 'breslow':
175
w = N.exp(N.dot(Z, b))
176
rv = discrete(Z[risk], w=w[risk])
178
elif ties == 'efron':
179
w = N.exp(N.dot(Z, b))
180
score += Z[fail].sum()
183
efron_w[fail] -= i * w[fail] / d
184
rv = discrete(Z[risk], w=efron_w[risk])
187
raise NotImplementedError, 'Cox tie breaking method not implemented'
189
raise NotImplementedError, 'tie breaking method not recognized'
192
if __name__ == '__main__':
193
import numpy.random as R
195
X = N.array([0]*n + [1]*n)
198
Y = R.standard_exponential((2*n,)) / lin
199
delta = R.binomial(1, 0.9, size=(2*n,))
201
subjects = [observation(Y[i], delta[i]) for i in range(2*n)]
206
x = F.quantitative('X')
209
c = coxph(subjects, f)