2
# Last Change: Mon Jul 02 07:00 PM 2007 J
4
"""Module implementing GMM, a class to estimate Gaussian mixture models using
5
EM, and EM, a class which use GMM instances to estimate models parameters using
6
the ExpectationMaximization algorithm."""
8
__docformat__ = 'restructuredtext'
11
# - which methods to avoid va shrinking to 0 ? There are several options,
12
# not sure which ones are appropriates
13
# - improve EM trainer
15
from numpy.random import randn
16
#import _c_densities as densities
18
from scipy.cluster.vq import kmeans2 as kmean
19
from gauss_mix import GmParamError
20
from misc import curry
22
#from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
28
class GmmError(Exception):
29
"""Base class for exceptions in this module."""
31
Exception.__init__(self)
33
class GmmParamError(GmmError):
34
"""Exception raised for errors in gmm params
37
expression -- input expression in which the error occurred
38
message -- explanation of the error
40
def __init__(self, message):
41
GmmError.__init__(self)
42
self.message = message
47
class MixtureModel(object):
48
"""Class to model mixture """
49
# XXX: Is this really needed ?
52
class ExpMixtureModel(MixtureModel):
53
"""Class to model mixture of exponential pdf (eg Gaussian, exponential,
54
Laplace, etc..). This is a special case because some parts of EM are common
55
for those models..."""
58
class GMM(ExpMixtureModel):
59
""" A class to model a Gaussian Mixture Model (GMM). An instance of this
60
class is created by giving weights, mean and variances in the ctor. An
61
instanciated object can be sampled, trained by EM. """
62
def init_kmean(self, data, niter = 5):
63
""" Init the model with kmean."""
68
# XXX: This is bogus initialization should do better (in kmean with CV)
69
(code, label) = kmean(data, init, niter, minit = 'matrix')
73
if self.gm.mode == 'diag':
77
va[i, j] = N.cov(data[N.where(label==i), j], rowvar = 0)
78
elif self.gm.mode == 'full':
79
va = N.zeros((k*d, d))
82
N.cov(data[N.where(label==i)], rowvar = 0)
84
raise GmmParamError("mode " + str(self.gm.mode) + \
87
self.gm.set_param(w, mu, va)
91
def init_random(self, data):
92
""" Init the model at random."""
97
if self.gm.mode == 'diag':
98
va = N.fabs(randn(k, d))
100
# If A is invertible, A'A is positive definite
103
va[i*d:i*d+d] = N.dot( va[i*d:i*d+d],
106
self.gm.set_param(w, mu, va)
110
def init_test(self, data):
111
"""Use values already in the model as initialization.
113
Useful for testing purpose when reproducability is necessary. This does
114
nothing but checking that the mixture model has valid initial
117
self.gm.check_state()
118
except GmParamError, e:
119
print "Model is not properly initalized, cannot init EM."
120
raise ValueError("Message was %s" % str(e))
122
def __init__(self, gm, init = 'kmean'):
123
"""Initialize a mixture model.
125
Initialize the model from a GM instance. This class implements all the
126
necessary functionalities for EM.
130
the mixture model to train.
132
initialization method to use."""
135
# Possible init methods
136
init_methods = {'kmean': self.init_kmean, 'random' : self.init_random,
137
'test': self.init_test}
139
if init not in init_methods:
140
raise GmmParamError('init method %s not recognized' + str(init))
142
self.init = init_methods[init]
146
def compute_responsabilities(self, data):
147
"""Compute responsabilities.
149
Return normalized and non-normalized respondabilities for the model.
153
Computes the latent variable distribution (a posteriori probability)
154
knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
155
i) = P[state = i | observation = data(t); w, mu, va]
157
This is basically the E step of EM for finite mixtures."""
158
# compute the gaussian pdf
159
tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
160
# multiply by the weight
162
# Normalize to get a pdf
163
gd = tgd / N.sum(tgd, axis=1)[:, N.newaxis]
167
def compute_log_responsabilities(self, data):
168
"""Compute log responsabilities.
170
Return normalized and non-normalized responsabilities for the model (in
175
Computes the latent variable distribution (a posteriori probability)
176
knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
177
i) = P[state = i | observation = data(t); w, mu, va]
179
This is basically the E step of EM for finite mixtures."""
180
# compute the gaussian pdf
181
tgd = densities.multiple_gauss_den(data, self.gm.mu,
182
self.gm.va, log = True)
183
# multiply by the weight
184
tgd += N.log(self.gm.w)
185
# Normalize to get a (log) pdf
186
gd = tgd - densities.logsumexp(tgd)[:, N.newaxis]
190
def _update_em_diag(self, data, gamma, ngamma):
191
"""Computes update of the Gaussian Mixture Model (M step) from the
192
responsabilities gamma and normalized responsabilities ngamma, for
194
#XXX: caching SS may decrease memory consumption, but is this possible ?
204
x = N.dot(gamma.T[c:c+1, :], data)[0, :]
205
xx = N.dot(gamma.T[c:c+1, :], data ** 2)[0, :]
207
mu[c, :] = x / ngamma[c]
208
va[c, :] = xx / ngamma[c] - mu[c, :] ** 2
214
def _update_em_full(self, data, gamma, ngamma):
215
"""Computes update of the Gaussian Mixture Model (M step) from the
216
responsabilities gamma and normalized responsabilities ngamma, for
223
# In full mode, this is the bottleneck: the triple loop
224
# kills performances. This is pretty straightforward
225
# algebra, so computing it in C should not be too difficult. The
226
# real problem is to have valid covariance matrices, and to keep
227
# them positive definite, maybe with special storage... Not sure
228
# it really worth the risk
230
va = N.zeros((k*d, d))
232
#XXX: caching SS may decrease memory consumption
234
#x = N.sum(N.outer(gamma[:, c],
235
# N.ones((1, d))) * data, axis = 0)
236
x = N.dot(gamma.T[c:c+1, :], data)[0, :]
239
# This should be much faster than recursing on n...
242
xx[i, j] = N.sum(data[:, i] * data[:, j] * gamma.T[c, :],
245
mu[c, :] = x / ngamma[c]
246
va[c*d:c*d+d, :] = xx / ngamma[c] \
247
- N.outer(mu[c, :], mu[c, :])
252
def update_em(self, data, gamma):
253
"""Computes update of the Gaussian Mixture Model (M step)
254
from the a posteriori pdf, computed by gmm_posterior
257
ngamma = N.sum(gamma, axis = 0)
259
if self.gm.mode == 'diag':
260
w, mu, va = self._update_em_diag(data, gamma, ngamma)
261
elif self.gm.mode == 'full':
262
w, mu, va = self._update_em_full(data, gamma, ngamma)
264
raise GmmParamError("varmode not recognized")
266
self.gm.set_param(w, mu, va)
268
def likelihood(self, data):
269
""" Returns the current log likelihood of the model given
272
# compute the gaussian pdf
273
tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
274
# multiply by the weight
277
return N.sum(N.log(N.sum(tgd, axis = 1)), axis = 0)
280
""" Returns the BIC (Bayesian Information Criterion),
281
also called Schwarz information criterion. Can be used
282
to choose between different models which have different
283
number of clusters. The BIC is defined as:
285
BIC = 2 * ln(L) - k * ln(n)
288
* ln(L) is the log-likelihood of the estimated model
289
* k is the number of degrees of freedom
290
* n is the number of frames
292
Not that depending on the literature, BIC may be defined as the opposite
293
of the definition given here. """
295
if self.gm.mode == 'diag':
296
# for a diagonal model, we have k - 1 (k weigths, but one
297
# constraint of normality) + k * d (means) + k * d (variances)
298
free_deg = self.gm.k * (self.gm.d * 2 + 1) - 1
299
elif self.gm.mode == 'full':
300
# for a full model, we have k - 1 (k weigths, but one constraint of
301
# normality) + k * d (means) + k * d * d / 2 (each covariance
302
# matrice has d **2 params, but with positivity constraint)
304
free_deg = self.gm.k * 3 - 1
306
free_deg = self.gm.k * (self.gm.d + 1 + self.gm.d ** 2 / 2) - 1
308
lk = self.likelihood(data)
310
return bic(lk, free_deg, n)
315
repre += "Gaussian Mixture Model\n"
316
repre += " -> initialized by %s\n" % str(self.initst)
317
repre += self.gm.__repr__()
321
"""An EM trainer. An EM trainer
322
trains from data, with a model
324
Not really useful yet"""
328
def train(self, data, model, maxiter = 10, thresh = 1e-5, log = False):
329
"""Train a model using EM.
331
Train a model using data, and stops when the likelihood increase
332
between two consecutive iteration fails behind a threshold, or when the
333
number of iterations > niter, whichever comes first
337
contains the observed features, one row is one frame, ie one
338
observation of dimension d
342
maximum number of iterations
344
if the slope of the likelihood falls below this value, the
349
one value per iteration.
353
The model is trained, and its parameters updated accordingly, eg the
354
results are put in the GMM instance.
356
if not isinstance(model, MixtureModel):
357
raise TypeError("expect a MixtureModel as a model")
359
# Initialize the data (may do nothing depending on the model)
364
like = self._train_simple_em_log(data, model, maxiter, thresh)
366
like = self._train_simple_em(data, model, maxiter, thresh)
369
def _train_simple_em(self, data, model, maxiter, thresh):
371
like = N.zeros(maxiter)
373
# Em computation, with computation of the likelihood
374
g, tgd = model.compute_responsabilities(data)
375
# TODO: do it in log domain instead
376
like[0] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
377
model.update_em(data, g)
378
for i in range(1, maxiter):
379
g, tgd = model.compute_responsabilities(data)
380
like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
381
model.update_em(data, g)
382
if has_em_converged(like[i], like[i-1], thresh):
385
def _train_simple_em_log(self, data, model, maxiter, thresh):
387
like = N.zeros(maxiter)
389
# Em computation, with computation of the likelihood
390
g, tgd = model.compute_log_responsabilities(data)
391
like[0] = N.sum(densities.logsumexp(tgd), axis = 0)
392
model.update_em(data, N.exp(g))
393
for i in range(1, maxiter):
394
g, tgd = model.compute_log_responsabilities(data)
395
like[i] = N.sum(densities.logsumexp(tgd), axis = 0)
396
model.update_em(data, N.exp(g))
397
if has_em_converged(like[i], like[i-1], thresh):
401
# TODO: separate regularizer from EM class ?
402
def __init__(self, pcnt = _PRIOR_COUNT, pval = _COV_PRIOR):
403
"""Create a regularized EM object.
405
Covariances matrices are regularized after the E step.
409
proportion of soft counts to be count as prior counts (e.g. if
410
you have 1000 samples and the prior_count is 0.1, than the
411
prior would "weight" 100 samples).
418
def train(self, data, model, maxiter = 20, thresh = 1e-5):
419
"""Train a model using EM.
421
Train a model using data, and stops when the likelihood increase
422
between two consecutive iteration fails behind a threshold, or when the
423
number of iterations > niter, whichever comes first
427
contains the observed features, one row is one frame, ie one
428
observation of dimension d
432
maximum number of iterations
434
if the slope of the likelihood falls below this value, the
439
one value per iteration.
443
The model is trained, and its parameters updated accordingly, eg the
444
results are put in the GMM instance.
450
regularize = curry(regularize_diag, np = self.pcnt, prior =
451
self.pval * N.ones(model.gm.d))
453
regularize = curry(regularize_full, np = self.pcnt, prior =
454
self.pval * N.eye(model.gm.d))
456
raise ValueError("unknown variance mode")
459
regularize(model.gm.va)
462
like = N.empty(maxiter, N.float)
464
# Em computation, with computation of the likelihood
465
g, tgd = model.compute_log_responsabilities(data)
467
model.update_em(data, g)
468
regularize(model.gm.va)
470
like[0] = N.sum(densities.logsumexp(tgd), axis = 0)
471
for i in range(1, maxiter):
472
g, tgd = model.compute_log_responsabilities(data)
474
model.update_em(data, g)
475
regularize(model.gm.va)
477
like[i] = N.sum(densities.logsumexp(tgd), axis = 0)
478
if has_em_converged(like[i], like[i-1], thresh):
483
""" Expects lk to be log likelihood """
484
return 2 * lk - deg * N.log(n)
486
def has_em_converged(like, plike, thresh):
487
""" given likelihood of current iteration like and previous
488
iteration plike, returns true is converged: based on comparison
489
of the slope of the likehood with thresh"""
490
diff = N.abs(like - plike)
491
avg = 0.5 * (N.abs(like) + N.abs(plike))
492
if diff / avg < thresh:
497
def regularize_diag(va, np, prior):
498
"""np * n is the number of prior counts (np is a proportion, and n is the
501
diagonal variance version"""
505
va[i] *= 1. / (1 + np)
506
va[i] += np / (1. + np) * prior
508
def regularize_full(va, np, prior):
509
"""np * n is the number of prior counts (np is a proportion, and n is the
515
va[i*d:i*d+d, :] *= 1. / (1 + np)
516
va[i*d:i*d+d, :] += np / (1. + np) * prior
518
if __name__ == "__main__":