1
# Copyright (c) 2006 Carnegie Mellon University
3
# You may copy and modify this freely under the same terms as
6
"""Sphinx-III acoustic models.
8
This module provides a class which wraps a set of acoustic models, as
9
used by SphinxTrain, Sphinx-III, and PocketSphinx. It provides
10
functions for computing Gaussian mixture densities for acoustic
14
__author__ = "David Huggins-Daines <dhuggins@cs.cmu.edu>"
15
__version__ = "$Revision: 10963 $"
28
class S3Model(object):
29
def __init__(self, path=None, topn=4):
37
self.mdef = s3mdef.open(os.path.join(path, "mdef"))
38
self.mean = s3gau.open(os.path.join(path, "means"))
39
self.var = s3gau.open(os.path.join(path, "variances"))
40
self.mixw = s3mixw.open(os.path.join(path, "mixture_weights"))
41
self.tmat = s3tmat.open(os.path.join(path, "transition_matrices"))
42
# Normalize transition matrices and mixture weights
43
for t in range(0, len(self.tmat)):
44
self.tmat[t] = (self.tmat[t].T / self.tmat[t].sum(1)).T
45
for t in range(0, len(self.mixw)):
46
self.mixw[t] = (self.mixw[t].T / self.mixw[t].sum(1)).T.clip(self.mwfloor, 1.0)
47
# Floor variances and precompute normalizing and inverse variance terms
48
self.norm = numpy.empty((len(self.var),
50
len(self.var[0][0])),'d')
51
for m,mgau in enumerate(self.var):
52
for f,feat in enumerate(mgau):
53
fvar = feat.clip(self.varfloor, numpy.inf)
54
# log of 1/sqrt((2*pi)^N * det(var))
55
det = numpy.log(fvar).sum(1)
56
lrd = -0.5 * (det + numpy.log(2 * numpy.pi) * feat.shape[1])
59
feat[:] = (1 / (fvar * 2))
60
# Construct senone to codebook mapping
61
if os.access(os.path.join(path, "senmgau"), os.F_OK):
62
self.senmgau = s3file.S3File(os.path.join(path, "senmgau")).read1d()
63
elif len(self.mean) == 1:
64
self.senmgau = numpy.ones(len(self.mixw))
66
self.senmgau = numpy.arange(0, len(self.mixw))
67
self.senscr = numpy.ones(len(self.mixw)) * WORSTSCORE
69
def cb_compute(self, mgau, feat, obs):
70
"Compute codebook #mgau feature #feat for obs"
71
mean = self.mean[mgau][feat]
72
ivar = self.var[mgau][feat]
73
norm = self.norm[mgau][feat]
75
dist = (diff * ivar * diff).sum(1)
78
def senone_compute(self, senones, *features):
79
"""Compute senone scores for given list of senones and a
80
frame of acoustic features"""
82
self.senscr[:] = WORSTSCORE
86
cbs[m] = [self.cb_compute(m, f, features[f])
87
for f in range(0,len(self.mean[m]))]
89
for f, vec in enumerate(features):
90
# Compute densities and scale by mixture weights
91
d = cbs[m][f] + numpy.log(self.mixw[s,f])
92
# Take top-N densities
93
d = d.take(d.argsort()[-self.topn:])
94
# Multiply into output score
95
score += numpy.log(numpy.exp(d).sum())
96
self.senscr[s] = score
97
return numpy.exp(self.senscr - self.senscr.max())