1
# Copyright (c) 2006 Carnegie Mellon University
3
# You may copy and modify this freely under the same terms as
6
"""Read/write Sphinx-III Gaussian parameter count files.
8
This module reads and writes the expected Gaussian mixture occupancy
9
count files created by SphinxTrain's implementation of the
10
Forward-Backward algorithm for training (semi-)continuous HMMs.
13
__author__ = "David Huggins-Daines <dhuggins@cs.cmu.edu>"
14
__version__ = "$Revision: 8994 $"
16
from struct import unpack, pack
17
from numpy import array, reshape, shape, fromstring
18
from s3file import S3File
21
def open(filename, mode="rb", attr={"version":1.0}):
22
if mode in ("r", "rb"):
23
return S3GauCntFile(filename, mode)
25
raise Exception, "mode must be 'r' or 'rb'"
27
def accumdirs(accumdirs):
28
"Read and accumulate counts from several directories"
32
subgau = S3GauCntFile(os.path.join(d, "gauden_counts"), "rb")
39
for m, mgau in enumerate(gauden.mean):
40
for f, feat in enumerate(mgau):
41
gauden.mean[m][f] += subgau.mean[m][f]
42
gauden.var[m][f] += subgau.var[m][f]
43
gauden.dnom[m][f] += subgau.dnom[m][f]
46
def accumdirs_full(accumdirs):
47
"Read and accumulate full-covariance counts from several directories"
51
subgau = S3FullGauCntFile(os.path.join(d, "gauden_counts"), "rb")
58
for m, mgau in enumerate(gauden.mean):
59
for f, feat in enumerate(mgau):
60
gauden.mean[m][f] += subgau.mean[m][f]
61
gauden.var[m][f] += subgau.var[m][f]
62
gauden.dnom[m][f] += subgau.dnom[m][f]
65
def open_full(filename, mode="rb", attr={"version":1.0}):
66
if mode in ("r", "rb"):
67
return S3FullGauCntFile(filename, mode)
69
raise Exception, "mode must be 'r', 'rb'"
71
class S3GauCntFile(S3File):
72
"Read Sphinx-III format Gaussian count files"
73
def __init__(self, file, mode):
74
S3File.__init__(self, file, mode)
77
def readgauheader(self):
78
if self.fileattr["version"] != "1.0":
79
raise Exception("Version mismatch: must be 1.0 but is "
80
+ self.fileattr["version"])
81
self.fh.seek(self.data_start, 0)
82
self.has_mean = unpack(self.swap + "I", self.fh.read(4))[0]
83
self.has_var = unpack(self.swap + "I", self.fh.read(4))[0]
84
self.pass2var = unpack(self.swap + "I", self.fh.read(4))[0]
85
self.n_mgau = unpack(self.swap + "I", self.fh.read(4))[0]
86
self.density = unpack(self.swap + "I", self.fh.read(4))[0]
87
self.n_feat = unpack(self.swap + "I", self.fh.read(4))[0]
88
self.veclen = unpack(self.swap + "I" * self.n_feat,
89
self.fh.read(4 * self.n_feat))
90
self.blk = sum(self.veclen)
95
self.mean = self._loadgau()
97
self.var = self._loadgau()
98
self.dnom = self.read3d()
101
self._nfloats = unpack(self.swap + "I", self.fh.read(4))[0]
102
if self._nfloats != self.n_mgau * self.density * self.blk:
103
raise Exception(("Number of data points %d doesn't match "
104
+ "total %d = %d*%d*%d")
107
self.n_mgau * self.density * self.blk,
108
self.n_mgau, self.density, self.blk))
109
spam = self.fh.read(self._nfloats * 4)
110
data = fromstring(spam, 'f')
112
data = data.byteswap()
115
for i in range(0, self.n_mgau):
118
for j in range(0, self.n_feat):
119
rnext = r + self.density * self.veclen[j];
120
gmm = reshape(data[r:rnext], (self.density, self.veclen[j]))
125
class S3FullGauCntFile(S3GauCntFile):
126
"Read Sphinx-III format Gaussian full covariance matrix files"
130
self.mean = self._loadgau()
132
self.var = self._loadfullgau()
133
self.dnom = self.read3d()
135
def _loadfullgau(self):
136
self._nfloats = unpack(self.swap + "I", self.fh.read(4))[0]
137
if self._nfloats != self.n_mgau * self.density * self.blk * self.blk:
138
raise Exception(("Number of data points %d doesn't match "
139
+ "total %d = %d*%d*%d*%d")
142
self.n_mgau * self.density * self.blk * self.blk,
143
self.n_mgau, self.density, self.blk, self.blk))
144
spam = self.fh.read(self._nfloats * 4)
145
data = fromstring(spam, 'f')
147
data = data.byteswap()
150
for i in range(0, self.n_mgau):
153
for j in range(0, self.n_feat):
154
rnext = r + self.density * self.veclen[j] * self.veclen[j];
155
gmm = reshape(data[r:rnext], (self.density,