1
/* ====================================================================
2
* Copyright (c) 1994-2005 Carnegie Mellon University. All rights
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in
14
* the documentation and/or other materials provided with the
17
* This work was supported in part by funding from the Defense Advanced
18
* Research Projects Agency and the National Science Foundation of the
19
* United States of America, and the CMU Sphinx Speech Consortium.
21
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
22
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
25
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
* ====================================================================
36
/******************************************************************************
37
* Main routine for computing the distribution for CDCN.
38
* Coded by Bhiksha Raj, June 94
39
******************************************************************************/
46
#include "parse_cmd_ln.h"
48
int main(int argc, char **argv)
50
vector_t *vector, *buff;
51
float **mean, **variance, *c;
52
float noisec, noisemean[64], noisevar[64];
53
float atemp, noise_threshold, noise_width;
54
int numnoise, numspch, numvecs, Ndim, Nmodes;
55
int maxframes, vector_alloc;
56
int i, j, k, *nbin, *bin;
57
int superiter, stride;
60
parse_cmd_ln(argc, argv);
61
Ndim = cmd_ln_int32("-ceplen");
62
Nmodes = cmd_ln_int32("-nmodes");
63
stride = cmd_ln_int32("-stride");
64
maxframes = cmd_ln_int32("-maxframes");
65
noise_width = cmd_ln_float32("-noisewidth");
66
/* FIXME: this will break when we have terabytes of memory... */
71
* by default I assume the programs is to be run from data
72
* only with no initial set of Gaussians. i.e. go to vq and them
75
corpus_set_mfcc_dir(cmd_ln_str("-cepdir"));
76
corpus_set_mfcc_ext(cmd_ln_str("-cepext"));
77
if (corpus_set_ctl_filename(cmd_ln_str("-ctlfn")) != S3_SUCCESS)
78
E_FATAL("Failed to read control file %s\n", cmd_ln_str("-ctlfn"));
80
if (corpus_init() != S3_SUCCESS)
81
E_FATAL("Corpus initialization failed\n");
85
for (j = 0; j < Ndim; ++j) {
90
/* Read in all frames (you can set a maximum to avoid dying) */
91
/* Pick a reasonable size (about 60MB) to start with. */
92
E_INFO("Allocating 100000 frames initially\n");
93
vector_alloc = 1000000;
94
vector = (vector_t *) ckd_calloc_2d(vector_alloc, Ndim, sizeof(float));
95
E_INFO("Reading frames... ");
96
while (corpus_next_utt() && (numvecs < maxframes)) {
97
corpus_get_generic_featurevec(&buff, &length, Ndim);
98
for (x = 0; x < length; x += stride) {
99
if (numvecs >= vector_alloc) {
100
vector_alloc = numvecs + 1000000;
101
E_INFOCONT(" (Reallocating to %d frames) ", vector_alloc);
102
vector = ckd_realloc(vector, sizeof(vector_t *) * vector_alloc);
103
vector[0] = ckd_realloc(vector[0],
104
Ndim*sizeof(float)*vector_alloc);
105
for (j = 1; j < vector_alloc; ++j)
106
vector[j] = vector[0] + j * Ndim;
108
memcpy(vector[numvecs], buff[x], Ndim*sizeof(float));
114
E_INFOCONT("%d vectors in all\n", numvecs);
117
E_FATAL(("This is silly! You have given me only 0 vectors to compute a DISTRIBUTION!\nI am miffed! I am quitting!\n"));
120
* Compute threshold for the noise mode as the minimum c[0] + thresholding
122
noise_threshold = vector[0][0];
123
for (i = 0; i < numvecs; ++i)
124
if (vector[i][0] < noise_threshold)
125
noise_threshold = vector[i][0];
126
noise_threshold += noise_width;
127
E_INFO("Noise threshold = %f\n", noise_threshold);
131
for (i = 0; i < numvecs; ++i) {
132
if (vector[i][0] <= noise_threshold) {
133
for (j = 0; j < Ndim; ++j) {
134
noisemean[j] += vector[i][j];
135
noisevar[j] += vector[i][j] * vector[i][j];
139
for (j = 0; j < Ndim; ++j)
140
vector[numspch][j] = vector[i][j];
145
("%d vectors found below noise threshold %f, %d vectors found above it\n",
146
numnoise, noise_threshold, numspch);
150
* Compute noise statistics
152
for (j = 0; j < Ndim; ++j) {
153
noisemean[j] /= (float) numnoise;
155
noisevar[j] / (float) numnoise -
156
noisemean[j] * noisemean[j];
158
noisec = (float) numnoise / (float) numvecs;
159
Nmodes -= 1; /* ACCOUNT FOR NOISE MODE : Rest of modes = total modes-1 */
162
* We Vector Quantize to obtain the initial values for the EM.
163
* If this codebook already exists, we skip the VQ and directly
164
* compute the variances and c[]s after obtaining the mean values
165
* as the code words in the existing codebook
169
* do this only if we are not requesting a restart from a previous
170
* temp statistics file .
172
if (cmd_ln_str("-cbfn") == NULL) {
174
* allocate the mean and variance and c arrays.
176
c = (float *) ckd_calloc(Nmodes, sizeof(float));
177
mean = (float **) ckd_calloc_2d(Nmodes, Ndim, sizeof(float));
178
variance = (float **) ckd_calloc_2d(Nmodes, Ndim, sizeof(float));
180
nbin = (int *) ckd_calloc(Nmodes, sizeof(int)); /* no of vectors in a mode */
183
* The vector_quantize routine performs VQ with a mahalonobis metric
184
* and returns the codes as the means and the wieghts as the variances
185
* of the initial estimates of the modes, which will further be
186
* employed in EM. Note that the variances are purely diagonal
187
* We initialize all initial c[] to be equal
190
bin = (int *) ckd_calloc(numspch, sizeof(int));
193
vector_quantize(mean, Nmodes, vector, numspch, Ndim, bin,
194
cmd_ln_int32("-vqiter"),
195
cmd_ln_float32("-vqthresh"));
197
for (i = 0; i < Nmodes; ++i)
198
c[i] = 1.0 / (float) Nmodes;
199
for (k = 0; k < Nmodes; ++k) {
201
for (i = 0; i < Ndim; ++i)
204
for (i = 0; i < numspch; ++i) {
205
for (j = 0; j < Ndim; ++j) {
206
atemp = (vector[i][j] - mean[bin[i]][j]);
207
variance[bin[i]][j] += atemp * atemp;
211
for (k = 0; k < Nmodes; ++k) {
212
for (j = 0; j < Ndim; ++j)
213
variance[k][j] /= nbin[k];
216
ckd_free(bin); /* We do not need this array anymore */
217
ckd_free(nbin); /* Chappie not needed anymore */
220
* if initialize = 0 ===> I want to skip the VQ and go to EM
223
if (!read_backup_distribution
224
(cmd_ln_str("-cbfn"), &mean, &variance, &c, &Nmodes, Ndim))
225
E_FATAL(("Unable to read initial distribution\n"));
228
for (superiter = 0; superiter < 1; ++superiter) {
229
estimate_multi_modals(vector, numspch, Ndim, Nmodes, mean,
230
variance, c, cmd_ln_str("-tmpfn"),
231
cmd_ln_int32("-emiter"),
232
cmd_ln_float32("-emthresh"));
233
if (store_distribution
234
(cmd_ln_str("-outfn"), Nmodes, Ndim, noisec, noisemean, noisevar, c,
235
mean, variance) != 0) {
236
E_FATAL("Unable to open %s to store distribution\n",
237
cmd_ln_str("-tmpfn"));
241
ckd_free_2d((void **)vector);
243
ckd_free_2d((void **)mean);
244
ckd_free_2d((void **)variance);