1
/* ====================================================================
2
* Copyright (c) 1994-2000 Carnegie Mellon University. All rights
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in
14
* the documentation and/or other materials provided with the
17
* This work was supported in part by funding from the Defense Advanced
18
* Research Projects Agency and the National Science Foundation of the
19
* United States of America, and the CMU Sphinx Speech Consortium.
21
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
22
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
25
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
* ====================================================================
36
/*********************************************************************
38
* File: train_cmd_ln.c
43
* Eric H. Thayer (eht@cs.cmu.edu)
44
*********************************************************************/
46
#include "train_cmd_ln.h"
48
#include <sphinxbase/cmd_ln.h>
49
#include <sphinxbase/feat.h>
50
#include <sphinxbase/err.h>
53
#include <sys_compat/file.h>
58
#include <sys/types.h>
62
validate_writeable_dir(char *switch_name, void *arg)
69
E_ERROR("%s is a necessary switch\n", switch_name);
74
if (stat(path, &s) < 0) {
75
E_ERROR("%s %s does not exist or is inaccessible\n", switch_name, path);
80
if (!S_ISDIR(s.st_mode)) {
81
E_ERROR("%s %s is not a directory\n", switch_name, path);
86
if ((s.st_mode && S_IWOTH) ||
87
((s.st_uid == getuid()) && (s.st_mode && S_IWUSR)) ||
88
((s.st_gid == getgid()) && (s.st_mode && S_IWGRP))) {
92
E_ERROR("%s %s is not writeable\n", switch_name, path);
99
/* Do no validation for now. Need to figure out WIN32 compatible way */
105
validate_opt_writeable_dir(char *switch_name, void *arg)
115
if (stat(path, &s) < 0) {
116
E_ERROR("%s %s does not exist or is inaccessible\n", switch_name, path);
121
if (!S_ISDIR(s.st_mode)) {
122
E_ERROR("%s %s is not a directory\n", switch_name, path);
127
if ((s.st_mode && S_IWOTH) ||
128
((s.st_uid == getuid()) && (s.st_mode && S_IWUSR)) ||
129
((s.st_gid == getgid()) && (s.st_mode && S_IWGRP))) {
133
E_ERROR("%s %s is not writeable\n", switch_name, path);
140
/* Do no validation for now. Need to figure out WIN32 compatible way */
147
validate_readable_dir(char *switch_name, void *arg)
154
E_ERROR("%s is a necessary switch\n", switch_name);
159
if (stat(path, &s) < 0) {
160
E_ERROR("%s %s does not exist or is inaccessible\n", switch_name, path);
165
if (!S_ISDIR(s.st_mode)) {
166
E_ERROR("%s %s is not a directory\n", switch_name, path);
171
if ((s.st_mode && S_IROTH) ||
172
((s.st_uid == getuid()) && (s.st_mode && S_IRUSR)) ||
173
((s.st_gid == getgid()) && (s.st_mode && S_IRGRP))) {
177
E_ERROR("%s %s is not readable\n", switch_name, path);
184
/* Do no validation for now. Need to figure out a WIN32 compatible
192
validate_agc(char *switch_name, void *arg)
194
if ((strcmp(arg, "max") == 0) || (strcmp(arg, "emax") == 0) || (strcmp(arg, "none") == 0)) {
198
E_ERROR("Unknown agc type %s %s\n", switch_name, arg);
207
validate_cmn(char *switch_name, void *arg)
210
if ((strcmp(arg, "current") == 0) ||
211
(strcmp(arg, "none") == 0) ||
212
(strcmp(arg, "prior") == 0)) {
216
E_ERROR("Unknown CMN type %s %s\n", switch_name, arg);
222
/* defines, parses and (partially) validates the arguments
223
given on the command line */
226
train_cmd_ln_parse(int argc, char *argv[])
231
const char helpstr[] =
233
Strictly speaking, bw only implements the first-part of the Baum-Welch \n\
234
algorithm. That is it go through forward and backward algortihm and\n\
235
collect the necessary statistics for parameter estimation.\n\
237
The advantage of this architecture is that researcher can easily write \n\
238
programs to do parameter estimation and they have no need to tweak the \n\
239
huge and usually difficult Baum-Welch algorithm. \n\
241
In terms of functionality, one important thing you need to know is option \n\
242
-part and -npart. They can allow you to split the training into N equal parts\n\
243
Say, if there are M utterances in your control file, then this \n\
244
will enable you to run the training separately on each (M/N)th \n\
245
part. This flag may be set to specify which of these parts you want to \n\
246
currently train on. As an example, if your total number of parts (-npart) is 3, \n\
247
-part can take one of the values 1,2 or 3. \n\
249
To control the speed of the training, -abeam (control alpha search) \n\
250
and -bbeam (control beta search) can be used to control the searching \n\
251
time. Notice that if the beams are too small, the path may not reach \n\
252
the end of the search and results in estimation error \n\
253
Too many lost path may also cause training set likelihood not unable to increase \n\
255
Several options allow the user to control the behaviour of bw such \n\
256
that silence or pauses can be taken care\n\
258
Finally, one can use the viterbi training mode of the code. Notice \n\
259
though, the code is not always tested by CMU's researcher \n\
261
I also included the following paragraph from Rita's web page. ";
263
const char examplestr[]=
265
Command used to train continuous HMM \n\
266
(Beware, this only illustrates how to use this command, for detail on \n\
267
how to tune it, please consult the manual. ) \n\
269
-moddeffn mdef -ts2cbfn .cont.\n\
270
-mixwfn mixw -tmatfn tmatn -meanfn mean -varfn var\n\
271
-dictfn dict -fdictfn fillerdict \n\
272
-ctlfn control_files \n\
274
-cepdir feature_dir -cepext mfc \n\
275
-lsnfn transcription \n\
276
-accumdir accumdir \n\
277
-abeam 1e-200 -bbeam 1e-200 \n\
278
-meanreest yes -varreest yes \n\
279
-tmatreest yes -feat 1s_12c_12d_3p_12dd \n\
282
If yo want to do parallel training for N machines. Run N trainers with \n\
289
static arg_t defn[] = {
293
"Shows the usage of the tool"},
298
"Shows example of how to use the tool"},
303
"Default directory for acoustic model files (mdef, means, variances, transition_matrices, noisedict)" },
308
"The model definition file for the model inventory to train" },
313
"The transition matrix parameter file name"},
318
"The mixture weight parameter file name"},
323
"The mean parameter file name"},
328
"The var parameter file name"},
333
"Variances are full covariance matrices"},
338
"Evaluate Gaussian densities using diagonals only"},
343
"Mixing weight smoothing floor" },
348
"Transition probability smoothing floor" },
353
"The minimum variance"},
358
"Compute output probabilities based this number of top scoring densities."},
363
"The content word dictionary" },
368
"The filler word dictionary (e.g. SIL, SILb, ++COUGH++)" },
373
"The training corpus control file" },
378
"The number of utterances to skip at the beginning of a control file" },
382
"-1", /* until eof */
383
"The number of utterances to process in the (skipped) control file" },
388
"Identifies the corpus part number (range 1..NPART)" },
393
"Partition the corpus into this many equal sized subsets" },
398
"The cepstrum file extension" },
403
"The cepstrum data root directory" },
408
"Phone segmentation file extension" },
413
"Phone segmentation file root directory" },
418
"Phone segmentation file output root directory" },
423
"The sentence transcript file directory"},
428
"The sentence transcript file extension"},
433
"The corpus word transcript file"},
438
"A path where accumulated counts are to be written." },
443
"Evaluate alpha values subject to this beam"},
448
"Evaluate beta values (update reestimation sums) subject to this beam"},
453
"Reestimate variances"},
463
"Reestimate mixing weights"},
468
"Reestimate transition probability matrices"},
473
"An MLLR transformation file to apply to the means of the model"},
478
"Codebook-to-MLLR-class mapping file name" },
483
"Tied-state-to-codebook mapping file name" },
488
"Controls whether profiling information is displayed"},
493
"Controls whether Viterbi training is done"},
498
"Reestimate variances based on prior means"},
503
"State posterior probability floor for reestimation. States below this are not counted"},
508
"Maximum # of frames for an utt ( 0 => no fixed limit )"},
513
"Checkpoint the reestimation sums every -chkptintv utts" },
518
"Output full path of utterance to bw log output" },
520
{ "-fullsuffixmatch",
523
"Expect utterance id in transcript to be a suffix of the partial path in the control file" },
528
"Apply LDA in accumulation of statistics only (NOTE: no dimensionality reduction will be done)."},
533
"Dump state/mixture posterior probabilities to files in this directory" },
538
"Directory that contains lattice files" },
543
"Whether to do MMIE training or not" },
548
"how to get different context for Viterbi run on lattice, such as rand, best or ci. \n\
549
\t\t\trand: randomly take the left and right context \n\
550
\t\t\tbest: take the left and right context with the best acoustic score \n\
551
\t\t\tci: use context-independent hmm for word boundary models" },
556
"Denominator or Numerator lattice. Use denlat or numlat" },
561
"Language model weight" },
564
cepstral_to_feature_command_line_macro(),
565
{NULL, 0, NULL, NULL}
568
cmd_ln_parse(defn, argc, argv, 1);
571
isHelp = cmd_ln_int32("-help");
572
isExample = cmd_ln_int32("-example");
575
printf("%s\n\n",helpstr);
579
printf("%s\n\n",examplestr);
582
if(isHelp || isExample){
583
E_INFO("User asked for help or example.\n");