2
## ====================================================================
4
## Copyright (c) 1996-2000 Carnegie Mellon University. All rights
7
## Redistribution and use in source and binary forms, with or without
8
## modification, are permitted provided that the following conditions
11
## 1. Redistributions of source code must retain the above copyright
12
## notice, this list of conditions and the following disclaimer.
14
## 2. Redistributions in binary form must reproduce the above copyright
15
## notice, this list of conditions and the following disclaimer in
16
## the documentation and/or other materials provided with the
19
## This work was supported in part by funding from the Defense Advanced
20
## Research Projects Agency and the National Science Foundation of the
21
## United States of America, and the CMU Sphinx Speech Consortium.
23
## THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
24
## ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
25
## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26
## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
27
## NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28
## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29
## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30
## DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31
## THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32
## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33
## OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
## ====================================================================
37
## Author: Ricky Houghton
38
## Author: David Huggins-Daines
44
use File::Spec::Functions;
47
use lib catdir(dirname($0), updir(), 'lib');
48
use SphinxTrain::Config;
49
use SphinxTrain::Util;
51
#***************************************************************************
52
# This script launches all the ci - continuous training jobs in the proper
53
# order. First it cleans up the directories, then launches the
54
# flat initialization, and the baum-welch and norm jobs for the required
55
# number of iterations. Within each iteration it launches as many baumwelch
56
# jobs as the number of parts we wish to split the training into.
57
#***************************************************************************
59
my ($iter, $n_parts) = @ARGV;
60
$iter = 1 unless defined $iter;
61
$n_parts = (defined($ST::CFG_NPART) ? $ST::CFG_NPART : 1) unless defined $n_parts;
62
# Otherwise we use all memory in norm and die
63
$n_parts = 15 if $n_parts > 15;
65
my $modeldir = "$ST::CFG_BASE_DIR/model_parameters";
66
mkdir ($modeldir,0777);
68
use vars qw($MLLT_FILE $MODEL_TYPE $CI_MODEL_TYPE);
69
$MLLT_FILE = catfile($ST::CFG_MODEL_DIR, "${ST::CFG_EXPTNAME}.mllt");
71
$CI_MODEL_TYPE = 'ci';
73
$| = 1; # Turn on autoflushing
74
my $logdir = "${ST::CFG_LOG_DIR}/30.${MODEL_TYPE}_hmm_untied";
75
mkdir ("$logdir",0777);
77
#Read npart_untied from variables.def
80
# Clean up junk from earlier runs
81
Log ("MODULE: 30 Training Context Dependent models\n");
82
Log("Phase 1: Cleaning up directories:");
83
# Don't do this on a queue, because of NFS bugs
84
unless ($ST::CFG_QUEUE_TYPE eq 'Queue::PBS') {
85
LogProgress("\taccumulator...");
86
rmtree ($ST::CFG_BWACCUM_DIR, 0, 1);
87
mkdir ($ST::CFG_BWACCUM_DIR,0777);
89
LogProgress("logs...");
90
rmtree($logdir, 0, 1);
92
LogProgress("qmanager...\n");
93
rmtree ($ST::CFG_QMGR_DIR, 0, 1);
94
mkdir ($ST::CFG_QMGR_DIR,0777);
95
LogStatus('completed');
97
# For the first iteration Flat initialize models.
98
$return_value = Initialize();
99
exit ($return_value) if ($return_value);
100
Log("Phase 3: Forward-Backward");
103
# Call baum_welch with iter part and n_parts,
104
# once done call norm_and_lauchbw.pl
106
for (my $i=1; $i<=$n_parts; $i++)
108
push @deps, LaunchScript("bw.$iter.$i", ['baum_welch.pl', $iter, $i, $n_parts])
110
LaunchScript("norm.$iter", ['norm_and_launchbw.pl', $iter, $n_parts], \@deps);
111
# For the first iteration (i.e. the one that was called from the
112
# command line or a parent script), wait until completion or error
114
$return_value = WaitForConvergence($logdir);
121
my $cihmmdir = "${ST::CFG_BASE_DIR}/model_parameters/${ST::CFG_EXPTNAME}.${CI_MODEL_TYPE}_${ST::CFG_DIRLABEL}";
122
my $cdhmmdir = "${ST::CFG_BASE_DIR}/model_parameters/${ST::CFG_EXPTNAME}.${MODEL_TYPE}_${ST::CFG_DIRLABEL}_untied";
123
mkdir ($cdhmmdir,0777);
125
my $logdir = "${ST::CFG_LOG_DIR}/30.${MODEL_TYPE}_hmm_untied";
126
mkdir ($logdir,0777);
128
Log ("Phase 2: Initialization");
129
my $untiedmdef = "${ST::CFG_BASE_DIR}/model_architecture/${ST::CFG_EXPTNAME}.untied.mdef";
130
my $logfile = "$logdir/${ST::CFG_EXPTNAME}.make_alltriphonelist.log";
132
# aligned transcripts and the list of aligned files is obtained as a result
133
# of (03.) forced alignment or (04.) VTLN
134
# FIXME: This should go in SphinxTrain::Util
135
my ($listoffiles, $transcriptfile);
136
if ($ST::CFG_FORCEDALIGN eq "yes") {
137
$listoffiles = "$ST::CFG_BASE_DIR/falignout/${ST::CFG_EXPTNAME}.alignedfiles";
138
$transcriptfile = "$ST::CFG_BASE_DIR/falignout/${ST::CFG_EXPTNAME}.alignedtranscripts";
139
} elsif ($ST::CFG_VTLN eq "yes") {
140
$listoffiles = "$ST::CFG_BASE_DIR/vtlnout/${ST::CFG_EXPTNAME}.alignedfiles";
141
$transcriptfile = "$ST::CFG_BASE_DIR/vtlnout/${ST::CFG_EXPTNAME}.alignedtranscripts";
143
$listoffiles = $ST::CFG_LISTOFFILES;
144
$transcriptfile = $ST::CFG_TRANSCRIPTFILE;
147
my $rv = RunTool('mk_mdef_gen', $logfile, 0,
148
-phnlstfn => $ST::CFG_RAWPHONEFILE,
149
-dictfn => GetDict(),
150
-fdictfn => $ST::CFG_FILLERDICT,
151
-lsnfn => $transcriptfile,
152
-ountiedmdef => $untiedmdef,
153
-n_state_pm => $ST::CFG_STATESPERHMM);
156
$logfile = "$logdir/${ST::CFG_EXPTNAME}.copycitocd.log";
158
my $cd_mdeffile = "${ST::CFG_BASE_DIR}/model_architecture/${ST::CFG_EXPTNAME}.untied.mdef";
160
('init_mixw', $logfile, 0,
161
-src_moddeffn => "${ST::CFG_BASE_DIR}/model_architecture/${ST::CFG_EXPTNAME}.ci.mdef",
162
-src_ts2cbfn => $ST::CFG_HMM_TYPE,
163
-src_mixwfn => "$cihmmdir/mixture_weights",
164
-src_meanfn => "$cihmmdir/means",
165
-src_varfn => "$cihmmdir/variances",
166
-src_tmatfn => "$cihmmdir/transition_matrices",
167
-dest_moddeffn => $cd_mdeffile,
168
-dest_ts2cbfn => $ST::CFG_HMM_TYPE,
169
-dest_mixwfn => "$cdhmmdir/mixture_weights",
170
-dest_meanfn => "$cdhmmdir/means",
171
-dest_varfn => "$cdhmmdir/variances",
172
-dest_tmatfn => "$cdhmmdir/transition_matrices",
173
-fullvar => $ST::CFG_FULLVAR);