1
/* ====================================================================
2
* Copyright (c) 2000 Carnegie Mellon University. All rights
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in
14
* the documentation and/or other materials provided with the
17
* This work was supported in part by funding from the Defense Advanced
18
* Research Projects Agency and the National Science Foundation of the
19
* United States of America, and the CMU Sphinx Speech Consortium.
21
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
22
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
25
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
* ====================================================================
36
/*********************************************************************
38
* Multi-function routine to generate mdef for context-independent
39
* training, untied training, and all-triphones mdef for state tying.
41
* if (triphonelist) make CI phone list and CD phone list
42
* if alltriphones mdef needed, make mdef
43
* if (rawphonelist) Make ci phone list,
44
* if cimdef needed, make mdef
45
* Generate alltriphones list from dictionary
46
* if alltriphones mdef needed, make mdef
47
* if neither triphonelist or rawphonelist quit
48
* Count triphones and triphone types in transcript
49
* Adjust threshold according to min-occ and maxtriphones
55
* Rita Singh (rsingh@cs.cmu.edu)
57
*********************************************************************/
63
#include <sphinxbase/cmd_ln.h>
64
#include <sphinxbase/ckd_alloc.h>
65
#include "parse_cmd_ln.h"
68
#include "mk_untied.h"
70
int main (int argc, char **argv)
72
heapelement_t **CDheap=NULL;
73
hashelement_t **CDhash=NULL;
74
phnhashelement_t **CIhash=NULL;
75
dicthashelement_t **dicthash=NULL;
76
int32 cilistsize=0, cdheapsize=0, threshold, tph_list_given, ncd;
77
const char *phnlist, *incimdef, *triphnlist, *incdmdef;
78
const char *lsnfile, *dictfn, *fillerdictfn;
80
const char *cimdeffn, *alltphnmdeffn, *untiedmdeffn, *countfn;
83
parse_cmd_ln(argc,argv);
85
/* Test all flags before beginning */
86
cimdeffn = cmd_ln_str("-ocimdef");
87
alltphnmdeffn = cmd_ln_str("-oalltphnmdef");
88
untiedmdeffn = cmd_ln_str("-ountiedmdef");
89
countfn = cmd_ln_str("-ocountfn");
90
ignore_wpos = cmd_ln_int32("-ignorewpos");
92
if (cimdeffn) E_INFO("Will write CI mdef file %s\n",cimdeffn);
94
E_INFO("Will write alltriphone mdef file %s\n",alltphnmdeffn);
95
if (untiedmdeffn) E_INFO("Will write untied mdef file %s\n",untiedmdeffn);
96
if (countfn) E_INFO("Will write triphone counts file %s\n",countfn);
98
if (!cimdeffn && !alltphnmdeffn && !untiedmdeffn && !countfn)
99
E_FATAL("No output mdef files or count files specified!\n");
101
dictfn = cmd_ln_str("-dictfn");
102
fillerdictfn = cmd_ln_str("-fdictfn");
103
lsnfile = cmd_ln_str("-lsnfn");
104
if ((untiedmdeffn || countfn) && (!lsnfile || !dictfn)) {
105
E_WARN("Either dictionary or transcript file not given!\n");
106
if (untiedmdeffn) E_WARN("Untied mdef will not be made\n");
107
if (countfn) E_WARN("Phone counts will not be generated\n");
108
untiedmdeffn = countfn = NULL;
111
phnlist = cmd_ln_str("-phnlstfn");
112
triphnlist = cmd_ln_str("-triphnlstfn");
113
incimdef = cmd_ln_str("-inCImdef");
114
incdmdef = cmd_ln_str("-inCDmdef");
115
if (!incdmdef && !incimdef && !phnlist && !triphnlist)
116
E_FATAL("No input mdefs or phone list given\n");
119
E_WARN("Both -triphnlist %s and -phnlist given.\n",triphnlist);
120
E_WARN("Ignoring -phnlist %s\n",phnlist);
121
phnlist = triphnlist;
123
tph_list_given = (triphnlist || incdmdef) ? 1 : 0;
126
if (incimdef || phnlist){
127
E_WARN("Using only input CD mdef %s!\n",incdmdef);
128
E_WARN("Using only triphones from input CD mdef %s!\n",incdmdef);
129
if (incimdef) E_WARN("CImdef %s will be ignored\n",incimdef);
130
if (phnlist) E_WARN("phonelist %s will be ignored\n",phnlist);
131
incimdef = phnlist = NULL;
133
make_ci_list_cd_hash_frm_mdef(incdmdef,&CIlist,&cilistsize,
138
make_ci_list_cd_hash_frm_phnlist(phnlist,&CIlist,
139
&cilistsize,&CDhash,&ncd);
141
if (CIlist) ckd_free_2d((void**)CIlist);
142
make_ci_list_frm_mdef(incimdef,&CIlist,&cilistsize);
146
make_mdef_from_list(cimdeffn,CIlist,cilistsize,NULL,0,argv[0]);
148
if (!tph_list_given && !cimdeffn) {
149
read_dict(dictfn, fillerdictfn, &dicthash);
150
if (CDhash) freehash(CDhash);
151
make_dict_triphone_list (dicthash, &CDhash, ignore_wpos);
156
make_CD_heap(CDhash,threshold,&CDheap,&cdheapsize);
157
make_mdef_from_list(alltphnmdeffn,CIlist,cilistsize,
158
CDheap,cdheapsize,argv[0]);
160
if (countfn || untiedmdeffn)
161
count_triphones(lsnfile, dicthash, CDhash, &CIhash, ignore_wpos);
163
print_counts(countfn,CIhash,CDhash);
166
threshold = find_threshold(CDhash);
167
make_CD_heap(CDhash,threshold,&CDheap,&cdheapsize);
168
make_mdef_from_list(untiedmdeffn,CIlist,cilistsize,
169
CDheap,cdheapsize,argv[0]);