2
2
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4
4
* Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
6
6
* The contents of this file are subject to the terms of either the GNU Lesser
7
7
* General Public License Version 2.1 only ("LGPL") or the Common Development and
8
8
* Distribution License ("CDDL")(collectively, the "License"). You may not use this
9
9
* file except in compliance with the License. You can obtain a copy of the CDDL at
10
10
* http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
12
12
* specific language governing permissions and limitations under the License. When
13
13
* distributing the software, include this License Header Notice in each file and
14
14
* include the full text of the License in the License file as well as the
15
15
* following notice:
17
17
* NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
19
19
* For Covered Software in this distribution, this License shall be governed by the
21
21
* Any litigation relating to this License shall be subject to the jurisdiction of
22
22
* the Federal Courts of the Northern District of California and the state courts
23
23
* of the State of California, with venue lying in Santa Clara County, California.
27
27
* If you wish your version of this file to be governed by only the CDDL or only
28
28
* the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
29
29
* include this software in this distribution under the [CDDL or LGPL Version 2.1]
32
32
* Version 2.1, or to extend the choice of license to its licensees as provided
33
33
* above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
34
34
* Version 2 license, then the option applies only if the new code is made subject
35
* to such option by the copyright holder.
35
* to such option by the copyright holder.
38
38
#ifdef HAVE_CONFIG_H
63
63
typedef std::vector<TState> iterator;
66
getLevelSize(int lvl) { return m_LevelSizes[lvl]; }
68
return m_LevelSizes[lvl];
69
getN() { return m_N; }
72
78
beginLevel(int lvl, iterator& it);
75
next(iterator& it) { ++(it.back()); adjustIterator(it); }
83
++(it.back()); adjustIterator(it);
78
isEnd(iterator& it) { return (((it.back().getIdx()) + 1) == getLevelSize(it.back().getLevel())); }
89
return(((it.back().getIdx()) + 1) == getLevelSize(it.back().getLevel()));
81
93
getNodePtr(TState s);
111
123
CIterateThreadSlm::beginLevel(int lvl, iterator& it)
114
if (lvl > m_N) return false;
115
for (int i=0; i <= lvl; ++i)
126
if (lvl > (int) m_N) return false;
127
for (int i = 0; i <= lvl; ++i)
116
128
it.push_back(TState(i, 0));
117
129
adjustIterator(it);
133
145
CIterateThreadSlm::adjustIterator(iterator& it)
135
147
// if (!isEnd(it)) {
136
for (int lvl = it.size()-2; lvl >= 0; --lvl) {
148
for (int lvl = it.size() - 2; lvl >= 0; --lvl) {
137
149
int sz = getLevelSize(lvl);
138
unsigned child = (it[lvl+1]).getIdx();
139
while ((it[lvl].getIdx() < (sz-1)) && ( (((TNode*)getNodePtr(it[lvl]))+1)->ch() <= child )) {
150
unsigned child = (it[lvl + 1]).getIdx();
151
while ((it[lvl].getIdx() < (sz - 1)) &&
152
((((TNode*)getNodePtr(it[lvl])) + 1)->ch() <= child)) {
148
162
printf("Usage:\n");
149
163
printf(" tslminfo [options] threaded_slm_file\n");
150
164
printf("\nDescription:\n");
151
printf(" tslminfo tell information of a threaded back-off language model 'threaded_slm_file'. It can also print the model to ARPA format.");
152
printf(" When no options given, slminfo will only print number of items in each level of the language model.\n");
166
" tslminfo tell information of a threaded back-off language model 'threaded_slm_file'. It can also print the model to ARPA format.");
168
" When no options given, slminfo will only print number of items in each level of the language model.\n");
153
169
printf("\nOptions:\n");
154
170
printf(" -v # Verbose mode, printing arpa format.\n");
155
printf(" -p # Prefer normal probability instead of -log(Pr) which is default. Valid under -v option.\n");
156
printf(" -l dict_file # Lexicon. Valid under -v option. Substitute the word-id with word-text in the output.\n");
172
" -p # Prefer normal probability instead of -log(Pr) which is default. Valid under -v option.\n");
174
" -l dict_file # Lexicon. Valid under -v option. Substitute the word-id with word-text in the output.\n");
161
static bool verbose = false;
179
static bool verbose = false;
162
180
static char *lexicon_filename = NULL;
163
static bool use_log_pr = true;
181
static bool use_log_pr = true;
165
183
static struct option long_options[] =
167
{"verbose", 0, 0, 'v'},
169
{"lexicon", 1, 0, 'l'},
185
{ "verbose", 0, 0, 'v' },
187
{ "lexicon", 1, 0, 'l' },
173
static void getParameters(int argc, char* argv[])
192
getParameters(int argc, char* argv[])
175
int c, option_index = 0;
176
while ((c=getopt_long(argc, argv, "vpl:", long_options, &option_index)) != -1)
194
int c, option_index = 0;
196
getopt_long(argc, argv, "vpl:", long_options,
197
&option_index)) != -1) {
183
203
lexicon_filename = strdup(optarg);
186
206
use_log_pr = false;
192
212
if (use_log_pr == false && !verbose) ShowUsage();
193
213
if (lexicon_filename != NULL && !verbose) ShowUsage();
194
if (optind != argc-1) ShowUsage();
214
if (optind != argc - 1) ShowUsage();
197
217
typedef std::map<unsigned int, std::string> TReverseLexicon;
201
PrintARPA(CIterateThreadSlm& itslm, const char* lexicon_filename, bool use_log_pr)
221
PrintARPA(CIterateThreadSlm& itslm,
222
const char* lexicon_filename,
203
static unsigned int id;
225
static unsigned int id;
204
226
static char word[10240];
206
228
TReverseLexicon* plexicon = NULL;
219
241
while (*p == ' ' || *p == '\t')
221
243
if (!(*p >= '0' && *p <= '9')) continue;
222
for (id=0; *p >= '0' && *p <= '9'; ++p)
223
id = 10*id + (*p - '0');
244
for (id = 0; *p >= '0' && *p <= '9'; ++p)
245
id = 10 * id + (*p - '0');
224
246
(*plexicon)[id] = std::string(word);
230
252
CIterateThreadSlm::iterator it;
231
253
for (int lvl = 0; lvl <= itslm.getN(); ++lvl) {
232
printf("\\%d-gram\\%d\n", lvl, itslm.getLevelSize(lvl)-1);
233
for (itslm.beginLevel(lvl,it); !itslm.isEnd(it); itslm.next(it)){
234
for (int i=1; i < lvl; ++i) {
235
CIterateThreadSlm::TNode*pn = (CIterateThreadSlm::TNode*)itslm.getNodePtr(it[i]);
254
printf("\\%d-gram\\%d\n", lvl, itslm.getLevelSize(lvl) - 1);
255
for (itslm.beginLevel(lvl, it); !itslm.isEnd(it); itslm.next(it)) {
256
for (int i = 1; i < lvl; ++i) {
257
CIterateThreadSlm::TNode*pn =
258
(CIterateThreadSlm::TNode*)itslm.getNodePtr(it[i]);
236
259
if (plexicon != NULL)
237
260
printf("%s ", (*plexicon)[pn->wid()].c_str());
239
262
printf("%9d ", pn->wid());
241
264
if (lvl < itslm.getN()) {
242
CIterateThreadSlm::TNode*pn = (CIterateThreadSlm::TNode*)itslm.getNodePtr(it[lvl]);
265
CIterateThreadSlm::TNode*pn =
266
(CIterateThreadSlm::TNode*)itslm.getNodePtr(it[lvl]);
244
268
if (plexicon != NULL)
245
269
printf("%s ", ((*plexicon)[pn->wid()]).c_str());
252
276
printf("%16.12lf %16.12lf ", pr, bow);
253
277
printf("(%1u,%u)\n", pn->bol(), pn->bon());
255
CIterateThreadSlm::TLeaf*pn = (CIterateThreadSlm::TLeaf*)itslm.getNodePtr(it[lvl]);
279
CIterateThreadSlm::TLeaf*pn =
280
(CIterateThreadSlm::TLeaf*)itslm.getNodePtr(it[lvl]);
257
282
if (plexicon != NULL)
258
283
printf("%s ", ((*plexicon)[pn->wid()]).c_str());
282
307
CIterateThreadSlm itslm;
284
if (itslm.load(argv[argc-1], true)) {
309
if (itslm.load(argv[argc - 1], true)) {
286
311
printf("Total %d level ngram: ", itslm.getN());
287
for (int lvl=1; lvl <=itslm.getN(); ++lvl)
288
printf("%d ", itslm.getLevelSize(lvl)-1);
289
printf((itslm.isUseLogPr())?" using -log(pr)\n":" using direct pr\n");
312
for (int lvl = 1; lvl <= itslm.getN(); ++lvl)
313
printf("%d ", itslm.getLevelSize(lvl) - 1);
315
(itslm.isUseLogPr()) ? " using -log(pr)\n" :
316
" using direct pr\n");
291
318
PrintARPA(itslm, lexicon_filename, use_log_pr);