2
2
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4
4
* Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
6
6
* The contents of this file are subject to the terms of either the GNU Lesser
7
7
* General Public License Version 2.1 only ("LGPL") or the Common Development and
8
8
* Distribution License ("CDDL")(collectively, the "License"). You may not use this
9
9
* file except in compliance with the License. You can obtain a copy of the CDDL at
10
10
* http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
12
12
* specific language governing permissions and limitations under the License. When
13
13
* distributing the software, include this License Header Notice in each file and
14
14
* include the full text of the License in the License file as well as the
15
15
* following notice:
17
17
* NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
19
19
* For Covered Software in this distribution, this License shall be governed by the
21
21
* Any litigation relating to this License shall be subject to the jurisdiction of
22
22
* the Federal Courts of the Northern District of California and the state courts
23
23
* of the State of California, with venue lying in Santa Clara County, California.
27
27
* If you wish your version of this file to be governed by only the CDDL or only
28
28
* the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
29
29
* include this software in this distribution under the [CDDL or LGPL Version 2.1]
32
32
* Version 2.1, or to extend the choice of license to its licensees as provided
33
33
* above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
34
34
* Version 2 license, then the option applies only if the new code is made subject
35
* to such option by the copyright holder.
35
* to such option by the copyright holder.
38
38
#ifdef HAVE_CONFIG_H
102
108
if (n == 0) return 0;
105
while (n > N) { --n; ++hw; }
107
void* pstate = ((TNode*)level[0]);
108
for (int lvl=0; lvl < n && pstate != NULL; ++lvl) {
109
int h = ((TNode*)pstate)->child;
110
int t = (((TNode*)pstate)+1)->child;
112
TLeaf* p = (TLeaf*)level[lvl+1];
113
pstate = (void*)binary_find_id(p+h, p+t, hw[lvl]);
114
m = (pstate != NULL)?(((TLeaf*)pstate) - p):(-1);
116
TNode* p = (TNode*)level[lvl+1];
117
pstate = (void*)binary_find_id(p+h, p+t, hw[lvl]);
118
m = (pstate != NULL)?(((TNode*)pstate) - p):(-1);
115
void* pstate = ((TNode*)level[0]);
116
for (int lvl = 0; lvl < n && pstate != NULL; ++lvl) {
117
int h = ((TNode*)pstate)->child;
118
int t = (((TNode*)pstate) + 1)->child;
120
TLeaf* p = (TLeaf*)level[lvl + 1];
121
pstate = (void*)binary_find_id(p + h, p + t, hw[lvl]);
122
m = (pstate != NULL) ? (((TLeaf*)pstate) - p) : (-1);
124
TNode* p = (TNode*)level[lvl + 1];
125
pstate = (void*)binary_find_id(p + h, p + t, hw[lvl]);
126
m = (pstate != NULL) ? (((TNode*)pstate) - p) : (-1);
125
CSIMSlmWithIteration::findBackOffState(int n, TSIMWordId*hw, unsigned & bol, unsigned& bon)
133
CSIMSlmWithIteration::findBackOffState(int n,
129
140
int idx = findState(n, hw);
130
if (idx >= 0 && ((TNode*)(level[n]))[idx].child < ((TNode*)(level[n]))[idx+1].child) {
141
if (idx >= 0 && ((TNode*)(level[n]))[idx].child <
142
((TNode*)(level[n]))[idx + 1].child) {
131
143
bol = n; bon = idx; return;
139
CSIMSlmWithIteration::getIdString(TLevelIterator& it, std::vector<TSIMWordId>& history)
151
CSIMSlmWithIteration::getIdString(TLevelIterator& it,
152
std::vector<TSIMWordId>& history)
142
for (int i=1, tmp_sz=it.m_history.size(); i < tmp_sz; ++i) {
155
for (int i = 1, tmp_sz = it.m_history.size(); i < tmp_sz; ++i) {
143
156
int idx = it.m_history[i];
145
158
history.push_back(((TLeaf*)(level[i]))[idx].id);
152
165
CSIMSlmWithIteration::beginLevelIteration(int lvl, TLevelIterator& it)
154
167
it.m_history.clear();
155
for (int i=0, tmp_sz=lvl; i <= tmp_sz; ++i)
168
for (int i = 0, tmp_sz = lvl; i <= tmp_sz; ++i)
156
169
it.m_history.push_back(0);
157
170
adjustIterator(it);
168
181
CSIMSlmWithIteration::isEnd(TLevelIterator& it)
170
return ((it.m_history.back()+1 >= sz[it.m_history.size()-1]));
183
return((it.m_history.back() + 1 >= sz[it.m_history.size() - 1]));
174
187
CSIMSlmWithIteration::adjustIterator(TLevelIterator& it)
176
189
int ch = it.m_history.back();
177
for (int i= it.m_history.size()-2; i >= 0; --i) {
190
for (int i = it.m_history.size() - 2; i >= 0; --i) {
179
192
int& parent = it.m_history[i];
180
193
TNode* pn = (TNode*)(level[i]);
181
while (parent < len && pn[parent+1].child <= ch)
194
while (parent < len && pn[parent + 1].child <= ch)
188
201
CSIMSlmWithIteration::getNodePtr(TLevelIterator& it)
190
int lvl = it.m_history.size()-1;
203
int lvl = it.m_history.size() - 1;
191
204
int idx = it.m_history.back();
193
return (((TLeaf*)(level[lvl]))+idx);
206
return(((TLeaf*)(level[lvl])) + idx);
195
return (((TNode*)(level[lvl]))+idx);
208
return(((TNode*)(level[lvl])) + idx);
202
216
printf("Usage:\n");
203
217
printf(" slmthread primitive_slm threaded_slm\n");
204
218
printf("\nDescription:\n");
205
printf(" slmthread add back-off-state for each slm node in the primitive_slm. ");
220
" slmthread add back-off-state for each slm node in the primitive_slm. ");
206
221
printf("Also it compresses 32-bit float into 16 bit representation.\n\n");
211
226
CThreadSlm::TNode* levels[16];
212
227
CThreadSlm::TLeaf* lastLevel;
214
int main(int argc, char* argv[])
230
main(int argc, char* argv[])
216
232
CValueCompressor vc;
217
233
unsigned int bol, bon;
235
251
bool usingLogPr = slm.isUseLogPr();
237
#define EffectivePr(a) (float((usingLogPr)?((a)/log(2.0)):(-log2((a)))))
238
#define OriginalPr(b) (float((usingLogPr)?((b)*log(2.0)):(exp2(-(b)))))
239
#define EffectiveBow(a) (float((usingLogPr)?(exp(-(a))):((a))))
240
#define OriginalBow(b) (float((usingLogPr)?(-log((b))):((b))))
253
#define EffectivePr(a) (float((usingLogPr) ? ((a) / log(2.0)) : (-log2((a)))))
254
#define OriginalPr(b) (float((usingLogPr) ? ((b) * log(2.0)) : (exp2(-(b)))))
255
#define EffectiveBow(a) (float((usingLogPr) ? (exp(-(a))) : ((a))))
256
#define OriginalBow(b) (float((usingLogPr) ? (-log((b))) : ((b))))
242
258
printf("\nfirst pass..."); fflush(stdout);
243
for (int lvl=0; lvl <= slm.getN(); ++lvl) {
259
for (int lvl = 0; lvl <= slm.getN(); ++lvl) {
244
260
CSIMSlmWithIteration::TLevelIterator it;
245
261
slm.beginLevelIteration(lvl, it);
246
262
for (; !slm.isEnd(it); slm.next(it)) {
269
285
// Following pr value should not be grouped, or as milestone values.
270
286
static float msprs[] = {
271
287
0.9, 0.8, 0.7, 0.6,
272
1.0/2, 1.0/4, 1.0/8, 1.0/16, 1.0/32, 1.0/64, 1.0/128,
273
1.0/256, 1.0/512, 1.0/1024, 1.0/2048, 1.0/4096, 1.0/8192,
274
1.0/16384, 1.0/32768, 1.0/65536
288
1.0 / 2, 1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32, 1.0 / 64, 1.0 / 128,
289
1.0 / 256, 1.0 / 512, 1.0 / 1024, 1.0 / 2048, 1.0 / 4096, 1.0 / 8192,
290
1.0 / 16384, 1.0 / 32768, 1.0 / 65536
277
for (unsigned i=0, sz=sizeof(msprs)/sizeof(float); i < sz; ++i) {
278
float real_pr = (usingLogPr)?(-log(msprs[i])):(msprs[i]);
293
for (unsigned i = 0, sz = sizeof(msprs) / sizeof(float); i < sz; ++i) {
294
float real_pr = (usingLogPr) ? (-log(msprs[i])) : (msprs[i]);
279
295
float eff_pr = EffectivePr(real_pr);
280
296
if (pr_eff.find(eff_pr) == pr_eff.end()) {
281
297
pr_eff[eff_pr] = real_pr;
292
308
0.00005, 0.00001, 0.000005, 0.000001, 0.0000005, 0.0000001
295
for (unsigned i=0, sz=sizeof(msbows)/sizeof(float); i < sz; ++i) {
296
float real_bow = (usingLogPr)?(-log(msbows[i])):(msbows[i]);
311
for (unsigned i = 0, sz = sizeof(msbows) / sizeof(float); i < sz; ++i) {
312
float real_bow = (usingLogPr) ? (-log(msbows[i])) : (msbows[i]);
297
313
float eff_bow = EffectiveBow(real_bow);
298
314
if (bow_eff.find(eff_bow) == bow_eff.end()) {
299
315
bow_eff[eff_bow] = real_bow;
326
342
printf("\nThreading the new model..."); fflush(stdout);
327
for (int lvl=0; lvl < slm.getN(); ++lvl) {
343
for (int lvl = 0; lvl < slm.getN(); ++lvl) {
328
344
levels[lvl] = new CThreadSlm::TNode[slm.getLevelSize(lvl)];
330
346
CSIMSlmWithIteration::TLevelIterator it;
367
383
nn.set_ch(pn->child);
369
assert(usingLogPr || (pr_table[idx_pr] > 0.0 && pr_table[idx_pr] < 1.0));
386
(pr_table[idx_pr] > 0.0 && pr_table[idx_pr] < 1.0));
370
387
assert(!usingLogPr || pr_table[idx_pr] > 0.0);
372
389
CSIMSlm::TNode* pn = (CSIMSlm::TNode*)slm.getNodePtr(it);
373
390
CThreadSlm::TNode& nn = levels[lvl][it.m_history.back()];
374
391
nn.set_ch(pn->child);
378
396
lastLevel = new CThreadSlm::TLeaf [slm.getLevelSize(slm.getN())];
379
397
CSIMSlmWithIteration::TLevelIterator it;
380
398
slm.beginLevelIteration(slm.getN(), it);
381
for (int lvl=slm.getN(); !slm.isEnd(it); slm.next(it)) {
399
for (int lvl = slm.getN(); !slm.isEnd(it); slm.next(it)) {
382
400
CSIMSlm::TLeaf* pn = slm.getNodePtr(it);
383
401
slm.getIdString(it, history);
384
402
slm.findBackOffState(lvl, &history[0], bol, bon);
415
433
fwrite(&len, sizeof(int), 1, fp);
417
435
fwrite(&pr_table[0], sizeof(float), pr_table.size(), fp);
418
for (int i = pr_table.size(), sz=(1 << CThreadSlm::BITS_PR); i < sz; ++i)
436
for (int i = pr_table.size(), sz = (1 << CThreadSlm::BITS_PR); i < sz; ++i)
419
437
fwrite(&dummy, sizeof(float), 1, fp);
421
439
fwrite(&bow_table[0], sizeof(float), bow_table.size(), fp);
422
for (int i = bow_table.size(), sz=(1 << CThreadSlm::BITS_BOW); i < sz; ++i)
440
for (int i = bow_table.size(), sz = (1 << CThreadSlm::BITS_BOW);
423
443
fwrite(&dummy, sizeof(float), 1, fp);
425
for (int lvl=0; lvl < N; ++lvl)
426
fwrite(levels[lvl], sizeof(CThreadSlm::TNode), slm.getLevelSize(lvl), fp);
445
for (int lvl = 0; lvl < N; ++lvl)
446
fwrite(levels[lvl], sizeof(CThreadSlm::TNode), slm.getLevelSize(
427
448
fwrite(lastLevel, sizeof(CThreadSlm::TLeaf), slm.getLevelSize(N), fp);
430
451
printf("done!\n"); fflush(stdout);
432
453
delete [] lastLevel;
433
for (int lvl=0; lvl < N; ++lvl)
434
delete []levels[lvl];
454
for (int lvl = 0; lvl < N; ++lvl)
455
delete [] levels[lvl];
436
457
bow_values.clear();