2
2
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4
4
* Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
6
6
* The contents of this file are subject to the terms of either the GNU Lesser
7
7
* General Public License Version 2.1 only ("LGPL") or the Common Development and
8
8
* Distribution License ("CDDL")(collectively, the "License"). You may not use this
9
9
* file except in compliance with the License. You can obtain a copy of the CDDL at
10
10
* http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
12
12
* specific language governing permissions and limitations under the License. When
13
13
* distributing the software, include this License Header Notice in each file and
14
14
* include the full text of the License in the License file as well as the
15
15
* following notice:
17
17
* NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
19
19
* For Covered Software in this distribution, this License shall be governed by the
21
21
* Any litigation relating to this License shall be subject to the jurisdiction of
22
22
* the Federal Courts of the Northern District of California and the state courts
23
23
* of the State of California, with venue lying in Santa Clara County, California.
27
27
* If you wish your version of this file to be governed by only the CDDL or only
28
28
* the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
29
29
* include this software in this distribution under the [CDDL or LGPL Version 2.1]
32
32
* Version 2.1, or to extend the choice of license to its licensees as provided
33
33
* above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
34
34
* Version 2 license, then the option applies only if the new code is made subject
35
* to such option by the copyright holder.
35
* to such option by the copyright holder.
38
38
#ifdef HAVE_CONFIG_H
61
61
static struct option long_options[] =
66
{"discount", 1, 0, 'd'},
67
{"wordcount",1, 0, 'w'},
68
{"breakid", 1, 0, 'b'},
69
{"excludeid",1, 0, 'e'},
63
{ "ngram", 1, 0, 'n' },
66
{ "discount", 1, 0, 'd' },
67
{ "wordcount", 1, 0, 'w' },
68
{ "breakid", 1, 0, 'b' },
69
{ "excludeid", 1, 0, 'e' },
74
static void ShowUsage(void)
78
80
slmbuild options idngram\n\
127
129
static CSlmBuilder builder;
128
130
static char* inputfilename = NULL;
129
131
static char* outfilename = NULL;
130
132
static std::vector<CSlmDiscounter *> discounter;
132
static void getParameters(int argc, char* argv[])
135
getParameters(int argc, char* argv[])
135
char *ac=NULL, *cuts=NULL, *idstring=NULL, *dis_str=NULL;
138
char *ac = NULL, *cuts = NULL, *idstring = NULL, *dis_str = NULL;
136
139
std::vector<TSIMWordId> ids;
137
140
std::vector<CSlmBuilder::FREQ_TYPE> threshold;
138
141
bool bUseLogPr = false;
140
while ((c=getopt_long(argc, argv, "lw:n:c:d:o:b:e:", long_options, NULL)) != -1)
144
getopt_long(argc, argv, "lw:n:c:d:o:b:e:", long_options,
142
146
int n, rmax, cut;
216
builder.SetUseLogPr(((bUseLogPr)?1:0));
217
if (optind == argc-1) {
220
builder.SetUseLogPr(((bUseLogPr) ? 1 : 0));
221
if (optind == argc - 1) {
218
222
inputfilename = strdup(argv[optind]);
219
223
builder.SetDiscounter(&(discounter[0]));
221
225
fprintf(stderr, "Parameter input_file error\n");
222
for (int i=optind; i < argc; ++i)
226
for (int i = optind; i < argc; ++i)
223
227
fprintf(stderr, "%s ", argv[i]);
224
228
fprintf(stderr, "\n");
229
int main(int argc, char* argv[])
234
main(int argc, char* argv[])
231
236
getParameters(argc, argv);
233
TSIMWordId * ngram = new TSIMWordId[N+1];
238
TSIMWordId * ngram = new TSIMWordId[N + 1];
234
239
CSlmBuilder::FREQ_TYPE freq;
236
241
printf("Reading and Processing raw idngram..."); fflush(stdout);
237
242
FILE *fp = fopen(inputfilename, "rb");
239
while (fread(ngram, sizeof(TSIMWordId), N, fp) == N && fread(&freq, sizeof(freq), 1, fp)==1) {
244
while (fread(ngram, sizeof(TSIMWordId), N, fp) == (size_t) N
245
&& fread(&freq, sizeof(freq), 1, fp) == 1) {
240
246
builder.AddNGram(ngram, freq);