2
2
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4
4
* Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
6
6
* The contents of this file are subject to the terms of either the GNU Lesser
7
7
* General Public License Version 2.1 only ("LGPL") or the Common Development and
8
8
* Distribution License ("CDDL")(collectively, the "License"). You may not use this
9
9
* file except in compliance with the License. You can obtain a copy of the CDDL at
10
10
* http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
12
12
* specific language governing permissions and limitations under the License. When
13
13
* distributing the software, include this License Header Notice in each file and
14
14
* include the full text of the License in the License file as well as the
15
15
* following notice:
17
17
* NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
19
19
* For Covered Software in this distribution, this License shall be governed by the
48
48
#include "idngram.h"
51
void DoIdngramMerge(FILE*out, CMultiWayFileMerger<CSIM_IdngramFreq<N> > &merger)
54
CSIM_IdngramFreq<N> prevItem;
56
file_para<CSIM_IdngramFreq<N> > * ppara = merger.getBest();
57
TUnitAndParaInfo<CSIM_IdngramFreq<N> > & upi = *(*ppara);
59
if (prevItem.freq != 0) {
60
fwrite(prevItem.ids, sizeof(TSIMWordId), N, out);
61
fwrite(&(prevItem.freq), sizeof(unsigned int), 1, out);
65
CSIM_IdngramFreq<N>& ng = upi.unit;
66
if (!(prevItem == ng)) {
67
if (prevItem.freq != 0) {
68
fwrite(prevItem.ids, sizeof(TSIMWordId), N, out);
69
fwrite(&(prevItem.freq), sizeof(unsigned int), 1, out);
73
assert (prevItem.freq < UINT_MAX);
74
prevItem.freq += ng.freq;
81
void ProcessingIdngramMerge(FILE *swap, FILE* out, std::vector<long>& para_offsets)
83
CMultiWayFileMerger<CSIM_IdngramFreq<N> > merger;
85
for (int i=0; i < para_offsets.size(); ++i) {
86
merger.addPara(swap, s, para_offsets[i]);
89
DoIdngramMerge<N>(out, merger);
93
void ProcessingIdngramMerge(FILE* out, std::vector<FILE* >& file_list)
95
CMultiWayFileMerger<CSIM_IdngramFreq<N> > merger;
96
for (int i=0; i < file_list.size(); ++i) {
97
fseek(file_list[i], 0, SEEK_END);
98
merger.addPara(file_list[i], 0, ftell(file_list[i]));
100
DoIdngramMerge<N>(out, merger);
51
void DoIdngramMerge(FILE*out,
52
CMultiWayFileMerger<CSIM_IdngramFreq<N> > &merger){
54
CSIM_IdngramFreq<N> prevItem;
56
file_para<CSIM_IdngramFreq<N> > * ppara = merger.getBest();
57
TUnitAndParaInfo<CSIM_IdngramFreq<N> > & upi = *(*ppara);
59
if (prevItem.freq != 0) {
60
fwrite(prevItem.ids, sizeof(TSIMWordId), N, out);
61
fwrite(&(prevItem.freq), sizeof(unsigned int), 1, out);
65
CSIM_IdngramFreq<N>& ng = upi.unit;
66
if (!(prevItem == ng)) {
67
if (prevItem.freq != 0) {
68
fwrite(prevItem.ids, sizeof(TSIMWordId), N, out);
69
fwrite(&(prevItem.freq), sizeof(unsigned int), 1, out);
73
assert(prevItem.freq < UINT_MAX);
74
prevItem.freq += ng.freq;
81
void ProcessingIdngramMerge(FILE *swap,
83
std::vector<long>& para_offsets){
84
CMultiWayFileMerger<CSIM_IdngramFreq<N> > merger;
86
for (size_t i = 0; i < para_offsets.size(); i++) {
87
merger.addPara(swap, s, para_offsets[i]);
90
DoIdngramMerge<N>(out, merger);
94
void ProcessingIdngramMerge(FILE* out, std::vector<FILE* >& file_list){
95
CMultiWayFileMerger<CSIM_IdngramFreq<N> > merger;
96
for (size_t i = 0; i < file_list.size(); ++i) {
97
fseek(file_list[i], 0, SEEK_END);
98
merger.addPara(file_list[i], 0, ftell(file_list[i]));
100
DoIdngramMerge<N>(out, merger);