2
* Copyright (c) 2003 Nara Institute of Science and Technology
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
* 3. The name Nara Institute of Science and Technology may not be used to
15
* endorse or promote products derived from this software without
16
* specific prior written permission.
18
* THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology
19
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Nara Institute
22
* of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
* $Id: dartsdic.cpp,v 1.15 2003/07/30 16:06:57 kazuma-t Exp $
45
typedef Darts::DoubleArrayImpl<char, unsigned char, long, unsigned long>
55
typedef std::multimap<std::string, long> Hash;
56
typedef Hash::value_type HashVal;
64
da_open(char *daname, char *lexname, char *datname)
67
DoubleArrayL *darts = new DoubleArrayL;
69
da = (darts_t*)cha_malloc(sizeof(darts_t));
70
da->da_mmap = cha_mmap_file(daname);
71
darts->setArray(cha_mmap_map(da->da_mmap));
73
da->lex_mmap = cha_mmap_file(lexname);
74
da->dat_mmap = cha_mmap_file(datname);
80
da_lookup(darts_t *da, char *key, int key_len, long *indecies, int num)
83
->commonPrefixSearch(key, indecies, num, key_len);
86
#define lex_map(d) cha_mmap_map((d)->lex_mmap)
87
#define dat_map(d) cha_mmap_map((d)->dat_mmap)
90
da_get_lex(darts_t *da, long index, da_lex_t *lex_data, int *key_len)
93
char *base = (char *)lex_map(da) + index;
95
*key_len = ((short *)base)[0];
96
num = ((short *)base)[1];
97
base += sizeof(short) * 2;
99
for (i = 0; i < num; i++) {
100
memcpy((void*)(lex_data + i),
101
(void*)base, sizeof(da_lex_t));
102
base += sizeof(da_lex_t);
109
da_get_lex_base(darts_t *da)
115
da_get_dat_base(darts_t *da)
121
da_build_new(char *path)
125
builder = (da_build_t*)cha_malloc(sizeof(da_build_t));
126
builder->entries = new Hash;
127
builder->path = new std::string(path);
133
da_build_add(da_build_t *builder, char *key, long val)
135
builder->entries->insert(HashVal(key, val));
139
redump_lex(size_t key_len, std::vector<long>& indices,
140
char* tmpfile, FILE* lexfile)
142
long index = ftell(lexfile);
145
buf = (short)key_len;
146
fwrite(&buf, sizeof(short), 1, lexfile);
147
buf = (short)indices.size();
148
fwrite(&buf, sizeof(short), 1, lexfile);
149
for (std::vector<long>::iterator i = indices.begin();
150
i != indices.end(); i++) {
151
da_lex_t* lex = (da_lex_t*)(tmpfile + *i);
152
fwrite(lex, sizeof(da_lex_t), 1, lexfile);
159
da_build_dump(da_build_t* builder, char* tmpfile, FILE* lexfile)
161
Hash::iterator i, last;
162
Hash* entries = builder->entries;
163
char** keys = new char*[entries->size()];
164
size_t* lens = new size_t[entries->size()];
165
long* vals = new long[entries->size()];
167
std::vector<long> lex_indices;
169
std::cerr << entries->size() << " entries" << std::endl;
171
i = entries->begin();
172
while (i != entries->end()) {
173
const std::string& key = i->first;
174
last = entries->upper_bound(key);
176
for (; i != last; i++) {
177
lex_indices.push_back(i->second);
179
lens[size] = key.size();
180
/* (const char*)keys[size] = key.data();*/
181
keys[size] = (char*)key.data();
182
vals[size] = redump_lex(lens[size], lex_indices, tmpfile, lexfile);
183
if (vals[size] < 0) {
184
std::cerr << "Unexpected error at " << key << std::endl;
185
cha_exit_perror("build darts file");
189
std::cerr << size << " keys" << std::endl;
192
da.build(size, keys, lens, vals);
193
da.save(builder->path->c_str(), "wb");
195
return builder->entries->size();