1
by Keita Maehara
Import upstream version 2.3.4 |
1 |
/*
|
2 |
* KAKASI (Kanji Kana Simple inversion program)
|
|
3 |
* $Id: dict.c,v 1.8 2001/09/03 10:02:38 rug Exp $
|
|
4 |
* Copyright (C) 1992
|
|
5 |
* Hironobu Takahashi (takahasi@tiny.or.jp)
|
|
6 |
*
|
|
7 |
* This program is free software; you can redistribute it and/or modify
|
|
8 |
* it under the terms of the GNU General Public License as published by
|
|
9 |
* the Free Software Foundation; either versions 2, or (at your option)
|
|
10 |
* any later version.
|
|
11 |
*
|
|
12 |
* This program is distributed in the hope that it will be useful
|
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 |
* GNU General Public License for more details.
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License
|
|
18 |
* along with KAKASI, see the file COPYING. If not, write to the Free
|
|
19 |
* Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
|
|
20 |
* 02111-1307, USA.
|
|
21 |
*/
|
|
22 |
/*
|
|
23 |
Modified by NOKUBI Takatsugu <knok@daionet.gr.jp>
|
|
24 |
1999/03/04
|
|
25 |
Rename PERLMOD macro to LIBRARY
|
|
26 |
1999/01/11
|
|
27 |
Add PERLMOD macro.
|
|
28 |
*/
|
|
29 |
||
30 |
#ifdef HAVE_CONFIG_H
|
|
31 |
# include <config.h>
|
|
32 |
#endif
|
|
33 |
||
34 |
#include <stdio.h> |
|
35 |
#include <ctype.h> |
|
36 |
#ifdef HAVE_STRING_H
|
|
37 |
# include <string.h>
|
|
38 |
#else
|
|
39 |
# include <strings.h>
|
|
40 |
#endif
|
|
41 |
#ifdef HAVE_MALLOC_H
|
|
42 |
# include <malloc.h>
|
|
43 |
#endif
|
|
44 |
#include <stdlib.h> |
|
45 |
#include "kakasi.h" |
|
46 |
||
47 |
#define BUFLEN 1024
|
|
48 |
||
49 |
#define IALLOCSIZE (1024*100)
|
|
50 |
#define CELLALLOC 5000
|
|
51 |
||
52 |
#ifndef KANWADICT
|
|
53 |
#define KANWADICT "./kanwadict"
|
|
54 |
#endif
|
|
55 |
||
56 |
/* variables for memory management */
|
|
57 |
void ** ary_charalloc = NULL; |
|
58 |
void ** ary_cellalloc = NULL; |
|
59 |
size_t ary_size_charalloc = -1; |
|
60 |
size_t ary_cur_charalloc = -1; |
|
61 |
size_t ary_size_cellalloc = -1; |
|
62 |
size_t ary_cur_cellalloc = -1; |
|
63 |
int point_charalloc = 0; |
|
64 |
unsigned char *ptr_charalloc = NULL; |
|
65 |
int point_cellalloc = 0; |
|
66 |
struct kanji_yomi *ptr_cellalloc = NULL; |
|
67 |
||
68 |
struct kanji_yomi *jisyo_table[0x80][0x80]; /* hash table */ |
|
69 |
int kanwa_load[0x80][0x80]; /* Is kanwadict put? */ |
|
70 |
struct kanwa_entry kanwa[0x60][0x60]; /* ujis onl 0xa0 is shift to code. */ |
|
71 |
||
72 |
static void |
|
73 |
add_ary_charalloc(ptr) |
|
74 |
void * ptr; |
|
75 |
{
|
|
76 |
ary_cur_charalloc ++; |
|
77 |
if (ary_charalloc == NULL || ary_cur_charalloc > ary_size_charalloc) { |
|
78 |
ary_size_charalloc += CELLALLOC; |
|
79 |
ary_charalloc = realloc(ary_charalloc, |
|
80 |
sizeof(void *) * ary_size_charalloc +1); |
|
81 |
}
|
|
82 |
ary_charalloc[ary_cur_charalloc] = ptr; |
|
83 |
}
|
|
84 |
||
85 |
static void |
|
86 |
add_ary_cellalloc(ptr) |
|
87 |
void * ptr; |
|
88 |
{
|
|
89 |
ary_cur_cellalloc ++; |
|
90 |
if (ary_cellalloc == NULL || ary_cur_cellalloc > ary_size_cellalloc) { |
|
91 |
ary_size_cellalloc += CELLALLOC; |
|
92 |
ary_cellalloc = realloc(ary_cellalloc, |
|
93 |
sizeof(void *) * ary_size_cellalloc +1); |
|
94 |
}
|
|
95 |
ary_cellalloc[ary_cur_cellalloc] = ptr; |
|
96 |
}
|
|
97 |
||
98 |
static unsigned char * |
|
99 |
charalloc(length) |
|
100 |
int length; |
|
101 |
{
|
|
102 |
unsigned char *ret; |
|
103 |
||
104 |
if ((ptr_charalloc == NULL) || (point_charalloc+length >= IALLOCSIZE)) { |
|
105 |
ptr_charalloc = (unsigned char *)malloc(IALLOCSIZE); |
|
106 |
add_ary_charalloc(ptr_charalloc); |
|
107 |
point_charalloc = 0; |
|
108 |
}
|
|
109 |
ret = ptr_charalloc+point_charalloc; |
|
110 |
point_charalloc += length; |
|
111 |
return ret; |
|
112 |
}
|
|
113 |
||
114 |
static struct kanji_yomi * |
|
115 |
cellalloc() |
|
116 |
{
|
|
117 |
||
118 |
if ((ptr_cellalloc == NULL) || (point_cellalloc >= CELLALLOC)) { |
|
119 |
char *cptr; |
|
120 |
cptr = malloc((CELLALLOC+1)*sizeof(struct kanji_yomi)); |
|
121 |
add_ary_cellalloc(cptr); |
|
122 |
if ((int)cptr & 7) cptr += 8 - ((int)cptr & 7); |
|
123 |
ptr_cellalloc = (struct kanji_yomi *) cptr; |
|
124 |
point_cellalloc = 0; |
|
125 |
}
|
|
126 |
++ point_cellalloc; |
|
127 |
return ptr_cellalloc ++; |
|
128 |
}
|
|
129 |
||
130 |
void init_jisyo() |
|
131 |
{
|
|
132 |
int c1, c2; |
|
133 |
||
134 |
for(c1 = 0; c1 < 0x80; c1 ++) |
|
135 |
for(c2 = 0; c2 < 0x80; c2 ++) |
|
136 |
jisyo_table[c1][c2] = NULL; |
|
137 |
}
|
|
138 |
||
139 |
static void jis2ujis_jisyo(buffer) |
|
140 |
unsigned char *buffer; |
|
141 |
{
|
|
142 |
unsigned char *p, *q; |
|
143 |
int kanji=0; |
|
144 |
||
145 |
p = q = buffer; |
|
146 |
while(*p != '\0') { |
|
147 |
if (*p == '\033') { |
|
148 |
if ((p[1] == '$') && |
|
149 |
((p[2] == '@') || (p[2] == 'B'))) { |
|
150 |
kanji = 1; |
|
151 |
p += 2; |
|
152 |
} else if ((p[1] == '(') && |
|
153 |
((p[2] == 'B') || (p[2] == 'J'))) { |
|
154 |
kanji = 0; |
|
155 |
p += 2; |
|
156 |
} else { |
|
157 |
*(q ++) = *p; |
|
158 |
}
|
|
159 |
} else { |
|
160 |
if (kanji) { |
|
161 |
*(q ++) = *(p ++) | 0x80; |
|
162 |
*(q ++) = *p | 0x80; |
|
163 |
} else { |
|
164 |
*(q ++) = *p; |
|
165 |
}
|
|
166 |
}
|
|
167 |
++ p; |
|
168 |
}
|
|
169 |
*q = '\0'; |
|
170 |
}
|
|
171 |
||
172 |
static void add_item(yomi, kanji, tail) |
|
173 |
unsigned char *yomi; |
|
174 |
unsigned char *kanji; |
|
175 |
int tail; |
|
176 |
{
|
|
177 |
unsigned char *q, *ptr_kanji, *ptr_yomi; |
|
178 |
struct kanji_yomi *ptr_kanji_yomi, **ptr; |
|
179 |
int length, c1, c2; |
|
180 |
||
181 |
/* Is the head a kanji? */
|
|
182 |
if (kanji[0] < 0xb0) return; |
|
183 |
||
184 |
/* Isn't a HANKAKU character contained? Convert ITAIJI. */
|
|
185 |
for (q = kanji;; q += 2) { |
|
186 |
c1 = q[0]; c2 = q[1]; |
|
187 |
if ((c1 == '\0') || (c2 == '\0')) break; |
|
188 |
if ((c1 <= 0xa0) || (c2 <= 0xa0)) return; |
|
189 |
itaijiknj(&c1, &c2); |
|
190 |
q[0] = c1; q[1] = c2; |
|
191 |
}
|
|
192 |
||
193 |
/* Isn't the one except for the KANA contained? A KATAKANA changes into the HIRAGANA. */
|
|
194 |
for (q = yomi; (q[0] != '\0') && (q[1] != '\0'); q += 2) { |
|
195 |
if (*q < 0xa1) return; |
|
196 |
if (*q == 0xa5) *q = 0xa4; |
|
197 |
if ((*q != 0xa4) && |
|
198 |
((q[0] != 0xa1) || (q[1] != 0xbc)) && /* Prolonged sound */ |
|
199 |
((q[0] != 0xa1) || (q[1] != 0xab)) && /* Sonant */ |
|
200 |
((q[0] != 0xa1) || (q[1] != 0xac))) /* Half-sonant */ |
|
201 |
return; |
|
202 |
}
|
|
203 |
||
204 |
/* A cell because of the reading is made. */
|
|
205 |
||
206 |
length = strlen((const char *)kanji); |
|
207 |
ptr_kanji = charalloc(length-1); |
|
208 |
strcpy((char *)ptr_kanji, (const char *)(kanji+2)); |
|
209 |
ptr_yomi = charalloc(strlen((const char *)yomi)+1); |
|
210 |
strcpy((char *)ptr_yomi, (const char *)yomi); |
|
211 |
||
212 |
ptr_kanji_yomi = cellalloc(); |
|
213 |
ptr_kanji_yomi->next = NULL; |
|
214 |
ptr_kanji_yomi->length = tail ? length+1 : length; |
|
215 |
ptr_kanji_yomi->kanji = ptr_kanji; |
|
216 |
ptr_kanji_yomi->yomi = ptr_yomi; |
|
217 |
ptr_kanji_yomi->tail = tail; |
|
218 |
||
219 |
/* It is connected in search of the end of the link of the internal dictionary. */
|
|
220 |
for (ptr = &(jisyo_table[kanji[0]-0x80][kanji[1]-0x80]); |
|
221 |
*ptr != NULL; |
|
222 |
ptr = &((*ptr)->next)); |
|
223 |
*ptr = ptr_kanji_yomi; |
|
224 |
}
|
|
225 |
||
226 |
void add_jisyo(filename) |
|
227 |
char *filename; |
|
228 |
{
|
|
229 |
FILE *jisyo_fp; |
|
230 |
unsigned char buffer[BUFLEN]; |
|
231 |
unsigned char *p; |
|
232 |
unsigned char *yomi, *kanji; |
|
233 |
int tail; |
|
234 |
extern char *ialloc(); |
|
235 |
||
236 |
if ((jisyo_fp = fopen(filename, "rb")) == NULL) { |
|
237 |
perror(filename); |
|
238 |
exit(0); |
|
239 |
}
|
|
240 |
while(fgets((char *)buffer, BUFLEN, jisyo_fp)) { |
|
241 |
/* If there is the one except for the KANA at the head, to the next */
|
|
242 |
if ((buffer[0] < 0xa0) && (buffer[0] != '\033')) continue; |
|
243 |
||
244 |
/* A line is changed into ujis. */
|
|
245 |
jis2ujis_jisyo(buffer); |
|
246 |
||
247 |
yomi = buffer; |
|
248 |
/* The next ward is looked for. */
|
|
249 |
for (p = buffer; (*p != ' ') && (*p != '\011') && (*p != ','); ++ p) { |
|
250 |
if ((*p == '\0') || (*p == '\n')) goto next_line; |
|
251 |
}
|
|
252 |
||
253 |
if (isalpha(p[-1])) { /* An OKURIGANA is given if the last character is an alphabet. */ |
|
254 |
tail = p[-1]; |
|
255 |
p[-1] = '\0'; |
|
256 |
} else { |
|
257 |
tail = 0; |
|
258 |
p[0] = '\0'; |
|
259 |
}
|
|
260 |
||
261 |
/* The next ward is looked for. */
|
|
262 |
for (++ p; (*p == ' ') || (*p == '\011') || (*p == ','); ++ p) { |
|
263 |
if ((*p == '\0') || (*p == '\n')) goto next_line; |
|
264 |
}
|
|
265 |
||
266 |
if (*p == '/') { /* It seems to be the dictionary of SKK. */ |
|
267 |
for (;;) { |
|
268 |
kanji = p+1; |
|
269 |
/* The next ward is looked for. */
|
|
270 |
for (++ p; (*p != '/'); ++ p) { |
|
271 |
if ((*p == '\0')||(*p == '\n')||(*p == '[')) goto next_line; |
|
272 |
}
|
|
273 |
*p = '\0'; |
|
274 |
add_item(yomi, kanji, tail); |
|
275 |
}
|
|
276 |
} else { /* It seems to be a standard dictionary. */ |
|
277 |
kanji = p; |
|
278 |
/* The next ward is looked for. */
|
|
279 |
for (++ p; |
|
280 |
(*p != ' ') && (*p != '\n') && (*p != '\011') && |
|
281 |
(*p != '\0') && (*p != ',') |
|
282 |
; ++ p) { |
|
283 |
;
|
|
284 |
}
|
|
285 |
*p = '\0'; |
|
286 |
add_item(yomi, kanji, tail); |
|
287 |
}
|
|
288 |
next_line:; |
|
289 |
}
|
|
290 |
fclose(jisyo_fp); |
|
291 |
}
|
|
292 |
||
293 |
/* The initialization of kanwa is done. Reading kanwa_load is actually cleared in
|
|
294 |
kanwa the part at the head. */
|
|
295 |
||
296 |
#ifdef LIBRARY
|
|
297 |
FILE *kanwadict = NULL; |
|
298 |
#else
|
|
299 |
static FILE *kanwadict; |
|
300 |
#endif
|
|
301 |
||
302 |
void init_kanwa() |
|
303 |
{
|
|
304 |
int i, j; |
|
305 |
||
306 |
char *kanwadictpath; |
|
307 |
||
308 |
kanwadictpath = (char*)getenv("KANWADICTPATH"); |
|
309 |
if (kanwadictpath == (char*)NULL) |
|
310 |
kanwadictpath = (char*)getenv("KANWADICT"); |
|
311 |
if (kanwadictpath == (char*)NULL) |
|
312 |
kanwadictpath = KANWADICT; |
|
313 |
||
314 |
if ((kanwadict = fopen(kanwadictpath,"rb")) == NULL) { |
|
315 |
perror(kanwadictpath); |
|
316 |
exit(2); |
|
317 |
}
|
|
318 |
||
319 |
if (fread((char *)kanwa, sizeof kanwa, 1, kanwadict) != 1) { |
|
320 |
perror(kanwadictpath); |
|
321 |
}
|
|
322 |
||
323 |
for (i = 0; i < 0x80; ++ i) |
|
324 |
for (j = 0; j < 0x80; ++ j) |
|
325 |
kanwa_load[i][j] = 0; |
|
326 |
}
|
|
327 |
||
328 |
/* An applicable part from kanwa if necessary is drawn. */
|
|
329 |
||
330 |
void add_kanwa(c1, c2) |
|
331 |
int c1; |
|
332 |
int c2; |
|
333 |
{
|
|
334 |
unsigned char *ptr_yomi, *ptr_kanji; |
|
335 |
struct kanji_yomi *ptr_kanji_yomi, **ptr; |
|
336 |
int i; |
|
337 |
unsigned char tail, length; |
|
338 |
||
339 |
c1 &= 0x7f; |
|
340 |
c2 &= 0x7f; |
|
341 |
||
342 |
if (kanwa_load[c1][c2]) return; |
|
343 |
kanwa_load[c1][c2] = 1; |
|
344 |
||
345 |
/* It is finished when there is no description in the dictionary just in case. */
|
|
346 |
if (kanwa[c1-0x20][c2-0x20].entry == 0) return; |
|
347 |
/* It is moved to the fixed position of kanwadict. */
|
|
348 |
fseek(kanwadict, (long)(kanwa[c1-0x20][c2-0x20].index), 0L); |
|
349 |
||
350 |
/* The end of the link of the internal dictionary is looked for. */
|
|
351 |
for (ptr = &(jisyo_table[c1][c2]); |
|
352 |
*ptr != NULL; |
|
353 |
ptr = &((*ptr)->next)); |
|
354 |
||
355 |
for (i = 0; i < kanwa[c1-0x20][c2-0x20].entry; ++ i) { |
|
356 |
ptr_kanji_yomi = cellalloc(); |
|
357 |
||
358 |
fread(&tail, 1, 1, kanwadict); |
|
359 |
ptr_kanji_yomi->tail = tail; |
|
360 |
||
361 |
fread(&length, 1, 1, kanwadict); |
|
362 |
ptr_kanji = charalloc(length+1); |
|
363 |
fread(ptr_kanji, (int)length, 1, kanwadict); |
|
364 |
ptr_kanji[length] = '\0'; |
|
365 |
ptr_kanji_yomi->kanji = ptr_kanji; |
|
366 |
||
367 |
ptr_kanji_yomi->length = length + ((tail == 0) ? 2 : 3); |
|
368 |
||
369 |
fread(&length, 1, 1, kanwadict); |
|
370 |
ptr_yomi = charalloc(length+1); |
|
371 |
fread(ptr_yomi, (int)length, 1, kanwadict); |
|
372 |
ptr_yomi[length] = '\0'; |
|
373 |
ptr_kanji_yomi->yomi = ptr_yomi; |
|
374 |
||
375 |
ptr_kanji_yomi->next = NULL; |
|
376 |
||
377 |
*ptr = ptr_kanji_yomi; |
|
378 |
ptr = &(ptr_kanji_yomi->next); |
|
379 |
}
|
|
380 |
}
|