~ubuntu-branches/ubuntu/maverick/kakasi/maverick

1 by Keita Maehara
Import upstream version 2.3.4
1
/*
2
 * KAKASI (Kanji Kana Simple inversion program)
3
 * $Id: dict.c,v 1.8 2001/09/03 10:02:38 rug Exp $
4
 * Copyright (C) 1992
5
 * Hironobu Takahashi (takahasi@tiny.or.jp)
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either versions 2, or (at your option)
10
 * any later version.
11
 *
12
 * This program is distributed in the hope that it will be useful
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with KAKASI, see the file COPYING.  If not, write to the Free
19
 * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
20
 * 02111-1307, USA.
21
 */
22
/*
23
  Modified by NOKUBI Takatsugu <knok@daionet.gr.jp>
24
  1999/03/04
25
      Rename PERLMOD macro to LIBRARY
26
  1999/01/11
27
      Add PERLMOD macro.
28
*/
29
30
#ifdef HAVE_CONFIG_H
31
# include <config.h>
32
#endif
33
34
#include <stdio.h>
35
#include <ctype.h>
36
#ifdef HAVE_STRING_H
37
# include <string.h>
38
#else
39
# include <strings.h>
40
#endif
41
#ifdef HAVE_MALLOC_H
42
# include <malloc.h>
43
#endif
44
#include <stdlib.h>
45
#include "kakasi.h"
46
47
#define BUFLEN 1024
48
49
#define IALLOCSIZE (1024*100)
50
#define CELLALLOC  5000
51
52
#ifndef KANWADICT
53
#define KANWADICT "./kanwadict"
54
#endif
55
56
/* variables for memory management */
57
void ** ary_charalloc = NULL;
58
void ** ary_cellalloc = NULL;
59
size_t ary_size_charalloc = -1;
60
size_t ary_cur_charalloc = -1;
61
size_t ary_size_cellalloc = -1;
62
size_t ary_cur_cellalloc = -1;
63
int point_charalloc = 0;
64
unsigned char *ptr_charalloc = NULL;
65
int point_cellalloc = 0;
66
struct kanji_yomi *ptr_cellalloc = NULL;
67
68
struct kanji_yomi *jisyo_table[0x80][0x80]; /* hash table */
69
int kanwa_load[0x80][0x80];                 /* Is kanwadict put? */
70
struct kanwa_entry kanwa[0x60][0x60];       /* ujis onl 0xa0 is shift to code. */
71
72
static void
73
add_ary_charalloc(ptr)
74
     void * ptr;
75
{
76
    ary_cur_charalloc ++;
77
    if (ary_charalloc == NULL || ary_cur_charalloc > ary_size_charalloc) {
78
	ary_size_charalloc += CELLALLOC;
79
	ary_charalloc = realloc(ary_charalloc, 
80
				sizeof(void *) * ary_size_charalloc +1);
81
    }
82
    ary_charalloc[ary_cur_charalloc] = ptr;
83
}
84
85
static void
86
add_ary_cellalloc(ptr)
87
     void * ptr;
88
{
89
    ary_cur_cellalloc ++;
90
    if (ary_cellalloc == NULL || ary_cur_cellalloc > ary_size_cellalloc) {
91
	ary_size_cellalloc += CELLALLOC;
92
	ary_cellalloc = realloc(ary_cellalloc, 
93
				sizeof(void *) * ary_size_cellalloc +1);
94
    }
95
    ary_cellalloc[ary_cur_cellalloc] = ptr;
96
}
97
98
static unsigned char *
99
charalloc(length)
100
     int length;
101
{
102
    unsigned char *ret;
103
104
    if ((ptr_charalloc == NULL) || (point_charalloc+length >= IALLOCSIZE)) {
105
	ptr_charalloc = (unsigned char *)malloc(IALLOCSIZE);
106
	add_ary_charalloc(ptr_charalloc);
107
	point_charalloc = 0;
108
    }
109
    ret = ptr_charalloc+point_charalloc;
110
    point_charalloc += length;
111
    return ret;
112
}
113
114
static struct kanji_yomi *
115
cellalloc()
116
{
117
118
    if ((ptr_cellalloc == NULL) || (point_cellalloc >= CELLALLOC)) {
119
	char *cptr;
120
	cptr = malloc((CELLALLOC+1)*sizeof(struct kanji_yomi));
121
	add_ary_cellalloc(cptr);
122
	if ((int)cptr & 7) cptr += 8 - ((int)cptr & 7);
123
	ptr_cellalloc = (struct kanji_yomi *) cptr;
124
	point_cellalloc = 0;
125
    }
126
    ++ point_cellalloc;
127
    return ptr_cellalloc ++;
128
}
129
130
void init_jisyo()
131
{
132
    int c1, c2;
133
134
    for(c1 = 0; c1 < 0x80; c1 ++)
135
	for(c2 = 0; c2 < 0x80; c2 ++)
136
	    jisyo_table[c1][c2] = NULL;
137
}
138
139
static void jis2ujis_jisyo(buffer)
140
     unsigned char *buffer;
141
{
142
    unsigned char *p, *q;
143
    int kanji=0;
144
145
    p = q = buffer;
146
    while(*p != '\0') {
147
	if (*p == '\033') {
148
	    if ((p[1] == '$') &&
149
		((p[2] == '@') || (p[2] == 'B'))) {
150
		kanji = 1;
151
		p += 2;
152
	    } else if ((p[1] == '(') &&
153
		       ((p[2] == 'B') || (p[2] == 'J'))) {
154
		kanji = 0;
155
		p += 2;
156
	    } else {
157
		*(q ++) = *p;
158
	    }
159
	} else {
160
	    if (kanji) {
161
		*(q ++) = *(p ++) | 0x80;
162
		*(q ++) = *p | 0x80;
163
	    } else {
164
		*(q ++) = *p;
165
	    }
166
	}
167
	++ p;
168
    }
169
    *q = '\0';
170
}
171
172
static void add_item(yomi, kanji, tail)
173
     unsigned char *yomi;
174
     unsigned char *kanji;
175
     int tail;
176
{
177
    unsigned char *q, *ptr_kanji, *ptr_yomi;
178
    struct kanji_yomi *ptr_kanji_yomi, **ptr;
179
    int length, c1, c2;
180
181
    /* Is the head a kanji? */
182
    if (kanji[0] < 0xb0) return;
183
184
    /* Isn't a HANKAKU character contained? Convert ITAIJI. */
185
    for (q = kanji;; q += 2) {
186
	c1 = q[0]; c2 = q[1];
187
	if ((c1 == '\0') || (c2 == '\0')) break;
188
	if ((c1 <= 0xa0) || (c2 <= 0xa0)) return;
189
	itaijiknj(&c1, &c2);
190
	q[0] = c1; q[1] = c2;
191
    }
192
193
    /* Isn't the one except for the KANA contained? A KATAKANA changes into the HIRAGANA. */
194
    for (q = yomi; (q[0] != '\0') && (q[1] != '\0'); q += 2) {
195
	if (*q < 0xa1) return;
196
	if (*q == 0xa5) *q = 0xa4;
197
	if ((*q != 0xa4) &&
198
	    ((q[0] != 0xa1) || (q[1] != 0xbc)) && /* Prolonged sound */ 
199
	    ((q[0] != 0xa1) || (q[1] != 0xab)) && /* Sonant */
200
	    ((q[0] != 0xa1) || (q[1] != 0xac)))   /* Half-sonant */
201
	    return;
202
    }
203
204
    /* A cell because of the reading is made. */
205
206
    length = strlen((const char *)kanji);
207
    ptr_kanji =	charalloc(length-1);
208
    strcpy((char *)ptr_kanji, (const char *)(kanji+2));
209
    ptr_yomi = charalloc(strlen((const char *)yomi)+1);
210
    strcpy((char *)ptr_yomi, (const char *)yomi);
211
212
    ptr_kanji_yomi = cellalloc();
213
    ptr_kanji_yomi->next = NULL;
214
    ptr_kanji_yomi->length = tail ? length+1 : length;
215
    ptr_kanji_yomi->kanji = ptr_kanji;
216
    ptr_kanji_yomi->yomi = ptr_yomi;
217
    ptr_kanji_yomi->tail = tail;
218
219
    /* It is connected in search of the end of the link of the internal dictionary. */
220
    for (ptr = &(jisyo_table[kanji[0]-0x80][kanji[1]-0x80]);
221
	 *ptr != NULL;
222
	 ptr = &((*ptr)->next));
223
    *ptr = ptr_kanji_yomi;
224
}
225
226
void add_jisyo(filename)
227
     char *filename;
228
{
229
    FILE *jisyo_fp;
230
    unsigned char buffer[BUFLEN];
231
    unsigned char *p;
232
    unsigned char *yomi, *kanji;
233
    int tail;
234
    extern char *ialloc();
235
236
    if ((jisyo_fp = fopen(filename, "rb")) == NULL) {
237
	perror(filename);
238
	exit(0);
239
    }
240
    while(fgets((char *)buffer, BUFLEN, jisyo_fp)) {
241
	/* If there is the one except for the KANA at the head, to the next */
242
	if ((buffer[0] < 0xa0) && (buffer[0] != '\033')) continue;
243
244
	/* A line is changed into ujis. */
245
	jis2ujis_jisyo(buffer);
246
247
	yomi = buffer;
248
	/* The next ward is looked for. */
249
	for (p = buffer; (*p != ' ') && (*p != '\011') && (*p != ','); ++ p) {
250
	    if ((*p == '\0') || (*p == '\n')) goto next_line;
251
	}
252
253
	if (isalpha(p[-1])) { /* An OKURIGANA is given if the last character is an alphabet. */
254
	    tail = p[-1];
255
	    p[-1] = '\0';
256
	} else {
257
	    tail = 0;
258
	    p[0] = '\0';
259
	}
260
261
	/* The next ward is looked for. */
262
	for (++ p; (*p == ' ') || (*p == '\011') || (*p == ','); ++ p) {
263
	    if ((*p == '\0') || (*p == '\n')) goto next_line;
264
	}
265
266
	if (*p == '/') { /* It seems to be the dictionary of SKK. */
267
	    for (;;) {
268
		kanji = p+1;
269
		/* The next ward is looked for. */
270
		for (++ p; (*p != '/'); ++ p) {
271
		    if ((*p == '\0')||(*p == '\n')||(*p == '[')) goto next_line;
272
		}
273
		*p = '\0';
274
		add_item(yomi, kanji, tail);
275
	    }
276
	} else { /* It seems to be a standard dictionary. */
277
	    kanji = p;
278
	    /* The next ward is looked for. */
279
	    for (++ p; 
280
		 (*p != ' ') && (*p != '\n') && (*p != '\011') &&
281
		 (*p != '\0') && (*p != ',')
282
		 ; ++ p) {
283
		;
284
	    }
285
	    *p = '\0';
286
	    add_item(yomi, kanji, tail);
287
	}
288
      next_line:;
289
    }
290
    fclose(jisyo_fp);
291
}
292
293
/* The initialization of kanwa is done. Reading kanwa_load is actually cleared in 
294
   kanwa the part at the head. */
295
296
#ifdef LIBRARY
297
FILE *kanwadict = NULL;
298
#else
299
static FILE *kanwadict;
300
#endif
301
302
void init_kanwa()
303
{
304
    int i, j;
305
    
306
    char *kanwadictpath;
307
308
    kanwadictpath = (char*)getenv("KANWADICTPATH");
309
    if (kanwadictpath == (char*)NULL)
310
	kanwadictpath = (char*)getenv("KANWADICT");
311
    if (kanwadictpath == (char*)NULL)
312
	kanwadictpath = KANWADICT;
313
314
    if ((kanwadict = fopen(kanwadictpath,"rb")) == NULL) {
315
	perror(kanwadictpath);
316
	exit(2);
317
    }
318
319
    if (fread((char *)kanwa, sizeof kanwa, 1, kanwadict) != 1) {
320
	perror(kanwadictpath);
321
    }
322
323
    for (i = 0; i < 0x80; ++ i)
324
	for (j = 0; j < 0x80; ++ j)
325
	    kanwa_load[i][j] = 0;
326
}
327
328
/* An applicable part from kanwa if necessary is drawn. */
329
330
void add_kanwa(c1, c2)
331
     int c1;
332
     int c2;
333
{
334
    unsigned char *ptr_yomi, *ptr_kanji;
335
    struct kanji_yomi *ptr_kanji_yomi, **ptr;
336
    int i;
337
    unsigned char tail, length;
338
339
    c1 &= 0x7f;
340
    c2 &= 0x7f;
341
342
    if (kanwa_load[c1][c2]) return;
343
    kanwa_load[c1][c2] = 1;
344
345
    /* It is finished when there is no description in the dictionary just in case. */
346
    if (kanwa[c1-0x20][c2-0x20].entry == 0) return;
347
    /* It is moved to the fixed position of kanwadict. */
348
    fseek(kanwadict, (long)(kanwa[c1-0x20][c2-0x20].index), 0L);
349
350
    /* The end of the link of the internal dictionary is looked for. */
351
    for (ptr = &(jisyo_table[c1][c2]);
352
	 *ptr != NULL;
353
	 ptr = &((*ptr)->next));
354
355
    for (i = 0; i < kanwa[c1-0x20][c2-0x20].entry; ++ i) {
356
	ptr_kanji_yomi = cellalloc();
357
358
	fread(&tail, 1, 1, kanwadict);
359
	ptr_kanji_yomi->tail = tail;
360
361
	fread(&length, 1, 1, kanwadict);
362
	ptr_kanji = charalloc(length+1);
363
	fread(ptr_kanji, (int)length, 1, kanwadict);
364
	ptr_kanji[length] = '\0';
365
	ptr_kanji_yomi->kanji = ptr_kanji;
366
367
	ptr_kanji_yomi->length = length + ((tail == 0) ? 2 : 3);
368
369
	fread(&length, 1, 1, kanwadict);
370
	ptr_yomi = charalloc(length+1);
371
	fread(ptr_yomi, (int)length, 1, kanwadict);
372
	ptr_yomi[length] = '\0';
373
	ptr_kanji_yomi->yomi = ptr_yomi;
374
375
	ptr_kanji_yomi->next = NULL;
376
377
	*ptr = ptr_kanji_yomi;
378
	ptr = &(ptr_kanji_yomi->next);
379
    }
380
}