2
* KAKASI (Kanji Kana Simple inversion program)
3
* $Id: kakasi.c,v 1.20 2001/09/02 11:43:21 takesako Exp $
5
* Hironobu Takahashi (takahasi@tiny.or.jp)
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either versions 2, or (at your option)
12
* This program is distributed in the hope that it will be useful
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
* GNU General Public License for more details.
17
* You should have received a copy of the GNU General Public License
18
* along with KAKASI, see the file COPYING. If not, write to the Free
19
* Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
23
Modified by NOKUBI Takatsugu <knok@daionet.gr.jp>
25
Fix kakasi_do returning no values.
27
Add more valuables initialize routine.
29
Add initialize routine for some valuables to funtion kakasi_getopt_argv.
31
Rename PERLMOD macro to LIBRARY
47
# include "libkakasi.h"
52
/* FIXME: this macro should be removed future. */
56
#define KAKASI_ATTR static
59
int romaji_type = HEPBURN;
60
int romaji_capitalize = 0;
61
int romaji_upcase = 0;
64
int furigana_mode = 0;
68
int wakatigaki_mode = 0;
69
int terminate_done = 0;
71
#endif /* WAKATIGAKI */
75
char cr_eat_string[KAKASIBUF];
76
Character n[KAKASIBUF];
79
extern FILE *kanwadict;
80
static int (*proc[8])()={NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
81
/* ASCII, JISROMAN, KATAKANA, GRAPHIC, ZENKAKU-KIGOU, ZENKAKU-KATAKANA, ZENKAKU-HIRAGANA, KANJI, */
84
extern int input_term_type;
85
extern int output_term_type;
87
/* variables for memory management */
88
extern void ** ary_charalloc;
89
extern void ** ary_cellalloc;
90
extern size_t ary_size_charalloc;
91
extern size_t ary_cur_charalloc;
92
extern size_t ary_size_cellalloc;
93
extern size_t ary_cur_cellalloc;
94
extern int point_charalloc;
95
extern unsigned char *ptr_charalloc;
96
extern int point_cellalloc;
97
extern struct kanji_yomi *ptr_cellalloc;
101
static void digest_start_copy PARAMS((Character *c, Character *r));
102
static void put_separator PARAMS((void));
103
static void putchars PARAMS((Character *results));
104
static void digest_out PARAMS((Character *c, int ret));
105
static int digest PARAMS((Character *c, int clen, Character *r, int rlen, int type, int (*proc)(void)));
106
static void digest_shift PARAMS((Character *c, int s));
108
static void free_jisyo PARAMS((void));
112
digest_start_copy(c, r)
120
if ((r->type == OTHER) && (r->c1 == 0)) return;
130
if(! terminate_done) {
131
if (separator_out == 1)
137
if (separator_out == 1)
139
#endif /* WAKATIGAKI */
146
while(results->c1 != '\0') {
162
if (romaji_capitalize) {
163
if ((n[0].type == ASCII) || (n[0].type == JISROMAN))
164
if (('a' <= n[0].c1) && (n[0].c1 <= 'z'))
165
n[0].c1 = n[0].c1 - 0x20;
166
} else if (romaji_upcase) {
167
for (ptr = n; ptr->c1 != '\0'; ++ptr) {
168
if ((ptr->type == ASCII) || (ptr->type == JISROMAN))
169
if (('a' <= ptr->c1) && (ptr->c1 <= 'z'))
170
ptr->c1 = ptr->c1 - 0x20;
175
if ((kanji_digest) && (furigana_mode)) {
176
for (i = 0; i < ret; ++ i)
185
} else if ((kanji_digest) && (wakatigaki_mode)) {
186
for (i = 0; i < ret; ++ i)
188
#endif /* WAKATIGAKI */
192
if (flush_mode) fflush(stdout);
196
digest(c, clen, r, rlen, type, proc)
208
ret = (* proc)(c, n);
209
if (ret == 0) ret = 1;
211
if ((ret < 0) && (rlen < KAKASIBUF)) {
213
if(new.type == type) {
214
r[rlen].type = c[clen].type = type;
215
r[rlen].c1 = c[clen].c1 = new.c1;
216
r[rlen].c2 = c[clen].c2 = new.c2;
217
r[rlen+1].type = c[clen+1].type = OTHER;
218
r[rlen+1].c1 = c[clen+1].c1 = '\0';
219
return digest(c, clen+1, r, rlen+1, type, proc);
220
} else if (cr_eat_mode) {
221
if ((rlen < KAKASIBUF -1) && /* keep in check a buffer overflow */
222
((new.type == ASCII) || (new.type == JISROMAN) || (new.type == OTHER))) {
223
for (p = cr_eat_string; *p != '\0'; ++ p) {
224
if ((unsigned)(*p) == new.c1) {
225
r[rlen].type = new.type;
228
r[rlen+1].type = OTHER;
230
return digest(c, clen, r, rlen+1, type, proc);
244
if ((r[i].type == type) && (k > 0)) {
247
c[j].type = r[i].type;
265
for (i = 0;; ++ i) { /* Yes, I know following lines can be written in
266
1 line, but I have doubts of compatibilities.. */
267
c[i].type = c[i+s].type;
270
if (c[i+s].c1 == '\0')
282
kakasi_getopt_argv(argc, argv)
291
Character c[KAKASIBUF], r[KAKASIBUF];
292
int clen, ptype, pctype;
293
static int (*proc[8])()={NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
294
/* ASCII, JISROMAN, KATAKANA, GRAPHIC, ZENKAKU-KIGOU, ZENKAKU-KATAKANA, ZENKAKU-HIRAGANA, KANJI, */
299
/* Initialize some valuables. */
300
for (i = 0; i < 8; i ++) {
303
romaji_type = HEPBURN;
308
romaji_capitalize = 0;
315
#endif /* WAKATIGAKI*/
316
input_term_type = UNKNOWN;
317
output_term_type = UNKNOWN;
322
if ((*argv)[0] != '-') break;
326
case 'j': proc[0] = a2j; break;
327
case 'E': proc[0] = a2E; break;
328
default: proc[0] = NULL;
333
case 'a': proc[1] = j2a; break;
334
case 'E': proc[1] = j2E; break;
335
default: proc[1] = NULL;
340
case 'a': proc[2] = g2a; break;
341
case 'j': proc[2] = g2j; break;
342
case 'E': proc[2] = g2E; break;
343
default: proc[2] = NULL;
348
case 'a': proc[3] = k2a; break;
349
case 'j': proc[3] = k2j; break;
350
case 'K': proc[3] = k2K; break;
351
case 'H': proc[3] = k2H; break;
352
default: proc[3] = NULL;
357
case 'a': proc[4] = E2a; break;
358
case 'j': proc[4] = E2j; break;
359
default: proc[4] = NULL;
364
case 'a': proc[5] = K2a; break;
365
case 'j': proc[5] = K2j; break;
366
case 'k': proc[5] = K2k; break;
367
case 'H': proc[5] = K2H; break;
368
default: proc[5] = NULL;
373
case 'a': proc[6] = H2a; break;
374
case 'j': proc[6] = H2j; break;
375
case 'k': proc[6] = H2k; break;
376
case 'K': proc[6] = H2K; break;
377
default: proc[6] = NULL;
382
case 'a': proc[7] = J2a; break;
383
case 'j': proc[7] = J2j; break;
384
case 'k': proc[7] = J2k; break;
385
case 'K': proc[7] = J2K; break;
386
case 'H': proc[7] = J2H; break;
387
default: proc[7] = NULL;
391
if ((*argv)[2] != '\0')
392
set_input_term(term_type_str((*argv)+2));
396
set_input_term(term_type_str(*(++ argv)));
400
if ((*argv)[2] != '\0')
401
set_output_term(term_type_str((*argv)+2));
405
set_output_term(term_type_str(*(++ argv)));
409
if ((*argv)[2] == 'k')
410
romaji_type = KUNREI;
423
sprintf(cr_eat_string, "\011\012\015 %s", (*argv)+2);
426
romaji_capitalize = 1;
439
sprintf(cr_eat_string, "\011\012\015 %s", (*argv)+2);
444
#endif /* WAKATIGAKI */
448
fprintf(stderr, "KAKASI - Kanji Kana Simple Inverter Version %s\n", VERSION);
449
fprintf(stderr, "Copyright (C) 1992-1999 Hironobu Takahashi. All rights reserved.\n");
450
fprintf(stderr, "\n");
451
fprintf(stderr, "Usage: kakasi -a[jE] -j[aE] -g[ajE] -k[ajKH] -E[aj] -K[ajkH] -H[ajkK] -J[ajkKH]\n");
452
fprintf(stderr, " -i{oldjis,newjis,dec,euc,sjis} -o{oldjis,newjis,dec,euc,sjis}\n");
453
fprintf(stderr, " -r{hepburn,kunrei} -p -s -f -c\"chars\" [jisyo1, jisyo2,,,]\n");
454
fprintf(stderr, "\n");
455
fprintf(stderr, " Character Sets:\n");
456
fprintf(stderr, " a: ascii j: jisroman g: graphic k: kana (j,k defined in jisx0201)\n");
457
fprintf(stderr, " E: kigou K: katakana H: hiragana J: kanji(E,K,H,J defined in jisx0208)\n");
458
fprintf(stderr, "\n");
459
fprintf(stderr, " Options:\n");
460
fprintf(stderr, " -i: input coding system -o: output coding system\n");
461
fprintf(stderr, " -r: romaji conversion system\n");
462
fprintf(stderr, " -p: list all readings (with -J option)\n");
463
fprintf(stderr, " -s: insert separate characters (with -J option)\n");
464
fprintf(stderr, " -f: furigana mode (with -J option)\n");
465
fprintf(stderr, " -c: skip chars within jukugo (with -J option: default TAB CR LF BLANK)\n");
466
fprintf(stderr, " -C: romaji Capitalize (with -Ja or -Jj option)\n");
467
fprintf(stderr, " -U: romaji Upcase (with -Ja or -Jj option)\n");
468
fprintf(stderr, " -u: call fflush() after 1 character output\n");
470
fprintf(stderr, " -w: wakatigaki mode\n");
471
#endif /* WAKATIGAKI */
472
fprintf(stderr, "\n");
473
fprintf(stderr, "Report bugs to <bug-kakasi@namazu.org>.\n");
481
if ((input_term_type != UNKNOWN) && (output_term_type == UNKNOWN))
482
set_output_term(input_term_type);
489
if (proc[7] != NULL) {
490
for (; argc > 0; -- argc)
491
add_jisyo(*(argv ++));
502
Character c[KAKASIBUF], r[KAKASIBUF];
503
int clen, ptype, pctype;
512
if ((c[0].type == OTHER) && (c[0].c1 == 0xff)) break;
523
if ((c[0].type != OTHER) && (c[0].type != pctype)) {
527
if ((*proc[(int)(c[0].type)]) == NULL) {
528
putkanji(c); digest_shift(c, 1); -- clen;
529
if (flush_mode) fflush(stdout);
531
digest_start_copy(c, r);
532
clen = digest(c, clen, r, clen, (int)(c[0].type), *proc[(int)(c[0].type)]);
536
#endif /* WAKATIGAKI */
539
if (c[0].c1 >= 0xb0) {
543
} else if ((c[0].c1 == 0xa1) && /* charcter code(\241\270),charcter code(\241\271),charcter code(\241\272) */
544
(c[0].c2 >= 0xb8 && c[0].c2 <= 0xba)) {
547
} else if ((c[0].c1 == 0xa5) && /* charcter code(\245\365),charcter code(\245\366) */
548
(c[0].c2 >= 0xf5 && c[0].c2 <= 0xf6)) {
551
#endif /* WAKATIGAKI */
552
} else if (c[0].c1 == 0xa4) {
555
} else if ((c[0].c1 == 0xa1) && /* charcter code(\241\263),charcter code(\241\264),charcter code(\241\265),charcter code(\241\266) */
556
(c[0].c2 >= 0xb3 && c[0].c2 <= 0xb6)) {
558
#endif /* WAKATIGAKI */
559
} else if (c[0].c1 == 0xa5) {
561
} else if ((c[0].c1 == 0xa1) && (c[0].c2 == 0xbc)) {
566
if (ptype != pctype) {
577
#endif /* WAKATIGAKI */
579
if ((*proc[ptype]) == NULL) {
580
putkanji(c); digest_shift(c, 1); -- clen;
581
if (flush_mode) fflush(stdout);
583
digest_start_copy(c, r);
584
clen = digest(c, clen, r, clen, JIS83, *proc[ptype]);
588
#endif /* WAKATIGAKI */
593
#endif /* WAKATIGAKI */
594
putkanji(c); digest_shift(c, 1); -- clen;
596
if (flush_mode) fflush(stdout);
605
char *ret = getpbstr();
615
kakasi_close_kanwadict()
617
if (kanwadict != NULL) {
631
for (x = 0; x <= ary_cur_charalloc; x ++) {
632
free(ary_charalloc[x]);
637
for (x = 0; x <= ary_cur_cellalloc; x ++) {
638
free(ary_cellalloc[x]);
645
ary_charalloc = NULL;
646
ary_cellalloc = NULL;
647
ary_size_charalloc = -1;
648
ary_cur_charalloc = -1;
649
ary_size_cellalloc = -1;
650
ary_cur_cellalloc = -1;
652
ptr_charalloc = NULL;
654
ptr_cellalloc = NULL;