3
Copyright (c) 2003-2005 uim Project http://uim.freedesktop.org/
7
Redistribution and use in source and binary forms, with or without
8
modification, are permitted provided that the following conditions
11
1. Redistributions of source code must retain the above copyright
12
notice, this list of conditions and the following disclaimer.
13
2. Redistributions in binary form must reproduce the above copyright
14
notice, this list of conditions and the following disclaimer in the
15
documentation and/or other materials provided with the distribution.
16
3. Neither the name of authors nor the names of its contributors
17
may be used to endorse or promote products derived from this software
18
without specific prior written permission.
20
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
21
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
24
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35
* SKK is a simple Japanese input method
37
* Many many things are to be implemented!
39
#include <sys/types.h>
50
#include <sys/socket.h>
51
#include <netinet/in.h>
58
#define skk_isalpha(ch) (skk_islower(ch) || skk_isupper(ch))
59
#define skk_islower(ch) ((((unsigned char)ch) >= 'a') && (((unsigned char)ch) <= 'z'))
60
#define skk_isupper(ch) ((((unsigned char)ch) >= 'A') && (((unsigned char)ch) <= 'Z'))
61
#define skk_isascii(ch) ((((unsigned char)ch) & ~0x7f) == 0)
63
#define IGNORING_WORD_MAX 63
70
/* candidate array for each okurigana
72
* |C0|C1| .. |Cnr_real_cands| .. |Cnr_cands|
73
* <-------should be saved --><-- cache of master dict -->
75
struct skk_cand_array {
76
/* okurigana string */
79
int nr_cands; /* length of cands array allocated */
80
int nr_real_cands; /* length of read from file part */
81
/* candidate string */
84
/* this array was used and merged with okuri-nasi entry array */
86
/* link to its parent line */
87
struct skk_line *line;
90
/* skk dictionary line */
92
/* line index. head part */
94
/* line index. okurigana part. value will be 0 if it is okuri-nasi
97
/* array of candidate array for different okuri-gana */
99
struct skk_cand_array *cands;
100
/* modified or read from file */
102
/* link to next entry in the list */
103
struct skk_line *next;
106
/* skk dictionary file */
107
static struct dic_info {
108
/* address of mmap'ed dictionary file */
110
/* byte offset of first valid entry in mmap'ed region */
112
/* byte offset of first okuri-nasi entry */
114
/* size of dictionary file */
116
/* head of cached skk dictionary line list. LRU ordered */
117
struct skk_line head;
118
/* timestamp of personal dictionary */
119
time_t personal_dic_timestamp;
120
/* whether cached lines are modified or not */
122
/* length of cached lines */
124
/* skkserv is initialized */
126
/* skkserv port number */
131
struct skk_comp_array {
132
/* index of completion */
134
/* array of completion string */
140
struct skk_comp_array *next;
143
static char *sanitize_word(const char *str, const char *prefix);
144
static int is_purged_cand(const char *str);
145
static void merge_purged_cands(struct skk_cand_array *src_ca,
146
struct skk_cand_array *dst_ca, int src_nth, int dst_nth);
147
static void merge_purged_cand_to_dst_array(struct skk_cand_array *src_ca,
148
struct skk_cand_array *dst_ca, char *purged_cand);
150
/* skkserv connection */
151
#define SKK_SERVICENAME "skkserv"
152
#define SKK_SERVER_HOST "localhost"
153
#define SKK_SERV_BUFSIZ 1024
155
static int skkservsock = -1;
156
static FILE *rserv, *wserv;
157
static char *SKKServerHost = NULL;
159
static int open_skkserv(int portnum);
160
static void close_skkserv(void);
163
calc_line_len(const char *s)
166
for (i = 0; s[i] != '\n'; i++);
171
is_okuri(const char *line_str)
174
/* find first white space */
175
b = strchr(line_str, ' ');
178
/* check previous character */
186
find_first_line(struct dic_info *di)
191
while (off < di->size && s[off] == ';') {
192
int l = calc_line_len(&s[off]);
199
find_border(struct dic_info *di)
203
while (off < di->size) {
204
int l = calc_line_len(&s[off]);
209
if (!is_okuri(&s[off]))
213
/* every entry is okuri-ari, it may not happen. */
217
static struct dic_info *
218
open_dic(const char *fn, uim_bool use_skkserv, int skkserv_portnum)
226
if (!(di = (struct dic_info *)malloc(sizeof(struct dic_info))))
229
di->skkserv_portnum = skkserv_portnum;
231
di->skkserv_ok = open_skkserv(skkserv_portnum);
234
fd = open(fn, O_RDONLY);
236
if (fstat(fd, &st) != -1) {
237
addr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
238
if (addr != MAP_FAILED) {
246
di->addr = mmap_done ? addr : NULL;
247
di->size = mmap_done ? st.st_size : 0;
248
di->first = mmap_done ? find_first_line(di) : 0;
249
di->border = mmap_done ? find_border(di) : 0;
251
di->head.next = NULL;
252
di->personal_dic_timestamp = 0;
253
di->cache_modified = 0;
260
find_line(struct dic_info *di, int off)
262
char *ptr = di->addr;
263
while (off > 0 && (ptr[off] != '\n' || ptr[off + 1] == ';'))
273
extract_line_index(struct dic_info *di, int off, char *buf, int len)
275
const char *p = find_line(di, off);
280
for (i = 0; i < len && p[i] != ' '; i++)
288
do_search_line(struct dic_info *di, const char *s, int min,
293
int idx = (min + max) / 2;
296
if (abs(max - min) < 4)
299
r = extract_line_index(di, idx, buf, 256);
309
return do_search_line(di, s, idx, max, d);
311
return do_search_line(di, s, min, idx, d);
316
/* This function name is temporary. I want a better name. */
318
first_space(char *str)
320
while (*str && (*str != ' '))
326
/* This function returns a pointer with '/' or '\0' */
328
next_cand_slash(char *str)
331
int open_bracket = 0;
333
while (*str && (*str != '/' || open_bracket == 1)) {
334
if (*str == '[' && i == 0)
337
if (open_bracket == 1 && *str == ']' && *(str + 1) == '/')
346
next_slash_in_bracket(char *str)
348
while (*str && *str != '/')
355
okuri_in_bracket(char *str)
363
term = next_slash_in_bracket(p);
369
nth_candidate(char *str, int nth)
374
str = first_space(str);
375
for (i = 0; i <= nth; i++) {
376
str = next_cand_slash(str);
380
* we don't need sanity check here since argument nth is limited
392
term = next_cand_slash(p);
398
skk_dic_open(uim_lisp fn_, uim_lisp use_skkserv_, uim_lisp skkserv_portnum_)
400
const char *fn = uim_scm_refer_c_str(fn_);
401
uim_bool use_skkserv = uim_scm_c_bool(use_skkserv_);
402
int skkserv_portnum = uim_scm_c_int(skkserv_portnum_);
405
skk_dic = open_dic(fn, use_skkserv, skkserv_portnum);
411
free_skk_line(struct skk_line *sl)
418
for (i = 0; i < sl->nr_cand_array; i++) {
419
struct skk_cand_array *ca = &sl->cands[i];
420
for (j = 0; j < ca->nr_cands; j++)
429
static struct skk_cand_array *
430
find_candidate_array_from_line(struct skk_line *sl, const char *okuri,
431
int create_if_notfound)
434
struct skk_cand_array *ca;
436
if (!okuri || !strlen(okuri))
437
return &sl->cands[0];
439
for (i = 1; i < sl->nr_cand_array; i++) {
440
if (okuri && !strcmp(okuri, sl->cands[i].okuri))
441
return &sl->cands[i];
444
if (!create_if_notfound)
445
return &sl->cands[0];
449
sl->cands = realloc(sl->cands,
450
sizeof(struct skk_cand_array) * sl->nr_cand_array);
451
ca = &sl->cands[sl->nr_cand_array - 1];
455
ca->nr_real_cands = 0;
456
ca->okuri = strdup(okuri);
462
push_back_candidate_to_array(struct skk_cand_array *ca, const char *cand)
466
ca->cands = realloc(ca->cands, sizeof(char *) * ca->nr_cands);
468
ca->cands = malloc(sizeof(char *));
469
ca->cands[ca->nr_cands - 1] = strdup(cand);
473
merge_base_candidates_to_array(struct skk_line *sl,
474
struct skk_cand_array *dst_ca)
477
struct skk_cand_array *src_ca;
482
src_ca = &sl->cands[0];
483
if (src_ca == dst_ca)
486
for (i = 0; i < src_ca->nr_cands; i++) {
488
int src_purged_cand_index = -1;
489
int dst_purged_cand_index = -1;
491
if (i < src_ca->nr_real_cands && is_purged_cand(src_ca->cands[i]))
492
src_purged_cand_index = i;
494
for (j = 0; j < dst_ca->nr_cands; j++) {
495
if (dst_purged_cand_index == -1 && is_purged_cand(dst_ca->cands[j]))
496
dst_purged_cand_index = j;
497
if (!strcmp(src_ca->cands[i], dst_ca->cands[j])) {
502
if (src_purged_cand_index != -1 && dst_purged_cand_index != -1)
503
merge_purged_cands(src_ca, dst_ca, src_purged_cand_index,
504
dst_purged_cand_index);
505
else if (src_purged_cand_index != -1 && dst_purged_cand_index == -1)
506
merge_purged_cand_to_dst_array(src_ca, dst_ca,
507
src_ca->cands[src_purged_cand_index]);
510
* Just adding words subsequent to real_cands
511
* (push_back_candidate_to_array) is enough.
513
else if (src_purged_cand_index == -1 && dst_purged_cand_index != -1)
514
merge_word_to_dst_cand_array_with_purged_words(dst_ca,
515
src_ca, src_ca->cands[i]);
518
push_back_candidate_to_array(dst_ca, src_ca->cands[i]);
524
compose_line_parts(struct dic_info *di, struct skk_line *sl,
525
char *okuri, char *line)
529
struct skk_cand_array *ca = find_candidate_array_from_line(sl, okuri, 1);
533
tmp = nth_candidate(line, nth);
536
char *str = okuri_in_bracket(&tmp[1]);
537
tmp[0] = ' '; /* create first_space */
538
compose_line_parts(di, sl, str, &tmp[0]);
540
} else if (tmp[0] != ']') {
541
push_back_candidate_to_array(ca, tmp);
551
static struct skk_line *
552
alloc_skk_line(const char *word, char okuri_head)
555
sl = malloc(sizeof(struct skk_line));
557
sl->head = strdup(word);
558
sl->okuri_head = okuri_head;
559
sl->nr_cand_array = 1;
560
sl->cands = malloc(sizeof(struct skk_cand_array));
561
sl->cands[0].okuri = NULL;
562
sl->cands[0].cands = NULL;
563
sl->cands[0].nr_cands = 0;
564
sl->cands[0].nr_real_cands = 0;
565
sl->cands[0].is_used = 0;
566
sl->cands[0].line = sl;
570
static struct skk_line *
571
copy_skk_line(struct skk_line *p)
579
sl = malloc(sizeof(struct skk_line));
580
sl->need_save = p->need_save;
581
sl->head = strdup(p->head);
582
sl->okuri_head = p->okuri_head;
583
sl->nr_cand_array = p->nr_cand_array;
584
sl->cands = malloc(sizeof(struct skk_cand_array) * sl->nr_cand_array);
585
for (i = 0; i < sl->nr_cand_array; i++) {
586
struct skk_cand_array *ca = &sl->cands[i];
587
struct skk_cand_array *q = &p->cands[i];
589
ca->okuri = q->okuri ? strdup(q->okuri) : NULL;
590
ca->nr_cands = q->nr_cands;
591
ca->nr_real_cands = q->nr_real_cands;
592
ca->cands = malloc(sizeof(char *) * ca->nr_cands);
593
for (j = 0; j < ca->nr_cands; j++)
594
ca->cands[j] = strdup(q->cands[j]);
595
ca->is_used = q->is_used;
605
static struct skk_line *
606
compose_line(struct dic_info *di, const char *word, char okuri_head, char *entry)
610
sl = alloc_skk_line(word, okuri_head);
612
compose_line_parts(di, sl, NULL, entry);
618
add_line_to_cache_head(struct dic_info *di, struct skk_line *sl)
620
sl->next = di->head.next;
624
di->cache_modified = 1;
628
move_line_to_cache_head(struct dic_info *di, struct skk_line *sl)
630
struct skk_line *prev;
632
if (di->head.next == sl)
635
prev = di->head.next;
636
while (prev->next != sl) {
639
prev->next = sl->next;
640
sl->next = di->head.next;
643
di->cache_modified = 1;
648
add_line_to_cache_last(struct dic_info *di, struct skk_line *sl)
650
struct skk_line *prev;
652
if (di->head.next == NULL)
655
prev = di->head.next;
664
di->cache_modified = 1;
668
static struct skk_line *
669
search_line_from_server(struct dic_info *di, const char *s, char okuri_head)
674
char buf[SKK_SERV_BUFSIZ];
676
char *idx = alloca(strlen(s) + 2);
678
sprintf(idx, "%s%c", s, okuri_head);
680
fprintf(wserv, "1%s \n", idx);
682
if (ret != 0 && errno == EPIPE) {
683
di->skkserv_ok = open_skkserv(di->skkserv_portnum);
687
line = malloc(strlen(idx) + 2);
688
sprintf(line, "%s ", idx);
689
read(skkservsock, &r, 1);
690
if (r == '1') { /* succeeded */
692
ret = read(skkservsock, &r, 1);
694
fprintf(stderr, "skkserv connection closed\n");
699
line = realloc(line, strlen(line) + n + 1);
700
strncat(line, buf, n);
705
if (n == SKK_SERV_BUFSIZ - 1) {
706
line = realloc(line, strlen(line) + n + 2);
707
strncat(line, buf, n + 1);
713
sl = compose_line(di, s, okuri_head, line);
717
while (read(skkservsock, &r, 1) > 0 && r != '\n');
722
static struct skk_line *
723
search_line_from_file(struct dic_info *di, const char *s, char okuri_head)
729
char *idx = alloca(strlen(s) + 2);
735
sprintf(idx, "%s%c", s, okuri_head);
737
n = do_search_line(di, idx, di->first, di->border - 1, -1);
739
n = do_search_line(di, idx, di->border, di->size - 1, 1);
744
p = find_line(di, n);
745
len = calc_line_len(p);
746
line = malloc(len + 1);
748
strncat(line, p, len);
749
sl = compose_line(di, s, okuri_head, line);
754
static struct skk_line *
755
search_line_from_cache(struct dic_info *di, const char *s, char okuri_head)
762
/* search from cache */
763
for (sl = di->head.next; sl; sl = sl->next) {
764
if (!strcmp(sl->head, s) && sl->okuri_head == okuri_head)
771
static struct skk_cand_array *
772
find_cand_array(struct dic_info *di, const char *s,
773
char okuri_head, const char *okuri,
774
int create_if_not_found)
776
struct skk_line *sl, *sl_file;
777
struct skk_cand_array *ca;
783
sl = search_line_from_cache(di, s, okuri_head);
786
sl = search_line_from_server(di, s, okuri_head);
788
sl = search_line_from_file(di, s, okuri_head);
790
if (!create_if_not_found)
792
sl = alloc_skk_line(s, okuri_head);
795
add_line_to_cache_head(di, sl);
798
ca = find_candidate_array_from_line(sl, okuri, create_if_not_found);
801
merge_base_candidates_to_array(sl, ca);
805
sl_file = search_line_from_server(di, s, okuri_head);
807
sl_file = search_line_from_file(di, s, okuri_head);
808
merge_base_candidates_to_array(sl_file, ca);
809
free_skk_line(sl_file);
816
static struct skk_cand_array *
817
find_cand_array_lisp(uim_lisp head_, uim_lisp okuri_head_, uim_lisp okuri_,
818
int create_if_not_found)
822
const char *okuri = NULL;
823
struct skk_cand_array *ca;
825
hs = uim_scm_refer_c_str(head_);
826
if (okuri_ != uim_scm_null_list()) {
827
okuri = uim_scm_refer_c_str(okuri_);
829
if (okuri_head_ == uim_scm_null_list()) {
832
const char *os = uim_scm_refer_c_str(okuri_head_);
836
ca = find_cand_array(skk_dic, hs, o, okuri, create_if_not_found);
841
* purged_cand: /(skk-ignore-dic-word "foo" "bar" ...)/
842
* purged_words: {"foo", "bar", ..., NULL}
845
is_purged_cand(const char *str)
849
p = strstr(str, "(skk-ignore-dic-word ");
857
get_purged_words(const char *str)
862
const char *evaluated_word;
865
int len = 0, word_len;
868
p = strstr(str, "(skk-ignore-dic-word");
878
if (*p == '"' && p[-1] != '\\') {
885
char *orig = malloc(len + 1);
888
words = realloc(words, sizeof(char *) * nr);
890
words = malloc(sizeof(char *));
892
strncpy(orig, word, len);
895
/* need to eval word. siod dependent like \073 -> ';' */
896
UIM_EVAL_FSTRING1(NULL, "(string-append \"%s\")", orig);
897
return_val = uim_scm_return_value();
898
if (return_val == uim_scm_null_list()) {
899
words[nr - 1] = malloc(len + 1);
900
strncpy(words[nr - 1], orig, len);
901
words[nr - 1][len] = '\0';
903
evaluated_word = uim_scm_refer_c_str(return_val);
904
word_len = strlen(evaluated_word);
905
words[nr - 1] = malloc(word_len + 1);
906
strncpy(words[nr - 1], evaluated_word, word_len);
907
words[nr - 1][word_len] = '\0';
916
words = realloc(words, sizeof(char *) * (nr + 1));
923
nr_purged_words(char **p)
933
free_allocated_purged_words(char **p)
948
is_purged_only(struct skk_cand_array *ca)
953
if (ca->nr_real_cands > 1)
956
if ((purged_words = get_purged_words(ca->cands[0])) != NULL) {
957
int nr_purged = nr_purged_words(purged_words);
958
/* going to compare words beyond nr_real_cands */
959
for (i = ca->nr_real_cands; i < ca->nr_cands; i++) {
960
for (j = 0; j < nr_purged; j++) {
961
/* return false if there is any different candidate */
962
if (strcmp(ca->cands[i], purged_words[j])) {
963
free_allocated_purged_words(purged_words);
968
free_allocated_purged_words(purged_words);
975
match_to_discarding_index(int indices[], int n)
978
while (indices[i] != -1) {
987
skk_get_entry(uim_lisp head_, uim_lisp okuri_head_, uim_lisp okuri_)
989
struct skk_cand_array *ca;
990
ca = find_cand_array_lisp(head_, okuri_head_, okuri_, 0);
991
if (ca && ca->nr_cands > 0 && !is_purged_only(ca))
998
skk_store_replaced_numeric_str(uim_lisp head_)
1003
int prev_is_num = 0;
1004
int i, numlen = 0, start = 0;
1005
char *numstr = NULL;
1006
uim_lisp lst = uim_scm_null_list();
1008
str = uim_scm_refer_c_str(head_);
1011
for (i = 0; i < len; i++) {
1012
if (isdigit((unsigned char)str[i])) {
1013
if (prev_is_num == 0) {
1022
/* add number into list */
1024
numstr = malloc(numlen + 1);
1026
numstr = realloc(numstr, numlen + 1);
1027
strncpy(numstr, &str[start], numlen);
1028
numstr[numlen] = '\0';
1029
lst = uim_scm_cons(uim_scm_make_str(numstr), lst);
1036
* Add last number into list if string is ended with numeric
1041
numstr = malloc(numlen + 1);
1043
numstr = realloc(numstr, numlen + 1);
1044
strncpy(numstr, &str[start], numlen);
1045
numstr[numlen] = '\0';
1046
lst = uim_scm_cons(uim_scm_make_str(numstr), lst);
1050
return uim_scm_reverse(lst);
1053
static char *wide_num_list[] =
1054
{"��", "��", "��", "��", "��", "��", "��", "��", "��", "��"};
1055
static char *kanji_num_list[] =
1056
{"��", "��", "��", "��", "��", "��", "ϻ", "��", "Ȭ", "��"};
1057
static char *kanji_num_position_list[] =
1058
{NULL, "��", "ɴ", "��", "��", NULL, NULL, NULL, "��", NULL,
1059
NULL, NULL, "��", NULL, NULL, NULL, "��", NULL, NULL, NULL};
1060
static char *kanji_check_num_list[] =
1061
{"��", "��", "б", "��", "��", "��", "ϻ", "��", "Ȭ", "��"};
1062
static char *kanji_check_num_position_list[] =
1063
{NULL, "��", "ɴ", "��", "��", NULL, NULL, NULL, "��", NULL,
1064
NULL, NULL, "��", NULL, NULL, NULL, "��", NULL, NULL, NULL};
1067
numeric_wide_or_kanji_conv(const char *numstr, int method)
1072
len = strlen(numstr);
1073
mbstr = malloc(len * 2 + 1);
1075
for (i = 0; i < len; i++) {
1077
strcpy(&mbstr[i * 2], wide_num_list[numstr[i] - '0']);
1079
strcpy(&mbstr[i * 2], kanji_num_list[numstr[i] - '0']);
1081
mbstr[len * 2] = '\0';
1087
numeric_kanji_with_position_conv(const char *numstr)
1090
int i, j, len, mblen;
1092
int head_is_zero = 0;
1094
len = strlen(numstr);
1095
if (len > 20) /* too big number */
1096
return strdup(numstr);
1098
mbstr = malloc(len * 2 + 1);
1101
for (i = 0, j = 0; j < len; i++, j++) {
1102
position = len - j - 1;
1103
if (numstr[j] == '0') {
1106
/* check zero at the head */
1110
/* add ��, ��, ��, �� for zero */
1111
if ((position >= 4) && ((position % 4) == 0) && !head_is_zero) {
1112
int use_position = 0;
1114
if (!((numstr[j - 1] == '0') && (numstr[j - 2] == '0') &&
1115
(numstr[j - 3] == '0')))
1117
} else if (j == 2) {
1118
if (!((numstr[j - 1] == '0') && (numstr[j - 2] == '0')))
1120
} else if (j == 1) {
1121
if (!(numstr[j - 1] == '0'))
1127
if (mblen > len * 2)
1128
mbstr = realloc(mbstr, mblen + 2);
1129
strcpy(&mbstr[i * 2], kanji_num_position_list[position]);
1134
if (head_is_zero == 1)
1137
/* replace numstr[j] with kanji number */
1138
if (numstr[j] == '1') {
1140
* use "��" only for the one at the place of ��, ��, ��, ��,
1143
if (((position % 4) == 0) ||
1145
((position % 4) == 3) &&
1146
(numstr[j + 1] == '0') &&
1147
(numstr[j + 2] == '0') &&
1148
(numstr[j + 3] == '0'))) {
1149
strcpy(&mbstr[i * 2], kanji_num_list[1]);
1155
strcpy(&mbstr[i * 2], kanji_num_list[numstr[j] - '0']);
1158
/* add ��, ɴ, �� for number whose place is exceeded �� */
1160
if ((position % 4) != 0) {
1163
if (mblen > len * 2)
1164
mbstr = realloc(mbstr, mblen + 2);
1165
strcpy(&mbstr[i * 2], kanji_num_position_list[position % 4]);
1170
if (kanji_num_position_list[position]) {
1173
if (mblen > len * 2)
1174
mbstr = realloc(mbstr, mblen + 2);
1175
strcpy(&mbstr[i * 2], kanji_num_position_list[position]);
1180
/* in case of zero */
1182
strcpy(&mbstr[0], kanji_num_list[0]);
1186
mbstr[mblen] = '\0';
1191
numeric_kanji_for_check_conv(const char *numstr)
1194
int i, j, len, mblen;
1196
int head_is_zero = 0;
1198
len = strlen(numstr);
1199
if (len > 20) /* too big number */
1200
return strdup(numstr);
1202
mbstr = malloc(len * 2 + 1);
1205
for (i = 0, j = 0; j < len; i++, j++) {
1206
position = len - j - 1;
1207
if (numstr[j] == '0') {
1210
/* check zero at the head */
1214
/* add ��, ��, ��, �� for zero */
1215
if ((position >= 4) && ((position % 4) == 0) && !head_is_zero) {
1216
int use_position = 0;
1218
if (!((numstr[j - 1] == '0') && (numstr[j - 2] == '0') &&
1219
(numstr[j - 3] == '0')))
1221
} else if (j == 2) {
1222
if (!((numstr[j - 1] == '0') && (numstr[j - 2] == '0')))
1224
} else if (j == 1) {
1225
if (!((numstr[j - 1] == '0')))
1231
if (mblen > len * 2)
1232
mbstr = realloc(mbstr, mblen + 2);
1233
strcpy(&mbstr[i * 2], kanji_check_num_position_list[position]);
1238
if (head_is_zero == 1)
1241
/* replace numstr[j] with kanji number */
1242
strcpy(&mbstr[i * 2], kanji_check_num_list[numstr[j] - '0']);
1244
/* add ��, ɴ, �� for number whose place is exceeded �� */
1246
if ((position % 4) != 0) {
1249
if (mblen > len * 2)
1250
mbstr = realloc(mbstr, mblen + 2);
1251
strcpy(&mbstr[i * 2], kanji_check_num_position_list[position % 4]);
1256
if (kanji_check_num_position_list[position]) {
1259
if (mblen > len * 2)
1260
mbstr = realloc(mbstr, mblen + 2);
1261
strcpy(&mbstr[i * 2], kanji_check_num_position_list[position]);
1266
/* in case of zero */
1268
strcpy(&mbstr[0], kanji_check_num_list[0]);
1272
mbstr[mblen] = '\0';
1277
numeric_shogi_conv(const char *numstr)
1282
len = strlen(numstr);
1283
if (len != 2) /* allow two digit number only */
1284
return strdup(numstr);
1287
strcpy(&mbstr[0], wide_num_list[numstr[0] - '0']);
1288
strcpy(&mbstr[2], kanji_num_list[numstr[1] - '0']);
1294
/* returns string with malloc() */
1296
numeric_convert(const char *numstr, int method)
1301
* method #4 is already handled in skk_get_nth_candidate()
1305
ret = strdup(numstr);
1307
case 1: /* ���ѿ��� */
1308
case 2: /* ������ �̼��̵�� */
1309
ret = numeric_wide_or_kanji_conv(numstr, method);
1311
case 3: /* ������ �̼��ͭ�� */
1312
ret = numeric_kanji_with_position_conv(numstr);
1314
case 5: /* ���ڼ�ɽ�� */
1315
ret = numeric_kanji_for_check_conv(numstr);
1317
case 9: /* ����ɽ�� */
1318
ret = numeric_shogi_conv(numstr);
1321
ret = strdup(numstr);
1328
skk_merge_replaced_numeric_str(uim_lisp str_, uim_lisp numlst_)
1331
int i, j, len, newlen;
1336
uim_lisp merged_str;
1338
if (str_ == uim_scm_null_list())
1339
return uim_scm_null_list();
1341
str = uim_scm_c_str(str_);
1345
for (i = 0, j = 0; j < len; i++, j++) {
1346
if (str[i] == '#') {
1347
method = str[i + 1] - '0';
1348
if (uim_scm_nullp(numlst_))
1351
numstr = uim_scm_refer_c_str(uim_scm_car(numlst_));
1353
convstr = numeric_convert(numstr, method);
1354
convlen = strlen(convstr);
1356
newlen = newlen - 2 + convlen;
1357
str = realloc(str, newlen + 1);
1358
memmove(&str[i + convlen], &str[i + 2], newlen - i - convlen + 1);
1359
memcpy(&str[i], convstr, convlen);
1360
i = i - 2 + convlen;
1362
numlst_ = uim_scm_cdr(numlst_);
1366
merged_str = uim_scm_make_str(str);
1372
skk_replace_numeric(uim_lisp head_)
1375
int prev_is_num = 0;
1376
int i, j, len, newlen;
1379
str = uim_scm_c_str(head_);
1383
for (i = 0, j = 0; j < len; i++, j++) {
1384
if (isdigit((unsigned char)str[i])) {
1385
if (prev_is_num == 0) {
1388
memmove(&str[i], &str[i + 1], newlen - i);
1397
result = uim_scm_make_str(str);
1403
find_numeric_conv_method4_mark(const char *cand, int *nth)
1410
p = strstr(cand, "#4");
1412
for (i = 0; i < len; i++) {
1413
if (cand[i] == '#' && isdigit((unsigned char)cand[i + 1])) {
1415
if (cand[i + 1] == '4')
1424
get_nth(int nth, uim_lisp lst_)
1427
/* nth start from 1 */
1428
for (i = 1; i < nth; i++) {
1429
if (uim_scm_nullp(lst_)) {
1430
return uim_scm_null_list();
1432
lst_ = uim_scm_cdr(lst_);
1434
return uim_scm_car(lst_);
1438
get_purged_cand_index(struct skk_cand_array *ca)
1445
for (i = 0; i < ca->nr_real_cands; i++) {
1446
if (is_purged_cand(ca->cands[i])) {
1455
get_ignoring_indices(struct skk_cand_array *ca, int indices[])
1458
int purged_cand_index;
1460
purged_cand_index= get_purged_cand_index(ca);
1462
if (purged_cand_index != -1) {
1463
char **purged_words = get_purged_words(ca->cands[purged_cand_index]);
1464
int nr_purged = nr_purged_words(purged_words);
1466
indices[k] = purged_cand_index;
1469
for (i = ca->nr_real_cands; i < ca->nr_cands; i++) {
1470
if (k >= IGNORING_WORD_MAX)
1472
for (j = 0; j < nr_purged; j++) {
1473
if (!strcmp(ca->cands[i], purged_words[j])) {
1480
free_allocated_purged_words(purged_words);
1488
skk_get_nth_candidate(uim_lisp nth_, uim_lisp head_, uim_lisp okuri_head_, uim_lisp okuri_, uim_lisp numlst_)
1491
struct skk_cand_array *ca, *subca;
1496
int method_place = 0;
1499
uim_lisp str_ = uim_scm_null_list();
1501
int ignoring_indices[IGNORING_WORD_MAX + 1];
1503
n = uim_scm_c_int(nth_);
1504
ca = find_cand_array_lisp(head_, okuri_head_, okuri_, 0);
1505
get_ignoring_indices(ca, ignoring_indices);
1508
/* handle #4 method of numeric conversion */
1509
if (!uim_scm_nullp(numlst_)) {
1510
for (i = 0; i < ca->nr_cands; i++) {
1511
if (match_to_discarding_index(ignoring_indices, i))
1514
if ((p = find_numeric_conv_method4_mark(ca->cands[i], &method_place))) {
1515
numstr = uim_scm_refer_c_str(get_nth(method_place, numlst_));
1516
subca = find_cand_array(skk_dic, numstr, 0, NULL, 0);
1518
for (j = 0; j < subca->nr_cands; j++) {
1520
cands = strdup(ca->cands[i]);
1521
sublen = strlen(subca->cands[j]);
1522
newlen = strlen(ca->cands[i]) - 2 + sublen;
1523
mark = p - ca->cands[i];
1525
cands = realloc(cands, newlen + 1);
1526
memmove(&cands[mark + sublen],
1528
newlen - mark - sublen + 1);
1529
memcpy(&cands[mark], subca->cands[j], sublen);
1531
str_ = uim_scm_make_str(cands);
1540
cands = ca->cands[i];
1547
for (i = 0; i < ca->nr_cands; i++) {
1548
if (match_to_discarding_index(ignoring_indices, i))
1551
cands = ca->cands[i];
1560
str_ = uim_scm_make_str(cands);
1565
skk_get_nr_candidates(uim_lisp head_, uim_lisp okuri_head_, uim_lisp okuri_, uim_lisp numlst_)
1567
struct skk_cand_array *ca, *subca;
1569
int i, nr_cands = 0;
1571
int method_place = 0;
1573
int ignoring_indices[IGNORING_WORD_MAX + 1];
1575
ca = find_cand_array_lisp(head_, okuri_head_, okuri_, 0);
1579
nr_cands -= get_ignoring_indices(ca, ignoring_indices);
1581
/* handle #4 method of numeric conversion */
1582
if (!uim_scm_nullp(numlst_)) {
1583
for (i = 0; i < n; i++) {
1584
if (match_to_discarding_index(ignoring_indices, i))
1587
if (find_numeric_conv_method4_mark(ca->cands[i], &method_place)) {
1588
numstr = uim_scm_refer_c_str(get_nth(method_place, numlst_));
1590
subca = find_cand_array(skk_dic, numstr, 0, NULL, 0);
1592
nr_cands += subca->nr_cands;
1597
return uim_scm_make_int(nr_cands);
1600
static struct skk_comp_array *
1601
make_comp_array_from_cache(struct dic_info *di, const char *s)
1603
struct skk_line *sl;
1604
struct skk_comp_array *ca;
1609
ca = malloc(sizeof(struct skk_comp_array));
1616
/* search from cache */
1617
for (sl = di->head.next; sl; sl = sl->next) {
1618
if (/* string 's' is part of sl->head */
1619
!strncmp(sl->head, s, strlen(s)) && strcmp(sl->head, s) &&
1620
/* and sl is okuri-nasi line */
1621
(sl->okuri_head == '\0')) {
1623
ca->comps = realloc(ca->comps, sizeof(char *) * ca->nr_comps);
1624
ca->comps[ca->nr_comps - 1] = strdup(sl->head);
1628
if (ca->nr_comps == 0) {
1632
ca->head = strdup(s);
1633
ca->next = skk_comp;
1639
static struct skk_comp_array *
1640
find_comp_array(struct dic_info *di, const char *s)
1642
struct skk_comp_array *ca;
1647
for (ca = skk_comp; ca; ca = ca->next) {
1648
if (!strcmp(ca->head, s))
1652
ca = make_comp_array_from_cache(di, s);
1658
static struct skk_comp_array *
1659
find_comp_array_lisp(uim_lisp head_)
1662
struct skk_comp_array *ca;
1664
hs = uim_scm_refer_c_str(head_);
1665
ca = find_comp_array(skk_dic, hs);
1670
skk_get_completion(uim_lisp head_)
1672
struct skk_comp_array *ca;
1673
ca = find_comp_array_lisp(head_);
1682
skk_get_nth_completion(uim_lisp nth_, uim_lisp head_)
1685
struct skk_comp_array *ca;
1688
ca = find_comp_array_lisp(head_);
1689
n = uim_scm_c_int(nth_);
1690
if (ca && ca->nr_comps > n) {
1692
return uim_scm_make_str(str);
1694
return uim_scm_null_list();
1698
skk_get_nr_completions(uim_lisp head_)
1701
struct skk_comp_array *ca;
1703
ca = find_comp_array_lisp(head_);
1707
return uim_scm_make_int(n);
1711
skk_clear_completions(uim_lisp head_)
1714
struct skk_comp_array *ca, *ca_prev;
1717
hs = uim_scm_refer_c_str(head_);
1718
for (ca = skk_comp; ca; ca = ca->next) {
1719
if (!strcmp(ca->head, hs)) {
1725
if (ca && ca->refcount == 0) {
1726
for (i = 0; i < ca->nr_comps; i++) {
1732
if (ca == skk_comp) {
1733
skk_comp = ca->next;
1737
while (ca_prev->next != ca) {
1738
ca_prev = ca_prev->next;
1740
ca_prev->next = ca->next;
1748
reorder_candidate(struct skk_cand_array *ca, const char *str)
1753
/* find index of the candidate */
1754
for (i = 0; i < ca->nr_cands; i++) {
1755
if (!strcmp(str, ca->cands[i])) {
1762
tmp = ca->cands[nth];
1764
for (i = nth; i > 0; i--)
1765
ca->cands[i] = ca->cands[i - 1];
1767
skk_dic->cache_modified = 1;
1770
if (nth >= ca->nr_real_cands)
1771
ca->nr_real_cands++;
1774
static void push_purged_word(struct skk_cand_array *ca, int nth, int append, char *word)
1776
char *cand = ca->cands[nth];
1777
int len, oldlen = strlen(cand);
1778
char *p = sanitize_word(word, NULL);
1784
/* check whether the word is already registerd */
1785
char **purged_words = get_purged_words(cand);
1786
int nr_purged = nr_purged_words(purged_words);
1788
for (j = 0; j < nr_purged; j++) {
1789
if (!strcmp(purged_words[j], word)) {
1790
free_allocated_purged_words(purged_words);
1794
free_allocated_purged_words(purged_words);
1796
len = oldlen + strlen(p) + 3;
1797
cand = realloc(cand, len + 1);
1799
cand[oldlen - 1] = '\0';
1800
strcat(cand, " \"");
1802
strcat(cand, "\")");
1803
ca->cands[nth] = cand;
1804
skk_dic->cache_modified = 1;
1807
cand = realloc(cand, strlen("(skk-ignore-dic-word \"\")") + strlen(p) + 1);
1809
sprintf(cand, "(skk-ignore-dic-word \"%s\")", p);
1810
ca->cands[nth] = cand;
1811
skk_dic->cache_modified = 1;
1816
static void remove_candidate_from_array(struct skk_cand_array *ca, int nth)
1820
free(ca->cands[nth]);
1821
for (i = nth; i < ca->nr_cands - 1; i++)
1822
ca->cands[i] = ca->cands[i + 1];
1823
if (nth < ca->nr_real_cands)
1824
ca->nr_real_cands--;
1826
skk_dic->cache_modified = 1;
1830
merge_word_to_real_cand_array(struct skk_cand_array *ca, const char *word)
1835
push_back_candidate_to_array(ca, word);
1836
nth = ca->nr_cands - 1;
1838
/* move word at the end of real cand array */
1839
tmp = ca->cands[nth];
1840
if (nth >= ca->nr_real_cands) {
1841
for (i = nth; i > ca->nr_real_cands; i--)
1842
ca->cands[i] = ca->cands[i - 1];
1843
ca->cands[ca->nr_real_cands] = tmp;
1844
ca->nr_real_cands++;
1848
static int exist_in_purged_cand(struct skk_cand_array *ca,
1851
int i, purged_cand_index;
1852
char **purged_words;
1855
purged_cand_index = get_purged_cand_index(ca);
1856
if (purged_cand_index == -1)
1859
purged_words = get_purged_words(ca->cands[purged_cand_index]);
1860
nr_purged = nr_purged_words(purged_words);
1862
for (i = 0; i < nr_purged; i++) {
1863
if (!strcmp(purged_words[i], word)) {
1864
free_allocated_purged_words(purged_words);
1868
free_allocated_purged_words(purged_words);
1872
static int index_in_real_cands(struct skk_cand_array *ca, const char *str)
1875
for (i = 0; i < ca->nr_real_cands; i++) {
1876
if (!strcmp(ca->cands[i], str))
1883
remove_purged_words_from_dst_cand_array(struct skk_cand_array *src_ca,
1884
struct skk_cand_array *dst_ca, const char *purged_cand)
1886
char **purged_words;
1890
purged_words = get_purged_words(purged_cand);
1891
nr_words = nr_purged_words(purged_words);
1893
for (i = 0; i < nr_words; i++) {
1896
if (index_in_real_cands(src_ca, purged_words[i]) != -1)
1899
for (j = 0; j < dst_ca->nr_real_cands; j++) {
1900
if (!strcmp(purged_words[i], dst_ca->cands[j])) {
1906
remove_candidate_from_array(dst_ca, j);
1908
free_allocated_purged_words(purged_words);
1912
merge_purged_cands(struct skk_cand_array *src_ca, struct skk_cand_array *dst_ca,
1913
int src_nth, int dst_nth)
1915
char *src_cand = src_ca->cands[src_nth];
1916
char *dst_cand = dst_ca->cands[dst_nth];
1917
char **dst_purged_words, **src_purged_words;
1918
int nr_dst_purged_words, nr_src_purged_words;
1921
src_purged_words = get_purged_words(src_cand);
1922
dst_purged_words = get_purged_words(dst_cand);
1923
nr_src_purged_words = nr_purged_words(src_purged_words);
1924
nr_dst_purged_words = nr_purged_words(dst_purged_words);
1926
for (i = 0; i < nr_src_purged_words; i++) {
1928
for (j = 0; j < nr_dst_purged_words; j++) {
1929
if (!strcmp(src_purged_words[i], dst_purged_words[j])) {
1935
push_purged_word(dst_ca, dst_nth, 1, src_purged_words[i]);
1936
remove_purged_words_from_dst_cand_array(src_ca, dst_ca, src_ca->cands[src_nth]);
1939
free_allocated_purged_words(dst_purged_words);
1940
free_allocated_purged_words(src_purged_words);
1944
merge_purged_cand_to_dst_array(struct skk_cand_array *src_ca,
1945
struct skk_cand_array *dst_ca, char *purged_cand)
1947
remove_purged_words_from_dst_cand_array(src_ca, dst_ca, purged_cand);
1948
merge_word_to_real_cand_array(dst_ca, purged_cand);
1952
merge_word_to_dst_cand_array_with_purged_words(struct skk_cand_array *dst_ca,
1953
struct skk_cand_array *src_ca, const char *src_cand)
1958
if (exist_in_purged_cand(dst_ca, src_cand) && !exist_in_purged_cand(src_ca, src_cand))
1961
push_back_candidate_to_array(dst_ca, src_cand);
1962
nth = dst_ca->nr_cands - 1;
1964
/* move word at the end of real cand array */
1965
tmp = dst_ca->cands[nth];
1966
if (nth >= dst_ca->nr_real_cands) {
1967
for (i = nth; i > dst_ca->nr_real_cands; i--)
1968
dst_ca->cands[i] = dst_ca->cands[i - 1];
1969
dst_ca->cands[dst_ca->nr_real_cands] = tmp;
1970
dst_ca->nr_real_cands++;
1975
merge_real_candidate_array(struct skk_cand_array *src_ca,
1976
struct skk_cand_array *dst_ca)
1979
int src_nr_real_cands = src_ca->nr_real_cands;
1980
int dst_nr_real_cands = dst_ca->nr_real_cands;
1982
if (!src_ca || !dst_ca)
1985
for (i = 0; i < src_nr_real_cands; i++) {
1987
int src_purged_cand_index = -1;
1988
int dst_purged_cand_index = -1;
1990
if (is_purged_cand(src_ca->cands[i]))
1991
src_purged_cand_index = i;
1993
for (j = 0; j < dst_nr_real_cands; j++) {
1994
if (dst_purged_cand_index == -1 && is_purged_cand(dst_ca->cands[j]))
1995
dst_purged_cand_index = j;
1996
if (!strcmp(src_ca->cands[i], dst_ca->cands[j]))
2002
if (src_purged_cand_index != -1 && dst_purged_cand_index != -1)
2003
merge_purged_cands(src_ca, dst_ca, src_purged_cand_index,
2004
dst_purged_cand_index);
2005
else if (src_purged_cand_index != -1 && dst_purged_cand_index == -1)
2006
merge_purged_cand_to_dst_array(src_ca, dst_ca,
2007
src_ca->cands[src_purged_cand_index]);
2008
else if (src_purged_cand_index == -1 && dst_purged_cand_index != -1)
2009
merge_word_to_dst_cand_array_with_purged_words(dst_ca, src_ca,
2012
merge_word_to_real_cand_array(dst_ca, src_ca->cands[i]);
2018
skk_commit_candidate(uim_lisp head_, uim_lisp okuri_head_,
2019
uim_lisp okuri_, uim_lisp nth_, uim_lisp numlst_)
2022
struct skk_cand_array *ca, *subca;
2027
int method_place = 0;
2029
int ignoring_indices[IGNORING_WORD_MAX + 1];
2031
nth = uim_scm_c_int(nth_);
2032
ca = find_cand_array_lisp(head_, okuri_head_, okuri_, 0);
2036
get_ignoring_indices(ca, ignoring_indices);
2038
/* handle #4 method of numeric conversion */
2039
if (!uim_scm_nullp(numlst_)) {
2040
for (i = 0; i < ca->nr_cands; i++) {
2041
if (match_to_discarding_index(ignoring_indices, i))
2044
if (find_numeric_conv_method4_mark(ca->cands[i], &method_place)) {
2045
numstr_ = get_nth(method_place, numlst_);
2046
numstr = uim_scm_refer_c_str(numstr_);
2047
subca = find_cand_array(skk_dic, numstr, 0, NULL, 0);
2049
for (j = 0; j < subca->nr_cands; j++) {
2052
/* reorder sub candidate */
2053
skk_commit_candidate(numstr_, uim_scm_null_list(), uim_scm_null_list(), uim_scm_make_int(j), uim_scm_null_list());
2072
for (i = 0; i < ca->nr_cands; i++) {
2073
if (match_to_discarding_index(ignoring_indices, i))
2084
reorder_candidate(ca, str);
2086
if (okuri_ != uim_scm_null_list()) {
2087
struct skk_line *sl;
2091
okuri = uim_scm_refer_c_str(okuri_);
2093
for (i = 1; i < sl->nr_cand_array; i++) {
2094
if (!strcmp(okuri, sl->cands[i].okuri)) {
2100
ca = find_cand_array_lisp(head_, okuri_head_, okuri_, 1);
2101
reorder_candidate(ca, str);
2103
/* also reorder base candidate array */
2104
reorder_candidate(&sl->cands[0], str);
2108
ca->line->need_save = 1;
2109
move_line_to_cache_head(skk_dic, ca->line);
2114
static void purge_candidate(struct skk_cand_array *ca, int nth)
2122
str = strdup(ca->cands[nth]);
2124
if ((i = get_purged_cand_index(ca)) == -1) {
2125
/* new purged cand in the array */
2126
push_purged_word(ca, nth, 0, str);
2128
/* append the word to already existing purged cand and remove it own */
2129
push_purged_word(ca, i, 1, str);
2130
remove_candidate_from_array(ca, nth);
2135
/* also purge the word in the base cand array */
2136
int index = index_in_real_cands(&ca->line->cands[0], str);
2138
purge_candidate(&ca->line->cands[0], index);
2145
skk_purge_candidate(uim_lisp head_, uim_lisp okuri_head_,
2146
uim_lisp okuri_, uim_lisp nth_, uim_lisp numlst_)
2148
int nth = uim_scm_c_int(nth_);
2149
struct skk_cand_array *ca, *subca;
2154
int method_place = 0;
2156
int ignoring_indices[IGNORING_WORD_MAX + 1];
2158
ca = find_cand_array_lisp(head_, okuri_head_, okuri_, 0);
2160
return uim_scm_f(); /* shouldn't happen */
2161
get_ignoring_indices(ca, ignoring_indices);
2163
/* handle #4 method of numeric conversion */
2164
if (!uim_scm_nullp(numlst_)) {
2165
for (i = 0; i < ca->nr_cands; i++) {
2166
if (match_to_discarding_index(ignoring_indices, i))
2169
if (find_numeric_conv_method4_mark(ca->cands[i], &method_place)) {
2170
numstr_ = get_nth(method_place, numlst_);
2171
numstr = uim_scm_refer_c_str(numstr_);
2172
subca = find_cand_array(skk_dic, numstr, 0, NULL, 0);
2174
for (j = 0; j < subca->nr_cands; j++) {
2178
* don't purge word in sub candidate array
2179
* skk_purge_candidate(numstr_, uim_scm_null_list(), uim_scm_null_list(), uim_scm_make_int(j), uim_scm_null_list());
2199
for (i = 0; i < ca->nr_cands; i++) {
2200
if (match_to_discarding_index(ignoring_indices, i))
2207
if (i < ca->nr_real_cands)
2208
purge_candidate(ca, i);
2214
learn_word_to_cand_array(struct skk_cand_array *ca, const char *word)
2217
for (i = 0; i < ca->nr_cands; i++) {
2218
if (!strcmp(word, ca->cands[i])) {
2224
push_back_candidate_to_array(ca, word);
2226
reorder_candidate(ca, word);
2227
ca->line->need_save = 1;
2231
quote_word(const char *word, const char *prefix)
2238
str = strdup(prefix);
2242
for (p = word; *p; p++) {
2247
str = realloc(str, len + strlen("\\057") + 1);
2248
strcat(str, "\\057");
2251
str = realloc(str, len + strlen("[") + 1);
2255
str = realloc(str, len + strlen("]") + 1);
2259
str = realloc(str, len + strlen("\\n") + 1);
2263
str = realloc(str, len + strlen("\\r") + 1);
2267
str = realloc(str, len + strlen("\\\\") + 1);
2268
strcat(str, "\\\\");
2271
str = realloc(str, len + strlen("\\073") + 1);
2272
strcat(str, "\\073");
2275
str = realloc(str, len + strlen("\\\"") + 1);
2276
strcat(str, "\\\"");
2279
str = realloc(str, len + 2);
2281
str[len + 1] = '\0';
2287
str = realloc(str, len + strlen("\")") + 1);
2295
sanitize_word(const char *str, const char *prefix)
2298
int is_space_only = 1;
2300
if (!str || !strlen(str)) {
2303
for (p = str; *p; p++) {
2313
return quote_word(str, prefix);
2328
skk_learn_word(uim_lisp head_, uim_lisp okuri_head_, uim_lisp okuri_, uim_lisp word_)
2330
struct skk_cand_array *ca;
2334
tmp = uim_scm_refer_c_str(word_);
2335
word = sanitize_word(tmp, "(concat \"");
2339
ca = find_cand_array_lisp(head_, okuri_head_, okuri_, 1);
2341
learn_word_to_cand_array(ca, word);
2344
tmp = uim_scm_refer_c_str(okuri_);
2346
ca = find_cand_array_lisp(head_, okuri_head_, uim_scm_null_list(), 1);
2348
learn_word_to_cand_array(ca, word);
2356
reverse_cache(struct dic_info *di)
2358
struct skk_line *sl, *prev, *next;
2368
di->head.next = prev;
2372
parse_dic_line(struct dic_info *di, char *line)
2375
struct skk_line *sl;
2378
buf = alloca(strlen(line) + 1);
2380
sep = strchr(buf, ' ');
2382
if (!sep || (sep == buf))
2386
if (!skk_isascii(buf[0]) && skk_islower(sep[-1])) { /* okuri-ari entry */
2387
char okuri_head = sep[-1];
2389
sl = compose_line(di, buf, okuri_head, line);
2391
sl = compose_line(di, buf, 0, line);
2394
/* set nr_real_cands for the candidate array from personal dictionaly */
2395
for (i = 0; i < sl->nr_cand_array; i++)
2396
sl->cands[i].nr_real_cands = sl->cands[i].nr_cands;
2397
add_line_to_cache_head(di, sl);
2401
write_out_array(FILE *fp, struct skk_cand_array *ca)
2405
fprintf(fp, "[%s/", ca->okuri);
2406
for (i = 0; i < ca->nr_real_cands; i++)
2407
fprintf(fp, "%s/", ca->cands[i]);
2410
for (i = 0; i < ca->nr_real_cands; i++)
2411
fprintf(fp, "%s/", ca->cands[i]);
2416
write_out_line(FILE *fp, struct skk_line *sl)
2418
struct skk_cand_array *ca;
2421
fprintf(fp, "%s", sl->head);
2422
if (sl->okuri_head) {
2423
fprintf(fp, "%c /", sl->okuri_head);
2427
for (i = 0; i < sl->nr_cand_array; i++) {
2429
write_out_array(fp, ca);
2435
open_lock(const char *name, int type)
2441
lock_fn = malloc(sizeof(char) * (strlen(name) + strlen(".lock") + 1));
2442
if (lock_fn == NULL)
2444
sprintf(lock_fn, "%s.lock", name);
2446
fd = open(lock_fn, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
2453
fl.l_whence = SEEK_SET;
2456
if (fcntl(fd, F_SETLKW, &fl) == -1) {
2473
fl.l_type = F_UNLCK;
2474
fl.l_whence = SEEK_SET;
2478
fcntl(fd, F_SETLKW, &fl);
2483
read_personal_dictionary(struct dic_info *di, const char *fn)
2487
char buf[4096]; /* XXX */
2494
lock_fd = open_lock(fn, F_RDLCK);
2496
if (stat(fn, &st) == -1) {
2497
close_lock(lock_fd);
2501
fp = fopen(fn, "r");
2503
close_lock(lock_fd);
2507
di->personal_dic_timestamp = st.st_mtime;
2509
while (fgets(buf, 4096, fp)) { /* XXX */
2510
int len = strlen(buf);
2511
if (buf[len - 1] == '\n') {
2512
if (err_flag == 0) {
2513
if (buf[0] != ';') {
2514
buf[len - 1] = '\0';
2515
parse_dic_line(di, buf);
2518
/* erroneous line ends here */
2526
close_lock(lock_fd);
2532
skk_read_personal_dictionary(uim_lisp fn_)
2534
const char *fn = uim_scm_refer_c_str(fn_);
2535
return read_personal_dictionary(skk_dic, fn);
2538
static void push_back_candidate_array_to_sl(struct skk_line *sl,
2539
struct skk_cand_array *src_ca)
2542
struct skk_cand_array *ca;
2544
sl->nr_cand_array++;
2545
sl->cands = realloc(sl->cands,
2546
sizeof(struct skk_cand_array) * sl->nr_cand_array);
2547
ca = &sl->cands[sl->nr_cand_array - 1];
2548
ca->is_used = src_ca->is_used;
2549
ca->nr_cands = src_ca->nr_cands;
2550
ca->cands = malloc(sizeof(char *) * src_ca->nr_cands);
2551
for (i = 0; i < ca->nr_cands; i++)
2552
ca->cands[i] = strdup(src_ca->cands[i]);
2554
ca->nr_real_cands = src_ca->nr_real_cands;
2555
ca->okuri = strdup(src_ca->okuri);
2559
static void compare_and_merge_skk_line(struct skk_line *dst_sl,
2560
struct skk_line *src_sl)
2563
struct skk_cand_array *dst_ca, *src_ca;
2565
if (dst_sl == NULL || src_sl == NULL)
2568
src_ca = &src_sl->cands[0];
2569
dst_ca = &dst_sl->cands[0];
2571
* check all candidate array since purged words may exist.
2573
/* if (src_ca->nr_real_cands >= dst_ca->nr_real_cands) */
2574
merge_real_candidate_array(src_ca, dst_ca);
2576
for (i = 1; i < src_sl->nr_cand_array; i++) {
2578
src_ca = &src_sl->cands[i];
2580
for (j = 1; j < dst_sl->nr_cand_array; j++) {
2581
dst_ca = &dst_sl->cands[j];
2582
if (!strcmp(src_ca->okuri, dst_ca->okuri)) {
2584
/* if (src_ca->nr_real_cands >= dst_ca->nr_real_cands) */
2585
merge_real_candidate_array(src_ca, dst_ca);
2589
push_back_candidate_array_to_sl(dst_sl, src_ca);
2593
/* for merge sort */
2595
compare_entry(struct skk_line *p, struct skk_line *q)
2598
ret = strcmp(p->head, q->head);
2603
return p->okuri_head - q->okuri_head;
2607
* Retern lines with differential "midashi-go" between two personal
2608
* dictionaly caches. Also merge candidate arrays for line with same
2609
* "midashi-go". p and q are needed to be sorted.
2611
static struct skk_line *
2612
cache_line_diffs(struct skk_line *p, struct skk_line *q, int *len)
2614
struct skk_line *r, *s, head;
2617
for (r = &head; p && q; ) {
2618
cmp = compare_entry(p, q);
2621
} else if (cmp > 0) {
2622
s = copy_skk_line(q);
2628
compare_and_merge_skk_line(p, q);
2634
s = copy_skk_line(q);
2644
/* for merge sort */
2645
static struct skk_line *
2646
lmerge(struct skk_line *p, struct skk_line *q)
2648
struct skk_line *r, head;
2650
for (r = &head; p && q; ) {
2651
if (compare_entry(p, q) < 0) {
2661
r->next = (p ? p : q);
2666
static struct skk_line *
2667
lsort(struct skk_line *p)
2669
struct skk_line *q, *r;
2673
for (r = q->next; r && (r = r->next) != NULL; r = r->next)
2678
p = lmerge(lsort(r), lsort(p));
2684
update_personal_dictionary_cache(const char *fn)
2686
struct dic_info *di;
2687
struct skk_line *sl, *tmp, *diff, **cache_array;
2688
int i, diff_len = 0;
2690
di = (struct dic_info *)malloc(sizeof(struct dic_info));
2693
di->head.next = NULL;
2694
read_personal_dictionary(di, fn);
2695
di->head.next = lsort(di->head.next);
2697
/* keep original sequence of cache */
2698
cache_array = (struct skk_line **)malloc(sizeof(struct skk_line *)
2699
* skk_dic->cache_len);
2700
if (cache_array == NULL)
2703
sl = skk_dic->head.next;
2705
cache_array[i] = sl;
2710
skk_dic->head.next = lsort(skk_dic->head.next);
2712
/* get differential lines and merge candidate */
2713
diff = cache_line_diffs(skk_dic->head.next, di->head.next, &diff_len);
2715
/* revert sequence of the cache */
2716
if (cache_array[0]) {
2717
sl = skk_dic->head.next = cache_array[0];
2718
for (i = 0; i < skk_dic->cache_len - 1; i++) {
2719
sl->next = cache_array[i + 1];
2725
/* add differential lines at the top of the cache */
2731
sl->next = skk_dic->head.next;
2732
skk_dic->head.next = diff;
2733
skk_dic->cache_len += diff_len;
2735
skk_dic->cache_modified = 1;
2748
skk_save_personal_dictionary(uim_lisp fn_)
2751
const char *fn = uim_scm_refer_c_str(fn_);
2752
char *tmp_fn = NULL;
2753
struct skk_line *sl;
2757
if (!skk_dic || skk_dic->cache_modified == 0)
2761
if (stat(fn, &st) != -1) {
2762
if (st.st_mtime != skk_dic->personal_dic_timestamp)
2763
update_personal_dictionary_cache(fn);
2766
lock_fd = open_lock(fn, F_WRLCK);
2767
if (!(tmp_fn = malloc(strlen(fn) + 5)))
2770
sprintf(tmp_fn, "%s.tmp", fn);
2771
fp = fopen(tmp_fn, "w");
2779
for (sl = skk_dic->head.next; sl; sl = sl->next) {
2781
write_out_line(fp, sl);
2784
if (fclose(fp) != 0)
2787
if (rename(tmp_fn, fn) != 0)
2790
if (stat(fn, &st) != -1) {
2791
skk_dic->personal_dic_timestamp = st.st_mtime;
2792
skk_dic->cache_modified = 0;
2796
close_lock(lock_fd);
2802
skk_get_annotation(uim_lisp str_)
2804
const char *str, *sep;
2807
if (str_ == uim_scm_null_list())
2808
return uim_scm_null_list();
2810
str = uim_scm_refer_c_str(str_);
2811
sep = strrchr(str, ';');
2812
if (sep && (*(++sep) != '\0')) {
2813
res = uim_scm_make_str(sep);
2815
res = uim_scm_make_str("");
2821
skk_remove_annotation(uim_lisp str_)
2826
if (str_ == uim_scm_null_list())
2827
return uim_scm_null_list();
2829
str = uim_scm_c_str(str_);
2830
sep = strrchr(str, ';');
2831
if (sep && (*(sep + 1) != '\0')) {
2834
res = uim_scm_make_str(str);
2840
skk_eval_candidate(uim_lisp str_)
2842
const char *cand, *evaluated_str;
2845
uim_lisp cand_, return_val;
2847
if (str_ == uim_scm_null_list())
2848
return uim_scm_null_list();
2850
cand = uim_scm_refer_c_str(str_);
2852
/* eval concat only for now */
2853
if ((p = strstr(cand, "(concat \"")) == NULL)
2856
/* check close paren */
2857
q = strrchr(p, ')');
2858
if (!q || (strstr(p, "\")") == NULL))
2861
/* ignore make-string */
2862
if (strstr(p, "make-string"))
2866
/* replace elisp's concat with string-append */
2867
str = malloc(len + strlen("string-append") - strlen("concat") + 1);
2868
strcpy(str, "(string-append");
2869
strncat(str, p + strlen("(concat"), q - (p + strlen("(concat")) + 1);
2871
/* XXX string expansion like \073 -> ';' is siod dependent */
2872
UIM_EVAL_FSTRING1(NULL, "%s", str);
2873
return_val = uim_scm_return_value();
2874
if (return_val == uim_scm_null_list()) {
2878
evaluated_str = uim_scm_refer_c_str(return_val);
2880
/* get evaluated candidate */
2881
len = p - cand + strlen(evaluated_str);
2882
if (len > strlen(str))
2883
str = realloc(str, len + 1);
2886
strncpy(str, cand, p - cand);
2887
str[p - cand] = '\0';
2888
strcat(str, evaluated_str);
2890
strcpy(str, evaluated_str);
2893
cand_ = uim_scm_make_str(str);
2900
uim_plugin_instance_init(void)
2902
uim_scm_init_subr_3("skk-lib-dic-open", skk_dic_open);
2903
uim_scm_init_subr_1("skk-lib-read-personal-dictionary", skk_read_personal_dictionary);
2904
uim_scm_init_subr_1("skk-lib-save-personal-dictionary", skk_save_personal_dictionary);
2905
uim_scm_init_subr_3("skk-lib-get-entry", skk_get_entry);
2906
uim_scm_init_subr_1("skk-lib-store-replaced-numstr", skk_store_replaced_numeric_str);
2907
uim_scm_init_subr_2("skk-lib-merge-replaced-numstr", skk_merge_replaced_numeric_str);
2908
uim_scm_init_subr_1("skk-lib-replace-numeric", skk_replace_numeric);
2909
uim_scm_init_subr_5("skk-lib-get-nth-candidate", skk_get_nth_candidate);
2910
uim_scm_init_subr_4("skk-lib-get-nr-candidates", skk_get_nr_candidates);
2911
uim_scm_init_subr_5("skk-lib-commit-candidate", skk_commit_candidate);
2912
uim_scm_init_subr_5("skk-lib-purge-candidate", skk_purge_candidate);
2913
uim_scm_init_subr_4("skk-lib-learn-word", skk_learn_word);
2914
uim_scm_init_subr_1("skk-lib-get-annotation", skk_get_annotation);
2915
uim_scm_init_subr_1("skk-lib-remove-annotation", skk_remove_annotation);
2916
uim_scm_init_subr_1("skk-lib-get-completion", skk_get_completion);
2917
uim_scm_init_subr_2("skk-lib-get-nth-completion", skk_get_nth_completion);
2918
uim_scm_init_subr_1("skk-lib-get-nr-completions", skk_get_nr_completions);
2919
uim_scm_init_subr_1("skk-lib-clear-completions", skk_clear_completions);
2920
uim_scm_init_subr_1("skk-lib-eval-candidate", skk_eval_candidate);
2924
uim_plugin_instance_quit(void)
2926
struct skk_line *sl, *tmp;
2931
if (skk_dic->addr) {
2932
munmap(skk_dic->addr, skk_dic->size);
2934
sl = skk_dic->head.next;
2941
if (skk_dic->skkserv_ok)
2948
/* skkserv related */
2950
open_skkserv(int portnum)
2953
struct sockaddr_in hostaddr;
2954
struct hostent *entry;
2955
/* struct servent *serv; */
2956
struct protoent *proto;
2960
signal(SIGPIPE, SIG_IGN);
2962
/* serv = getservbyname(SKK_SERVICENAME, "tcp"); */
2963
memset((char*)&hostaddr, 0, sizeof(struct sockaddr_in));
2964
if ((proto = getprotobyname("tcp")) == NULL) {
2968
if ((sock = socket(AF_INET, SOCK_STREAM, proto->p_proto)) < 0) {
2973
hostname = SKKServerHost;
2974
else if ((hostname = getenv("SKKSERVER")) == NULL) {
2975
#ifdef SKK_SERVER_HOST
2976
hostname = SKK_SERVER_HOST;
2981
if ('0' <= *hostname && *hostname <= '9') {
2982
if (sscanf(hostname,"%d.%d.%d.%d", &a1, &a2, &a3, &a4) != 4) {
2985
a1 = (a1 << 24) | (a2 << 16) | (a3 << 8) | a4;
2986
hostaddr.sin_addr.s_addr = htonl(a1);
2988
if ((entry = gethostbyname(hostname)) == NULL) {
2991
memcpy(&hostaddr.sin_addr, entry->h_addr, entry->h_length);
2993
hostaddr.sin_family = AF_INET;
2994
/* hostaddr.sin_port = serv ? serv->s_port : htons(portnum); */
2995
hostaddr.sin_port = htons(portnum);
2996
if (connect(sock, (struct sockaddr *)&hostaddr, sizeof(struct sockaddr_in)) < 0) {
2999
fprintf(stderr, "SKKSERVER=%s\n", hostname);
3001
rserv = fdopen(sock, "r");
3002
wserv = fdopen(sock, "w");
3009
if (skkservsock >= 0) {
3010
fprintf(wserv, "0\n");