49
46
#define TYPE_NUM 256
50
47
#define FORM_NUM 128
53
#define ESTR_BOS "BOS"
54
#define JSTR_EOS "ʸ��"
55
#define ESTR_EOS "EOS"
56
#define JSTR_BKUGIRI "/"
57
#define ESTR_BOS_EOS "BOS/EOS"
60
#define JSTR_BASE_FORM_STR "���ܷ�"
61
#define ESTR_BASE_FORM_STR1 "BASEFORM"
62
#define ESTR_BASE_FORM_STR2 "STEMFORM"
63
#define JSTR_BASE_FORM "���ܷ�"
64
#define ESTR_BASE_FORM1 "BASEFORM"
65
#define ESTR_BASE_FORM2 "STEMFORM"
68
#define JSTR_DEF_POS_COST "�ǥե�����ʻ쥳����"
69
#define ESTR_DEF_POS_COST "DEF_POS_COST"
70
#define JSTR_MRPH "������"
71
#define ESTR_MRPH "MORPH"
72
#define JSTR_POS "�ʻ�"
73
#define ESTR_POS "POS"
74
#define JSTR_WORD "������"
75
#define ESTR_WORD "LEX"
76
#define JSTR_READING "�ɤ�"
77
#define ESTR_READING "READING"
78
#define JSTR_BASE "����"
79
#define ESTR_BASE "BASE"
80
#define JSTR_PRON "ȯ��"
81
#define ESTR_PRON "PRON"
82
#define JSTR_CTYPE "���ѷ�"
83
#define ESTR_CTYPE "CTYPE"
84
#define JSTR_CFORM "���ѷ�"
85
#define ESTR_CFORM "CFORM"
86
#define JSTR_INFO1 "�ղþ���"
87
#define JSTR_INFO2 "��̣����"
88
#define ESTR_INFO "INFO"
89
#define JSTR_COMPOUND "ʣ���"
90
#define ESTR_COMPOUND "COMPOUND"
91
#define JSTR_SEG "������"
92
#define ESTR_SEG "SEG"
93
#define JSTR_CONN_ATTR "Ϣ��°��"
96
#define JSTR_GRAM_FILE "ʸˡ�ե�����"
97
#define ESTR_GRAM_FILE "GRAMMAR"
98
#define JSTR_UNKNOWN_WORD1 "̤�θ�"
99
#define JSTR_UNKNOWN_WORD2 "̤�����"
100
#define ESTR_UNKNOWN_WORD "UNKNOWN"
101
#define JSTR_UNKNOWN_WORD JSTR_UNKNOWN_WORD1
102
#define JSTR_UNKNOWN_POS1 "̤�θ��ʻ�"
103
#define JSTR_UNKNOWN_POS2 "̤������ʻ�"
104
#define ESTR_UNKNOWN_POS "UNKNOWN_POS"
105
#define JSTR_SPACE_POS "�����ʻ�"
106
#define ESTR_SPACE_POS "SPACE_POS"
107
#define JSTR_ANNOTATION "����"
108
#define ESTR_ANNOTATION "ANNOTATION"
109
#define JSTR_POS_COST "�ʻ쥳����"
110
#define ESTR_POS_COST "POS_COST"
111
#define JSTR_CONN_WEIGHT "Ϣ�ܥ����ȽŤ�"
112
#define ESTR_CONN_WEIGHT "CONN_WEIGHT"
113
#define JSTR_MRPH_WEIGHT "�����ǥ����ȽŤ�"
114
#define ESTR_MRPH_WEIGHT "MORPH_WEIGHT"
115
#define JSTR_COST_WIDTH "��������"
116
#define ESTR_COST_WIDTH "COST_WIDTH"
117
#define JSTR_DEF_CONN_COST "̤���Ϣ�ܥ�����"
118
#define ESTR_DEF_CONN_COST "DEF_CONN_COST"
119
#define JSTR_COMPOSIT_POS "Ϣ���ʻ�"
120
#define ESTR_COMPOSIT_POS "COMPOSIT_POS"
121
#define JSTR_OUTPUT_COMPOUND "ʣ������"
122
#define ESTR_OUTPUT_COMPOUND "OUTPUT_COMPOUND"
123
#define ESTR_PAT_FILE "PATDIC" /* changed by Tatuo 960920 */
124
#define ESTR_SUF_FILE "SUFDIC"
125
#define JSTR_OUTPUT_FORMAT "���ϥե����ޥå�"
126
#define ESTR_OUTPUT_FORMAT "OUTPUT_FORMAT"
127
#define JSTR_LANG "����"
128
#define ESTR_LANG "LANG"
129
#define JSTR_DELIMITER "���ڤ�ʸ��"
130
#define ESTR_DELIMITER "DELIMITER"
131
#define JSTR_BOS_STR "BOSʸ����"
132
#define ESTR_BOS_STR "BOS_STRING"
133
#define JSTR_EOS_STR "EOSʸ����"
134
#define ESTR_EOS_STR "EOS_STRING"
136
49
#define VCHA_CONNECT_FILE "connect.cha"
137
50
#define VCHA_CONNTMP_FILE "_connect.cha"
138
51
#define VCHA_GRAMMAR_FILE "grammar.cha"
204
114
/* this structure is used only in mkchadic */
206
typedef struct _mrph {
207
char midasi[MIDASI_LEN]; /* surface form */
208
char yomi[MIDASI_LEN]; /* Japanese reading */
116
typedef struct _lexicon_t {
117
char headword[MIDASI_LEN]; /* surface form */
119
char reading[MIDASI_LEN * 2]; /* Japanese reading *//* XXX ad hoc */
121
char pron[MIDASI_LEN * 2]; /* Japanese pronunciation *//* XXX ad hoc */
123
char *base; /* base form */
124
unsigned short pos; /* POS number */
125
unsigned char inf_type; /* Conjugation type number */
126
unsigned char inf_form; /* Conjugation form number */
209
128
char *info; /* semantic information */
210
char *base; /* base form */
211
char pron[MIDASI_LEN]; /* Japanese pronunciation */
212
unsigned short hinsi; /* POS number */
213
unsigned char ktype; /* Conjugation type number */
214
unsigned char kform; /* Conjugation form number */
216
130
short con_tbl; /* connection table number */
217
short length; /* the length of surface form */
218
131
unsigned short weight; /* cost for morpheme */
220
char is_undef; /* the unseen word or not */
223
135
/* POS information -- see also the comments (the end of this file) */
224
136
typedef struct _hinsi_t {
225
137
short *path; /* the path to top node */
226
138
short *daughter; /* the daughter node */
227
139
char *name; /* the name of POS (at the level) */
228
char *bkugiri; /* for bunsetsu segmentation */
229
140
short composit; /* for the COMPOSIT_POS */
230
141
char depth; /* the depth from top node */
231
142
char kt; /* have conjugation or not */
340
243
void cha_read_table(FILE*, int);
341
int cha_check_table(mrph_t*); /* 970301 tatuo: void -> int for ��� */
244
int cha_check_table(lexicon_t*); /* 970301 tatuo: void -> int for ��� */
342
245
int cha_check_table_for_undef(int);
343
246
void cha_read_matrix(FILE*);
344
247
int cha_check_automaton(int, int, int, int*);
346
void cha_check_edrtable(mrph_t *, chasen_cell_t*);
347
void cha_check_edrtable_str(mrph_t*, char*); /* Unused. */
350
250
int cha_get_nhinsi_str_id(char**);
352
252
int cha_get_type_id(char*);
353
253
int cha_get_form_id(char*, int);
356
unsigned char *euc2sjis(char*);
357
unsigned char *sjis2euc(unsigned char*);
358
unsigned char *hankana2zenkana(unsigned char*);
361
256
int cha_getopt(char**, char*, FILE*);
362
257
int cha_getopt_chasen(char**, FILE*);
365
off_t cha_mmap_file(char*, void**);
366
off_t cha_mmap_file_w(char*, void**);
367
void cha_munmap_file(void*, off_t);
369
259
#endif /* __CHADIC_H__ */