6
// First some base level utility routines
13
#include "nscore.h" // for mozalloc headers
23
// default encoding and keystring
24
#define SPELL_ENCODING "ISO8859-1"
25
#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
27
// default morphological fields
28
#define MORPH_STEM "st:"
29
#define MORPH_ALLOMORPH "al:"
30
#define MORPH_POS "po:"
31
#define MORPH_DERI_PFX "dp:"
32
#define MORPH_INFL_PFX "ip:"
33
#define MORPH_TERM_PFX "tp:"
34
#define MORPH_DERI_SFX "ds:"
35
#define MORPH_INFL_SFX "is:"
36
#define MORPH_TERM_SFX "ts:"
37
#define MORPH_SURF_PFX "sp:"
38
#define MORPH_FREQ "fr:"
39
#define MORPH_PHON "ph:"
40
#define MORPH_HYPH "hy:"
41
#define MORPH_PART "pa:"
42
#define MORPH_FLAG "fl:"
43
#define MORPH_HENTRY "_H:"
44
#define MORPH_TAG_LEN strlen(MORPH_STEM)
51
#define DEFAULTFLAGS 65510
52
#define FORBIDDENWORD 65510
53
#define ONLYUPCASEFLAG 65511
55
// convert UTF-16 characters to UTF-8
56
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
58
// convert UTF-8 characters to UTF-16
59
LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
62
LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
64
// binary search in 2-byte vector
65
LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
67
// remove end of line char(s)
68
LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
71
LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
73
// strcat for limited length destination string
74
LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
76
// duplicate reverse of string
77
LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
79
// parse into tokens with char delimiter
80
LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
81
// parse into tokens with char delimiter
82
LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
84
// parse into tokens with char delimiter
85
LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
87
// append s to ends of every lines in text
88
LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
90
// tokenize into lines with new line
91
LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
93
// tokenize into lines with new line and uniq in place
94
LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
95
LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
97
// change oldchar to newchar in place
98
LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
101
LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
104
LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
107
LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
109
// free character array list
110
LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
112
// character encoding information
115
unsigned char clower;
116
unsigned char cupper;
119
// Unicode character encoding information
120
struct unicode_info {
122
unsigned short cupper;
123
unsigned short clower;
126
struct unicode_info2 {
128
unsigned short cupper;
129
unsigned short clower;
132
LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
133
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
134
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
135
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
136
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
139
const char * enc_name;
140
struct cs_info * cs_table;
143
// language to encoding default map
147
const char * def_enc;
151
LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
153
LIBHUNSPELL_DLL_EXPORTED const char * get_default_enc(const char * lang);
155
// get language identifiers of language codes
156
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
158
// get characters of the given 8bit encoding with lower- and uppercase forms
159
LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
161
// convert null terminated string to all caps using encoding
162
LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
164
// convert null terminated string to all little using encoding
165
LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
167
// convert null terminated string to have initial capital using encoding
168
LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
170
// convert null terminated string to all caps
171
LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
173
// convert null terminated string to all little
174
LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
176
// convert null terminated string to have initial capital
177
LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
179
// convert first nc characters of UTF-8 string to little
180
LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
182
// convert first nc characters of UTF-8 string to capital
183
LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
185
// get type of capitalization
186
LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
188
// get type of capitalization (UTF-8)
189
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
191
// strip all ignored characters in the string
192
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
194
// strip all ignored characters in the string
195
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
197
LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
199
LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
200
int * out_utf16_len, int utf8, int ln);
202
LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
203
LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
205
LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
207
LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
209
// conversion function for protected memory
210
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
212
// conversion function for protected memory
213
LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
216
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
221
else if (h->var & H_OPT_ALIASM)
222
ret = get_stored_pointer(&(h->word[0]) + h->blen + 1);
224
ret = &(h->word[0]) + h->blen + 1;
228
// NULL-free version for warning-free OOo build
229
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
234
else if (h->var & H_OPT_ALIASM)
235
ret = get_stored_pointer(&(h->word[0]) + h->blen + 1);
237
ret = &(h->word[0]) + h->blen + 1;
241
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
243
return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
246
#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))