2
**********************************************************************
3
* Copyright (C) 2005-2009, International Business Machines
4
* Corporation and others. All Rights Reserved.
5
**********************************************************************
11
#include "unicode/uobject.h"
13
#if !UCONFIG_NO_CONVERSION
19
class NGramParser : public UMemory
25
const int32_t *ngramList;
26
const uint8_t *charMap;
32
NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
36
* Binary search for value in table, which must have exactly 64 entries.
38
int32_t search(const int32_t *table, int32_t value);
40
void lookup(int32_t thisNgram);
41
void addByte(int32_t b);
42
int32_t nextByte(InputText *det);
45
int32_t parse(InputText *det);
49
class CharsetRecog_sbcs : public CharsetRecognizer
57
virtual ~CharsetRecog_sbcs();
59
virtual const char *getName() const = 0;
61
virtual int32_t match(InputText *det) = 0;
63
int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]);
66
class CharsetRecog_8859_1 : public CharsetRecog_sbcs
69
virtual ~CharsetRecog_8859_1();
71
const char *getName() const;
74
class CharsetRecog_8859_2 : public CharsetRecog_sbcs
77
virtual ~CharsetRecog_8859_2();
79
const char *getName() const;
82
class CharsetRecog_8859_5 : public CharsetRecog_sbcs
85
virtual ~CharsetRecog_8859_5();
87
const char *getName() const;
90
class CharsetRecog_8859_6 : public CharsetRecog_sbcs
93
virtual ~CharsetRecog_8859_6();
95
const char *getName() const;
98
class CharsetRecog_8859_7 : public CharsetRecog_sbcs
101
virtual ~CharsetRecog_8859_7();
103
const char *getName() const;
106
class CharsetRecog_8859_8 : public CharsetRecog_sbcs
109
virtual ~CharsetRecog_8859_8();
111
virtual const char *getName() const;
114
class CharsetRecog_8859_9 : public CharsetRecog_sbcs
117
virtual ~CharsetRecog_8859_9();
119
const char *getName() const;
122
class CharsetRecog_8859_1_en : public CharsetRecog_8859_1
125
virtual ~CharsetRecog_8859_1_en();
127
const char *getLanguage() const;
129
int32_t match(InputText *textIn);
132
class CharsetRecog_8859_1_da : public CharsetRecog_8859_1
135
virtual ~CharsetRecog_8859_1_da();
137
const char *getLanguage() const;
139
int32_t match(InputText *textIn);
142
class CharsetRecog_8859_1_de : public CharsetRecog_8859_1
145
virtual ~CharsetRecog_8859_1_de();
147
const char *getLanguage() const;
149
int32_t match(InputText *textIn);
152
class CharsetRecog_8859_1_es : public CharsetRecog_8859_1
155
virtual ~CharsetRecog_8859_1_es();
157
const char *getLanguage() const;
159
int32_t match(InputText *textIn);
162
class CharsetRecog_8859_1_fr : public CharsetRecog_8859_1
165
virtual ~CharsetRecog_8859_1_fr();
167
const char *getLanguage() const;
169
int32_t match(InputText *textIn);
172
class CharsetRecog_8859_1_it : public CharsetRecog_8859_1
175
virtual ~CharsetRecog_8859_1_it();
177
const char *getLanguage() const;
179
int32_t match(InputText *textIn);
182
class CharsetRecog_8859_1_nl : public CharsetRecog_8859_1
185
virtual ~CharsetRecog_8859_1_nl();
187
const char *getLanguage() const;
189
int32_t match(InputText *textIn);
192
class CharsetRecog_8859_1_no : public CharsetRecog_8859_1
195
virtual ~CharsetRecog_8859_1_no();
197
const char *getLanguage() const;
199
int32_t match(InputText *textIn);
202
class CharsetRecog_8859_1_pt : public CharsetRecog_8859_1
205
virtual ~CharsetRecog_8859_1_pt();
207
const char *getLanguage() const;
209
int32_t match(InputText *textIn);
212
class CharsetRecog_8859_1_sv : public CharsetRecog_8859_1
215
virtual ~CharsetRecog_8859_1_sv();
217
const char *getLanguage() const;
219
int32_t match(InputText *textIn);
222
class CharsetRecog_8859_2_cs : public CharsetRecog_8859_2
225
virtual ~CharsetRecog_8859_2_cs();
227
const char *getLanguage() const;
229
int32_t match(InputText *textIn);
232
class CharsetRecog_8859_2_hu : public CharsetRecog_8859_2
235
virtual ~CharsetRecog_8859_2_hu();
237
const char *getLanguage() const;
239
int32_t match(InputText *textIn);
242
class CharsetRecog_8859_2_pl : public CharsetRecog_8859_2
245
virtual ~CharsetRecog_8859_2_pl();
247
const char *getLanguage() const;
249
int32_t match(InputText *textIn);
252
class CharsetRecog_8859_2_ro : public CharsetRecog_8859_2
255
virtual ~CharsetRecog_8859_2_ro();
257
const char *getLanguage() const;
259
int32_t match(InputText *textIn);
262
class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
265
virtual ~CharsetRecog_8859_5_ru();
267
const char *getLanguage() const;
269
int32_t match(InputText *textIn);
272
class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
275
virtual ~CharsetRecog_8859_6_ar();
277
const char *getLanguage() const;
279
int32_t match(InputText *textIn);
282
class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
285
virtual ~CharsetRecog_8859_7_el();
287
const char *getLanguage() const;
289
int32_t match(InputText *textIn);
292
class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
295
virtual ~CharsetRecog_8859_8_I_he();
297
const char *getName() const;
299
const char *getLanguage() const;
301
int32_t match(InputText *textIn);
304
class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
307
virtual ~CharsetRecog_8859_8_he ();
309
const char *getLanguage() const;
311
int32_t match(InputText *textIn);
314
class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
317
virtual ~CharsetRecog_8859_9_tr ();
319
const char *getLanguage() const;
321
int32_t match(InputText *textIn);
324
class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
327
virtual ~CharsetRecog_windows_1256();
329
const char *getName() const;
331
const char *getLanguage() const;
333
int32_t match(InputText *textIn);
336
class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
339
virtual ~CharsetRecog_windows_1251();
341
const char *getName() const;
343
const char *getLanguage() const;
345
int32_t match(InputText *textIn);
349
class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
352
virtual ~CharsetRecog_KOI8_R();
354
const char *getName() const;
356
const char *getLanguage() const;
358
int32_t match(InputText *textIn);
361
class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
364
virtual ~CharsetRecog_IBM424_he();
366
const char *getLanguage() const;
369
class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
371
virtual ~CharsetRecog_IBM424_he_rtl();
373
const char *getName() const;
375
int32_t match(InputText *textIn);
378
class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
379
virtual ~CharsetRecog_IBM424_he_ltr();
381
const char *getName() const;
383
int32_t match(InputText *textIn);
386
class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
389
virtual ~CharsetRecog_IBM420_ar();
391
const char *getLanguage() const;
394
void matchInit(InputText *textIn);
395
void matchFinish(InputText *textIn);
398
uint8_t *prev_fInputBytes;
399
int32_t prev_fInputBytesLength;
402
UBool isLamAlef(uint8_t b);
403
uint8_t *unshapeLamAlef(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length);
404
uint8_t *unshape(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length);
407
class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
409
virtual ~CharsetRecog_IBM420_ar_rtl();
411
const char *getName() const;
413
int32_t match(InputText *textIn);
416
class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
417
virtual ~CharsetRecog_IBM420_ar_ltr();
419
const char *getName() const;
421
int32_t match(InputText *textIn);
427
#endif /* __CSRSBCS_H */