~ubuntu-branches/debian/squeeze/sword/squeeze

« back to all changes in this revision

Viewing changes to src/modules/filters/utf8transliterator.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Jonathan Marsden, Jonathan Marsden, Dmitrijs Ledkovs, Closed Bugs
  • Date: 2009-05-30 11:55:55 UTC
  • mfrom: (1.3.1 upstream) (6.1.1 experimental)
  • Revision ID: james.westby@ubuntu.com-20090530115555-r427zsn3amivdpfu
Tags: 1.6.0+dfsg-1
[ Jonathan Marsden ]
* New upstream release. (Closes: #507960) (LP: #320558)
* debian/patches/02_libver.diff:
  - Bump SONAME to 8 -- SWORD 1.6 is not backward compatible with 1.5.11.
* debian/patches/series:
  - Remove 10_diatheke.diff -- included in upstream source.
* debian/patches/:
  - Remove several old unused .diff files.
  - Add 11_regex_only_when_needed.diff to conditionally include regex lib.
  - Add 12_fix_compiler_warnings.diff to remove all compiler warnings.
  - Add 13_fix_osis2mod_compression_default.diff from upstream svn.
  - Add 14_closing_section_not_chapter.diff from upstream svn.
* debian/libsword7.*: 
  - Rename to libsword8.*
  - Change libsword7 to libsword8 within files.
* debian/rules: 
  - SONAME bump to 8.
  - Set library version check to >= 1.6
* debian/control:
  - Change libsword7 to libsword8.
  - Add libsword7 to Conflicts.
  - Fix case of sword to SWORD in package descriptions.
  - Bump Standards-Version to 3.8.1 (no changes needed).
  - Fix section for libsword-dbg to avoid lintian warning.
* debian/rules:
  - Add DFSG get-orig-source target.
* debian/copyright:
  - Fix various mistakes in initial attempt to document copyrights.

[ Dmitrijs Ledkovs ]
* debian/rules: Added utils.mk to use missing-files target and call it on
  each build.
* debian/libsword-dev.install: Added libsword.la, previously missing.
* debian/libsword7.install: Added missing libicu translit files.
* debian/control:
  - Updated all uses of SWORD version to 1.6
  - Added libsword-dbg package
* debian/watch: Fixed a small mistake which was resulting in extra "."
  in final version name.
* debian/rules: simplified manpage processing.
* debian/libsword8.lintian-overrides: added override for module
  installation directory.
* debian/copyright: Updated with information about everyfile.
  Closes: #513448 LP: #322638
* debian/diatheke.examples: moved examples here from the diatheke.install
* debian/rules:
  - enabled shell script based testsuite
  - added commented out cppunit testsuite
* debian/patches/40_missing_includes.diff: 
  - added several missing stdio.h includes to prevent FTBFS of testsuite.

[ Closed Bugs ]
* FTBFS on intrepid (LP: #305172)

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/******************************************************************************
 
2
 *
 
3
 * utf8transliterators - SWFilter descendant to transliterate between
 
4
 *                       ICU-supported scripts.
 
5
 *
 
6
 *
 
7
 * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org)
 
8
 *      CrossWire Bible Society
 
9
 *      P. O. Box 2528
 
10
 *      Tempe, AZ  85280-2528
 
11
 *
 
12
 * This program is free software; you can redistribute it and/or modify it
 
13
 * under the terms of the GNU General Public License as published by the
 
14
 * Free Software Foundation version 2.
 
15
 *
 
16
 * This program is distributed in the hope that it will be useful, but
 
17
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
19
 * General Public License for more details.
 
20
 *
 
21
 */
 
22
 
 
23
#ifdef _ICU_
 
24
 
 
25
#include <stdlib.h>
 
26
 
 
27
#include <utilstr.h>
 
28
 
 
29
#include <unicode/ucnv.h>
 
30
#include <unicode/uchar.h>
 
31
#include <utf8transliterator.h>
 
32
#include <swmodule.h>
 
33
 
 
34
#ifndef _ICUSWORD_
 
35
#include "unicode/resbund.h"
 
36
#endif
 
37
#include <swlog.h>
 
38
 
 
39
SWORD_NAMESPACE_START
 
40
 
 
41
const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = {
 
42
        "Off",
 
43
        "Latin",
 
44
        //        "IPA",
 
45
        //        "Basic Latin",
 
46
        //        "SBL",
 
47
        //        "TC",
 
48
        //        "Beta",
 
49
        //        "BGreek",
 
50
        //        "SERA",
 
51
        //        "Hugoye",
 
52
        //        "UNGEGN",
 
53
        //        "ISO",
 
54
        //        "ALA-LC",
 
55
        //        "BGN",
 
56
        //        "Greek",
 
57
        //        "Hebrew",
 
58
        //        "Cyrillic",
 
59
        //        "Arabic",
 
60
        //        "Syriac",
 
61
        //        "Katakana",
 
62
        //        "Hiragana",
 
63
        //        "Hangul",
 
64
        //        "Devanagari",
 
65
        //        "Tamil",
 
66
        //        "Bengali",
 
67
        //        "Gurmukhi",
 
68
        //        "Gujarati",
 
69
        //        "Oriya",
 
70
        //        "Telugu",
 
71
        //        "Kannada",
 
72
        //        "Malayalam",
 
73
        //        "Thai",
 
74
        //        "Georgian",
 
75
        //        "Armenian",
 
76
        //        "Ethiopic",
 
77
        //        "Gothic",
 
78
        //        "Ugaritic",
 
79
        //        "Coptic",
 
80
        //        "Linear B",
 
81
        //        "Cypriot",
 
82
        //        "Runic",
 
83
        //        "Ogham",
 
84
        //        "Thaana",
 
85
        //        "Glagolitic",
 
86
};
 
87
 
 
88
const char UTF8Transliterator::optName[] = "Transliteration";
 
89
const char UTF8Transliterator::optTip[] = "Transliterates between scripts";
 
90
 
 
91
SWTransMap UTF8Transliterator::transMap;
 
92
 
 
93
#ifndef _ICUSWORD_
 
94
 
 
95
const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs";
 
96
const char UTF8Transliterator::SW_RB_RULE[] = "Rule";
 
97
#ifdef SWICU_DATA
 
98
const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA;
 
99
#else
 
100
const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/";
 
101
#endif
 
102
 
 
103
class SWCharString {
 
104
 public:
 
105
    inline SWCharString(const UnicodeString& str);
 
106
    inline ~SWCharString();
 
107
    inline operator const char*() { return ptr; }
 
108
 private:
 
109
    char buf[128];
 
110
    char* ptr;
 
111
};
 
112
SWCharString::SWCharString(const UnicodeString& str) {
 
113
    // TODO This isn't quite right -- we should probably do
 
114
    // preflighting here to determine the real length.
 
115
    if (str.length() >= (int32_t)sizeof(buf)) {
 
116
        ptr = new char[str.length() + 8];
 
117
    } else {
 
118
        ptr = buf;
 
119
    }
 
120
    str.extract(0, 0x7FFFFFFF, ptr, "");
 
121
}
 
122
 
 
123
SWCharString::~SWCharString() {
 
124
    if (ptr != buf) {
 
125
        delete[] ptr;
 
126
    }
 
127
}
 
128
 
 
129
#endif // _ICUSWORD_
 
130
 
 
131
 
 
132
UTF8Transliterator::UTF8Transliterator() {
 
133
        option = 0;
 
134
        unsigned long i;
 
135
        for (i = 0; i < NUMTARGETSCRIPTS; i++) {
 
136
                options.push_back(optionstring[i]);
 
137
        }
 
138
#ifndef _ICUSWORD_
 
139
        utf8status = U_ZERO_ERROR;
 
140
        Load(utf8status);
 
141
#endif
 
142
}
 
143
 
 
144
 
 
145
UTF8Transliterator::~UTF8Transliterator() {
 
146
}
 
147
 
 
148
 
 
149
void UTF8Transliterator::Load(UErrorCode &status)
 
150
{
 
151
#ifndef _ICUSWORD_
 
152
        static const char translit_swordindex[] = "translit_swordindex";
 
153
        
 
154
        UResourceBundle *bundle = 0, *transIDs = 0, *colBund = 0;
 
155
        bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status);
 
156
        if (U_FAILURE(status)) {
 
157
                SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: no resource index to load");
 
158
                SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
 
159
                return;
 
160
        }
 
161
 
 
162
        transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status);
 
163
        //UParseError parseError;
 
164
 
 
165
        int32_t row, maxRows;
 
166
        if (U_SUCCESS(status)) {
 
167
                maxRows = ures_getSize(transIDs);
 
168
                for (row = 0; row < maxRows; row++) {
 
169
                        colBund = ures_getByIndex(transIDs, row, 0, &status);
 
170
 
 
171
                        if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
 
172
                                UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status);
 
173
                                UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
 
174
                                UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
 
175
                                SWLog::getSystemLog()->logDebug("ok so far");
 
176
 
 
177
                                 if (U_SUCCESS(status)) {
 
178
                                        switch (type) {
 
179
                                        case 0x66: // 'f'
 
180
                                        case 0x69: // 'i'
 
181
                                                // 'file' or 'internal';
 
182
                                                // row[2]=resource, row[3]=direction
 
183
                                                {
 
184
                                                        //UBool visible = (type == 0x0066 /*f*/);
 
185
                                                        UTransDirection dir =
 
186
                                                                (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
 
187
                                                                0x0046 /*F*/) ?
 
188
                                                                UTRANS_FORWARD : UTRANS_REVERSE;
 
189
                                                        //registry->put(id, resString, dir, visible);
 
190
                                                        SWLog::getSystemLog()->logDebug("instantiating %s ...", resString.getBuffer());
 
191
                                                        registerTrans(id, resString, dir, status);
 
192
                                                        SWLog::getSystemLog()->logDebug("done.");
 
193
                                                }
 
194
                                                break;
 
195
                                        case 0x61: // 'a'
 
196
                                                // 'alias'; row[2]=createInstance argument
 
197
                                                //registry->put(id, resString, TRUE);
 
198
                                                break;
 
199
                                        }
 
200
                                 }
 
201
                                 else SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get resString");
 
202
                        }
 
203
                        else SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get row");
 
204
                        ures_close(colBund);
 
205
                }
 
206
        }
 
207
        else
 
208
        {
 
209
                SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: no resource index to load");
 
210
                SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
 
211
        }
 
212
 
 
213
        ures_close(transIDs);
 
214
        ures_close(bundle);
 
215
 
 
216
#endif // _ICUSWORD_
 
217
}
 
218
 
 
219
void  UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource,
 
220
                UTransDirection dir, UErrorCode &status )
 
221
{
 
222
#ifndef _ICUSWORD_
 
223
                SWLog::getSystemLog()->logDebug("registering ID locally %s", ID.getBuffer());
 
224
                SWTransData swstuff;
 
225
                swstuff.resource = resource;
 
226
                swstuff.dir = dir;
 
227
                SWTransPair swpair;
 
228
                swpair.first = ID;
 
229
                swpair.second = swstuff;
 
230
                transMap.insert(swpair);
 
231
#endif
 
232
}
 
233
 
 
234
bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status )
 
235
{
 
236
#ifndef _ICUSWORD_
 
237
                Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status);
 
238
                if (!U_FAILURE(status))
 
239
                {
 
240
                        // already have it, clean up and return true
 
241
                        SWLog::getSystemLog()->logDebug("already have it %s", ID.getBuffer());
 
242
                        delete trans;
 
243
                        return true;
 
244
                }
 
245
                status = U_ZERO_ERROR;
 
246
        
 
247
        SWTransMap::iterator swelement;
 
248
        if ((swelement = transMap.find(ID)) != transMap.end())
 
249
        {
 
250
                SWLog::getSystemLog()->logDebug("found element in map");
 
251
                SWTransData swstuff = (*swelement).second;
 
252
                UParseError parseError;
 
253
                //UErrorCode status;
 
254
                //std::cout << "unregistering " << ID << std::endl;
 
255
                //Transliterator::unregister(ID);
 
256
                SWLog::getSystemLog()->logDebug("resource is %s", swstuff.resource.getBuffer());
 
257
 
 
258
                // Get the rules
 
259
                //std::cout << "importing: " << ID << ", " << resource << std::endl;
 
260
                SWCharString ch(swstuff.resource);
 
261
                UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status);
 
262
                const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status);
 
263
                ures_close(bundle);
 
264
                //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD,
 
265
                //        parseError, status);
 
266
                if (U_FAILURE(status)) {
 
267
                        SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get rules");
 
268
                        SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
 
269
                        return false;
 
270
                }
 
271
 
 
272
                
 
273
                Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir,
 
274
                        parseError,status);
 
275
                if (U_FAILURE(status)) {
 
276
                        SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to create transliterator");
 
277
                        SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
 
278
                        SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: line %s", parseError.line);
 
279
                        SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: offset %d", parseError.offset);
 
280
                        SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: preContext %s", *parseError.preContext);
 
281
                        SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: postContext %s", *parseError.postContext);
 
282
                        SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: rules were");
 
283
//                      SWLog::getSystemLog()->logError((const char *)rules);
 
284
                        return false;
 
285
                }
 
286
 
 
287
                Transliterator::registerInstance(trans);
 
288
                return true;
 
289
                
 
290
                //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status);
 
291
                //return trans;
 
292
        }
 
293
        else
 
294
        {
 
295
                return false;
 
296
        }
 
297
#else
 
298
return true;
 
299
#endif // _ICUSWORD_
 
300
}
 
301
 
 
302
bool UTF8Transliterator::addTrans(const char* newTrans, SWBuf* transList) {
 
303
#ifdef _ICUSWORD_
 
304
        UErrorCode status;
 
305
        if (checkTrans(UnicodeString(newTrans), status)) {
 
306
#endif
 
307
                *transList += newTrans;
 
308
                *transList += ";";
 
309
                return true;
 
310
#ifdef _ICUSWORD_
 
311
        }
 
312
        else {
 
313
                return false;
 
314
        }
 
315
#endif
 
316
}
 
317
 
 
318
Transliterator * UTF8Transliterator::createTrans(const UnicodeString& ID, UTransDirection dir, UErrorCode &status )
 
319
{
 
320
        Transliterator *trans = Transliterator::createInstance(ID,UTRANS_FORWARD,status);
 
321
        if (U_FAILURE(status)) {
 
322
                delete trans;
 
323
                return NULL;
 
324
        }
 
325
        else {
 
326
                return trans;
 
327
        }
 
328
}
 
329
 
 
330
void UTF8Transliterator::setOptionValue(const char *ival)
 
331
{
 
332
        unsigned char i = option = NUMTARGETSCRIPTS;
 
333
        while (i && stricmp(ival, optionstring[i])) {
 
334
                i--;
 
335
                option = i;
 
336
        }
 
337
}
 
338
 
 
339
const char *UTF8Transliterator::getOptionValue()
 
340
{
 
341
        return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0;
 
342
}
 
343
 
 
344
char UTF8Transliterator::processText(SWBuf &text, const SWKey *key, const SWModule *module)
 
345
{
 
346
        if (option) {   // if we want transliteration
 
347
                unsigned long i, j;
 
348
                UErrorCode err = U_ZERO_ERROR;
 
349
                UConverter * conv = NULL;
 
350
                conv = ucnv_open("UTF-8", &err);
 
351
                SWBuf ID;
 
352
 
 
353
                bool compat = false;
 
354
 
 
355
                // Convert UTF-8 string to UTF-16 (UChars)
 
356
                j = strlen(text);
 
357
                int32_t len = (j * 2) + 1;
 
358
                UChar *source = new UChar[len];
 
359
                err = U_ZERO_ERROR;
 
360
                len = ucnv_toUChars(conv, source, len, text, j, &err);
 
361
                source[len] = 0;
 
362
 
 
363
                // Figure out which scripts are used in the string
 
364
                unsigned char scripts[NUMSCRIPTS];
 
365
 
 
366
                for (i = 0; i < NUMSCRIPTS; i++) {
 
367
                        scripts[i] = false;
 
368
                }
 
369
 
 
370
                for (i = 0; i < (unsigned long)len; i++) {
 
371
                        j = ublock_getCode(source[i]);
 
372
                        scripts[SE_LATIN] = true;
 
373
                        switch (j) {
 
374
                        //case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break;
 
375
                        case UBLOCK_GREEK: scripts[SE_GREEK] = true; break;
 
376
                        case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break;
 
377
                        case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break;
 
378
                        case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break;
 
379
                        case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break;
 
380
                        case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break;
 
381
                        case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break;
 
382
                        case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break;
 
383
                        case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break;
 
384
                        case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break;
 
385
                        case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break;
 
386
                        case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break;
 
387
                        case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break;
 
388
                        case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break;
 
389
                        case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break;
 
390
                        case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break;
 
391
                        case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break;
 
392
                        case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break;
 
393
                        case UBLOCK_THAI: scripts[SE_THAI] = true; break;
 
394
                        case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break;
 
395
                        case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break;
 
396
                        case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break;
 
397
                        case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break;
 
398
                        case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break;
 
399
//                      case UBLOCK_MEROITIC: scripts[SE_MEROITIC] = true; break;
 
400
                        case UBLOCK_LINEAR_B_SYLLABARY: scripts[SE_LINEARB] = true; break;
 
401
                        case UBLOCK_CYPRIOT_SYLLABARY: scripts[SE_CYPRIOT] = true; break;
 
402
                        case UBLOCK_RUNIC: scripts[SE_RUNIC] = true; break;
 
403
                        case UBLOCK_OGHAM: scripts[SE_OGHAM] = true; break;
 
404
                        case UBLOCK_THAANA: scripts[SE_THAANA] = true; break;
 
405
                        case UBLOCK_GLAGOLITIC: scripts[SE_GLAGOLITIC] = true; break;
 
406
//                      case UBLOCK_TENGWAR: scripts[SE_TENGWAR] = true; break;
 
407
//                      case UBLOCK_CIRTH: scripts[SE_CIRTH] = true; break;
 
408
                        case UBLOCK_CJK_RADICALS_SUPPLEMENT:
 
409
                        case UBLOCK_KANGXI_RADICALS:
 
410
                        case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS:
 
411
                        case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
 
412
                        case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:
 
413
                        case UBLOCK_CJK_UNIFIED_IDEOGRAPHS:
 
414
                                scripts[SE_HAN] = true;
 
415
                                break;
 
416
                        case UBLOCK_CJK_COMPATIBILITY:
 
417
                        case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS:
 
418
                        case UBLOCK_CJK_COMPATIBILITY_FORMS:
 
419
                                scripts[SE_HAN] = true;
 
420
                                compat = true;
 
421
                                break;
 
422
                        case UBLOCK_HANGUL_COMPATIBILITY_JAMO:
 
423
                                scripts[SE_HANGUL] = true;
 
424
                                compat = true;
 
425
                                break;
 
426
 
 
427
                        //default: scripts[SE_LATIN] = true;
 
428
                        }
 
429
                }
 
430
                scripts[option] = false; //turn off the reflexive transliteration
 
431
 
 
432
                //return if we have no transliteration to do for this text
 
433
                j = 0;
 
434
                for (i = 0; !j && i < NUMSCRIPTS; i++) {
 
435
                        if (scripts[i]) j++;
 
436
                }
 
437
                if (!j) {
 
438
                        ucnv_close(conv);
 
439
                        return 0;
 
440
                }
 
441
 
 
442
                if (compat) {
 
443
                        addTrans("NFKD", &ID);
 
444
                }
 
445
                else {
 
446
                        addTrans("NFD", &ID);
 
447
                }
 
448
 
 
449
                //Simple X to Latin transliterators
 
450
                if (scripts[SE_GREEK]) {
 
451
                        if (strnicmp (((SWModule*)module)->Lang(), "cop", 3)) {
 
452
                                if (option == SE_SBL)
 
453
                                        addTrans("Greek-Latin/SBL", &ID);
 
454
                                else if (option == SE_TC)
 
455
                                        addTrans("Greek-Latin/TC", &ID);
 
456
                                else if (option == SE_BETA)
 
457
                                        addTrans("Greek-Latin/Beta", &ID);
 
458
                                else if (option == SE_BGREEK)
 
459
                                        addTrans("Greek-Latin/BGreek", &ID);
 
460
                                else if (option == SE_UNGEGN)
 
461
                                        addTrans("Greek-Latin/UNGEGN", &ID);
 
462
                                else if (option == SE_ISO)
 
463
                                        addTrans("Greek-Latin/ISO", &ID);
 
464
                                else if (option == SE_ALALC)
 
465
                                        addTrans("Greek-Latin/ALALC", &ID);
 
466
                                else if (option == SE_BGN)
 
467
                                        addTrans("Greek-Latin/BGN", &ID);
 
468
                                else if (option == SE_IPA)
 
469
                                        addTrans("Greek-IPA/Ancient", &ID);
 
470
                                else {
 
471
                                        addTrans("Greek-Latin", &ID);
 
472
                                        scripts[SE_LATIN] = true;
 
473
                                }
 
474
                        }
 
475
                        else {
 
476
                                if (option == SE_SBL)
 
477
                                        addTrans("Coptic-Latin/SBL", &ID);
 
478
                                else if (option == SE_TC)
 
479
                                        addTrans("Coptic-Latin/TC", &ID);
 
480
                                else if (option == SE_BETA)
 
481
                                        addTrans("Coptic-Latin/Beta", &ID);
 
482
                                else if (option == SE_IPA)
 
483
                                        addTrans("Coptic-IPA", &ID);
 
484
                                else {
 
485
                                        addTrans("Coptic-Latin", &ID);
 
486
                                        scripts[SE_LATIN] = true;
 
487
                                }
 
488
                        }
 
489
                }
 
490
                if (scripts[SE_HEBREW]) {
 
491
                        if (option == SE_SBL)
 
492
                                addTrans("Hebrew-Latin/SBL", &ID);
 
493
                        else if (option == SE_TC)
 
494
                                addTrans("Hebrew-Latin/TC", &ID);
 
495
                        else if (option == SE_BETA)
 
496
                                addTrans("Hebrew-Latin/Beta", &ID);
 
497
                        else if (option == SE_UNGEGN)
 
498
                                addTrans("Hebrew-Latin/UNGEGN", &ID);
 
499
                        else if (option == SE_ALALC)
 
500
                                addTrans("Hebrew-Latin/ALALC", &ID);
 
501
                        else if (option == SE_SYRIAC)
 
502
                                addTrans("Hebrew-Syriac", &ID);
 
503
                        else {
 
504
                                addTrans("Hebrew-Latin", &ID);
 
505
                                scripts[SE_LATIN] = true;
 
506
                        }
 
507
                }
 
508
                if (scripts[SE_CYRILLIC]) {
 
509
                        if (option == SE_GLAGOLITIC)
 
510
                                addTrans("Cyrillic-Glagolitic", &ID);
 
511
                        else {
 
512
                                addTrans("Cyrillic-Latin", &ID);
 
513
                                scripts[SE_LATIN] = true;
 
514
                        }
 
515
                }
 
516
                if (scripts[SE_ARABIC]) {
 
517
                        addTrans("Arabic-Latin", &ID);
 
518
                        scripts[SE_LATIN] = true;
 
519
                }
 
520
                if (scripts[SE_SYRIAC]) {
 
521
                        if (option == SE_TC)
 
522
                                addTrans("Syriac-Latin/TC", &ID);
 
523
                        else if (option == SE_BETA)
 
524
                                addTrans("Syriac-Latin/Beta", &ID);
 
525
                        else if (option == SE_HUGOYE)
 
526
                                addTrans("Syriac-Latin/Hugoye", &ID);
 
527
                        else if (option == SE_HEBREW)
 
528
                                addTrans("Syriac-Hebrew", &ID);
 
529
                        else {
 
530
                                addTrans("Syriac-Latin", &ID);
 
531
                                scripts[SE_LATIN] = true;
 
532
                        }
 
533
                }
 
534
                if (scripts[SE_THAI]) {
 
535
                        addTrans("Thai-Latin", &ID);
 
536
                        scripts[SE_LATIN] = true;
 
537
                }
 
538
                if (scripts[SE_GEORGIAN]) {
 
539
                        if (option == SE_ISO)
 
540
                                addTrans("Georgian-Latin/ISO", &ID);
 
541
                        else if (option == SE_ALALC)
 
542
                                addTrans("Georgian-Latin/ALALC", &ID);
 
543
                        else if (option == SE_BGN)
 
544
                                addTrans("Georgian-Latin/BGN", &ID);
 
545
                        else if (option == SE_IPA)
 
546
                                addTrans("Georgian-IPA", &ID);
 
547
                        else {
 
548
                                addTrans("Georgian-Latin", &ID);
 
549
                                scripts[SE_LATIN] = true;
 
550
                        }
 
551
                }
 
552
                if (scripts[SE_ARMENIAN]) {
 
553
                        if (option == SE_ISO)
 
554
                                addTrans("Armenian-Latin/ISO", &ID);
 
555
                        else if (option == SE_ALALC)
 
556
                                addTrans("Armenian-Latin/ALALC", &ID);
 
557
                        else if (option == SE_BGN)
 
558
                                addTrans("Armenian-Latin/BGN", &ID);
 
559
                        else if (option == SE_IPA)
 
560
                                addTrans("Armenian-IPA", &ID);
 
561
                        else {
 
562
                                addTrans("Armenian-Latin", &ID);
 
563
                                scripts[SE_LATIN] = true;
 
564
                        }
 
565
                }
 
566
                if (scripts[SE_ETHIOPIC]) {
 
567
                        if (option == SE_UNGEGN)
 
568
                                addTrans("Ethiopic-Latin/UNGEGN", &ID);
 
569
                        else if (option == SE_ISO)
 
570
                                addTrans("Ethiopic-Latin/ISO", &ID);
 
571
                        else if (option == SE_ALALC)
 
572
                                addTrans("Ethiopic-Latin/ALALC", &ID);
 
573
                        else if (option == SE_SERA)
 
574
                                addTrans("Ethiopic-Latin/SERA", &ID);
 
575
                        else {
 
576
                                addTrans("Ethiopic-Latin", &ID);
 
577
                                scripts[SE_LATIN] = true;
 
578
                        }
 
579
                }
 
580
                if (scripts[SE_GOTHIC]) {
 
581
                        if (option == SE_BASICLATIN)
 
582
                                addTrans("Gothic-Latin/Basic", &ID);
 
583
                        else if (option == SE_IPA)
 
584
                                addTrans("Gothic-IPA", &ID);
 
585
                        else {
 
586
                                addTrans("Gothic-Latin", &ID);
 
587
                                scripts[SE_LATIN] = true;
 
588
                        }
 
589
                }
 
590
                if (scripts[SE_UGARITIC]) {
 
591
                        if (option == SE_SBL)
 
592
                                addTrans("Ugaritic-Latin/SBL", &ID);
 
593
                        else {
 
594
                                addTrans("Ugaritic-Latin", &ID);
 
595
                                scripts[SE_LATIN] = true;
 
596
                        }
 
597
                }
 
598
                if (scripts[SE_MEROITIC]) {
 
599
                        addTrans("Meroitic-Latin", &ID);
 
600
                        scripts[SE_LATIN] = true;
 
601
                }
 
602
                if (scripts[SE_LINEARB]) {
 
603
                        addTrans("LinearB-Latin", &ID);
 
604
                        scripts[SE_LATIN] = true;
 
605
                }
 
606
                if (scripts[SE_CYPRIOT]) {
 
607
                        addTrans("Cypriot-Latin", &ID);
 
608
                        scripts[SE_LATIN] = true;
 
609
                }
 
610
                if (scripts[SE_RUNIC]) {
 
611
                        addTrans("Runic-Latin", &ID);
 
612
                        scripts[SE_LATIN] = true;
 
613
                }
 
614
                if (scripts[SE_OGHAM]) {
 
615
                        addTrans("Ogham-Latin", &ID);
 
616
                        scripts[SE_LATIN] = true;
 
617
                }
 
618
                if (scripts[SE_THAANA]) {
 
619
                        if (option == SE_ALALC)
 
620
                                addTrans("Thaana-Latin/ALALC", &ID);
 
621
                        else if (option == SE_BGN)
 
622
                                addTrans("Thaana-Latin/BGN", &ID);
 
623
                        else {
 
624
                                addTrans("Thaana-Latin", &ID);
 
625
                                scripts[SE_LATIN] = true;
 
626
                        }
 
627
                }
 
628
                if (scripts[SE_GLAGOLITIC]) {
 
629
                        if (option == SE_ISO)
 
630
                                addTrans("Glagolitic-Latin/ISO", &ID);
 
631
                        else if (option == SE_ALALC)
 
632
                                addTrans("Glagolitic-Latin/ALALC", &ID);
 
633
                        else if (option == SE_ALALC)
 
634
                                addTrans("Glagolitic-Cyrillic", &ID);
 
635
                        else {
 
636
                                addTrans("Glagolitic-Latin", &ID);
 
637
                                scripts[SE_LATIN] = true;
 
638
                        }
 
639
                }
 
640
                if (scripts[SE_THAI]) {
 
641
                        addTrans("Thai-Latin", &ID);
 
642
                        scripts[SE_LATIN] = true;
 
643
                }
 
644
                if (scripts[SE_THAI]) {
 
645
                        addTrans("Thai-Latin", &ID);
 
646
                        scripts[SE_LATIN] = true;
 
647
                }
 
648
 
 
649
                if (scripts[SE_HAN]) {
 
650
                        if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) {
 
651
                                addTrans("Kanji-Romaji", &ID);
 
652
                        }
 
653
                        else {
 
654
                                addTrans("Han-Latin", &ID);
 
655
                        }
 
656
                        scripts[SE_LATIN] = true;
 
657
                }
 
658
 
 
659
                // Inter-Kana and Kana to Latin transliterators
 
660
                if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) {
 
661
                        addTrans("Katakana-Hiragana", &ID);
 
662
                        scripts[SE_HIRAGANA] = true;
 
663
                }
 
664
                else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) {
 
665
                        addTrans("Hiragana-Katakana", &ID);
 
666
                        scripts[SE_KATAKANA] = true;
 
667
                }
 
668
                else {
 
669
                        if (scripts[SE_KATAKANA]) {
 
670
                                addTrans("Katakana-Latin", &ID);
 
671
                                scripts[SE_LATIN] = true;
 
672
                        }
 
673
                        if (scripts[SE_HIRAGANA]) {
 
674
                                addTrans("Hiragana-Latin", &ID);
 
675
                                scripts[SE_LATIN] = true;
 
676
                        }
 
677
                }
 
678
 
 
679
                // Korean to Latin transliterators
 
680
                if (scripts[SE_HANGUL]) {
 
681
                        addTrans("Hangul-Latin", &ID);
 
682
                        scripts[SE_LATIN] = true;
 
683
                }
 
684
                if (scripts[SE_JAMO]) {
 
685
                        addTrans("Jamo-Latin", &ID);
 
686
                        scripts[SE_LATIN] = true;
 
687
                }
 
688
 
 
689
                // Indic-Latin
 
690
                if (option < SE_DEVANAGARI || option > SE_MALAYALAM) {
 
691
                        // Indic to Latin
 
692
                        if (scripts[SE_TAMIL]) {
 
693
                                addTrans("Tamil-Latin", &ID);
 
694
                                scripts[SE_LATIN] = true;
 
695
                        }
 
696
                        if (scripts[SE_BENGALI]) {
 
697
                                addTrans("Bengali-Latin", &ID);
 
698
                                scripts[SE_LATIN] = true;
 
699
                        }
 
700
                        if (scripts[SE_GURMUKHI]) {
 
701
                                addTrans("Gurmukhi-Latin", &ID);
 
702
                                scripts[SE_LATIN] = true;
 
703
                        }
 
704
                        if (scripts[SE_GUJARATI]) {
 
705
                                addTrans("Gujarati-Latin", &ID);
 
706
                                scripts[SE_LATIN] = true;
 
707
                        }
 
708
                        if (scripts[SE_ORIYA]) {
 
709
                                addTrans("Oriya-Latin", &ID);
 
710
                                scripts[SE_LATIN] = true;
 
711
                        }
 
712
                        if (scripts[SE_TELUGU]) {
 
713
                                addTrans("Telugu-Latin", &ID);
 
714
                                scripts[SE_LATIN] = true;
 
715
                        }
 
716
                        if (scripts[SE_KANNADA]) {
 
717
                                addTrans("Kannada-Latin", &ID);
 
718
                                scripts[SE_LATIN] = true;
 
719
                        }
 
720
                        if (scripts[SE_MALAYALAM]) {
 
721
                                addTrans("Malayalam-Latin", &ID);
 
722
                                scripts[SE_LATIN] = true;
 
723
                        }
 
724
                }
 
725
                else {
 
726
                        if (scripts[SE_LATIN]) {
 
727
                                addTrans("Latin-InterIndic", &ID);
 
728
                        }
 
729
                        if (scripts[SE_DEVANAGARI]) {
 
730
                                addTrans("Devanagari-InterIndic", &ID);
 
731
                        }
 
732
                        if (scripts[SE_TAMIL]) {
 
733
                                addTrans("Tamil-InterIndic", &ID);
 
734
                        }
 
735
                        if (scripts[SE_BENGALI]) {
 
736
                                addTrans("Bengali-InterIndic", &ID);
 
737
                        }
 
738
                        if (scripts[SE_GURMUKHI]) {
 
739
                                addTrans("Gurmurkhi-InterIndic", &ID);
 
740
                        }
 
741
                        if (scripts[SE_GUJARATI]) {
 
742
                                addTrans("Gujarati-InterIndic", &ID);
 
743
                        }
 
744
                        if (scripts[SE_ORIYA]) {
 
745
                                addTrans("Oriya-InterIndic", &ID);
 
746
                        }
 
747
                        if (scripts[SE_TELUGU]) {
 
748
                                addTrans("Telugu-InterIndic", &ID);
 
749
                        }
 
750
                        if (scripts[SE_KANNADA]) {
 
751
                                addTrans("Kannada-InterIndic", &ID);
 
752
                        }
 
753
                        if (scripts[SE_MALAYALAM]) {
 
754
                                addTrans("Malayalam-InterIndic", &ID);
 
755
                        }
 
756
 
 
757
                        switch(option) {
 
758
                        case SE_DEVANAGARI:
 
759
                                addTrans("InterIndic-Devanagari", &ID);
 
760
                                break;
 
761
                        case SE_TAMIL:
 
762
                                addTrans("InterIndic-Tamil", &ID);
 
763
                                break;
 
764
                        case SE_BENGALI:
 
765
                                addTrans("InterIndic-Bengali", &ID);
 
766
                                break;
 
767
                        case SE_GURMUKHI:
 
768
                                addTrans("InterIndic-Gurmukhi", &ID);
 
769
                                break;
 
770
                        case SE_GUJARATI:
 
771
                                addTrans("InterIndic-Gujarati", &ID);
 
772
                                break;
 
773
                        case SE_ORIYA:
 
774
                                addTrans("InterIndic-Oriya", &ID);
 
775
                                break;
 
776
                        case SE_TELUGU:
 
777
                                addTrans("InterIndic-Telugu", &ID);
 
778
                                break;
 
779
                        case SE_KANNADA:
 
780
                                addTrans("InterIndic-Kannada", &ID);
 
781
                                break;
 
782
                        case SE_MALAYALAM:
 
783
                                addTrans("InterIndic-Malayalam", &ID);
 
784
                                break;
 
785
                        default:
 
786
                                addTrans("InterIndic-Latin", &ID);
 
787
                                scripts[SE_LATIN] = true;
 
788
                                break;
 
789
                        }
 
790
                }
 
791
 
 
792
//              if (scripts[SE_TENGWAR]) {
 
793
//                      addTrans("Tengwar-Latin", &ID);
 
794
//                      scripts[SE_LATIN] = true;
 
795
//              }
 
796
//              if (scripts[SE_CIRTH]) {
 
797
//                      addTrans("Cirth-Latin", &ID);
 
798
//                      scripts[SE_LATIN] = true;
 
799
//              }
 
800
 
 
801
                if (scripts[SE_LATIN]) {
 
802
                switch (option) {
 
803
                        case SE_GREEK:
 
804
                                addTrans("Latin-Greek", &ID);
 
805
                                break;
 
806
                        case SE_HEBREW:
 
807
                                addTrans("Latin-Hebrew", &ID);
 
808
                                break;
 
809
                        case SE_CYRILLIC:
 
810
                                addTrans("Latin-Cyrillic", &ID);
 
811
                                break;
 
812
                        case SE_ARABIC:
 
813
                                addTrans("Latin-Arabic", &ID);
 
814
                                break;
 
815
                        case SE_SYRIAC:
 
816
                                addTrans("Latin-Syriac", &ID);
 
817
                                break;
 
818
                        case SE_THAI:
 
819
                                addTrans("Latin-Thai", &ID);
 
820
                                break;
 
821
                        case SE_GEORGIAN:
 
822
                                addTrans("Latin-Georgian", &ID);
 
823
                                break;
 
824
                        case SE_ARMENIAN:
 
825
                                addTrans("Latin-Armenian", &ID);
 
826
                                break;
 
827
                        case SE_ETHIOPIC:
 
828
                                addTrans("Latin-Ethiopic", &ID);
 
829
                                break;
 
830
                        case SE_GOTHIC:
 
831
                                addTrans("Latin-Gothic", &ID);
 
832
                                break;
 
833
                        case SE_UGARITIC:
 
834
                                addTrans("Latin-Ugaritic", &ID);
 
835
                                break;
 
836
                        case SE_COPTIC:
 
837
                                addTrans("Latin-Coptic", &ID);
 
838
                                break;
 
839
                        case SE_KATAKANA:
 
840
                                addTrans("Latin-Katakana", &ID);
 
841
                                break;
 
842
                        case SE_HIRAGANA:
 
843
                                addTrans("Latin-Hiragana", &ID);
 
844
                                break;
 
845
                        case SE_JAMO:
 
846
                                addTrans("Latin-Jamo", &ID);
 
847
                                break;
 
848
                        case SE_HANGUL:
 
849
                                addTrans("Latin-Hangul", &ID);
 
850
                                break;
 
851
                        case SE_MEROITIC:
 
852
                                addTrans("Latin-Meroitic", &ID);
 
853
                                break;
 
854
                        case SE_LINEARB:
 
855
                                addTrans("Latin-LinearB", &ID);
 
856
                                break;
 
857
                        case SE_CYPRIOT:
 
858
                                addTrans("Latin-Cypriot", &ID);
 
859
                                break;
 
860
                        case SE_RUNIC:
 
861
                                addTrans("Latin-Runic", &ID);
 
862
                                break;
 
863
                        case SE_OGHAM:
 
864
                                addTrans("Latin-Ogham", &ID);
 
865
                                break;
 
866
                        case SE_THAANA:
 
867
                                addTrans("Latin-Thaana", &ID);
 
868
                                break;
 
869
                        case SE_GLAGOLITIC:
 
870
                                addTrans("Latin-Glagolitic", &ID);
 
871
                                break;
 
872
//                        case SE_TENGWAR:
 
873
//                              addTrans("Latin-Tengwar", &ID);
 
874
//                                break;
 
875
//                        case SE_CIRTH:
 
876
//                              addTrans("Latin-Cirth", &ID);
 
877
//                                break;
 
878
                        }
 
879
                }
 
880
 
 
881
                if (option == SE_BASICLATIN) {
 
882
                        addTrans("Any-Latin1", &ID);
 
883
                }
 
884
 
 
885
                addTrans("NFC", &ID);
 
886
 
 
887
                err = U_ZERO_ERROR;
 
888
                Transliterator * trans = createTrans(UnicodeString(ID), UTRANS_FORWARD, err);
 
889
                if (trans && !U_FAILURE(err)) {
 
890
                        UnicodeString target = UnicodeString(source);
 
891
                        trans->transliterate(target);
 
892
                        text.setSize(text.size()*2);
 
893
                        len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target.getBuffer(), target.length(), &err);
 
894
                        text.setSize(len);
 
895
                        delete trans;
 
896
                }
 
897
                ucnv_close(conv);
 
898
        }
 
899
        return 0;
 
900
}
 
901
 
 
902
SWORD_NAMESPACE_END
 
903
#endif
 
904
 
 
905
 
 
906