1
/******************************************************************************
3
* utf8transliterators - SWFilter descendant to transliterate between
4
* ICU-supported scripts.
7
* Copyright 2009 CrossWire Bible Society (http://www.crosswire.org)
8
* CrossWire Bible Society
10
* Tempe, AZ 85280-2528
12
* This program is free software; you can redistribute it and/or modify it
13
* under the terms of the GNU General Public License as published by the
14
* Free Software Foundation version 2.
16
* This program is distributed in the hope that it will be useful, but
17
* WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19
* General Public License for more details.
29
#include <unicode/ucnv.h>
30
#include <unicode/uchar.h>
31
#include <utf8transliterator.h>
35
#include "unicode/resbund.h"
41
const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = {
88
const char UTF8Transliterator::optName[] = "Transliteration";
89
const char UTF8Transliterator::optTip[] = "Transliterates between scripts";
91
SWTransMap UTF8Transliterator::transMap;
95
const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs";
96
const char UTF8Transliterator::SW_RB_RULE[] = "Rule";
98
const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA;
100
const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/";
105
inline SWCharString(const UnicodeString& str);
106
inline ~SWCharString();
107
inline operator const char*() { return ptr; }
112
SWCharString::SWCharString(const UnicodeString& str) {
113
// TODO This isn't quite right -- we should probably do
114
// preflighting here to determine the real length.
115
if (str.length() >= (int32_t)sizeof(buf)) {
116
ptr = new char[str.length() + 8];
120
str.extract(0, 0x7FFFFFFF, ptr, "");
123
SWCharString::~SWCharString() {
132
UTF8Transliterator::UTF8Transliterator() {
135
for (i = 0; i < NUMTARGETSCRIPTS; i++) {
136
options.push_back(optionstring[i]);
139
utf8status = U_ZERO_ERROR;
145
UTF8Transliterator::~UTF8Transliterator() {
149
void UTF8Transliterator::Load(UErrorCode &status)
152
static const char translit_swordindex[] = "translit_swordindex";
154
UResourceBundle *bundle = 0, *transIDs = 0, *colBund = 0;
155
bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status);
156
if (U_FAILURE(status)) {
157
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: no resource index to load");
158
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
162
transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status);
163
//UParseError parseError;
165
int32_t row, maxRows;
166
if (U_SUCCESS(status)) {
167
maxRows = ures_getSize(transIDs);
168
for (row = 0; row < maxRows; row++) {
169
colBund = ures_getByIndex(transIDs, row, 0, &status);
171
if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
172
UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status);
173
UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
174
UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
175
SWLog::getSystemLog()->logDebug("ok so far");
177
if (U_SUCCESS(status)) {
181
// 'file' or 'internal';
182
// row[2]=resource, row[3]=direction
184
//UBool visible = (type == 0x0066 /*f*/);
185
UTransDirection dir =
186
(ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
188
UTRANS_FORWARD : UTRANS_REVERSE;
189
//registry->put(id, resString, dir, visible);
190
SWLog::getSystemLog()->logDebug("instantiating %s ...", resString.getBuffer());
191
registerTrans(id, resString, dir, status);
192
SWLog::getSystemLog()->logDebug("done.");
196
// 'alias'; row[2]=createInstance argument
197
//registry->put(id, resString, TRUE);
201
else SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get resString");
203
else SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get row");
209
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: no resource index to load");
210
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
213
ures_close(transIDs);
219
void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource,
220
UTransDirection dir, UErrorCode &status )
223
SWLog::getSystemLog()->logDebug("registering ID locally %s", ID.getBuffer());
225
swstuff.resource = resource;
229
swpair.second = swstuff;
230
transMap.insert(swpair);
234
bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status )
237
Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status);
238
if (!U_FAILURE(status))
240
// already have it, clean up and return true
241
SWLog::getSystemLog()->logDebug("already have it %s", ID.getBuffer());
245
status = U_ZERO_ERROR;
247
SWTransMap::iterator swelement;
248
if ((swelement = transMap.find(ID)) != transMap.end())
250
SWLog::getSystemLog()->logDebug("found element in map");
251
SWTransData swstuff = (*swelement).second;
252
UParseError parseError;
254
//std::cout << "unregistering " << ID << std::endl;
255
//Transliterator::unregister(ID);
256
SWLog::getSystemLog()->logDebug("resource is %s", swstuff.resource.getBuffer());
259
//std::cout << "importing: " << ID << ", " << resource << std::endl;
260
SWCharString ch(swstuff.resource);
261
UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status);
262
const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status);
264
//parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD,
265
// parseError, status);
266
if (U_FAILURE(status)) {
267
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get rules");
268
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
273
Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir,
275
if (U_FAILURE(status)) {
276
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to create transliterator");
277
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
278
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: line %s", parseError.line);
279
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: offset %d", parseError.offset);
280
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: preContext %s", *parseError.preContext);
281
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: postContext %s", *parseError.postContext);
282
SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: rules were");
283
// SWLog::getSystemLog()->logError((const char *)rules);
287
Transliterator::registerInstance(trans);
290
//Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status);
302
bool UTF8Transliterator::addTrans(const char* newTrans, SWBuf* transList) {
305
if (checkTrans(UnicodeString(newTrans), status)) {
307
*transList += newTrans;
318
Transliterator * UTF8Transliterator::createTrans(const UnicodeString& ID, UTransDirection dir, UErrorCode &status )
320
Transliterator *trans = Transliterator::createInstance(ID,UTRANS_FORWARD,status);
321
if (U_FAILURE(status)) {
330
void UTF8Transliterator::setOptionValue(const char *ival)
332
unsigned char i = option = NUMTARGETSCRIPTS;
333
while (i && stricmp(ival, optionstring[i])) {
339
const char *UTF8Transliterator::getOptionValue()
341
return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0;
344
char UTF8Transliterator::processText(SWBuf &text, const SWKey *key, const SWModule *module)
346
if (option) { // if we want transliteration
348
UErrorCode err = U_ZERO_ERROR;
349
UConverter * conv = NULL;
350
conv = ucnv_open("UTF-8", &err);
355
// Convert UTF-8 string to UTF-16 (UChars)
357
int32_t len = (j * 2) + 1;
358
UChar *source = new UChar[len];
360
len = ucnv_toUChars(conv, source, len, text, j, &err);
363
// Figure out which scripts are used in the string
364
unsigned char scripts[NUMSCRIPTS];
366
for (i = 0; i < NUMSCRIPTS; i++) {
370
for (i = 0; i < (unsigned long)len; i++) {
371
j = ublock_getCode(source[i]);
372
scripts[SE_LATIN] = true;
374
//case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break;
375
case UBLOCK_GREEK: scripts[SE_GREEK] = true; break;
376
case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break;
377
case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break;
378
case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break;
379
case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break;
380
case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break;
381
case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break;
382
case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break;
383
case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break;
384
case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break;
385
case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break;
386
case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break;
387
case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break;
388
case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break;
389
case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break;
390
case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break;
391
case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break;
392
case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break;
393
case UBLOCK_THAI: scripts[SE_THAI] = true; break;
394
case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break;
395
case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break;
396
case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break;
397
case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break;
398
case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break;
399
// case UBLOCK_MEROITIC: scripts[SE_MEROITIC] = true; break;
400
case UBLOCK_LINEAR_B_SYLLABARY: scripts[SE_LINEARB] = true; break;
401
case UBLOCK_CYPRIOT_SYLLABARY: scripts[SE_CYPRIOT] = true; break;
402
case UBLOCK_RUNIC: scripts[SE_RUNIC] = true; break;
403
case UBLOCK_OGHAM: scripts[SE_OGHAM] = true; break;
404
case UBLOCK_THAANA: scripts[SE_THAANA] = true; break;
405
case UBLOCK_GLAGOLITIC: scripts[SE_GLAGOLITIC] = true; break;
406
// case UBLOCK_TENGWAR: scripts[SE_TENGWAR] = true; break;
407
// case UBLOCK_CIRTH: scripts[SE_CIRTH] = true; break;
408
case UBLOCK_CJK_RADICALS_SUPPLEMENT:
409
case UBLOCK_KANGXI_RADICALS:
410
case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS:
411
case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
412
case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:
413
case UBLOCK_CJK_UNIFIED_IDEOGRAPHS:
414
scripts[SE_HAN] = true;
416
case UBLOCK_CJK_COMPATIBILITY:
417
case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS:
418
case UBLOCK_CJK_COMPATIBILITY_FORMS:
419
scripts[SE_HAN] = true;
422
case UBLOCK_HANGUL_COMPATIBILITY_JAMO:
423
scripts[SE_HANGUL] = true;
427
//default: scripts[SE_LATIN] = true;
430
scripts[option] = false; //turn off the reflexive transliteration
432
//return if we have no transliteration to do for this text
434
for (i = 0; !j && i < NUMSCRIPTS; i++) {
443
addTrans("NFKD", &ID);
446
addTrans("NFD", &ID);
449
//Simple X to Latin transliterators
450
if (scripts[SE_GREEK]) {
451
if (strnicmp (((SWModule*)module)->Lang(), "cop", 3)) {
452
if (option == SE_SBL)
453
addTrans("Greek-Latin/SBL", &ID);
454
else if (option == SE_TC)
455
addTrans("Greek-Latin/TC", &ID);
456
else if (option == SE_BETA)
457
addTrans("Greek-Latin/Beta", &ID);
458
else if (option == SE_BGREEK)
459
addTrans("Greek-Latin/BGreek", &ID);
460
else if (option == SE_UNGEGN)
461
addTrans("Greek-Latin/UNGEGN", &ID);
462
else if (option == SE_ISO)
463
addTrans("Greek-Latin/ISO", &ID);
464
else if (option == SE_ALALC)
465
addTrans("Greek-Latin/ALALC", &ID);
466
else if (option == SE_BGN)
467
addTrans("Greek-Latin/BGN", &ID);
468
else if (option == SE_IPA)
469
addTrans("Greek-IPA/Ancient", &ID);
471
addTrans("Greek-Latin", &ID);
472
scripts[SE_LATIN] = true;
476
if (option == SE_SBL)
477
addTrans("Coptic-Latin/SBL", &ID);
478
else if (option == SE_TC)
479
addTrans("Coptic-Latin/TC", &ID);
480
else if (option == SE_BETA)
481
addTrans("Coptic-Latin/Beta", &ID);
482
else if (option == SE_IPA)
483
addTrans("Coptic-IPA", &ID);
485
addTrans("Coptic-Latin", &ID);
486
scripts[SE_LATIN] = true;
490
if (scripts[SE_HEBREW]) {
491
if (option == SE_SBL)
492
addTrans("Hebrew-Latin/SBL", &ID);
493
else if (option == SE_TC)
494
addTrans("Hebrew-Latin/TC", &ID);
495
else if (option == SE_BETA)
496
addTrans("Hebrew-Latin/Beta", &ID);
497
else if (option == SE_UNGEGN)
498
addTrans("Hebrew-Latin/UNGEGN", &ID);
499
else if (option == SE_ALALC)
500
addTrans("Hebrew-Latin/ALALC", &ID);
501
else if (option == SE_SYRIAC)
502
addTrans("Hebrew-Syriac", &ID);
504
addTrans("Hebrew-Latin", &ID);
505
scripts[SE_LATIN] = true;
508
if (scripts[SE_CYRILLIC]) {
509
if (option == SE_GLAGOLITIC)
510
addTrans("Cyrillic-Glagolitic", &ID);
512
addTrans("Cyrillic-Latin", &ID);
513
scripts[SE_LATIN] = true;
516
if (scripts[SE_ARABIC]) {
517
addTrans("Arabic-Latin", &ID);
518
scripts[SE_LATIN] = true;
520
if (scripts[SE_SYRIAC]) {
522
addTrans("Syriac-Latin/TC", &ID);
523
else if (option == SE_BETA)
524
addTrans("Syriac-Latin/Beta", &ID);
525
else if (option == SE_HUGOYE)
526
addTrans("Syriac-Latin/Hugoye", &ID);
527
else if (option == SE_HEBREW)
528
addTrans("Syriac-Hebrew", &ID);
530
addTrans("Syriac-Latin", &ID);
531
scripts[SE_LATIN] = true;
534
if (scripts[SE_THAI]) {
535
addTrans("Thai-Latin", &ID);
536
scripts[SE_LATIN] = true;
538
if (scripts[SE_GEORGIAN]) {
539
if (option == SE_ISO)
540
addTrans("Georgian-Latin/ISO", &ID);
541
else if (option == SE_ALALC)
542
addTrans("Georgian-Latin/ALALC", &ID);
543
else if (option == SE_BGN)
544
addTrans("Georgian-Latin/BGN", &ID);
545
else if (option == SE_IPA)
546
addTrans("Georgian-IPA", &ID);
548
addTrans("Georgian-Latin", &ID);
549
scripts[SE_LATIN] = true;
552
if (scripts[SE_ARMENIAN]) {
553
if (option == SE_ISO)
554
addTrans("Armenian-Latin/ISO", &ID);
555
else if (option == SE_ALALC)
556
addTrans("Armenian-Latin/ALALC", &ID);
557
else if (option == SE_BGN)
558
addTrans("Armenian-Latin/BGN", &ID);
559
else if (option == SE_IPA)
560
addTrans("Armenian-IPA", &ID);
562
addTrans("Armenian-Latin", &ID);
563
scripts[SE_LATIN] = true;
566
if (scripts[SE_ETHIOPIC]) {
567
if (option == SE_UNGEGN)
568
addTrans("Ethiopic-Latin/UNGEGN", &ID);
569
else if (option == SE_ISO)
570
addTrans("Ethiopic-Latin/ISO", &ID);
571
else if (option == SE_ALALC)
572
addTrans("Ethiopic-Latin/ALALC", &ID);
573
else if (option == SE_SERA)
574
addTrans("Ethiopic-Latin/SERA", &ID);
576
addTrans("Ethiopic-Latin", &ID);
577
scripts[SE_LATIN] = true;
580
if (scripts[SE_GOTHIC]) {
581
if (option == SE_BASICLATIN)
582
addTrans("Gothic-Latin/Basic", &ID);
583
else if (option == SE_IPA)
584
addTrans("Gothic-IPA", &ID);
586
addTrans("Gothic-Latin", &ID);
587
scripts[SE_LATIN] = true;
590
if (scripts[SE_UGARITIC]) {
591
if (option == SE_SBL)
592
addTrans("Ugaritic-Latin/SBL", &ID);
594
addTrans("Ugaritic-Latin", &ID);
595
scripts[SE_LATIN] = true;
598
if (scripts[SE_MEROITIC]) {
599
addTrans("Meroitic-Latin", &ID);
600
scripts[SE_LATIN] = true;
602
if (scripts[SE_LINEARB]) {
603
addTrans("LinearB-Latin", &ID);
604
scripts[SE_LATIN] = true;
606
if (scripts[SE_CYPRIOT]) {
607
addTrans("Cypriot-Latin", &ID);
608
scripts[SE_LATIN] = true;
610
if (scripts[SE_RUNIC]) {
611
addTrans("Runic-Latin", &ID);
612
scripts[SE_LATIN] = true;
614
if (scripts[SE_OGHAM]) {
615
addTrans("Ogham-Latin", &ID);
616
scripts[SE_LATIN] = true;
618
if (scripts[SE_THAANA]) {
619
if (option == SE_ALALC)
620
addTrans("Thaana-Latin/ALALC", &ID);
621
else if (option == SE_BGN)
622
addTrans("Thaana-Latin/BGN", &ID);
624
addTrans("Thaana-Latin", &ID);
625
scripts[SE_LATIN] = true;
628
if (scripts[SE_GLAGOLITIC]) {
629
if (option == SE_ISO)
630
addTrans("Glagolitic-Latin/ISO", &ID);
631
else if (option == SE_ALALC)
632
addTrans("Glagolitic-Latin/ALALC", &ID);
633
else if (option == SE_ALALC)
634
addTrans("Glagolitic-Cyrillic", &ID);
636
addTrans("Glagolitic-Latin", &ID);
637
scripts[SE_LATIN] = true;
640
if (scripts[SE_THAI]) {
641
addTrans("Thai-Latin", &ID);
642
scripts[SE_LATIN] = true;
644
if (scripts[SE_THAI]) {
645
addTrans("Thai-Latin", &ID);
646
scripts[SE_LATIN] = true;
649
if (scripts[SE_HAN]) {
650
if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) {
651
addTrans("Kanji-Romaji", &ID);
654
addTrans("Han-Latin", &ID);
656
scripts[SE_LATIN] = true;
659
// Inter-Kana and Kana to Latin transliterators
660
if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) {
661
addTrans("Katakana-Hiragana", &ID);
662
scripts[SE_HIRAGANA] = true;
664
else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) {
665
addTrans("Hiragana-Katakana", &ID);
666
scripts[SE_KATAKANA] = true;
669
if (scripts[SE_KATAKANA]) {
670
addTrans("Katakana-Latin", &ID);
671
scripts[SE_LATIN] = true;
673
if (scripts[SE_HIRAGANA]) {
674
addTrans("Hiragana-Latin", &ID);
675
scripts[SE_LATIN] = true;
679
// Korean to Latin transliterators
680
if (scripts[SE_HANGUL]) {
681
addTrans("Hangul-Latin", &ID);
682
scripts[SE_LATIN] = true;
684
if (scripts[SE_JAMO]) {
685
addTrans("Jamo-Latin", &ID);
686
scripts[SE_LATIN] = true;
690
if (option < SE_DEVANAGARI || option > SE_MALAYALAM) {
692
if (scripts[SE_TAMIL]) {
693
addTrans("Tamil-Latin", &ID);
694
scripts[SE_LATIN] = true;
696
if (scripts[SE_BENGALI]) {
697
addTrans("Bengali-Latin", &ID);
698
scripts[SE_LATIN] = true;
700
if (scripts[SE_GURMUKHI]) {
701
addTrans("Gurmukhi-Latin", &ID);
702
scripts[SE_LATIN] = true;
704
if (scripts[SE_GUJARATI]) {
705
addTrans("Gujarati-Latin", &ID);
706
scripts[SE_LATIN] = true;
708
if (scripts[SE_ORIYA]) {
709
addTrans("Oriya-Latin", &ID);
710
scripts[SE_LATIN] = true;
712
if (scripts[SE_TELUGU]) {
713
addTrans("Telugu-Latin", &ID);
714
scripts[SE_LATIN] = true;
716
if (scripts[SE_KANNADA]) {
717
addTrans("Kannada-Latin", &ID);
718
scripts[SE_LATIN] = true;
720
if (scripts[SE_MALAYALAM]) {
721
addTrans("Malayalam-Latin", &ID);
722
scripts[SE_LATIN] = true;
726
if (scripts[SE_LATIN]) {
727
addTrans("Latin-InterIndic", &ID);
729
if (scripts[SE_DEVANAGARI]) {
730
addTrans("Devanagari-InterIndic", &ID);
732
if (scripts[SE_TAMIL]) {
733
addTrans("Tamil-InterIndic", &ID);
735
if (scripts[SE_BENGALI]) {
736
addTrans("Bengali-InterIndic", &ID);
738
if (scripts[SE_GURMUKHI]) {
739
addTrans("Gurmurkhi-InterIndic", &ID);
741
if (scripts[SE_GUJARATI]) {
742
addTrans("Gujarati-InterIndic", &ID);
744
if (scripts[SE_ORIYA]) {
745
addTrans("Oriya-InterIndic", &ID);
747
if (scripts[SE_TELUGU]) {
748
addTrans("Telugu-InterIndic", &ID);
750
if (scripts[SE_KANNADA]) {
751
addTrans("Kannada-InterIndic", &ID);
753
if (scripts[SE_MALAYALAM]) {
754
addTrans("Malayalam-InterIndic", &ID);
759
addTrans("InterIndic-Devanagari", &ID);
762
addTrans("InterIndic-Tamil", &ID);
765
addTrans("InterIndic-Bengali", &ID);
768
addTrans("InterIndic-Gurmukhi", &ID);
771
addTrans("InterIndic-Gujarati", &ID);
774
addTrans("InterIndic-Oriya", &ID);
777
addTrans("InterIndic-Telugu", &ID);
780
addTrans("InterIndic-Kannada", &ID);
783
addTrans("InterIndic-Malayalam", &ID);
786
addTrans("InterIndic-Latin", &ID);
787
scripts[SE_LATIN] = true;
792
// if (scripts[SE_TENGWAR]) {
793
// addTrans("Tengwar-Latin", &ID);
794
// scripts[SE_LATIN] = true;
796
// if (scripts[SE_CIRTH]) {
797
// addTrans("Cirth-Latin", &ID);
798
// scripts[SE_LATIN] = true;
801
if (scripts[SE_LATIN]) {
804
addTrans("Latin-Greek", &ID);
807
addTrans("Latin-Hebrew", &ID);
810
addTrans("Latin-Cyrillic", &ID);
813
addTrans("Latin-Arabic", &ID);
816
addTrans("Latin-Syriac", &ID);
819
addTrans("Latin-Thai", &ID);
822
addTrans("Latin-Georgian", &ID);
825
addTrans("Latin-Armenian", &ID);
828
addTrans("Latin-Ethiopic", &ID);
831
addTrans("Latin-Gothic", &ID);
834
addTrans("Latin-Ugaritic", &ID);
837
addTrans("Latin-Coptic", &ID);
840
addTrans("Latin-Katakana", &ID);
843
addTrans("Latin-Hiragana", &ID);
846
addTrans("Latin-Jamo", &ID);
849
addTrans("Latin-Hangul", &ID);
852
addTrans("Latin-Meroitic", &ID);
855
addTrans("Latin-LinearB", &ID);
858
addTrans("Latin-Cypriot", &ID);
861
addTrans("Latin-Runic", &ID);
864
addTrans("Latin-Ogham", &ID);
867
addTrans("Latin-Thaana", &ID);
870
addTrans("Latin-Glagolitic", &ID);
873
// addTrans("Latin-Tengwar", &ID);
876
// addTrans("Latin-Cirth", &ID);
881
if (option == SE_BASICLATIN) {
882
addTrans("Any-Latin1", &ID);
885
addTrans("NFC", &ID);
888
Transliterator * trans = createTrans(UnicodeString(ID), UTRANS_FORWARD, err);
889
if (trans && !U_FAILURE(err)) {
890
UnicodeString target = UnicodeString(source);
891
trans->transliterate(target);
892
text.setSize(text.size()*2);
893
len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target.getBuffer(), target.length(), &err);