2
**********************************************************************
3
* Copyright (C) 2001, International Business Machines
4
* Corporation and others. All Rights Reserved.
5
**********************************************************************
6
* Date Name Description
7
* 11/10/99 aliu Creation.
8
**********************************************************************
11
#include "unicode/cpdtrans.h"
12
#include "unicode/dtfmtsym.h"
13
#include "unicode/hextouni.h"
14
#include "unicode/normlzr.h"
15
#include "unicode/nultrans.h"
16
#include "unicode/rbt.h"
17
#include "unicode/translit.h"
18
#include "unicode/ucnv.h"
19
#include "unicode/ucnv_err.h"
20
#include "unicode/uchar.h"
21
#include "unicode/unifilt.h"
22
#include "unicode/uniset.h"
23
#include "unicode/unitohex.h"
24
#include "unicode/utypes.h"
25
#include "unicode/ustring.h"
27
/***********************************************************************
29
HOW TO USE THIS TEST FILE
31
How I developed on two platforms
32
without losing (too much of) my mind
35
1. Add new tests by copying/pasting/changing existing tests. On Java,
36
any public void method named Test...() taking no parameters becomes
37
a test. On C++, you need to modify the header and add a line to
38
the runIndexedTest() dispatch method.
40
2. Make liberal use of the expect() method; it is your friend.
42
3. The tests in this file exactly match those in a sister file on the
43
other side. The two files are:
45
icu4j: src/com/ibm/test/translit/TransliteratorTest.java
46
icu4c: source/test/intltest/transtst.cpp
48
==> THIS IS THE IMPORTANT PART <==
50
When you add a test in this file, add it in TransliteratorTest.java
51
too. Give it the same name and put it in the same relative place.
52
This makes maintenance a lot simpler for any poor soul who ends up
53
trying to synchronize the tests between icu4j and icu4c.
55
4. If you MUST enter a test that is NOT paralleled in the sister file,
56
then add it in the special non-mirrored section. These are
65
Make sure you document the reason the test is here and not there.
70
***********************************************************************/
72
// Define character constants thusly to be EBCDIC-friendly
74
LEFT_BRACE=((UChar)0x007B), /*{*/
75
PIPE =((UChar)0x007C), /*|*/
76
ZERO =((UChar)0x0030), /*0*/
77
UPPER_A =((UChar)0x0041) /*A*/
80
TransliteratorTest::TransliteratorTest()
81
: DESERET_DEE((UChar32)0x10414),
82
DESERET_dee((UChar32)0x1043C)
87
TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
88
const char* &name, char* /*par*/) {
90
TESTCASE(0,TestInstantiation);
91
TESTCASE(1,TestSimpleRules);
92
TESTCASE(2,TestRuleBasedInverse);
93
TESTCASE(3,TestKeyboard);
94
TESTCASE(4,TestKeyboard2);
95
TESTCASE(5,TestKeyboard3);
96
TESTCASE(6,TestArabic);
97
TESTCASE(7,TestCompoundKana);
98
TESTCASE(8,TestCompoundHex);
99
TESTCASE(9,TestFiltering);
100
TESTCASE(10,TestInlineSet);
101
TESTCASE(11,TestPatternQuoting);
102
TESTCASE(12,TestJ277);
103
TESTCASE(13,TestJ243);
104
TESTCASE(14,TestJ329);
105
TESTCASE(15,TestSegments);
106
TESTCASE(16,TestCursorOffset);
107
TESTCASE(17,TestArbitraryVariableValues);
108
TESTCASE(18,TestPositionHandling);
109
TESTCASE(19,TestHiraganaKatakana);
110
TESTCASE(20,TestCopyJ476);
111
TESTCASE(21,TestAnchors);
112
TESTCASE(22,TestInterIndic);
113
TESTCASE(23,TestFilterIDs);
114
TESTCASE(24,TestCaseMap);
115
TESTCASE(25,TestNameMap);
116
TESTCASE(26,TestLiberalizedID);
117
TESTCASE(27,TestCreateInstance);
118
TESTCASE(28,TestNormalizationTransliterator);
119
TESTCASE(29,TestCompoundRBT);
120
TESTCASE(30,TestCompoundFilter);
121
TESTCASE(31,TestRemove);
122
TESTCASE(32,TestToRules);
123
TESTCASE(33,TestContext);
124
TESTCASE(34,TestSupplemental);
125
TESTCASE(35,TestQuantifier);
126
TESTCASE(36,TestSTV);
127
TESTCASE(37,TestCompoundInverse);
128
TESTCASE(38,TestNFDChainRBT);
129
TESTCASE(39,TestNullInverse);
130
TESTCASE(40,TestAliasInverseID);
131
TESTCASE(41,TestCompoundInverseID);
132
TESTCASE(42,TestUndefinedVariable);
133
TESTCASE(43,TestEmptyContext);
134
TESTCASE(44,TestCompoundFilterID);
135
TESTCASE(45,TestPropertySet);
136
TESTCASE(46,TestNewEngine);
137
TESTCASE(47,TestQuantifiedSegment);
138
TESTCASE(48,TestDevanagariLatinRT);
139
TESTCASE(49,TestTeluguLatinRT);
140
TESTCASE(50,TestCompoundLatinRT);
141
TESTCASE(51,TestSanskritLatinRT);
142
TESTCASE(52,TestLocaleInstantiation);
143
TESTCASE(53,TestTitleAccents);
144
TESTCASE(54,TestLocaleResource);
145
TESTCASE(55,TestParseError);
146
TESTCASE(56,TestOutputSet);
147
TESTCASE(57,TestVariableRange);
148
TESTCASE(58,TestInvalidPostContext);
149
TESTCASE(59,TestIDForms);
150
TESTCASE(60,TestToRulesMark);
151
TESTCASE(61,TestEscape);
152
TESTCASE(62,TestAnchorMasking);
153
TESTCASE(63,TestDisplayName);
154
TESTCASE(64,TestSpecialCases);
155
TESTCASE(65,TestIncrementalProgress);
156
TESTCASE(66,TestSurrogateCasing);
157
TESTCASE(67,TestFunction);
158
TESTCASE(68,TestInvalidBackRef);
160
default: name = ""; break;
165
* Make sure every system transliterator can be instantiated.
167
* ALSO test that the result of toRules() for each rule is a valid
168
* rule. Do this here so we don't have to have another test that
169
* instantiates everything as well.
171
void TransliteratorTest::TestInstantiation() {
172
int32_t n = Transliterator::countAvailableIDs();
174
for (int32_t i=0; i<n; ++i) {
175
UnicodeString id = Transliterator::getAvailableID(i);
176
if (id.length() < 1) {
177
errln(UnicodeString("FAIL: getAvailableID(") +
178
i + ") returned empty string");
181
UParseError parseError;
182
UErrorCode status = U_ZERO_ERROR;
183
Transliterator* t = Transliterator::createInstance(id,
184
UTRANS_FORWARD, parseError,status);
186
Transliterator::getDisplayName(id, name);
188
errln(UnicodeString("FAIL: Couldn't create ") + id +
189
/*", parse error " + parseError.code +*/
190
", line " + parseError.line +
191
", offset " + parseError.offset +
192
", pre-context " + prettify(parseError.preContext, TRUE) +
193
", post-context " +prettify(parseError.postContext,TRUE) +
194
", Error: " + u_errorName(status));
195
// When createInstance fails, it deletes the failing
196
// entry from the available ID list. We detect this
197
// here by looking for a change in countAvailableIDs.
198
int32_t nn = Transliterator::countAvailableIDs();
201
--i; // Compensate for deleted entry
204
logln(UnicodeString("OK: ") + name + " (" + id + ")");
208
t->toRules(rules, TRUE);
209
Transliterator *u = Transliterator::createFromRules("x",
210
rules, UTRANS_FORWARD, parseError,status);
212
errln(UnicodeString("FAIL: ") + id +
213
".createFromRules() => bad rules" +
214
/*", parse error " + parseError.code +*/
215
", line " + parseError.line +
216
", offset " + parseError.offset +
217
", context " + prettify(parseError.preContext, TRUE) +
218
", rules: " + prettify(rules, TRUE));
226
// Now test the failure path
227
UParseError parseError;
228
UErrorCode status = U_ZERO_ERROR;
229
UnicodeString id("<Not a valid Transliterator ID>");
230
Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
232
errln("FAIL: " + id + " returned a transliterator");
235
logln("OK: Bogus ID handled properly");
239
void TransliteratorTest::TestSimpleRules(void) {
240
/* Example: rules 1. ab>x|y
243
* []|eabcd start - no match, copy e to tranlated buffer
244
* [e]|abcd match rule 1 - copy output & adjust cursor
245
* [ex|y]cd match rule 2 - copy output & adjust cursor
246
* [exz]|d no match, copy d to transliterated buffer
249
expect(UnicodeString("ab>x|y;", "") +
253
/* Another set of rules:
265
expect(UnicodeString("ab>x|yzacw;") +
273
UErrorCode status = U_ZERO_ERROR;
274
RuleBasedTransliterator t(
276
UnicodeString("$dummy=").append((UChar)0xE100) +
278
"$vowel=[aeiouAEIOU];"
280
"$vowel } $lu > '!';"
286
if (U_FAILURE(status)) {
287
errln("FAIL: RBT constructor failed");
290
expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
294
* Test inline set syntax and set variable syntax.
296
void TransliteratorTest::TestInlineSet(void) {
297
expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
298
expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
300
expect(UnicodeString(
303
"$alphanumeric = [$digit $alpha];" // ***
304
"$special = [^$alphanumeric];" // ***
305
"$alphanumeric > '-';"
306
"$special > '*';", ""),
308
"thx-1138", "---*----");
312
* Create some inverses and confirm that they work. We have to be
313
* careful how we do this, since the inverses will not be true
314
* inverses -- we can't throw any random string at the composition
315
* of the transliterators and expect the identity function. F x
316
* F' != I. However, if we are careful about the input, we will
317
* get the expected results.
319
void TransliteratorTest::TestRuleBasedInverse(void) {
320
UnicodeString RULES =
321
UnicodeString("abc>zyx;") +
339
const char* DATA[] = {
340
// Careful here -- random strings will not work. If we keep
341
// the left side to the domain and the right side to the range
342
// we will be okay though (left, abc; right xyz).
344
"abcacab", "zyxxxyy",
348
int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
350
UErrorCode status = U_ZERO_ERROR;
351
RuleBasedTransliterator fwd("<ID>", RULES, status);
352
RuleBasedTransliterator rev("<ID>", RULES,
353
UTRANS_REVERSE, status);
354
if (U_FAILURE(status)) {
355
errln("FAIL: RBT constructor failed");
358
for (int32_t i=0; i<DATA_length; i+=2) {
359
expect(fwd, DATA[i], DATA[i+1]);
360
expect(rev, DATA[i+1], DATA[i]);
365
* Basic test of keyboard.
367
void TransliteratorTest::TestKeyboard(void) {
368
UErrorCode status = U_ZERO_ERROR;
369
RuleBasedTransliterator t("<ID>",
370
UnicodeString("psch>Y;")
375
if (U_FAILURE(status)) {
376
errln("FAIL: RBT constructor failed");
379
const char* DATA[] = {
387
0, "AycAY", // null means finishKeyboardTransliteration
390
keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
394
* Basic test of keyboard with cursor.
396
void TransliteratorTest::TestKeyboard2(void) {
397
UErrorCode status = U_ZERO_ERROR;
398
RuleBasedTransliterator t("<ID>",
399
UnicodeString("ych>Y;")
404
if (U_FAILURE(status)) {
405
errln("FAIL: RBT constructor failed");
408
const char* DATA[] = {
412
"s", "Aps", // modified for rollback - "Ay",
413
"c", "Apsc", // modified for rollback - "Ayc",
416
"s", "AycAps", // modified for rollback - "AycAy",
417
"c", "AycApsc", // modified for rollback - "AycAyc",
419
0, "AycAY", // null means finishKeyboardTransliteration
422
keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
426
* Test keyboard transliteration with back-replacement.
428
void TransliteratorTest::TestKeyboard3(void) {
429
// We want th>z but t>y. Furthermore, during keyboard
430
// transliteration we want t>y then yh>z if t, then h are
432
UnicodeString RULES("t>|y;"
435
const char* DATA[] = {
436
// Column 1: characters to add to buffer (as if typed)
437
// Column 2: expected appearance of buffer after
438
// keyboard xliteration.
441
"t", "abt", // modified for rollback - "aby",
443
"t", "abyct", // modified for rollback - "abycy",
445
0, "abycz", // null means finishKeyboardTransliteration
448
UErrorCode status = U_ZERO_ERROR;
449
RuleBasedTransliterator t("<ID>", RULES, status);
450
if (U_FAILURE(status)) {
451
errln("FAIL: RBT constructor failed");
454
keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
457
void TransliteratorTest::keyboardAux(const Transliterator& t,
458
const char* DATA[], int32_t DATA_length) {
459
UErrorCode status = U_ZERO_ERROR;
460
UTransPosition index={0, 0, 0, 0};
462
for (int32_t i=0; i<DATA_length; i+=2) {
468
t.transliterate(s, index, DATA[i], status);
471
t.finishTransliteration(s, index);
473
// Show the start index '{' and the cursor '|'
474
UnicodeString a, b, c;
475
s.extractBetween(0, index.contextStart, a);
476
s.extractBetween(index.contextStart, index.start, b);
477
s.extractBetween(index.start, s.length(), c);
479
append((UChar)LEFT_BRACE).
483
if (s == DATA[i+1] && U_SUCCESS(status)) {
486
errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
491
void TransliteratorTest::TestArabic(void) {
492
// Test disabled for 2.0 until new Arabic transliterator can be written.
494
// const char* DATA[] = {
495
// "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
496
// "\u0627\u0644\u0644\u063a\u0629\u0020"+
497
// "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
498
// "\u0628\u0628\u0646\u0638\u0645\u0020"+
499
// "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
500
// "\u062c\u0645\u064a\u0644\u0629",
504
// UChar ar_raw[] = {
505
// 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
506
// 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
507
// 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
508
// 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
509
// 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
510
// 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
512
// UnicodeString ar(ar_raw);
513
// UErrorCode status=U_ZERO_ERROR;
514
// UParseError parseError;
515
// Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
517
// errln("FAIL: createInstance failed");
520
// expect(*t, "Arabic", ar);
525
* Compose the Kana transliterator forward and reverse and try
526
* some strings that should come out unchanged.
528
void TransliteratorTest::TestCompoundKana(void) {
529
UParseError parseError;
530
UErrorCode status = U_ZERO_ERROR;
531
Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
533
errln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed");
535
expect(*t, "aaaaa", "aaaaa");
541
* Compose the hex transliterators forward and reverse.
543
void TransliteratorTest::TestCompoundHex(void) {
544
UParseError parseError;
545
UErrorCode status = U_ZERO_ERROR;
546
Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
547
Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
548
Transliterator* transab[] = { a, b };
549
Transliterator* transba[] = { b, a };
550
if (a == 0 || b == 0) {
551
errln("FAIL: construction failed");
556
// Do some basic tests of a
557
expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
558
// Do some basic tests of b
559
expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
561
Transliterator* ab = new CompoundTransliterator(transab, 2);
562
UnicodeString s("abcde", "");
565
UnicodeString str(s);
566
a->transliterate(str);
567
Transliterator* ba = new CompoundTransliterator(transba, 2);
568
expect(*ba, str, str);
577
* Used by TestFiltering().
579
class TestFilter : public UnicodeFilter {
580
virtual UnicodeFunctor* clone() const {
581
return new TestFilter(*this);
583
virtual UBool contains(UChar32 c) const {
584
return c != (UChar)0x0063 /*c*/;
589
* Do some basic tests of filtering.
591
void TransliteratorTest::TestFiltering(void) {
592
UParseError parseError;
593
UErrorCode status = U_ZERO_ERROR;
594
Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
596
errln("FAIL: createInstance(Any-Hex) failed");
599
hex->adoptFilter(new TestFilter());
600
UnicodeString s("abcde");
601
hex->transliterate(s);
602
UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
604
logln(UnicodeString("Ok: \"") + exp + "\"");
606
logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
614
void TransliteratorTest::TestAnchors(void) {
615
expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
618
expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
621
expect(UnicodeString("^ab > 01 ;"
629
expect(UnicodeString("$s = [z$] ;"
636
"abzababbabxzabxabx",
641
* Test pattern quoting and escape mechanisms.
643
void TransliteratorTest::TestPatternQuoting(void) {
645
// Each item is <rules>, <input>, <expected output>
646
const UnicodeString DATA[] = {
647
UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
648
UnicodeString(UChar(0x4E01)),
652
for (int32_t i=0; i<3; i+=3) {
653
logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
654
UErrorCode status = U_ZERO_ERROR;
655
RuleBasedTransliterator t("<ID>", DATA[i], status);
656
if (U_FAILURE(status)) {
657
errln("RBT constructor failed");
659
expect(t, DATA[i+1], DATA[i+2]);
665
* Regression test for bugs found in Greek transliteration.
667
void TransliteratorTest::TestJ277(void) {
668
UErrorCode status = U_ZERO_ERROR;
669
UParseError parseError;
670
Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
672
errln("FAIL: createInstance(Greek-Latin) returned NULL");
677
UChar upsilon = 0x3C5;
679
// UChar PHI = 0x3A6;
681
// UChar omega = 0x3C9;
682
// UChar omicron = 0x3BF;
683
// UChar epsilon = 0x3B5;
685
// sigma upsilon nu -> syn
687
syn.append(sigma).append(upsilon).append(nu);
688
expect(*gl, syn, "syn");
690
// sigma alpha upsilon nu -> saun
692
sayn.append(sigma).append(alpha).append(upsilon).append(nu);
693
expect(*gl, sayn, "saun");
695
// Again, using a smaller rule set
700
"$ypsilon = \\u03C5;"
701
"$vowel = [aeiouAEIOU$alpha$ypsilon];"
704
"u <> $vowel { $ypsilon;"
708
RuleBasedTransliterator mini("mini", rules, UTRANS_REVERSE, status);
709
if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
710
expect(mini, syn, "syn");
711
expect(mini, sayn, "saun");
713
// Transliterate the Greek locale data
715
DateFormatSymbols syms(el, status);
716
if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
718
const UnicodeString* data = syms.getMonths(count);
719
for (i=0; i<count; ++i) {
720
if (data[i].length() == 0) {
723
UnicodeString out(data[i]);
724
gl->transliterate(out);
726
if (data[i].length() >= 2 && out.length() >= 2 &&
727
u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
728
if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
733
logln(prettify(data[i] + " -> " + out));
735
errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
743
* Prefix, suffix support in hex transliterators
745
void TransliteratorTest::TestJ243(void) {
746
UErrorCode status = U_ZERO_ERROR;
749
// Test default Hex-Any, which should handle
750
// \u, \U, u+, and U+
751
HexToUnicodeTransliterator hex;
752
expect(hex, UnicodeString("\\u0041+\\U0042,u+0043uu+0044z", ""), "A+B,CuDz");
753
// Try a custom Hex-Unicode
754
// \uXXXX and &#xXXXX;
755
status = U_ZERO_ERROR;
756
HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), status);
757
expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
758
"abcd5fx0123");
759
// Try custom Any-Hex (default is tested elsewhere)
760
status = U_ZERO_ERROR;
761
UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), status);
762
expect(hex3, "012", "012");
767
* Parsers need better syntax error messages.
769
void TransliteratorTest::TestJ329(void) {
771
struct { UBool containsErrors; const char* rule; } DATA[] = {
772
{ FALSE, "a > b; c > d" },
773
{ TRUE, "a > b; no operator; c > d" },
775
int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
777
for (int32_t i=0; i<DATA_length; ++i) {
778
UErrorCode status = U_ZERO_ERROR;
779
UParseError parseError;
780
RuleBasedTransliterator rbt("<ID>",
786
UBool gotError = U_FAILURE(status);
787
UnicodeString desc(DATA[i].rule);
788
desc.append(gotError ? " -> error" : " -> no error");
790
desc = desc + ", ParseError code=" + u_errorName(status) +
791
" line=" + parseError.line +
792
" offset=" + parseError.offset +
793
" context=" + parseError.preContext;
795
if (gotError == DATA[i].containsErrors) {
796
logln(UnicodeString("Ok: ") + desc);
798
errln(UnicodeString("FAIL: ") + desc);
804
* Test segments and segment references.
806
void TransliteratorTest::TestSegments(void) {
808
// Each item is <rules>, <input>, <expected output>
809
UnicodeString DATA[] = {
810
"([a-z]) '.' ([0-9]) > $2 '-' $1",
815
"(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
819
int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
821
for (int32_t i=0; i<DATA_length; i+=3) {
822
logln("Pattern: " + prettify(DATA[i]));
823
UErrorCode status = U_ZERO_ERROR;
824
RuleBasedTransliterator t("ID", DATA[i], status);
825
if (U_FAILURE(status)) {
826
errln("FAIL: RBT constructor");
828
expect(t, DATA[i+1], DATA[i+2]);
834
* Test cursor positioning outside of the key
836
void TransliteratorTest::TestCursorOffset(void) {
838
// Each item is <rules>, <input>, <expected output>
839
UnicodeString DATA[] = {
840
"pre {alpha} post > | @ ALPHA ;"
842
"pre {beta} post > BETA @@ | ;"
845
"prealphapost prebetapost",
847
"prbetaxyz preBETApost",
849
int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
851
for (int32_t i=0; i<DATA_length; i+=3) {
852
logln("Pattern: " + prettify(DATA[i]));
853
UErrorCode status = U_ZERO_ERROR;
854
RuleBasedTransliterator t("<ID>", DATA[i], status);
855
if (U_FAILURE(status)) {
856
errln("FAIL: RBT constructor");
858
expect(t, DATA[i+1], DATA[i+2]);
864
* Test zero length and > 1 char length variable values. Test
865
* use of variable refs in UnicodeSets.
867
void TransliteratorTest::TestArbitraryVariableValues(void) {
869
// Each item is <rules>, <input>, <expected output>
870
UnicodeString DATA[] = {
888
int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
890
for (int32_t i=0; i<DATA_length; i+=3) {
891
logln("Pattern: " + prettify(DATA[i]));
892
UErrorCode status = U_ZERO_ERROR;
893
RuleBasedTransliterator t("<ID>", DATA[i], status);
894
if (U_FAILURE(status)) {
895
errln("FAIL: RBT constructor");
897
expect(t, DATA[i+1], DATA[i+2]);
903
* Confirm that the contextStart, contextLimit, start, and limit
904
* behave correctly. J474.
906
void TransliteratorTest::TestPositionHandling(void) {
908
// Each item is <rules>, <input>, <expected output>
909
const char* DATA[] = {
910
"a{t} > SS ; {t}b > UU ; {t} > TT ;",
911
"xtat txtb", // pos 0,9,0,9
914
"a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
915
"xtat txtb", // pos 2,9,3,8
918
"a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
919
"xtat txtb", // pos 3,8,3,8
923
// Array of 4n positions -- these go with the DATA array
924
// They are: contextStart, contextLimit, start, limit
931
int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
932
for (int32_t i=0; i<n; i++) {
933
UErrorCode status = U_ZERO_ERROR;
934
Transliterator *t = new RuleBasedTransliterator("<ID>",
936
if (U_FAILURE(status)) {
938
errln("FAIL: RBT constructor");
942
pos.contextStart= POS[4*i];
943
pos.contextLimit = POS[4*i+1];
944
pos.start = POS[4*i+2];
945
pos.limit = POS[4*i+3];
946
UnicodeString rsource(DATA[3*i+1]);
947
t->transliterate(rsource, pos, status);
948
if (U_FAILURE(status)) {
950
errln("FAIL: transliterate");
953
t->finishTransliteration(rsource, pos);
963
* Test the Hiragana-Katakana transliterator.
965
void TransliteratorTest::TestHiraganaKatakana(void) {
966
UParseError parseError;
967
UErrorCode status = U_ZERO_ERROR;
968
Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
969
Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
970
if (hk == 0 || kh == 0) {
971
errln("FAIL: createInstance failed");
978
// Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
979
const char* DATA[] = {
981
"\\u3042\\u3090\\u3099\\u3092\\u3050",
982
"\\u30A2\\u30F8\\u30F2\\u30B0",
985
"\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
986
"\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
988
int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
990
for (int32_t i=0; i<DATA_length; i+=3) {
991
UnicodeString h = CharsToUnicodeString(DATA[i+1]);
992
UnicodeString k = CharsToUnicodeString(DATA[i+2]);
994
case 0x68: //'h': // Hiragana-Katakana
997
case 0x6B: //'k': // Katakana-Hiragana
1000
case 0x62: //'b': // both
1011
* Test cloning / copy constructor of RBT.
1013
void TransliteratorTest::TestCopyJ476(void) {
1014
// The real test here is what happens when the destructors are
1015
// called. So we let one object get destructed, and check to
1016
// see that its copy still works.
1017
RuleBasedTransliterator *t2 = 0;
1019
UErrorCode status = U_ZERO_ERROR;
1020
RuleBasedTransliterator t1("t1", "a>A;b>B;", status);
1021
if (U_FAILURE(status)) {
1022
errln("FAIL: RBT constructor");
1025
t2 = new RuleBasedTransliterator(t1);
1026
expect(t1, "abc", "ABc");
1028
expect(*t2, "abc", "ABc");
1033
* Test inter-Indic transliterators. These are composed.
1034
* ICU4C Jitterbug 483.
1036
void TransliteratorTest::TestInterIndic(void) {
1037
UnicodeString ID("Devanagari-Gujarati", "");
1038
UErrorCode status = U_ZERO_ERROR;
1039
UParseError parseError;
1040
Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1042
errln("FAIL: createInstance(" + ID + ") returned NULL");
1045
UnicodeString id = dg->getID();
1047
errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1049
UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1050
UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1051
expect(*dg, dev, guj);
1056
* Test filter syntax in IDs. (J918)
1058
void TransliteratorTest::TestFilterIDs(void) {
1059
// Array of 3n strings:
1060
// <id>, <inverse id>, <input>, <expected output>
1061
const char* DATA[] = {
1062
"[aeiou]Any-Hex", // ID
1063
"[aeiou]Hex-Any", // expected inverse ID
1065
"q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1067
"[aeiou]Any-Hex;[^5]Hex-Any",
1068
"[^5]Any-Hex;[aeiou]Hex-Any",
1077
enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1079
for (int i=0; i<DATA_length; i+=4) {
1080
UnicodeString ID(DATA[i], "");
1081
UnicodeString uID(DATA[i+1], "");
1082
UnicodeString data2(DATA[i+2], "");
1083
UnicodeString data3(DATA[i+3], "");
1084
UParseError parseError;
1085
UErrorCode status = U_ZERO_ERROR;
1086
Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1088
errln("FAIL: createInstance(" + ID + ") returned NULL");
1091
expect(*t, data2, data3);
1094
if (ID != t->getID()) {
1095
errln("FAIL: createInstance(" + ID + ").getID() => " +
1099
// Check the inverse
1100
Transliterator *u = t->createInverse(status);
1102
errln("FAIL: " + ID + ".createInverse() returned NULL");
1103
} else if (u->getID() != uID) {
1104
errln("FAIL: " + ID + ".createInverse().getID() => " +
1105
u->getID() + ", expected " + uID);
1114
* Test the case mapping transliterators.
1116
void TransliteratorTest::TestCaseMap(void) {
1117
UParseError parseError;
1118
UErrorCode status = U_ZERO_ERROR;
1119
Transliterator* toUpper =
1120
Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1121
Transliterator* toLower =
1122
Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1123
Transliterator* toTitle =
1124
Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1125
if (toUpper==0 || toLower==0 || toTitle==0) {
1126
errln("FAIL: createInstance returned NULL");
1133
expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1134
"THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1135
expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1136
"the quick brown foX jumped over the lazY dogs.");
1137
expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1138
"The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1146
* Test the name mapping transliterators.
1148
void TransliteratorTest::TestNameMap(void) {
1149
UParseError parseError;
1150
UErrorCode status = U_ZERO_ERROR;
1151
Transliterator* uni2name =
1152
Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1153
Transliterator* name2uni =
1154
Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1155
if (uni2name==0 || name2uni==0) {
1156
errln("FAIL: createInstance returned NULL");
1162
expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1163
CharsToUnicodeString("{NO-BREAK SPACE}abc{CJK UNIFIED IDEOGRAPH-4E01}{MICRO SIGN}{GUJARATI SIGN CANDRABINDU}{REPLACEMENT CHARACTER}{END OF TRANSMISSION}{HORIZONTAL TABULATION}{<control-0081>}{<noncharacter-FFFF>}"));
1164
expect(*name2uni, "{ NO-BREAK SPACE}abc{ CJK UNIFIED IDEOGRAPH-4E01 }{x{MICRO SIGN}{GUJARATI SIGN CANDRABINDU}{REPLACEMENT CHARACTER}{END OF TRANSMISSION}{HORIZONTAL TABULATION}{<control-0081>}{<noncharacter-FFFF>}{<control-0004>}{",
1165
CharsToUnicodeString("\\u00A0abc\\u4E01{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004{"));
1172
* Test liberalized ID syntax. 1006c
1174
void TransliteratorTest::TestLiberalizedID(void) {
1175
// Some test cases have an expected getID() value of NULL. This
1176
// means I have disabled the test case for now. This stuff is
1177
// still under development, and I haven't decided whether to make
1178
// getID() return canonical case yet. It will all get rewritten
1179
// with the move to Source-Target/Variant IDs anyway. [aliu]
1180
const char* DATA[] = {
1181
"latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1182
" Null ", "Null", "whitespace",
1183
" Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1184
" null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1186
const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1187
UParseError parseError;
1188
UErrorCode status= U_ZERO_ERROR;
1189
for (int32_t i=0; i<DATA_length; i+=3) {
1190
Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1192
errln(UnicodeString("FAIL: ") + DATA[i+2] +
1193
" cannot create ID \"" + DATA[i] + "\"");
1197
exp = UnicodeString(DATA[i+1], "");
1199
// Don't worry about getID() if the expected char*
1200
// is NULL -- see above.
1201
if (exp.length() == 0 || exp == t->getID()) {
1202
logln(UnicodeString("Ok: ") + DATA[i+2] +
1203
" create ID \"" + DATA[i] + "\" => \"" +
1206
errln(UnicodeString("FAIL: ") + DATA[i+2] +
1207
" create ID \"" + DATA[i] + "\" => \"" +
1208
t->getID() + "\", exp \"" + exp + "\"");
1215
/* test for Jitterbug 912 */
1216
void TransliteratorTest::TestCreateInstance(){
1218
UErrorCode status = U_ZERO_ERROR;
1219
Transliterator* myTrans = Transliterator::createInstance(UnicodeString("Latin-Hangul"),UTRANS_REVERSE,err,status);
1221
errln("FAIL: createInstance failed");
1224
UnicodeString newID =myTrans->getID();
1225
if(newID!=UnicodeString("Hangul-Latin")){
1226
errln(UnicodeString("Test for Jitterbug 912 Transliterator::createInstance(id,UTRANS_REVERSE) failed"));
1232
* Test the normalization transliterator.
1234
void TransliteratorTest::TestNormalizationTransliterator() {
1235
// THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1236
// PLEASE KEEP THEM IN SYNC WITH BasicTest.
1237
const char* CANON[] = {
1238
// Input Decomposed Composed
1239
"cat", "cat", "cat" ,
1240
"\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1242
"\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1243
"D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1245
"\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1246
"\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1247
"D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1249
"\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1250
"D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1252
"\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1253
"\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1254
"\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1256
"\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1257
"\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1259
"\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1260
"\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1262
"Henry IV", "Henry IV", "Henry IV" ,
1263
"Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1265
"\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1266
"\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1267
"\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1268
"\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1269
"\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1271
"A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1275
const char* COMPAT[] = {
1276
// Input Decomposed Composed
1277
"\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1279
"\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1280
"\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1282
"Henry IV", "Henry IV", "Henry IV" ,
1283
"Henry \\u2163", "Henry IV", "Henry IV" ,
1285
"\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1286
"\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1288
"\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1293
UParseError parseError;
1294
UErrorCode status = U_ZERO_ERROR;
1295
Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1296
Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1298
errln("FAIL: createInstance failed");
1303
for (i=0; CANON[i]; i+=3) {
1304
UnicodeString in = CharsToUnicodeString(CANON[i]);
1305
UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1306
UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1307
expect(*NFD, in, expd);
1308
expect(*NFC, in, expc);
1313
Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1314
Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1315
if (!NFKD || !NFKC) {
1316
errln("FAIL: createInstance failed");
1321
for (i=0; COMPAT[i]; i+=3) {
1322
UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1323
UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1324
UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1325
expect(*NFKD, in, expkd);
1326
expect(*NFKC, in, expkc);
1332
status = U_ZERO_ERROR;
1333
Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1337
errln("FAIL: createInstance failed");
1339
expect(*t, CharsToUnicodeString("\\u010dx"),
1340
CharsToUnicodeString("c\\u030C"));
1345
* Test compound RBT rules.
1347
void TransliteratorTest::TestCompoundRBT(void) {
1348
// Careful with spacing and ';' here: Phrase this exactly
1349
// as toRules() is going to return it. If toRules() changes
1350
// with regard to spacing or ';', then adjust this string.
1351
UnicodeString rule("::Hex-Any;\n"
1355
"::[^t]Any-Upper;", "");
1356
UParseError parseError;
1357
UErrorCode status = U_ZERO_ERROR;
1358
Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1360
errln("FAIL: createFromRules failed");
1363
expect(*t, "\\u0043at in the hat, bat on the mat",
1364
"C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1366
t->toRules(r, TRUE);
1368
logln((UnicodeString)"OK: toRules() => " + r);
1370
errln((UnicodeString)"FAIL: toRules() => " + r +
1371
", expected " + rule);
1376
t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1378
errln("FAIL: createInstance failed");
1381
UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1382
t->toRules(r, TRUE);
1384
errln((UnicodeString)"FAIL: toRules() => " + r +
1385
", expected " + exp);
1387
logln((UnicodeString)"OK: toRules() => " + r);
1391
// Round trip the result of toRules
1392
t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1394
errln("FAIL: createFromRules #2 failed");
1397
logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1400
// Test toRules again
1401
t->toRules(r, TRUE);
1403
errln((UnicodeString)"FAIL: toRules() => " + r +
1404
", expected " + exp);
1406
logln((UnicodeString)"OK: toRules() => " + r);
1411
// Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1412
// to what the regenerated ID will look like.
1413
UnicodeString id("Upper(Lower);(NFKC)", "");
1414
t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1416
errln("FAIL: createInstance #2 failed");
1419
if (t->getID() == id) {
1420
logln((UnicodeString)"OK: created " + id);
1422
errln((UnicodeString)"FAIL: createInstance(" + id +
1423
").getID() => " + t->getID());
1426
Transliterator *u = t->createInverse(status);
1428
errln("FAIL: createInverse failed");
1432
exp = "NFKC();Lower(Upper)";
1433
if (u->getID() == exp) {
1434
logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1437
errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1445
* Compound filter semantics were orginially not implemented
1446
* correctly. Originally, each component filter f(i) is replaced by
1447
* f'(i) = f(i) && g, where g is the filter for the compound
1452
* Suppose and I have a transliterator X. Internally X is
1453
* "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1455
* The compound should convert all greek characters (through latin) to
1456
* cyrillic, then lowercase the result. The filter should say "don't
1457
* touch 'A' in the original". But because an intermediate result
1458
* happens to go through "A", the Greek Alpha gets hung up.
1460
void TransliteratorTest::TestCompoundFilter(void) {
1461
UParseError parseError;
1462
UErrorCode status = U_ZERO_ERROR;
1463
Transliterator *t = Transliterator::createInstance
1464
("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1466
errln("FAIL: createInstance failed");
1469
t->adoptFilter(new UnicodeSet("[^A]", status));
1470
if (U_FAILURE(status)) {
1471
errln("FAIL: UnicodeSet ct failed");
1476
// Only the 'A' at index 1 should remain unchanged
1478
CharsToUnicodeString("BA\\u039A\\u0391"),
1479
CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1483
void TransliteratorTest::TestRemove(void) {
1484
UParseError parseError;
1485
UErrorCode status = U_ZERO_ERROR;
1486
Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1488
errln("FAIL: createInstance failed");
1492
expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1496
void TransliteratorTest::TestToRules(void) {
1497
const char* RBT = "rbt";
1498
const char* SET = "set";
1499
static const char* DATA[] = {
1501
"$a=\\u4E61; [$a] > A;",
1505
"$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1506
"[[:Zs:][:Zl:]]{a} > A;",
1533
"$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1534
"[^[:Zs:]]{a} > A;",
1537
"$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1538
"[[a-z]-[:Zs:]]{a} > A;",
1541
"$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1542
"[[:Zs:]&[a-z]]{a} > A;",
1545
"$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1546
"[x[:Zs:]]{a} > A;",
1549
"$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1550
"$macron = \\u0304 ;"
1551
"$evowel = [aeiouyAEIOUY] ;"
1552
"$iotasub = \\u0345 ;"
1553
"($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1554
"([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1557
"([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1558
"([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1560
static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1562
for (int32_t d=0; d < DATA_length; d+=3) {
1563
if (DATA[d] == RBT) {
1564
// Transliterator test
1565
UParseError parseError;
1566
UErrorCode status = U_ZERO_ERROR;
1567
Transliterator *t = Transliterator::createFromRules("ID",
1568
DATA[d+1], UTRANS_FORWARD, parseError, status);
1570
errln("FAIL: createFromRules failed");
1573
UnicodeString rules, escapedRules;
1574
t->toRules(rules, FALSE);
1575
t->toRules(escapedRules, TRUE);
1576
UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1577
UnicodeString expEscapedRules(DATA[d+2]);
1578
if (rules == expRules) {
1579
logln((UnicodeString)"Ok: " + DATA[d+1] +
1582
errln((UnicodeString)"FAIL: " + DATA[d+1] +
1583
" => " + rules + ", exp " + expRules);
1585
if (escapedRules == expEscapedRules) {
1586
logln((UnicodeString)"Ok: " + DATA[d+1] +
1587
" => " + escapedRules);
1589
errln((UnicodeString)"FAIL: " + DATA[d+1] +
1590
" => " + escapedRules + ", exp " + expEscapedRules);
1596
UErrorCode status = U_ZERO_ERROR;
1597
UnicodeString pat(DATA[d+1]);
1598
UnicodeString expToPat(DATA[d+2]);
1599
UnicodeSet set(pat, status);
1600
if (U_FAILURE(status)) {
1601
errln("FAIL: UnicodeSet ct failed");
1604
// Adjust spacing etc. as necessary.
1605
UnicodeString toPat;
1606
set.toPattern(toPat);
1607
if (expToPat == toPat) {
1608
logln((UnicodeString)"Ok: " + pat +
1611
errln((UnicodeString)"FAIL: " + pat +
1612
" => " + prettify(toPat, TRUE) +
1613
", exp " + prettify(pat, TRUE));
1619
void TransliteratorTest::TestContext() {
1620
UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1621
expect("de > x; {d}e > y;",
1626
expect("ab{c} > z;",
1631
void TransliteratorTest::TestSupplemental() {
1633
expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1635
CharsToUnicodeString("ab\\U0001030Fx"),
1636
CharsToUnicodeString("\\U00010300bix"));
1638
expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1639
"$b=[A-Z\\U00010400-\\U0001044D];"
1640
"($a)($b) > $2 $1;"),
1641
CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1642
CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1644
// k|ax\\U00010300xm
1646
// k|a\\U00010400\\U00010300xm
1647
// ky|\\U00010400\\U00010300xm
1648
// ky\\U00010400|\\U00010300xm
1650
// ky\\U00010400|\\U00010300\\U00010400m
1651
// ky\\U00010400y|\\U00010400m
1652
expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1653
"$a {x} > | @ \\U00010400;"
1654
"{$a} [^\\u0000-\\uFFFF] > y;"),
1655
CharsToUnicodeString("kax\\U00010300xm"),
1656
CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1659
CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1660
"{GOTHIC LETTER AHSA}{TAG LATIN SMALL LETTER A}{NO-BREAK SPACE}");
1662
expectT("Any-Hex/Unicode",
1663
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1664
"U+10330U+10FF00U+E0061U+00A0");
1666
expectT("Any-Hex/C",
1667
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1668
"\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1670
expectT("Any-Hex/Perl",
1671
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1672
"\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1674
expectT("Any-Hex/Java",
1675
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1676
"\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1678
expectT("Any-Hex/XML",
1679
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1680
"𐌰􏼀󠁡 ");
1682
expectT("Any-Hex/XML10",
1683
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1684
"𐌰􏼀󠁡 ");
1686
expectT("[\\U000E0000-\\U000E0FFF] Remove",
1687
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1688
CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1691
void TransliteratorTest::TestQuantifier() {
1693
// Make sure @ in a quantified anteContext works
1694
expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1698
// Make sure @ in a quantified postContext works
1699
expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1703
// Make sure @ in a quantified postContext with seg ref works
1704
expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1708
// Make sure @ past ante context doesn't enter ante context
1709
UTransPosition pos = {0, 5, 3, 5};
1710
expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1715
// Make sure @ past post context doesn't pass limit
1716
UTransPosition pos2 = {0, 4, 0, 2};
1717
expect("{b} a+ > c @@ |; x > y; a > A;",
1722
// Make sure @ past post context doesn't enter post context
1723
expect("{b} a+ > c @@ |; x > y; a > A;",
1727
expect("(ab)? c > d;",
1731
// NOTE: The (ab)+ when referenced just yields a single "ab",
1732
// not the full sequence of them. This accords with perl behavior.
1733
expect("(ab)+ {x} > '(' $1 ')';",
1735
"x ab(ab) abab(ab)y");
1738
"ac abc abbc abbbc",
1741
expect("[abc]+ > x;",
1742
"qac abrc abbcs abtbbc",
1745
expect("q{(ab)+} > x;",
1746
"qa qab qaba qababc qaba",
1747
"qa qx qxa qxc qxa");
1749
expect("q(ab)* > x;",
1750
"qa qab qaba qababc",
1753
// NOTE: The (ab)+ when referenced just yields a single "ab",
1754
// not the full sequence of them. This accords with perl behavior.
1755
expect("q(ab)* > '(' $1 ')';",
1756
"qa qab qaba qababc",
1757
"()a (ab) (ab)a (ab)c");
1759
// 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1761
expect("'ab'+ > x;",
1765
// $foo+ and $foo* -- the quantifier should apply to the entire
1766
// variable reference
1767
expect("$var = ab; $var+ > x;",
1772
class TestTrans : public NullTransliterator {
1774
TestTrans(const UnicodeString& id) {
1780
* Test Source-Target/Variant.
1782
void TransliteratorTest::TestSTV(void) {
1783
int32_t ns = Transliterator::countAvailableSources();
1784
if (ns < 0 || ns > 255) {
1785
errln((UnicodeString)"FAIL: Bad source count: " + ns);
1789
for (i=0; i<ns; ++i) {
1790
UnicodeString source;
1791
Transliterator::getAvailableSource(i, source);
1792
logln((UnicodeString)"" + i + ": " + source);
1793
if (source.length() == 0) {
1794
errln("FAIL: empty source");
1797
int32_t nt = Transliterator::countAvailableTargets(source);
1798
if (nt < 0 || nt > 255) {
1799
errln((UnicodeString)"FAIL: Bad target count: " + nt);
1802
for (int32_t j=0; j<nt; ++j) {
1803
UnicodeString target;
1804
Transliterator::getAvailableTarget(j, source, target);
1805
logln((UnicodeString)" " + j + ": " + target);
1806
if (target.length() == 0) {
1807
errln("FAIL: empty target");
1810
int32_t nv = Transliterator::countAvailableVariants(source, target);
1811
if (nv < 0 || nv > 255) {
1812
errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1815
for (int32_t k=0; k<nv; ++k) {
1816
UnicodeString variant;
1817
Transliterator::getAvailableVariant(k, source, target, variant);
1818
if (variant.length() == 0) {
1819
logln((UnicodeString)" " + k + ": <empty>");
1821
logln((UnicodeString)" " + k + ": " + variant);
1827
// Test registration
1828
const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1829
for (i=0; i<3; ++i) {
1830
Transliterator *t = new TestTrans(IDS[i]);
1832
errln("FAIL: out of memory");
1835
if (t->getID() != IDS[i]) {
1836
errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
1840
Transliterator::registerInstance(t);
1841
UErrorCode status = U_ZERO_ERROR;
1842
t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
1844
errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
1847
logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
1851
Transliterator::unregister(IDS[i]);
1852
t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
1854
errln((UnicodeString)"FAIL: Unregistration failed for ID " +
1862
* Test inverse of Greek-Latin; Title()
1864
void TransliteratorTest::TestCompoundInverse(void) {
1865
UParseError parseError;
1866
UErrorCode status = U_ZERO_ERROR;
1867
Transliterator *t = Transliterator::createInstance
1868
("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
1870
errln("FAIL: createInstance");
1873
UnicodeString exp("(Title);Latin-Greek");
1874
if (t->getID() == exp) {
1875
logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
1878
errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
1879
t->getID() + "\", expected \"" + exp + "\"");
1885
* Test NFD chaining with RBT
1887
void TransliteratorTest::TestNFDChainRBT() {
1889
UErrorCode ec = U_ZERO_ERROR;
1890
Transliterator* t = Transliterator::createFromRules(
1891
"TEST", "::NFD; aa > Q; a > q;",
1892
UTRANS_FORWARD, pe, ec);
1893
if (t == NULL || U_FAILURE(ec)) {
1894
errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
1897
expect(*t, "aa", "Q");
1900
// TEMPORARY TESTS -- BEING DEBUGGED
1901
//=- UnicodeString s, s2;
1902
//=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
1903
//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
1904
//=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
1905
//=- expect(*t, s, s2);
1908
//=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
1909
//=- expect(*t, s2, s);
1912
//=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
1913
//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
1914
//=- expect(*t, s, s);
1917
// const char* source[] = {
1919
// "\\u015Br\\u012Bmad",
1920
// "bhagavadg\\u012Bt\\u0101",
1923
// "vi\\u1E63\\u0101da",
1925
// "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
1926
// "uv\\u0101cr\\u0325",
1928
// "rmk\\u1E63\\u0113t",
1929
// //"dharmak\\u1E63\\u0113tr\\u0113",
1931
// "kuruk\\u1E63\\u0113tr\\u0113",
1932
// "samav\\u0113t\\u0101",
1933
// "yuyutsava-\\u1E25",
1934
// "m\\u0101mak\\u0101-\\u1E25",
1935
// // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
1937
// "san\\u0304java",
1942
// const char* expected[] = {
1944
// "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
1945
// "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
1946
// "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
1947
// "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
1948
// "\\u0935\\u093f\\u0937\\u093e\\u0926",
1949
// "\\u092f\\u094b\\u0917",
1950
// "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
1951
// "\\u0909\\u0935\\u093E\\u091A\\u0943",
1954
// //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
1956
// "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
1957
// "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
1958
// "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
1959
// "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
1960
// // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
1961
// "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
1962
// "\\u0938\\u0902\\u091c\\u0935",
1966
// UErrorCode status = U_ZERO_ERROR;
1967
// UParseError parseError;
1968
// UnicodeString message;
1969
// Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
1970
// Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
1971
// if(U_FAILURE(status)){
1972
// errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
1973
// errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
1974
// delete latinToDevToLatin;
1975
// delete devToLatinToDev;
1978
// UnicodeString gotResult;
1979
// for(int i= 0; source[i] != 0; i++){
1980
// gotResult = source[i];
1981
// expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
1982
// expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
1984
// delete latinToDevToLatin;
1985
// delete devToLatinToDev;
1989
* Inverse of "Null" should be "Null". (J21)
1991
void TransliteratorTest::TestNullInverse() {
1993
UErrorCode ec = U_ZERO_ERROR;
1994
Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
1995
if (t == 0 || U_FAILURE(ec)) {
1996
errln("FAIL: createInstance");
1999
Transliterator *u = t->createInverse(ec);
2000
if (u == 0 || U_FAILURE(ec)) {
2001
errln("FAIL: createInverse");
2005
if (u->getID() != "Null") {
2006
errln("FAIL: Inverse of Null should be Null");
2013
* Check ID of inverse of alias. (J22)
2015
void TransliteratorTest::TestAliasInverseID() {
2016
UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2018
UErrorCode ec = U_ZERO_ERROR;
2019
Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2020
if (t == 0 || U_FAILURE(ec)) {
2021
errln("FAIL: createInstance");
2024
Transliterator *u = t->createInverse(ec);
2025
if (u == 0 || U_FAILURE(ec)) {
2026
errln("FAIL: createInverse");
2030
UnicodeString exp = "Hangul-Latin";
2031
UnicodeString got = u->getID();
2033
errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2034
", expected " + exp);
2041
* Test IDs of inverses of compound transliterators. (J20)
2043
void TransliteratorTest::TestCompoundInverseID() {
2044
UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2046
UErrorCode ec = U_ZERO_ERROR;
2047
Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2048
if (t == 0 || U_FAILURE(ec)) {
2049
errln("FAIL: createInstance");
2052
Transliterator *u = t->createInverse(ec);
2053
if (u == 0 || U_FAILURE(ec)) {
2054
errln("FAIL: createInverse");
2058
UnicodeString exp = "NFD(NFC);Jamo-Latin";
2059
UnicodeString got = u->getID();
2061
errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2062
", expected " + exp);
2069
* Test undefined variable.
2072
void TransliteratorTest::TestUndefinedVariable() {
2073
UnicodeString rule = "$initial } a <> \\u1161;";
2075
UErrorCode ec = U_ZERO_ERROR;
2076
Transliterator *t = new RuleBasedTransliterator("<ID>", rule, UTRANS_FORWARD, 0, pe, ec);
2078
if (U_FAILURE(ec)) {
2079
logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2083
errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2088
* Test empty context.
2090
void TransliteratorTest::TestEmptyContext() {
2091
expect(" { a } > b;", "xay a ", "xby b ");
2095
* Test compound filter ID syntax
2097
void TransliteratorTest::TestCompoundFilterID(void) {
2098
static const char* DATA[] = {
2099
// Col. 1 = ID or rule set (latter must start with #)
2101
// = columns > 1 are null if expect col. 1 to be illegal =
2103
// Col. 2 = direction, "F..." or "R..."
2104
// Col. 3 = source string
2105
// Col. 4 = exp result
2107
"[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2108
"Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2109
"[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2110
"[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2111
"#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2112
"#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2116
for (int32_t i=0; DATA[i]; i+=4) {
2117
UnicodeString id = CharsToUnicodeString(DATA[i]);
2118
UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2119
UTRANS_REVERSE : UTRANS_FORWARD;
2120
UnicodeString source;
2122
if (DATA[i+2] != NULL) {
2123
source = CharsToUnicodeString(DATA[i+2]);
2124
exp = CharsToUnicodeString(DATA[i+3]);
2126
UBool expOk = (DATA[i+1] != NULL);
2127
Transliterator* t = NULL;
2129
UErrorCode ec = U_ZERO_ERROR;
2130
if (id.charAt(0) == 0x23/*#*/) {
2131
t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2133
t = Transliterator::createInstance(id, direction, pe, ec);
2135
UBool ok = (t != NULL && U_SUCCESS(ec));
2137
logln((UnicodeString)"Ok: " + id + " => " + (t!=0?t->getID():(UnicodeString)"NULL") + ", " +
2139
if (source.length() != 0) {
2140
expect(*t, source, exp);
2144
errln((UnicodeString)"FAIL: " + id + " => " + (t!=0?t->getID():(UnicodeString)"NULL") + ", " +
2151
* Test new property set syntax
2153
void TransliteratorTest::TestPropertySet() {
2154
expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");
2155
expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2156
"[ a stitch ]\n[ in time ]\r[ saves 9]");
2160
* Test various failure points of the new 2.0 engine.
2162
void TransliteratorTest::TestNewEngine() {
2164
UErrorCode ec = U_ZERO_ERROR;
2165
Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2166
if (t == 0 || U_FAILURE(ec)) {
2167
errln("FAIL: createInstance Latin-Hiragana");
2170
// Katakana should be untouched
2171
expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2172
CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2177
// This test will only work if Transliterator.ROLLBACK is
2178
// true. Otherwise, this test will fail, revealing a
2179
// limitation of global filters in incremental mode.
2181
Transliterator::createFromRules("a", "a > A;", UTRANS_FORWARD, pe, ec);
2183
Transliterator::createFromRules("A", "A > b;", UTRANS_FORWARD, pe, ec);
2184
if (U_FAILURE(ec)) {
2190
Transliterator* array[3];
2192
array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2194
if (U_FAILURE(ec)) {
2195
errln("FAIL: createInstance NFD");
2202
t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2203
if (U_FAILURE(ec)) {
2204
errln("FAIL: UnicodeSet constructor");
2212
expect(*t, "aAaA", "bAbA");
2219
expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2223
UnicodeString gr = CharsToUnicodeString(
2225
"$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2226
"$rough = \\u0314 ;"
2227
"($lcgvowel+ $ddot?) $rough > h | $1 ;"
2231
expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2235
* Test quantified segment behavior. We want:
2236
* ([abc])+ > x $1 x; applied to "cba" produces "xax"
2238
void TransliteratorTest::TestQuantifiedSegment(void) {
2240
expect("([abc]+) > x $1 x;", "cba", "xcbax");
2242
// The tricky case; the quantifier is around the segment
2243
expect("([abc])+ > x $1 x;", "cba", "xax");
2245
// Tricky case in reverse direction
2246
expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2248
// Check post-context segment
2249
expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2251
// Test toRule/toPattern for non-quantified segment.
2252
// Careful with spacing here.
2253
UnicodeString r("([a-c]){q} > x $1 x;");
2255
UErrorCode ec = U_ZERO_ERROR;
2256
Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2257
if (U_FAILURE(ec)) {
2258
errln("FAIL: createFromRules");
2263
t->toRules(rr, TRUE);
2265
errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2267
logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2271
// Test toRule/toPattern for quantified segment.
2272
// Careful with spacing here.
2273
r = "([a-c])+{q} > x $1 x;";
2274
t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2275
if (U_FAILURE(ec)) {
2276
errln("FAIL: createFromRules");
2280
t->toRules(rr, TRUE);
2282
errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2284
logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2289
//======================================================================
2291
//======================================================================
2292
void TransliteratorTest::TestDevanagariLatinRT(){
2293
const int MAX_LEN= 52;
2294
const char* const source[MAX_LEN] = {
2309
//"r\\u0323ya", // \u095c is not valid in Devanagari
2335
"\\u1E6Dh\\u1E6Dha",
2342
// Not roundtrippable --
2343
// \\u0939\\u094d\\u094d\\u092E - hma
2344
// \\u0939\\u094d\\u092E - hma
2345
// CharsToUnicodeString("hma"),
2350
"san\\u0304j\\u012Bb s\\u0113nagupta",
2351
"\\u0101nand vaddir\\u0101ju",
2355
const char* const expected[MAX_LEN] = {
2356
"\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2357
"\\u0915\\u094D\\u0930", /* kra */
2358
"\\u0915\\u094D\\u0937", /* ks\\u0323a */
2359
"\\u0916\\u094D\\u0930", /* khra */
2360
"\\u0917\\u094D\\u0930", /* gra */
2361
"\\u0919\\u094D\\u0930", /* n\\u0307ra */
2362
"\\u091A\\u094D\\u0930", /* cra */
2363
"\\u091B\\u094D\\u0930", /* chra */
2364
"\\u091C\\u094D\\u091E", /* jn\\u0303a */
2365
"\\u091D\\u094D\\u0930", /* jhra */
2366
"\\u091E\\u094D\\u0930", /* n\\u0303ra */
2367
"\\u091F\\u094D\\u092F", /* t\\u0323ya */
2368
"\\u0920\\u094D\\u0930", /* t\\u0323hra */
2369
"\\u0921\\u094D\\u092F", /* d\\u0323ya */
2370
//"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2371
"\\u0922\\u094D\\u092F", /* d\\u0323hya */
2372
"\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2373
"\\u0923\\u094D\\u0930", /* n\\u0323ra */
2374
"\\u0924\\u094D\\u0924", /* tta */
2375
"\\u0925\\u094D\\u0930", /* thra */
2376
"\\u0926\\u094D\\u0926", /* dda */
2377
"\\u0927\\u094D\\u0930", /* dhra */
2378
"\\u0928\\u094D\\u0928", /* nna */
2379
"\\u092A\\u094D\\u0930", /* pra */
2380
"\\u092B\\u094D\\u0930", /* phra */
2381
"\\u092C\\u094D\\u0930", /* bra */
2382
"\\u092D\\u094D\\u0930", /* bhra */
2383
"\\u092E\\u094D\\u0930", /* mra */
2384
"\\u0929\\u094D\\u0930", /* n\\u0331ra */
2385
//"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2386
"\\u092F\\u094D\\u0930", /* yra */
2387
"\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2389
"\\u0935\\u094D\\u0930", /* vra */
2390
"\\u0936\\u094D\\u0930", /* s\\u0301ra */
2391
"\\u0937\\u094D\\u0930", /* s\\u0323ra */
2392
"\\u0938\\u094D\\u0930", /* sra */
2393
"\\u0939\\u094d\\u092E", /* hma */
2394
"\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2395
"\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2396
"\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2397
"\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2398
"\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2399
"\\u091F\\u094D\\u092F", /* t\\u0323ya */
2400
"\\u0920\\u094D\\u092F", /* t\\u0323hya */
2401
"\\u0921\\u094D\\u092F", /* d\\u0323ya */
2402
"\\u0922\\u094D\\u092F", /* d\\u0323hya */
2404
"\\u0939\\u094D\\u092F", /* hya */
2405
"\\u0936\\u0943", /* s\\u0301r\\u0325a */
2406
"\\u0936\\u094D\\u091A", /* s\\u0301ca */
2407
"\\u090d", /* e\\u0306 */
2408
"\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2409
"\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2413
UErrorCode status = U_ZERO_ERROR;
2414
UParseError parseError;
2415
UnicodeString message;
2416
Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2417
Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2418
if(U_FAILURE(status)){
2419
errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2420
errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2423
UnicodeString gotResult;
2424
for(int i= 0; i<MAX_LEN; i++){
2425
gotResult = source[i];
2426
expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2427
expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2433
void TransliteratorTest::TestTeluguLatinRT(){
2434
const int MAX_LEN=10;
2435
const char* const source[MAX_LEN] = {
2436
"raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2437
"\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2438
"r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2439
"san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2440
"san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2441
"amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2442
"ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2443
"\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2444
"\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2445
"m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2448
const char* const expected[MAX_LEN] = {
2449
"\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2450
"\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2451
"\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2452
"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2453
"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2454
"\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2455
"\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2456
"\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2457
"\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2458
"\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2461
UErrorCode status = U_ZERO_ERROR;
2462
UParseError parseError;
2463
UnicodeString message;
2464
Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2465
Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2466
if(U_FAILURE(status)){
2467
errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2468
errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2471
UnicodeString gotResult;
2472
for(int i= 0; i<MAX_LEN; i++){
2473
gotResult = source[i];
2474
expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2475
expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2481
void TransliteratorTest::TestSanskritLatinRT(){
2482
const int MAX_LEN =16;
2483
const char* const source[MAX_LEN] = {
2484
"rmk\\u1E63\\u0113t",
2485
"\\u015Br\\u012Bmad",
2486
"bhagavadg\\u012Bt\\u0101",
2489
"vi\\u1E63\\u0101da",
2491
"dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2492
"uv\\u0101cr\\u0325",
2493
"dharmak\\u1E63\\u0113tr\\u0113",
2494
"kuruk\\u1E63\\u0113tr\\u0113",
2495
"samav\\u0113t\\u0101",
2497
"m\\u0101mak\\u0101\\u1E25",
2498
// "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2502
const char* const expected[MAX_LEN] = {
2503
"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2504
"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2505
"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2506
"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2507
"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2508
"\\u0935\\u093f\\u0937\\u093e\\u0926",
2509
"\\u092f\\u094b\\u0917",
2510
"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2511
"\\u0909\\u0935\\u093E\\u091A\\u0943",
2512
"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2513
"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2514
"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2515
"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2516
"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2517
//"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2518
"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2519
"\\u0938\\u0902\\u091c\\u0935",
2521
UErrorCode status = U_ZERO_ERROR;
2522
UParseError parseError;
2523
UnicodeString message;
2524
Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2525
Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2526
if(U_FAILURE(status)){
2527
errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2528
errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2531
UnicodeString gotResult;
2532
for(int i= 0; i<MAX_LEN; i++){
2533
gotResult = source[i];
2534
expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2535
expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2542
void TransliteratorTest::TestCompoundLatinRT(){
2543
const char* const source[] = {
2544
"rmk\\u1E63\\u0113t",
2545
"\\u015Br\\u012Bmad",
2546
"bhagavadg\\u012Bt\\u0101",
2549
"vi\\u1E63\\u0101da",
2551
"dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2552
"uv\\u0101cr\\u0325",
2553
"dharmak\\u1E63\\u0113tr\\u0113",
2554
"kuruk\\u1E63\\u0113tr\\u0113",
2555
"samav\\u0113t\\u0101",
2557
"m\\u0101mak\\u0101\\u1E25",
2558
// "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2562
const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2563
const char* const expected[MAX_LEN] = {
2564
"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2565
"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2566
"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2567
"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2568
"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2569
"\\u0935\\u093f\\u0937\\u093e\\u0926",
2570
"\\u092f\\u094b\\u0917",
2571
"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2572
"\\u0909\\u0935\\u093E\\u091A\\u0943",
2573
"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2574
"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2575
"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2576
"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2577
"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2578
// "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2579
"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2580
"\\u0938\\u0902\\u091c\\u0935"
2582
if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2583
errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2587
UErrorCode status = U_ZERO_ERROR;
2588
UParseError parseError;
2589
UnicodeString message;
2590
Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2591
Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2592
Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2593
Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2595
if(U_FAILURE(status)){
2596
errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2597
errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2600
UnicodeString gotResult;
2601
for(int i= 0; i<MAX_LEN; i++){
2602
gotResult = source[i];
2603
expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2604
expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2605
expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2608
delete(latinToDevToLatin);
2609
delete(devToLatinToDev);
2610
delete(devToTelToDev);
2611
delete(latinToTelToLatin);
2615
* Test instantiation from a locale.
2617
void TransliteratorTest::TestLocaleInstantiation(void) {
2619
UErrorCode ec = U_ZERO_ERROR;
2620
Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2621
if (U_FAILURE(ec)) {
2622
errln("FAIL: createInstance(ru_RU-Latin)");
2626
expect(*t, CharsToUnicodeString("\\u0430"), "a");
2629
t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2630
if (U_FAILURE(ec)) {
2631
errln("FAIL: createInstance(en-el)");
2635
expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2640
* Test title case handling of accent (should ignore accents)
2642
void TransliteratorTest::TestTitleAccents(void) {
2644
UErrorCode ec = U_ZERO_ERROR;
2645
Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2646
if (U_FAILURE(ec)) {
2647
errln("FAIL: createInstance(Title)");
2651
expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2656
* Basic test of a locale resource based rule.
2658
void TransliteratorTest::TestLocaleResource() {
2659
const char* DATA[] = {
2661
//"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2662
"Latin-el", "b", "\\u03bc\\u03c0",
2663
"Latin-Greek", "b", "\\u03B2",
2664
"Greek-Latin/UNGEGN", "\\u03B2", "v",
2665
"el-Latin", "\\u03B2", "v",
2666
"Greek-Latin", "\\u03B2", "b",
2668
const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2669
for (int32_t i=0; i<DATA_length; i+=3) {
2671
UErrorCode ec = U_ZERO_ERROR;
2672
Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2673
if (U_FAILURE(ec)) {
2674
errln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ")");
2678
expect(*t, CharsToUnicodeString(DATA[i+1]),
2679
CharsToUnicodeString(DATA[i+2]));
2685
* Make sure parse errors reference the right line.
2687
void TransliteratorTest::TestParseError() {
2692
UErrorCode ec = U_ZERO_ERROR;
2694
Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2696
if (U_FAILURE(ec)) {
2697
UnicodeString err(pe.preContext);
2698
err.append((UChar)124/*|*/).append(pe.postContext);
2699
if (err.indexOf("d << b") >= 0) {
2700
logln("Ok: " + err);
2702
errln("FAIL: " + err);
2706
errln("FAIL: no syntax error");
2710
* Make sure sets on output are disallowed.
2712
void TransliteratorTest::TestOutputSet() {
2713
UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2714
UErrorCode ec = U_ZERO_ERROR;
2716
Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2718
if (U_FAILURE(ec)) {
2719
UnicodeString err(pe.preContext);
2720
err.append((UChar)124/*|*/).append(pe.postContext);
2721
logln("Ok: " + err);
2724
errln("FAIL: No syntax error");
2728
* Test the use variable range pragma, making sure that use of
2729
* variable range characters is detected and flagged as an error.
2731
void TransliteratorTest::TestVariableRange() {
2732
UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2733
UErrorCode ec = U_ZERO_ERROR;
2735
Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2737
if (U_FAILURE(ec)) {
2738
UnicodeString err(pe.preContext);
2739
err.append((UChar)124/*|*/).append(pe.postContext);
2740
logln("Ok: " + err);
2743
errln("FAIL: No syntax error");
2747
* Test invalid post context error handling
2749
void TransliteratorTest::TestInvalidPostContext() {
2750
UnicodeString rule = "a}b{c>d;";
2751
UErrorCode ec = U_ZERO_ERROR;
2753
Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2755
if (U_FAILURE(ec)) {
2756
UnicodeString err(pe.preContext);
2757
err.append((UChar)124/*|*/).append(pe.postContext);
2758
if (err.indexOf("a}b{c") >= 0) {
2759
logln("Ok: " + err);
2761
errln("FAIL: " + err);
2765
errln("FAIL: No syntax error");
2769
* Test ID form variants
2771
void TransliteratorTest::TestIDForms() {
2772
const char* DATA[] = {
2774
"nfd", NULL, "NFC", // make sure case is ignored
2775
"Any-NFKD", NULL, "Any-NFKC",
2776
"Null", NULL, "Null",
2777
"-nfkc", "nfkc", "NFKD",
2778
"-nfkc/", "nfkc", "NFKD",
2779
"Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
2780
"Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
2781
"Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
2782
"Source-", NULL, NULL,
2783
"Source/Variant-", NULL, NULL,
2784
"Source-/Variant", NULL, NULL,
2785
"/Variant", NULL, NULL,
2786
"/Variant-", NULL, NULL,
2787
"-/Variant", NULL, NULL,
2792
const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
2794
for (int32_t i=0; i<DATA_length; i+=3) {
2795
const char* ID = DATA[i];
2796
const char* expID = DATA[i+1];
2797
const char* expInvID = DATA[i+2];
2798
UBool expValid = (expInvID != NULL);
2799
if (expID == NULL) {
2803
UErrorCode ec = U_ZERO_ERROR;
2805
Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2806
if (U_FAILURE(ec)) {
2808
logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
2810
errln((UnicodeString)"FAIL: Couldn't create " + ID);
2815
Transliterator *u = t->createInverse(ec);
2816
if (U_FAILURE(ec)) {
2817
errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
2822
if (t->getID() == expID &&
2823
u->getID() == expInvID) {
2824
logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
2826
errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
2827
t->getID() + " x getInverse() => " + u->getID() +
2828
", expected " + expInvID);
2835
static const UChar SPACE[] = {32,0};
2836
static const UChar NEWLINE[] = {10,0};
2837
static const UChar RETURN[] = {13,0};
2838
static const UChar EMPTY[] = {0};
2840
void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
2841
const UnicodeString& testRulesForward) {
2842
UnicodeString rules2; t2.toRules(rules2, TRUE);
2843
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
2844
rules2.findAndReplace(SPACE, EMPTY);
2845
rules2.findAndReplace(NEWLINE, EMPTY);
2846
rules2.findAndReplace(RETURN, EMPTY);
2848
UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
2850
if (rules2 != testRules) {
2852
logln((UnicodeString)"GENERATED RULES: " + rules2);
2853
logln((UnicodeString)"SHOULD BE: " + testRulesForward);
2858
* Mark's toRules test.
2860
void TransliteratorTest::TestToRulesMark() {
2861
const char* testRules =
2862
"::[[:Latin:][:Mark:]];"
2865
"a <> \\u03B1;" // alpha
2869
"::([[:Greek:][:Mark:]]);"
2871
const char* testRulesForward =
2872
"::[[:Latin:][:Mark:]];"
2880
const char* testRulesBackward =
2881
"::[[:Greek:][:Mark:]];"
2888
UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
2889
UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
2892
UErrorCode ec = U_ZERO_ERROR;
2893
Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
2894
Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
2896
if (U_FAILURE(ec)) {
2899
errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
2903
expect(*t2, source, target);
2904
expect(*t3, target, source);
2906
checkRules("Failed toRules FORWARD", *t2, testRulesForward);
2907
checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
2914
* Test Escape and Unescape transliterators.
2916
void TransliteratorTest::TestEscape() {
2922
t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
2923
if (U_FAILURE(ec)) {
2924
errln((UnicodeString)"FAIL: createInstance");
2927
"\\x{40}\\U000000312Q",
2933
t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
2934
if (U_FAILURE(ec)) {
2935
errln((UnicodeString)"FAIL: createInstance");
2938
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2939
"\\u0041\\U0010BEEF\\uFEED");
2944
t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
2945
if (U_FAILURE(ec)) {
2946
errln((UnicodeString)"FAIL: createInstance");
2949
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2950
"\\u0041\\uDBEF\\uDEEF\\uFEED");
2955
t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
2956
if (U_FAILURE(ec)) {
2957
errln((UnicodeString)"FAIL: createInstance");
2960
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2961
"\\x{41}\\x{10BEEF}\\x{FEED}");
2967
void TransliteratorTest::TestAnchorMasking(){
2968
UnicodeString rule ("^a > Q; a > q;");
2969
UErrorCode status= U_ZERO_ERROR;
2970
UParseError parseError;
2972
Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
2973
if(U_FAILURE(status)){
2974
errln(UnicodeString("FAIL: ") + "ID" +
2975
".createFromRules() => bad rules" +
2976
/*", parse error " + parseError.code +*/
2977
", line " + parseError.line +
2978
", offset " + parseError.offset +
2979
", context " + prettify(parseError.preContext, TRUE) +
2980
", rules: " + prettify(rule, TRUE));
2986
* Make sure display names of variants look reasonable.
2988
void TransliteratorTest::TestDisplayName() {
2989
static const char* DATA[] = {
2990
// ID, forward name, reverse name
2991
// Update the text as necessary -- the important thing is
2992
// not the text itself, but how various cases are handled.
2995
"Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
2998
"Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3001
"NFC", "Any to NFC", "Any to NFD",
3004
int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3006
Locale US("en", "US");
3008
for (int32_t i=0; i<DATA_length; i+=3) {
3010
Transliterator::getDisplayName(DATA[i], US, name);
3011
if (name != DATA[i+1]) {
3012
errln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3013
name + ", expected " + DATA[i+1]);
3015
logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3017
UErrorCode ec = U_ZERO_ERROR;
3019
Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3020
if (U_FAILURE(ec)) {
3022
errln("FAIL: createInstance failed");
3025
name = Transliterator::getDisplayName(t->getID(), US, name);
3026
if (name != DATA[i+2]) {
3027
errln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3028
name + ", expected " + DATA[i+2]);
3030
logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3036
void TransliteratorTest::TestSpecialCases(void) {
3037
const UnicodeString registerRules[] = {
3038
"Any-Dev1", "x > X; y > Y;",
3039
"Any-Dev2", "XY > Z",
3041
CharsToUnicodeString
3042
("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3046
static const UnicodeString testCases[] = {
3048
// should add more test cases
3049
"NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3050
"NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3051
"NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3052
"NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3055
"Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3056
"Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3058
// check for devanagari bug
3059
"nfd;Dev1;Dev2;nfc", "xy", "Z",
3061
// ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3062
"Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3063
CharsToUnicodeString("Ab'cd Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3065
//TODO: enable this test once Titlecase works right
3067
"Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3068
CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3070
"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3071
CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3072
"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3073
CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3075
"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3076
"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3079
"Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3080
CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3081
"Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3082
CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3083
"Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3084
CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3085
"Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3086
CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3088
// Upper: TAT\\u02B9\\u00C2NA
3089
// Lower: tat\\u02B9\\u00E2na
3090
// Title: Tat\\u02B9\\u00E2na
3091
"Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3092
CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3093
"Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3094
CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3095
"Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3096
CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3103
for (i = 0; registerRules[i].length()!=0; i+=2) {
3104
UErrorCode status = U_ZERO_ERROR;
3106
Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3107
registerRules[i+1], UTRANS_FORWARD, pos, status);
3108
if (U_FAILURE(status)) {
3109
errln("Fails: Unable to create the transliterator from rules.");
3111
Transliterator::registerInstance(t);
3114
for (i = 0; testCases[i].length()!=0; i+=3) {
3115
UErrorCode ec = U_ZERO_ERROR;
3117
const UnicodeString& name = testCases[i];
3118
Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3119
if (U_FAILURE(ec)) {
3120
errln((UnicodeString)"FAIL: Couldn't create " + name);
3124
const UnicodeString& id = t->getID();
3125
const UnicodeString& source = testCases[i+1];
3126
UnicodeString target;
3128
// Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3130
if (testCases[i+2].length() > 0) {
3131
target = testCases[i+2];
3132
} else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3133
Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3134
} else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3135
Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3136
} else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3137
Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3138
} else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3139
Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3140
} else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3142
target.toLower(Locale::US);
3143
} else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3145
target.toUpper(Locale::US);
3147
if (U_FAILURE(ec)) {
3148
errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3152
expect(*t, source, target);
3155
for (i = 0; registerRules[i].length()!=0; i+=2) {
3156
Transliterator::unregister(registerRules[i]);
3160
char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3162
sprintf(buffer, "\\u%04x", ch);
3164
sprintf(buffer, "\\u%08x", ch);
3169
void TransliteratorTest::TestSurrogateCasing (void) {
3170
// check that casing handles surrogates
3171
// titlecase is currently defective
3175
UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3176
UnicodeString DEE(u_totitle(dee));
3177
if (DEE != DESERET_DEE) {
3178
err("Fails titlecase of surrogates");
3179
err(Char32ToEscapedChars(dee, buffer));
3181
errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3184
UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3185
UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3186
UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3187
UErrorCode status= U_ZERO_ERROR;
3189
u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3190
if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3191
errln("Fails: Can't uppercase surrogates.");
3194
status= U_ZERO_ERROR;
3195
u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3196
if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3197
errln("Fails: Can't lowercase surrogates.");
3201
static void _trans(Transliterator& t, const UnicodeString& src,
3202
UnicodeString& result) {
3204
t.transliterate(result);
3207
static void _trans(const UnicodeString& id, const UnicodeString& src,
3208
UnicodeString& result, UErrorCode ec) {
3210
Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3211
if (U_SUCCESS(ec)) {
3212
_trans(*t, src, result);
3217
static const UnicodeString& _findMatch(const UnicodeString& source,
3218
const UnicodeString* pairs) {
3219
static const UnicodeString empty;
3220
for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3221
if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3228
// Check to see that incremental gets at least part way through a reasonable string.
3230
void TransliteratorTest::TestIncrementalProgress(void) {
3231
UErrorCode ec = U_ZERO_ERROR;
3232
UnicodeString latinTest = "The Quick Brown Fox.";
3233
UnicodeString devaTest;
3234
_trans("Latin-Devanagari", latinTest, devaTest, ec);
3235
UnicodeString kataTest;
3236
_trans("Latin-Katakana", latinTest, kataTest, ec);
3237
if (U_FAILURE(ec)) {
3238
errln("FAIL: Internal error");
3241
static const UnicodeString tests[] = {
3244
"Halfwidth", latinTest,
3245
"Devanagari", devaTest,
3246
"Katakana", kataTest,
3250
UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3251
int32_t i = 0, j=0, k=0;
3252
int32_t sources = Transliterator::countAvailableSources();
3253
for (i = 0; i < sources; i++) {
3254
UnicodeString source;
3255
Transliterator::getAvailableSource(i, source);
3256
UnicodeString test = _findMatch(source, tests);
3257
if (test.length() == 0) {
3258
logln((UnicodeString)"Skipping " + source + "-X");
3261
int32_t targets = Transliterator::countAvailableTargets(source);
3262
for (j = 0; j < targets; j++) {
3263
UnicodeString target;
3264
Transliterator::getAvailableTarget(j, source, target);
3265
int32_t variants = Transliterator::countAvailableVariants(source, target);
3266
for (k =0; k< variants; k++) {
3267
UnicodeString variant;
3269
UErrorCode status = U_ZERO_ERROR;
3271
Transliterator::getAvailableVariant(k, source, target, variant);
3272
UnicodeString id = source + "-" + target + "/" + variant;
3274
Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3275
if (U_FAILURE(status)) {
3276
errln((UnicodeString)"FAIL: Could not create " + id);
3280
status = U_ZERO_ERROR;
3281
CheckIncrementalAux(t, test);
3284
_trans(*t, test, rev);
3285
Transliterator *inv = t->createInverse(status);
3286
if (U_FAILURE(status)) {
3287
errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3292
CheckIncrementalAux(inv, rev);
3300
void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3301
const UnicodeString& input) {
3302
UErrorCode ec = U_ZERO_ERROR;
3304
UnicodeString test = input;
3306
pos.contextStart = 0;
3307
pos.contextLimit = input.length();
3309
pos.limit = input.length();
3311
t->transliterate(test, pos, ec);
3312
if (U_FAILURE(ec)) {
3313
errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3316
UBool gotError = FALSE;
3318
// we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3320
if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3321
errln((UnicodeString)"No Progress, " +
3322
t->getID() + ": " + formatInput(test, input, pos));
3325
logln((UnicodeString)"PASS Progress, " +
3326
t->getID() + ": " + formatInput(test, input, pos));
3328
t->finishTransliteration(test, pos);
3329
if (pos.start != pos.limit) {
3330
errln((UnicodeString)"Incomplete, " +
3331
t->getID() + ": " + formatInput(test, input, pos));
3336
void TransliteratorTest::TestFunction() {
3337
// Careful with spacing and ';' here: Phrase this exactly
3338
// as toRules() is going to return it. If toRules() changes
3339
// with regard to spacing or ';', then adjust this string.
3340
UnicodeString rule = // TODO clean up spacing
3341
"([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3344
UErrorCode ec = U_ZERO_ERROR;
3345
Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3347
errln("FAIL: createFromRules failed");
3352
t->toRules(r, TRUE);
3354
logln((UnicodeString)"OK: toRules() => " + r);
3356
errln((UnicodeString)"FAIL: toRules() => " + r +
3357
", expected " + rule);
3360
expect(*t, "The Quick Brown Fox",
3361
"T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
3366
void TransliteratorTest::TestInvalidBackRef(void) {
3367
UnicodeString rule = ". > $1;";
3368
UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3370
UErrorCode ec = U_ZERO_ERROR;
3371
Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3372
Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3375
errln("FAIL: createFromRules should have returned NULL");
3380
errln("FAIL: createFromRules should have returned NULL");
3384
if (U_SUCCESS(ec)) {
3385
errln("FAIL: Ok: . > $1; => no error");
3387
logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3391
//======================================================================
3393
//======================================================================
3394
void TransliteratorTest::expectT(const UnicodeString& id,
3395
const UnicodeString& source,
3396
const UnicodeString& expectedResult) {
3397
UErrorCode ec = U_ZERO_ERROR;
3399
Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3400
if (U_FAILURE(ec)) {
3401
errln((UnicodeString)"FAIL: Could not create " + id);
3405
expect(*t, source, expectedResult);
3409
void TransliteratorTest::expect(const UnicodeString& rules,
3410
const UnicodeString& source,
3411
const UnicodeString& expectedResult,
3412
UTransPosition *pos) {
3413
UErrorCode status = U_ZERO_ERROR;
3414
Transliterator *t = new RuleBasedTransliterator("<ID>", rules, status);
3415
if (U_FAILURE(status)) {
3416
errln("FAIL: Transliterator constructor failed");
3418
expect(*t, source, expectedResult, pos);
3423
void TransliteratorTest::expect(const Transliterator& t,
3424
const UnicodeString& source,
3425
const UnicodeString& expectedResult,
3426
const Transliterator& reverseTransliterator) {
3427
expect(t, source, expectedResult);
3428
expect(reverseTransliterator, expectedResult, source);
3431
void TransliteratorTest::expect(const Transliterator& t,
3432
const UnicodeString& source,
3433
const UnicodeString& expectedResult,
3434
UTransPosition *pos) {
3436
UnicodeString result(source);
3437
t.transliterate(result);
3438
expectAux(t.getID() + ":String", source, result, expectedResult);
3441
UTransPosition index={0, 0, 0, 0};
3446
UnicodeString rsource(source);
3448
t.transliterate(rsource);
3450
// Do it all at once -- below we do it incrementally
3451
t.finishTransliteration(rsource, *pos);
3453
expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
3455
// Test keyboard (incremental) transliteration -- this result
3456
// must be the same after we finalize (see below).
3461
formatInput(log, rsource, index);
3463
UErrorCode status = U_ZERO_ERROR;
3464
t.transliterate(rsource, index, status);
3465
formatInput(log, rsource, index);
3467
for (int32_t i=0; i<source.length(); ++i) {
3471
log.append(source.charAt(i)).append(" -> ");
3472
UErrorCode status = U_ZERO_ERROR;
3473
t.transliterate(rsource, index, source.charAt(i), status);
3474
formatInput(log, rsource, index);
3478
// As a final step in keyboard transliteration, we must call
3479
// transliterate to finish off any pending partial matches that
3480
// were waiting for more input.
3481
t.finishTransliteration(rsource, index);
3482
log.append(" => ").append(rsource);
3484
expectAux(t.getID() + ":Keyboard", log,
3485
rsource == expectedResult,
3491
* @param appendTo result is appended to this param.
3492
* @param input the string being transliterated
3493
* @param pos the index struct
3495
UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
3496
const UnicodeString& input,
3497
const UTransPosition& pos) {
3498
// Output a string of the form aaa{bbb|ccc|ddd}eee, where
3499
// the {} indicate the context start and limit, and the ||
3500
// indicate the start and limit.
3501
if (0 <= pos.contextStart &&
3502
pos.contextStart <= pos.start &&
3503
pos.start <= pos.limit &&
3504
pos.limit <= pos.contextLimit &&
3505
pos.contextLimit <= input.length()) {
3507
UnicodeString a, b, c, d, e;
3508
input.extractBetween(0, pos.contextStart, a);
3509
input.extractBetween(pos.contextStart, pos.start, b);
3510
input.extractBetween(pos.start, pos.limit, c);
3511
input.extractBetween(pos.limit, pos.contextLimit, d);
3512
input.extractBetween(pos.contextLimit, input.length(), e);
3513
appendTo.append(a).append((UChar)123/*{*/).append(b).
3514
append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
3515
append((UChar)125/*}*/).append(e);
3517
appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
3518
pos.contextStart + ", s=" + pos.start + ", l=" +
3519
pos.limit + ", cl=" + pos.contextLimit + "} on " +
3525
void TransliteratorTest::expectAux(const UnicodeString& tag,
3526
const UnicodeString& source,
3527
const UnicodeString& result,
3528
const UnicodeString& expectedResult) {
3529
expectAux(tag, source + " -> " + result,
3530
result == expectedResult,
3534
void TransliteratorTest::expectAux(const UnicodeString& tag,
3535
const UnicodeString& summary, UBool pass,
3536
const UnicodeString& expectedResult) {
3538
logln(UnicodeString("(")+tag+") " + prettify(summary));
3540
errln(UnicodeString("FAIL: (")+tag+") "
3542
+ ", expected " + prettify(expectedResult));