~ubuntu-branches/ubuntu/gutsy/icu/gutsy-updates

CharsToUnicodeString("{NO-BREAK SPACE}abc{CJK UNIFIED IDEOGRAPH-4E01}{MICRO SIGN}{GUJARATI SIGN CANDRABINDU}{REPLACEMENT CHARACTER}{END OF TRANSMISSION}{HORIZONTAL TABULATION}{<control-0081>}{<noncharacter-FFFF>}"));

1164

expect(*name2uni, "{ NO-BREAK SPACE}abc{ CJK UNIFIED IDEOGRAPH-4E01 }{x{MICRO SIGN}{GUJARATI SIGN CANDRABINDU}{REPLACEMENT CHARACTER}{END OF TRANSMISSION}{HORIZONTAL TABULATION}{<control-0081>}{<noncharacter-FFFF>}{<control-0004>}{",

1165

CharsToUnicodeString("\\u00A0abc\\u4E01{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004{"));

1166

1167

delete uni2name;

1168

delete name2uni;

1169

}

1170

1171

/**

1172

* Test liberalized ID syntax. 1006c

1173

1174

void TransliteratorTest::TestLiberalizedID(void) {

1175

// Some test cases have an expected getID() value of NULL. This

1176

// means I have disabled the test case for now. This stuff is

1177

// still under development, and I haven't decided whether to make

1178

// getID() return canonical case yet. It will all get rewritten

1179

// with the move to Source-Target/Variant IDs anyway. [aliu]

1180

const char* DATA[] = {

1181

"latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",

1182

" Null ", "Null", "whitespace",

1183

" Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",

1184

" null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",

1185

};

1186

const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);

1187

UParseError parseError;

1188

UErrorCode status= U_ZERO_ERROR;

1189

for (int32_t i=0; i<DATA_length; i+=3) {

1190

Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);

1191

if (t == 0) {

1192

errln(UnicodeString("FAIL: ") + DATA[i+2] +

1193

" cannot create ID \"" + DATA[i] + "\"");

1194

} else {

1195

UnicodeString exp;

1196

if (DATA[i+1]) {

1197

exp = UnicodeString(DATA[i+1], "");

1198

}

1199

// Don't worry about getID() if the expected char*

1200

// is NULL -- see above.

1201

if (exp.length() == 0 || exp == t->getID()) {

1202

logln(UnicodeString("Ok: ") + DATA[i+2] +

1203

" create ID \"" + DATA[i] + "\" => \"" +

1204

exp + "\"");

1205

} else {

1206

errln(UnicodeString("FAIL: ") + DATA[i+2] +

1207

" create ID \"" + DATA[i] + "\" => \"" +

1208

t->getID() + "\", exp \"" + exp + "\"");

1209

}

1210

delete t;

1211

}

1212

}

1213

}

1214

1215

/* test for Jitterbug 912 */

1216

void TransliteratorTest::TestCreateInstance(){

1217

UParseError err;

1218

UErrorCode status = U_ZERO_ERROR;

1219

Transliterator* myTrans = Transliterator::createInstance(UnicodeString("Latin-Hangul"),UTRANS_REVERSE,err,status);

1220

if (myTrans == 0) {

1221

errln("FAIL: createInstance failed");

1222

return;

1223

}

1224

UnicodeString newID =myTrans->getID();

1225

if(newID!=UnicodeString("Hangul-Latin")){

1226

errln(UnicodeString("Test for Jitterbug 912 Transliterator::createInstance(id,UTRANS_REVERSE) failed"));

1227

}

1228

delete myTrans;

1229

}

1230

1231

/**

1232

* Test the normalization transliterator.

1233

1234

void TransliteratorTest::TestNormalizationTransliterator() {

1235

// THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest

1236

// PLEASE KEEP THEM IN SYNC WITH BasicTest.

1237

const char* CANON[] = {

1238

// Input Decomposed Composed

1239

"cat", "cat", "cat" ,

1240

"\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,

1241

1242

"\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above

1243

"D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above

1244

1245

"\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above

1246

"\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below

1247

"D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above

1248

1249

"\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above

1250

"D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below

1251

1252

"\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave

1253

"\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave

1254

"\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron

1255

1256

"\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign

1257

"\\u00c5", "A\\u030a", "\\u00c5" , // A-ring

1258

1259

"\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0

1260

"\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0

1261

1262

"Henry IV", "Henry IV", "Henry IV" ,

1263

"Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,

1264

1265

"\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)

1266

"\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten

1267

"\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten

1268

"\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten

1269

"\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten

1270

1271

"A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,

1272

0 // end

1273

};

1274

1275

const char* COMPAT[] = {

1276

// Input Decomposed Composed

1277

"\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed

1278

1279

"\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0

1280

"\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i

1281

1282

"Henry IV", "Henry IV", "Henry IV" ,

1283

"Henry \\u2163", "Henry IV", "Henry IV" ,

1284

1285

"\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)

1286

"\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten

1287

1288

"\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten

1289

0 // end

1290

};

1291

1292

int32_t i;

1293

UParseError parseError;

1294

UErrorCode status = U_ZERO_ERROR;

1295

Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);

1296

Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);

1297

if (!NFD || !NFC) {

1298

errln("FAIL: createInstance failed");

1299

delete NFD;

1300

delete NFC;

1301

return;

1302

}

1303

for (i=0; CANON[i]; i+=3) {

1304

UnicodeString in = CharsToUnicodeString(CANON[i]);

1305

UnicodeString expd = CharsToUnicodeString(CANON[i+1]);

1306

UnicodeString expc = CharsToUnicodeString(CANON[i+2]);

1307

expect(*NFD, in, expd);

1308

expect(*NFC, in, expc);

1309

}

1310

delete NFD;

1311

delete NFC;

1312

1313

Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);

1314

Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);

1315

if (!NFKD || !NFKC) {

1316

errln("FAIL: createInstance failed");

1317

delete NFKD;

1318

delete NFKC;

1319

return;

1320

}

1321

for (i=0; COMPAT[i]; i+=3) {

1322

UnicodeString in = CharsToUnicodeString(COMPAT[i]);

1323

UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);

1324

UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);

1325

expect(*NFKD, in, expkd);

1326

expect(*NFKC, in, expkc);

1327

}

1328

delete NFKD;

1329

delete NFKC;

1330

1331

UParseError pe;

1332

status = U_ZERO_ERROR;

1333

Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",

1334

UTRANS_FORWARD,

1335

pe, status);

1336

if (t == 0) {

1337

errln("FAIL: createInstance failed");

1338

}

1339

expect(*t, CharsToUnicodeString("\\u010dx"),

1340

CharsToUnicodeString("c\\u030C"));

1341

delete t;

1342

}

1343

1344

/**

1345

* Test compound RBT rules.

1346

1347

void TransliteratorTest::TestCompoundRBT(void) {

1348

// Careful with spacing and ';' here: Phrase this exactly

1349

// as toRules() is going to return it. If toRules() changes

1350

// with regard to spacing or ';', then adjust this string.

1351

UnicodeString rule("::Hex-Any;\n"

1352

"::Any-Lower;\n"

1353

"a > '.A.';\n"

1354

"b > '.B.';\n"

1355

"::[^t]Any-Upper;", "");

1356

UParseError parseError;

1357

UErrorCode status = U_ZERO_ERROR;

1358

Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);

1359

if (t == 0) {

1360

errln("FAIL: createFromRules failed");

1361

return;

1362

}

1363

expect(*t, "\\u0043at in the hat, bat on the mat",

1364

"C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");

1365

UnicodeString r;

1366

t->toRules(r, TRUE);

1367

if (r == rule) {

1368

logln((UnicodeString)"OK: toRules() => " + r);

1369

} else {

1370

errln((UnicodeString)"FAIL: toRules() => " + r +

1371

", expected " + rule);

1372

}

1373

delete t;

1374

1375

// Now test toRules

1376

t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);

1377

if (t == 0) {

1378

errln("FAIL: createInstance failed");

1379

return;

1380

}

1381

UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");

1382

t->toRules(r, TRUE);

1383

if (r != exp) {

1384

errln((UnicodeString)"FAIL: toRules() => " + r +

1385

", expected " + exp);

1386

} else {

1387

logln((UnicodeString)"OK: toRules() => " + r);

1388

}

1389

delete t;

1390

1391

// Round trip the result of toRules

1392

t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);

1393

if (t == 0) {

1394

errln("FAIL: createFromRules #2 failed");

1395

return;

1396

} else {

1397

logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");

1398

}

1399

1400

// Test toRules again

1401

t->toRules(r, TRUE);

1402

if (r != exp) {

1403

errln((UnicodeString)"FAIL: toRules() => " + r +

1404

", expected " + exp);

1405

} else {

1406

logln((UnicodeString)"OK: toRules() => " + r);

1407

}

1408

1409

delete t;

1410

1411

// Test Foo(Bar) IDs. Careful with spacing in id; make it conform

1412

// to what the regenerated ID will look like.

1413

UnicodeString id("Upper(Lower);(NFKC)", "");

1414

t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);

1415

if (t == 0) {

1416

errln("FAIL: createInstance #2 failed");

1417

return;

1418

}

1419

if (t->getID() == id) {

1420

logln((UnicodeString)"OK: created " + id);

1421

} else {

1422

errln((UnicodeString)"FAIL: createInstance(" + id +

1423

").getID() => " + t->getID());

1424

}

1425

1426

Transliterator *u = t->createInverse(status);

1427

if (u == 0) {

1428

errln("FAIL: createInverse failed");

1429

delete t;

1430

return;

1431

}

1432

exp = "NFKC();Lower(Upper)";

1433

if (u->getID() == exp) {

1434

logln((UnicodeString)"OK: createInverse(" + id + ") => " +

1435

u->getID());

1436

} else {

1437

errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +

1438

u->getID());

1439

}

1440

delete t;

1441

delete u;

1442

}

1443

1444

/**

1445

* Compound filter semantics were orginially not implemented

1446

* correctly. Originally, each component filter f(i) is replaced by

1447

* f'(i) = f(i) && g, where g is the filter for the compound

1448

* transliterator.

1449

1450

* From Mark:

1451

1452

* Suppose and I have a transliterator X. Internally X is

1453

* "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].

1454

1455

* The compound should convert all greek characters (through latin) to

1456

* cyrillic, then lowercase the result. The filter should say "don't

1457

* touch 'A' in the original". But because an intermediate result

1458

* happens to go through "A", the Greek Alpha gets hung up.

1459

1460

void TransliteratorTest::TestCompoundFilter(void) {

1461

UParseError parseError;

1462

UErrorCode status = U_ZERO_ERROR;

1463

Transliterator *t = Transliterator::createInstance

1464

("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);

1465

if (t == 0) {

1466

errln("FAIL: createInstance failed");

1467

return;

1468

}

1469

t->adoptFilter(new UnicodeSet("[^A]", status));

1470

if (U_FAILURE(status)) {

1471

errln("FAIL: UnicodeSet ct failed");

1472

delete t;

1473

return;

1474

}

1475

1476

// Only the 'A' at index 1 should remain unchanged

1477

expect(*t,

1478

CharsToUnicodeString("BA\\u039A\\u0391"),

1479

CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));

1480

delete t;

1481

}

1482

1483

void TransliteratorTest::TestRemove(void) {

1484

UParseError parseError;

1485

UErrorCode status = U_ZERO_ERROR;

1486

Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);

1487

if (t == 0) {

1488

errln("FAIL: createInstance failed");

1489

return;

1490

}

1491

1492

expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");

1493

delete t;

1494

}

1495

1496

void TransliteratorTest::TestToRules(void) {

1497

const char* RBT = "rbt";

1498

const char* SET = "set";

1499

static const char* DATA[] = {

1500

RBT,

1501

"$a=\\u4E61; [$a] > A;",

1502

"[\\u4E61] > A;",

1503

1504

RBT,

1505

"$white=[[:Zs:][:Zl:]]; $white{a} > A;",

1506

"[[:Zs:][:Zl:]]{a} > A;",

1507

1508

SET,

1509

"[[:Zs:][:Zl:]]",

1510

"[[:Zs:][:Zl:]]",

1511

1512

SET,

1513

"[:Ps:]",

1514

"[:Ps:]",

1515

1516

SET,

1517

"[:L:]",

1518

"[:L:]",

1519

1520

SET,

1521

"[[:L:]-[A]]",

1522

"[[:L:]-[A]]",

1523

1524

SET,

1525

"[~[:Lu:][:Ll:]]",

1526

"[~[:Lu:][:Ll:]]",

1527

1528

SET,

1529

"[~[a-z]]",

1530

"[~[a-z]]",

1531

1532

RBT,

1533

"$white=[:Zs:]; $black=[^$white]; $black{a} > A;",

1534

"[^[:Zs:]]{a} > A;",

1535

1536

RBT,

1537

"$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",

1538

"[[a-z]-[:Zs:]]{a} > A;",

1539

1540

RBT,

1541

"$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",

1542

"[[:Zs:]&[a-z]]{a} > A;",

1543

1544

RBT,

1545

"$a=[:Zs:]; $b=[x$a]; $b{a} > A;",

1546

"[x[:Zs:]]{a} > A;",

1547

1548

RBT,

1549

"$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"

1550

"$macron = \\u0304 ;"

1551

"$evowel = [aeiouyAEIOUY] ;"

1552

"$iotasub = \\u0345 ;"

1553

"($evowel $macron $accentMinus *) i > | $1 $iotasub ;",

1554

"([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",

1555

1556

RBT,

1557

"([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",

1558

"([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",

1559

};

1560

static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));

1561

1562

for (int32_t d=0; d < DATA_length; d+=3) {

1563

if (DATA[d] == RBT) {

1564

// Transliterator test

1565

UParseError parseError;

1566

UErrorCode status = U_ZERO_ERROR;

1567

Transliterator *t = Transliterator::createFromRules("ID",

1568

DATA[d+1], UTRANS_FORWARD, parseError, status);

1569

if (t == 0) {

1570

errln("FAIL: createFromRules failed");

1571

return;

1572

}

1573

UnicodeString rules, escapedRules;

1574

t->toRules(rules, FALSE);

1575

t->toRules(escapedRules, TRUE);

1576

UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);

1577

UnicodeString expEscapedRules(DATA[d+2]);

1578

if (rules == expRules) {

1579

logln((UnicodeString)"Ok: " + DATA[d+1] +

1580

" => " + rules);

1581

} else {

1582

errln((UnicodeString)"FAIL: " + DATA[d+1] +

1583

" => " + rules + ", exp " + expRules);

1584

}

1585

if (escapedRules == expEscapedRules) {

1586

logln((UnicodeString)"Ok: " + DATA[d+1] +

1587

" => " + escapedRules);

1588

} else {

1589

errln((UnicodeString)"FAIL: " + DATA[d+1] +

1590

" => " + escapedRules + ", exp " + expEscapedRules);

1591

}

1592

delete t;

1593

1594

} else {

1595

// UnicodeSet test

1596

UErrorCode status = U_ZERO_ERROR;

1597

UnicodeString pat(DATA[d+1]);

1598

UnicodeString expToPat(DATA[d+2]);

1599

UnicodeSet set(pat, status);

1600

if (U_FAILURE(status)) {

1601

errln("FAIL: UnicodeSet ct failed");

1602

return;

1603

}

1604

// Adjust spacing etc. as necessary.

1605

UnicodeString toPat;

1606

set.toPattern(toPat);

1607

if (expToPat == toPat) {

1608

logln((UnicodeString)"Ok: " + pat +

1609

" => " + toPat);

1610

} else {

1611

errln((UnicodeString)"FAIL: " + pat +

1612

" => " + prettify(toPat, TRUE) +

1613

", exp " + prettify(pat, TRUE));

1614

}

1615

}

1616

}

1617

}

1618

1619

void TransliteratorTest::TestContext() {

1620

UTransPosition pos = {0, 2, 0, 1}; // cs cl s l

1621

expect("de > x; {d}e > y;",

1622

"de",

1623

"ye",

1624

&pos);

1625

1626

expect("ab{c} > z;",

1627

"xadabdabcy",

1628

"xadabdabzy");

1629

}

1630

1631

void TransliteratorTest::TestSupplemental() {

1632

1633

expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"

1634

"a > $a; $s > i;"),

1635

CharsToUnicodeString("ab\\U0001030Fx"),

1636

CharsToUnicodeString("\\U00010300bix"));

1637

1638

expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"

1639

"$b=[A-Z\\U00010400-\\U0001044D];"

1640

"($a)($b) > $2 $1;"),

1641

CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),

1642

CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));

1643

1644

// k|ax\\U00010300xm

1645

1646

// k|a\\U00010400\\U00010300xm

1647

// ky|\\U00010400\\U00010300xm

1648

// ky\\U00010400|\\U00010300xm

1649

1650

// ky\\U00010400|\\U00010300\\U00010400m

1651

// ky\\U00010400y|\\U00010400m

1652

expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"

1653

"$a {x} > | @ \\U00010400;"

1654

"{$a} [^\\u0000-\\uFFFF] > y;"),

1655

CharsToUnicodeString("kax\\U00010300xm"),

1656

CharsToUnicodeString("ky\\U00010400y\\U00010400m"));

1657

1658

expectT("Any-Name",

1659

CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),

1660

"{GOTHIC LETTER AHSA}{TAG LATIN SMALL LETTER A}{NO-BREAK SPACE}");

1661

1662

expectT("Any-Hex/Unicode",

1663

CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),

1664

"U+10330U+10FF00U+E0061U+00A0");

1665

1666

expectT("Any-Hex/C",

1667

CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),

1668

"\\U00010330\\U0010FF00\\U000E0061\\u00A0");

1669

1670

expectT("Any-Hex/Perl",

1671

CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),

1672

"\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");

1673

1674

expectT("Any-Hex/Java",

1675

CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),

1676

"\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");

1677

1678

expectT("Any-Hex/XML",

1679

CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),

1680

"𐌰􏼀󠁡 ");

1681

1682

expectT("Any-Hex/XML10",

1683

CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),

1684

"𐌰􏼀󠁡 ");

1685

1686

expectT("[\\U000E0000-\\U000E0FFF] Remove",

1687

CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),

1688

CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));

1689

}

1690

1691

void TransliteratorTest::TestQuantifier() {

1692

1693

// Make sure @ in a quantified anteContext works

1694

expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",

1695

"AAAAAb",

1696

"aaa(aac)");

1697

1698

// Make sure @ in a quantified postContext works

1699

expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",

1700

"baaaaa",

1701

"caa(aaa)");

1702

1703

// Make sure @ in a quantified postContext with seg ref works

1704

expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",

1705

"baaaaa",

1706

"baa(aaa)");

1707

1708

// Make sure @ past ante context doesn't enter ante context

1709

UTransPosition pos = {0, 5, 3, 5};

1710

expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",

1711

"xxxab",

1712

"xxx(ac)",

1713

&pos);

1714

1715

// Make sure @ past post context doesn't pass limit

1716

UTransPosition pos2 = {0, 4, 0, 2};

1717

expect("{b} a+ > c @@ |; x > y; a > A;",

1718

"baxx",

1719

"caxx",

1720

&pos2);

1721

1722

// Make sure @ past post context doesn't enter post context

1723

expect("{b} a+ > c @@ |; x > y; a > A;",

1724

"baxx",

1725

"cayy");

1726

1727

expect("(ab)? c > d;",

1728

"c abc ababc",

1729

"d d abd");

1730

1731

// NOTE: The (ab)+ when referenced just yields a single "ab",

1732

// not the full sequence of them. This accords with perl behavior.

1733

expect("(ab)+ {x} > '(' $1 ')';",

1734

"x abx ababxy",

1735

"x ab(ab) abab(ab)y");

1736

1737

expect("b+ > x;",

1738

"ac abc abbc abbbc",

1739

"ac axc axc axc");

1740

1741

expect("[abc]+ > x;",

1742

"qac abrc abbcs abtbbc",

1743

"qx xrx xs xtx");

1744

1745

expect("q{(ab)+} > x;",

1746

"qa qab qaba qababc qaba",

1747

"qa qx qxa qxc qxa");

1748

1749

expect("q(ab)* > x;",

1750

"qa qab qaba qababc",

1751

"xa x xa xc");

1752

1753

// NOTE: The (ab)+ when referenced just yields a single "ab",

1754

// not the full sequence of them. This accords with perl behavior.

1755

expect("q(ab)* > '(' $1 ')';",

1756

"qa qab qaba qababc",

1757

"()a (ab) (ab)a (ab)c");

1758

1759

// 'foo'+ and 'foo'* -- the quantifier should apply to the entire

1760

// quoted string

1761

expect("'ab'+ > x;",

1762

"bb ab ababb",

1763

"bb x xb");

1764

1765

// $foo+ and $foo* -- the quantifier should apply to the entire

1766

// variable reference

1767

expect("$var = ab; $var+ > x;",

1768

"bb ab ababb",

1769

"bb x xb");

1770

}

1771

1772

class TestTrans : public NullTransliterator {

1773

public:

1774

TestTrans(const UnicodeString& id) {

1775

setID(id);

1776

}

1777

};

1778

1779

/**

1780

* Test Source-Target/Variant.

1781

1782

void TransliteratorTest::TestSTV(void) {

1783

int32_t ns = Transliterator::countAvailableSources();

1784

if (ns < 0 || ns > 255) {

1785

errln((UnicodeString)"FAIL: Bad source count: " + ns);

1786

return;

1787

}

1788

int32_t i;

1789

for (i=0; i<ns; ++i) {

1790

UnicodeString source;

1791

Transliterator::getAvailableSource(i, source);

1792

logln((UnicodeString)"" + i + ": " + source);

1793

if (source.length() == 0) {

1794

errln("FAIL: empty source");

1795

continue;

1796

}

1797

int32_t nt = Transliterator::countAvailableTargets(source);

1798

if (nt < 0 || nt > 255) {

1799

errln((UnicodeString)"FAIL: Bad target count: " + nt);

1800

continue;

1801

}

1802

for (int32_t j=0; j<nt; ++j) {

1803

UnicodeString target;

1804

Transliterator::getAvailableTarget(j, source, target);

1805

logln((UnicodeString)" " + j + ": " + target);

1806

if (target.length() == 0) {

1807

errln("FAIL: empty target");

1808

continue;

1809

}

1810

int32_t nv = Transliterator::countAvailableVariants(source, target);

1811

if (nv < 0 || nv > 255) {

1812

errln((UnicodeString)"FAIL: Bad variant count: " + nv);

1813

continue;

1814

}

1815

for (int32_t k=0; k<nv; ++k) {

1816

UnicodeString variant;

1817

Transliterator::getAvailableVariant(k, source, target, variant);

1818

if (variant.length() == 0) {

1819

logln((UnicodeString)" " + k + ": <empty>");

1820

} else {

1821

logln((UnicodeString)" " + k + ": " + variant);

1822

}

1823

}

1824

}

1825

}

1826

1827

// Test registration

1828

const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };

1829

for (i=0; i<3; ++i) {

1830

Transliterator *t = new TestTrans(IDS[i]);

1831

if (t == 0) {

1832

errln("FAIL: out of memory");

1833

return;

1834

}

1835

if (t->getID() != IDS[i]) {

1836

errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);

1837

delete t;

1838

return;

1839

}

1840

Transliterator::registerInstance(t);

1841

UErrorCode status = U_ZERO_ERROR;

1842

t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);

1843

if (t == NULL) {

1844

errln((UnicodeString)"FAIL: Registration/creation failed for ID " +

1845

IDS[i]);

1846

} else {

1847

logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +

1848

IDS[i]);

1849

delete t;

1850

}

1851

Transliterator::unregister(IDS[i]);

1852

t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);

1853

if (t != NULL) {

1854

errln((UnicodeString)"FAIL: Unregistration failed for ID " +

1855

IDS[i]);

1856

delete t;

1857

}

1858

}

1859

}

1860

1861

/**

1862

* Test inverse of Greek-Latin; Title()

1863

1864

void TransliteratorTest::TestCompoundInverse(void) {

1865

UParseError parseError;

1866

UErrorCode status = U_ZERO_ERROR;

1867

Transliterator *t = Transliterator::createInstance

1868

("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);

1869

if (t == 0) {

1870

errln("FAIL: createInstance");

1871

return;

1872

}

1873

UnicodeString exp("(Title);Latin-Greek");

1874

if (t->getID() == exp) {

1875

logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +

1876

t->getID());

1877

} else {

1878

errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +

1879

t->getID() + "\", expected \"" + exp + "\"");

1880

}

1881

delete t;

1882

}

1883

1884

/**

1885

* Test NFD chaining with RBT

1886

1887

void TransliteratorTest::TestNFDChainRBT() {

1888

UParseError pe;

1889

UErrorCode ec = U_ZERO_ERROR;

1890

Transliterator* t = Transliterator::createFromRules(

1891

"TEST", "::NFD; aa > Q; a > q;",

1892

UTRANS_FORWARD, pe, ec);

1893

if (t == NULL || U_FAILURE(ec)) {

1894

errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));

1895

return;

1896

}

1897

expect(*t, "aa", "Q");

1898

delete t;

1899

1900

// TEMPORARY TESTS -- BEING DEBUGGED

1901

//=- UnicodeString s, s2;

1902

//=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);

1903

//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");

1904

//=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");

1905

//=- expect(*t, s, s2);

1906

//=- delete t;

1907

//=-

1908

//=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);

1909

//=- expect(*t, s2, s);

1910

//=- delete t;

1911

//=-

1912

//=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);

1913

//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");

1914

//=- expect(*t, s, s);

1915

//=- delete t;

1916

1917

// const char* source[] = {

1918

// /*

1919

// "\\u015Br\\u012Bmad",

1920

// "bhagavadg\\u012Bt\\u0101",

1921

// "adhy\\u0101ya",

1922

// "arjuna",

1923

// "vi\\u1E63\\u0101da",

1924

// "y\\u014Dga",

1925

// "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",

1926

// "uv\\u0101cr\\u0325",

1927

// */

1928

// "rmk\\u1E63\\u0113t",

1929

// //"dharmak\\u1E63\\u0113tr\\u0113",

1930

// /*

1931

// "kuruk\\u1E63\\u0113tr\\u0113",

1932

// "samav\\u0113t\\u0101",

1933

// "yuyutsava-\\u1E25",

1934

// "m\\u0101mak\\u0101-\\u1E25",

1935

// // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",

1936

// "kimakurvata",

1937

// "san\\u0304java",

1938

// */

1939

1940

// 0

1941

// };

1942

// const char* expected[] = {

1943

// /*

1944

// "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",

1945

// "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",

1946

// "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",

1947

// "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",

1948

// "\\u0935\\u093f\\u0937\\u093e\\u0926",

1949

// "\\u092f\\u094b\\u0917",

1950

// "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",

1951

// "\\u0909\\u0935\\u093E\\u091A\\u0943",

1952

// */

1953

// "\\u0927",

1954

// //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",

1955

// /*

1956

// "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",

1957

// "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",

1958

// "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",

1959

// "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",

1960

// // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",

1961

// "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",

1962

// "\\u0938\\u0902\\u091c\\u0935",

1963

// */

1964

// 0

1965

// };

1966

// UErrorCode status = U_ZERO_ERROR;

1967

// UParseError parseError;

1968

// UnicodeString message;

1969

// Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);

1970

// Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);

1971

// if(U_FAILURE(status)){

1972

// errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));

1973

// errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );

1974

// delete latinToDevToLatin;

1975

// delete devToLatinToDev;

1976

// return;

1977

// }

1978

// UnicodeString gotResult;

1979

// for(int i= 0; source[i] != 0; i++){

1980

// gotResult = source[i];

1981

// expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));

1982

// expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));

1983

// }

1984

// delete latinToDevToLatin;

1985

// delete devToLatinToDev;

1986

}

1987

1988

/**

1989

* Inverse of "Null" should be "Null". (J21)

1990

1991

void TransliteratorTest::TestNullInverse() {

1992

UParseError pe;

1993

UErrorCode ec = U_ZERO_ERROR;

1994

Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);

1995

if (t == 0 || U_FAILURE(ec)) {

1996

errln("FAIL: createInstance");

1997

return;

1998

}

1999

Transliterator *u = t->createInverse(ec);

2000

if (u == 0 || U_FAILURE(ec)) {

2001

errln("FAIL: createInverse");

2002

delete t;

2003

return;

2004

}

2005

if (u->getID() != "Null") {

2006

errln("FAIL: Inverse of Null should be Null");

2007

}

2008

delete t;

2009

delete u;

2010

}

2011

2012

/**

2013

* Check ID of inverse of alias. (J22)

2014

2015

void TransliteratorTest::TestAliasInverseID() {

2016

UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse

2017

UParseError pe;

2018

UErrorCode ec = U_ZERO_ERROR;

2019

Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);

2020

if (t == 0 || U_FAILURE(ec)) {

2021

errln("FAIL: createInstance");

2022

return;

2023

}

2024

Transliterator *u = t->createInverse(ec);

2025

if (u == 0 || U_FAILURE(ec)) {

2026

errln("FAIL: createInverse");

2027

delete t;

2028

return;

2029

}

2030

UnicodeString exp = "Hangul-Latin";

2031

UnicodeString got = u->getID();

2032

if (got != exp) {

2033

errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +

2034

", expected " + exp);

2035

}

2036

delete t;

2037

delete u;

2038

}

2039

2040

/**

2041

* Test IDs of inverses of compound transliterators. (J20)

2042

2043

void TransliteratorTest::TestCompoundInverseID() {

2044

UnicodeString ID = "Latin-Jamo;NFC(NFD)";

2045

UParseError pe;

2046

UErrorCode ec = U_ZERO_ERROR;

2047

Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);

2048

if (t == 0 || U_FAILURE(ec)) {

2049

errln("FAIL: createInstance");

2050

return;

2051

}

2052

Transliterator *u = t->createInverse(ec);

2053

if (u == 0 || U_FAILURE(ec)) {

2054

errln("FAIL: createInverse");

2055

delete t;

2056

return;

2057

}

2058

UnicodeString exp = "NFD(NFC);Jamo-Latin";

2059

UnicodeString got = u->getID();

2060

if (got != exp) {

2061

errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +

2062

", expected " + exp);

2063

}

2064

delete t;

2065

delete u;

2066

}

2067

2068

/**

2069

* Test undefined variable.

2070

2071

2072

void TransliteratorTest::TestUndefinedVariable() {

2073

UnicodeString rule = "$initial } a <> \\u1161;";

2074

UParseError pe;

2075

UErrorCode ec = U_ZERO_ERROR;

2076

Transliterator *t = new RuleBasedTransliterator("<ID>", rule, UTRANS_FORWARD, 0, pe, ec);

2077

delete t;

2078

if (U_FAILURE(ec)) {

2079

logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +

2080

u_errorName(ec));

2081

return;

2082

}

2083

errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +

2084

u_errorName(ec));

2085

}

2086

2087

/**

2088

* Test empty context.

2089

2090

void TransliteratorTest::TestEmptyContext() {

2091

expect(" { a } > b;", "xay a ", "xby b ");

2092

}

2093

2094

/**

2095

* Test compound filter ID syntax

2096

2097

void TransliteratorTest::TestCompoundFilterID(void) {

2098

static const char* DATA[] = {

2099

// Col. 1 = ID or rule set (latter must start with #)

2100

2101

// = columns > 1 are null if expect col. 1 to be illegal =

2102

2103

// Col. 2 = direction, "F..." or "R..."

2104

// Col. 3 = source string

2105

// Col. 4 = exp result

2106

2107

"[abc]; [abc]", NULL, NULL, NULL, // multiple filters

2108

"Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter

2109

"[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",

2110

"[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",

2111

"#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",

2112

"#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",

2113

NULL,

2114

};

2115

2116

for (int32_t i=0; DATA[i]; i+=4) {

2117

UnicodeString id = CharsToUnicodeString(DATA[i]);

2118

UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?

2119

UTRANS_REVERSE : UTRANS_FORWARD;

2120

UnicodeString source;

2121

UnicodeString exp;

2122

if (DATA[i+2] != NULL) {

2123

source = CharsToUnicodeString(DATA[i+2]);

2124

exp = CharsToUnicodeString(DATA[i+3]);

2125

}

2126

UBool expOk = (DATA[i+1] != NULL);

2127

Transliterator* t = NULL;

2128

UParseError pe;

2129

UErrorCode ec = U_ZERO_ERROR;

2130

if (id.charAt(0) == 0x23/*#*/) {

2131

t = Transliterator::createFromRules("ID", id, direction, pe, ec);

2132

} else {

2133

t = Transliterator::createInstance(id, direction, pe, ec);

2134

}

2135

UBool ok = (t != NULL && U_SUCCESS(ec));

2136

if (ok == expOk) {

2137

logln((UnicodeString)"Ok: " + id + " => " + (t!=0?t->getID():(UnicodeString)"NULL") + ", " +

2138

u_errorName(ec));

2139

if (source.length() != 0) {

2140

expect(*t, source, exp);

2141

}

2142

delete t;

2143

} else {

2144

errln((UnicodeString)"FAIL: " + id + " => " + (t!=0?t->getID():(UnicodeString)"NULL") + ", " +

2145

u_errorName(ec));

2146

}

2147

}

2148

}

2149

2150

/**

2151

* Test new property set syntax

2152

2153

void TransliteratorTest::TestPropertySet() {

2154

expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");

2155

expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",

2156

"[ a stitch ]\n[ in time ]\r[ saves 9]");

2157

}

2158

2159

/**

2160

* Test various failure points of the new 2.0 engine.

2161

2162

void TransliteratorTest::TestNewEngine() {

2163

UParseError pe;

2164

UErrorCode ec = U_ZERO_ERROR;

2165

Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);

2166

if (t == 0 || U_FAILURE(ec)) {

2167

errln("FAIL: createInstance Latin-Hiragana");

2168

return;

2169

}

2170

// Katakana should be untouched

2171

expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),

2172

CharsToUnicodeString("\\u3042\\u3042\\u30A2"));

2173

2174

delete t;

2175

2176

#if 1

2177

// This test will only work if Transliterator.ROLLBACK is

2178

// true. Otherwise, this test will fail, revealing a

2179

// limitation of global filters in incremental mode.

2180

Transliterator *a =

2181

Transliterator::createFromRules("a", "a > A;", UTRANS_FORWARD, pe, ec);

2182

Transliterator *A =

2183

Transliterator::createFromRules("A", "A > b;", UTRANS_FORWARD, pe, ec);

2184

if (U_FAILURE(ec)) {

2185

delete a;

2186

delete A;

2187

return;

2188

}

2189

2190

Transliterator* array[3];

2191

array[0] = a;

2192

array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);

2193

array[2] = A;

2194

if (U_FAILURE(ec)) {

2195

errln("FAIL: createInstance NFD");

2196

delete a;

2197

delete A;

2198

delete array[1];

2199

return;

2200

}

2201

2202

t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));

2203

if (U_FAILURE(ec)) {

2204

errln("FAIL: UnicodeSet constructor");

2205

delete a;

2206

delete A;

2207

delete array[1];

2208

delete t;

2209

return;

2210

}

2211

2212

expect(*t, "aAaA", "bAbA");

2213

delete a;

2214

delete A;

2215

delete array[1];

2216

delete t;

2217

#endif

2218

2219

expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",

2220

"a",

2221

"ax");

2222

2223

UnicodeString gr = CharsToUnicodeString(

2224

"$ddot = \\u0308 ;"

2225

"$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"

2226

"$rough = \\u0314 ;"

2227

"($lcgvowel+ $ddot?) $rough > h | $1 ;"

2228

"\\u03b1 <> a ;"

2229

"$rough <> h ;");

2230

2231

expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");

2232

}

2233

2234

/**

2235

* Test quantified segment behavior. We want:

2236

* ([abc])+ > x $1 x; applied to "cba" produces "xax"

2237

2238

void TransliteratorTest::TestQuantifiedSegment(void) {

2239

// The normal case

2240

expect("([abc]+) > x $1 x;", "cba", "xcbax");

2241

2242

// The tricky case; the quantifier is around the segment

2243

expect("([abc])+ > x $1 x;", "cba", "xax");

2244

2245

// Tricky case in reverse direction

2246

expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");

2247

2248

// Check post-context segment

2249

expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");

2250

2251

// Test toRule/toPattern for non-quantified segment.

2252

// Careful with spacing here.

2253

UnicodeString r("([a-c]){q} > x $1 x;");

2254

UParseError pe;

2255

UErrorCode ec = U_ZERO_ERROR;

2256

Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);

2257

if (U_FAILURE(ec)) {

2258

errln("FAIL: createFromRules");

2259

delete t;

2260

return;

2261

}

2262

UnicodeString rr;

2263

t->toRules(rr, TRUE);

2264

if (r != rr) {

2265

errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");

2266

} else {

2267

logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");

2268

}

2269

delete t;

2270

2271

// Test toRule/toPattern for quantified segment.

2272

// Careful with spacing here.

2273

r = "([a-c])+{q} > x $1 x;";

2274

t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);

2275

if (U_FAILURE(ec)) {

2276

errln("FAIL: createFromRules");

2277

delete t;

2278

return;

2279

}

2280

t->toRules(rr, TRUE);

2281

if (r != rr) {

2282

errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");

2283

} else {

2284

logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");

2285

}

2286

delete t;

2287

}

2288

2289

//======================================================================

2290

// Ram's tests

2291

//======================================================================

2292

void TransliteratorTest::TestDevanagariLatinRT(){

2293

const int MAX_LEN= 52;

2294

const char* const source[MAX_LEN] = {

2295

"bh\\u0101rata",

2296

"kra",

2297

"k\\u1E63a",

2298

"khra",

2299

"gra",

2300

"\\u1E45ra",

2301

"cra",

2302

"chra",

2303

"j\\u00F1a",

2304

"jhra",

2305

"\\u00F1ra",

2306

"\\u1E6Dya",

2307

"\\u1E6Dhra",

2308

"\\u1E0Dya",

2309

//"r\\u0323ya", // \u095c is not valid in Devanagari

2310

"\\u1E0Dhya",

2311

"\\u1E5Bhra",

2312

"\\u1E47ra",

2313

"tta",

2314

"thra",

2315

"dda",

2316

"dhra",

2317

"nna",

2318

"pra",

2319

"phra",

2320

"bra",

2321

"bhra",

2322

"mra",

2323

"\\u1E49ra",

2324

//"l\\u0331ra",

2325

"yra",

2326

"\\u1E8Fra",

2327

//"l-",

2328

"vra",

2329

"\\u015Bra",

2330

"\\u1E63ra",

2331

"sra",

2332

"hma",

2333

"\\u1E6D\\u1E6Da",

2334

"\\u1E6D\\u1E6Dha",

2335

"\\u1E6Dh\\u1E6Dha",

2336

"\\u1E0D\\u1E0Da",

2337

"\\u1E0D\\u1E0Dha",

2338

"\\u1E6Dya",

2339

"\\u1E6Dhya",

2340

"\\u1E0Dya",

2341

"\\u1E0Dhya",

2342

// Not roundtrippable --

2343

// \\u0939\\u094d\\u094d\\u092E - hma

2344

// \\u0939\\u094d\\u092E - hma

2345

// CharsToUnicodeString("hma"),

2346

"hya",

2347

"\\u015Br\\u0325",

2348

"\\u015Bca",

2349

"\\u0115",

2350

"san\\u0304j\\u012Bb s\\u0113nagupta",

2351

"\\u0101nand vaddir\\u0101ju",

2352

"\\u0101",

2353

"a"

2354

};

2355

const char* const expected[MAX_LEN] = {

2356

"\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */

2357

"\\u0915\\u094D\\u0930", /* kra */

2358

"\\u0915\\u094D\\u0937", /* ks\\u0323a */

2359

"\\u0916\\u094D\\u0930", /* khra */

2360

"\\u0917\\u094D\\u0930", /* gra */

2361

"\\u0919\\u094D\\u0930", /* n\\u0307ra */

2362

"\\u091A\\u094D\\u0930", /* cra */

2363

"\\u091B\\u094D\\u0930", /* chra */

2364

"\\u091C\\u094D\\u091E", /* jn\\u0303a */

2365

"\\u091D\\u094D\\u0930", /* jhra */

2366

"\\u091E\\u094D\\u0930", /* n\\u0303ra */

2367

"\\u091F\\u094D\\u092F", /* t\\u0323ya */

2368

"\\u0920\\u094D\\u0930", /* t\\u0323hra */

2369

"\\u0921\\u094D\\u092F", /* d\\u0323ya */

2370

//"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari

2371

"\\u0922\\u094D\\u092F", /* d\\u0323hya */

2372

"\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */

2373

"\\u0923\\u094D\\u0930", /* n\\u0323ra */

2374

"\\u0924\\u094D\\u0924", /* tta */

2375

"\\u0925\\u094D\\u0930", /* thra */

2376

"\\u0926\\u094D\\u0926", /* dda */

2377

"\\u0927\\u094D\\u0930", /* dhra */

2378

"\\u0928\\u094D\\u0928", /* nna */

2379

"\\u092A\\u094D\\u0930", /* pra */

2380

"\\u092B\\u094D\\u0930", /* phra */

2381

"\\u092C\\u094D\\u0930", /* bra */

2382

"\\u092D\\u094D\\u0930", /* bhra */

2383

"\\u092E\\u094D\\u0930", /* mra */

2384

"\\u0929\\u094D\\u0930", /* n\\u0331ra */

2385

//"\\u0934\\u094D\\u0930", /* l\\u0331ra */

2386

"\\u092F\\u094D\\u0930", /* yra */

2387

"\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */

2388

//"l-",

2389

"\\u0935\\u094D\\u0930", /* vra */

2390

"\\u0936\\u094D\\u0930", /* s\\u0301ra */

2391

"\\u0937\\u094D\\u0930", /* s\\u0323ra */

2392

"\\u0938\\u094D\\u0930", /* sra */

2393

"\\u0939\\u094d\\u092E", /* hma */

2394

"\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */

2395

"\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */

2396

"\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/

2397

"\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */

2398

"\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */

2399

"\\u091F\\u094D\\u092F", /* t\\u0323ya */

2400

"\\u0920\\u094D\\u092F", /* t\\u0323hya */

2401

"\\u0921\\u094D\\u092F", /* d\\u0323ya */

2402

"\\u0922\\u094D\\u092F", /* d\\u0323hya */

2403

// "hma", /* hma */

2404

"\\u0939\\u094D\\u092F", /* hya */

2405

"\\u0936\\u0943", /* s\\u0301r\\u0325a */

2406

"\\u0936\\u094D\\u091A", /* s\\u0301ca */

2407

"\\u090d", /* e\\u0306 */

2408

"\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",

2409

"\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",

2410

"\\u0906",

2411

"\\u0905",

2412

};

2413

UErrorCode status = U_ZERO_ERROR;

2414

UParseError parseError;

2415

UnicodeString message;

2416

Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);

2417

Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);

2418

if(U_FAILURE(status)){

2419

errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));

2420

errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );

2421

return;

2422

}

2423

UnicodeString gotResult;

2424

for(int i= 0; i<MAX_LEN; i++){

2425

gotResult = source[i];

2426

expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));

2427

expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));

2428

}

2429

delete latinToDev;

2430

delete devToLatin;

2431

}

2432

2433

void TransliteratorTest::TestTeluguLatinRT(){

2434

const int MAX_LEN=10;

2435

const char* const source[MAX_LEN] = {

2436

"raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */

2437

"\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */

2438

"r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */

2439

"san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */

2440

"san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */

2441

"amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */

2442

"ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */

2443

"\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */

2444

"\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */

2445

"m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */

2446

};

2447

2448

const char* const expected[MAX_LEN] = {

2449

"\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",

2450

"\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",

2451

"\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",

2452

"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",

2453

"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",

2454

"\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",

2455

"\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",

2456

"\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",

2457

"\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",

2458

"\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",

2459

};

2460

2461

UErrorCode status = U_ZERO_ERROR;

2462

UParseError parseError;

2463

UnicodeString message;

2464

Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);

2465

Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);

2466

if(U_FAILURE(status)){

2467

errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));

2468

errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );

2469

return;

2470

}

2471

UnicodeString gotResult;

2472

for(int i= 0; i<MAX_LEN; i++){

2473

gotResult = source[i];

2474

expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));

2475

expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));

2476

}

2477

delete latinToDev;

2478

delete devToLatin;

2479

}

2480

2481

void TransliteratorTest::TestSanskritLatinRT(){

2482

const int MAX_LEN =16;

2483

const char* const source[MAX_LEN] = {

2484

"rmk\\u1E63\\u0113t",

2485

"\\u015Br\\u012Bmad",

2486

"bhagavadg\\u012Bt\\u0101",

2487

"adhy\\u0101ya",

2488

"arjuna",

2489

"vi\\u1E63\\u0101da",

2490

"y\\u014Dga",

2491

"dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",

2492

"uv\\u0101cr\\u0325",

2493

"dharmak\\u1E63\\u0113tr\\u0113",

2494

"kuruk\\u1E63\\u0113tr\\u0113",

2495

"samav\\u0113t\\u0101",

2496

"yuyutsava\\u1E25",

2497

"m\\u0101mak\\u0101\\u1E25",

2498

// "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",

2499

"kimakurvata",

2500

"san\\u0304java",

2501

};

2502

const char* const expected[MAX_LEN] = {

2503

"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",

2504

"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",

2505

"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",

2506

"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",

2507

"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",

2508

"\\u0935\\u093f\\u0937\\u093e\\u0926",

2509

"\\u092f\\u094b\\u0917",

2510

"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",

2511

"\\u0909\\u0935\\u093E\\u091A\\u0943",

2512

"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",

2513

"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",

2514

"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",

2515

"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",

2516

"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",

2517

//"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",

2518

"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",

2519

"\\u0938\\u0902\\u091c\\u0935",

2520

};

2521

UErrorCode status = U_ZERO_ERROR;

2522

UParseError parseError;

2523

UnicodeString message;

2524

Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);

2525

Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);

2526

if(U_FAILURE(status)){

2527

errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));

2528

errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );

2529

return;

2530

}

2531

UnicodeString gotResult;

2532

for(int i= 0; i<MAX_LEN; i++){

2533

gotResult = source[i];

2534

expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));

2535

expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));

2536

}

2537

delete latinToDev;

2538

delete devToLatin;

2539

}

2540

2541

2542

void TransliteratorTest::TestCompoundLatinRT(){

2543

const char* const source[] = {

2544

"rmk\\u1E63\\u0113t",

2545

"\\u015Br\\u012Bmad",

2546

"bhagavadg\\u012Bt\\u0101",

2547

"adhy\\u0101ya",

2548

"arjuna",

2549

"vi\\u1E63\\u0101da",

2550

"y\\u014Dga",

2551

"dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",

2552

"uv\\u0101cr\\u0325",

2553

"dharmak\\u1E63\\u0113tr\\u0113",

2554

"kuruk\\u1E63\\u0113tr\\u0113",

2555

"samav\\u0113t\\u0101",

2556

"yuyutsava\\u1E25",

2557

"m\\u0101mak\\u0101\\u1E25",

2558

// "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",

2559

"kimakurvata",

2560

"san\\u0304java"

2561

};

2562

const int MAX_LEN = sizeof(source)/sizeof(source[0]);

2563

const char* const expected[MAX_LEN] = {

2564

"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",

2565

"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",

2566

"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",

2567

"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",

2568

"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",

2569

"\\u0935\\u093f\\u0937\\u093e\\u0926",

2570

"\\u092f\\u094b\\u0917",

2571

"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",

2572

"\\u0909\\u0935\\u093E\\u091A\\u0943",

2573

"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",

2574

"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",

2575

"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",

2576

"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",

2577

"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",

2578

// "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",

2579

"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",

2580

"\\u0938\\u0902\\u091c\\u0935"

2581

};

2582

if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {

2583

errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");

2584

return;

2585

}

2586

2587

UErrorCode status = U_ZERO_ERROR;

2588

UParseError parseError;

2589

UnicodeString message;

2590

Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);

2591

Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);

2592

Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);

2593

Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);

2594

2595

if(U_FAILURE(status)){

2596

errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));

2597

errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );

2598

return;

2599

}

2600

UnicodeString gotResult;

2601

for(int i= 0; i<MAX_LEN; i++){

2602

gotResult = source[i];

2603

expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));

2604

expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));

2605

expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));

2606

2607

}

2608

delete(latinToDevToLatin);

2609

delete(devToLatinToDev);

2610

delete(devToTelToDev);

2611

delete(latinToTelToLatin);

2612

}

2613

2614

/**

2615

* Test instantiation from a locale.

2616

2617

void TransliteratorTest::TestLocaleInstantiation(void) {

2618

UParseError pe;

2619

UErrorCode ec = U_ZERO_ERROR;

2620

Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);

2621

if (U_FAILURE(ec)) {

2622

errln("FAIL: createInstance(ru_RU-Latin)");

2623

delete t;

2624

return;

2625

}

2626

expect(*t, CharsToUnicodeString("\\u0430"), "a");

2627

delete t;

2628

2629

t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);

2630

if (U_FAILURE(ec)) {

2631

errln("FAIL: createInstance(en-el)");

2632

delete t;

2633

return;

2634

}

2635

expect(*t, "a", CharsToUnicodeString("\\u03B1"));

2636

delete t;

2637

}

2638

2639

/**

2640

* Test title case handling of accent (should ignore accents)

2641

2642

void TransliteratorTest::TestTitleAccents(void) {

2643

UParseError pe;

2644

UErrorCode ec = U_ZERO_ERROR;

2645

Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);

2646

if (U_FAILURE(ec)) {

2647

errln("FAIL: createInstance(Title)");

2648

delete t;

2649

return;

2650

}

2651

expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));

2652

delete t;

2653

}

2654

2655

/**

2656

* Basic test of a locale resource based rule.

2657

2658

void TransliteratorTest::TestLocaleResource() {

2659

const char* DATA[] = {

2660

// id from to

2661

//"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",

2662

"Latin-el", "b", "\\u03bc\\u03c0",

2663

"Latin-Greek", "b", "\\u03B2",

2664

"Greek-Latin/UNGEGN", "\\u03B2", "v",

2665

"el-Latin", "\\u03B2", "v",

2666

"Greek-Latin", "\\u03B2", "b",

2667

};

2668

const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);

2669

for (int32_t i=0; i<DATA_length; i+=3) {

2670

UParseError pe;

2671

UErrorCode ec = U_ZERO_ERROR;

2672

Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);

2673

if (U_FAILURE(ec)) {

2674

errln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ")");

2675

delete t;

2676

continue;

2677

}

2678

expect(*t, CharsToUnicodeString(DATA[i+1]),

2679

CharsToUnicodeString(DATA[i+2]));

2680

delete t;

2681

}

2682

}

2683

2684

/**

2685

* Make sure parse errors reference the right line.

2686

2687

void TransliteratorTest::TestParseError() {

2688

const char* rule =

2689

"a > b;\n"

2690

"# more stuff\n"

2691

"d << b;";

2692

UErrorCode ec = U_ZERO_ERROR;

2693

UParseError pe;

2694

Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);

2695

delete t;

2696

if (U_FAILURE(ec)) {

2697

UnicodeString err(pe.preContext);

2698

err.append((UChar)124/*|*/).append(pe.postContext);

2699

if (err.indexOf("d << b") >= 0) {

2700

logln("Ok: " + err);

2701

} else {

2702

errln("FAIL: " + err);

2703

}

2704

return;

2705

}

2706

errln("FAIL: no syntax error");

2707

}

2708

2709

/**

2710

* Make sure sets on output are disallowed.

2711

2712

void TransliteratorTest::TestOutputSet() {

2713

UnicodeString rule = "$set = [a-cm-n]; b > $set;";

2714

UErrorCode ec = U_ZERO_ERROR;

2715

UParseError pe;

2716

Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);

2717

delete t;

2718

if (U_FAILURE(ec)) {

2719

UnicodeString err(pe.preContext);

2720

err.append((UChar)124/*|*/).append(pe.postContext);

2721

logln("Ok: " + err);

2722

return;

2723

}

2724

errln("FAIL: No syntax error");

2725

}

2726

2727

/**

2728

* Test the use variable range pragma, making sure that use of

2729

* variable range characters is detected and flagged as an error.

2730

2731

void TransliteratorTest::TestVariableRange() {

2732

UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";

2733

UErrorCode ec = U_ZERO_ERROR;

2734

UParseError pe;

2735

Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);

2736

delete t;

2737

if (U_FAILURE(ec)) {

2738

UnicodeString err(pe.preContext);

2739

err.append((UChar)124/*|*/).append(pe.postContext);

2740

logln("Ok: " + err);

2741

return;

2742

}

2743

errln("FAIL: No syntax error");

2744

}

2745

2746

/**

2747

* Test invalid post context error handling

2748

2749

void TransliteratorTest::TestInvalidPostContext() {

2750

UnicodeString rule = "a}b{c>d;";

2751

UErrorCode ec = U_ZERO_ERROR;

2752

UParseError pe;

2753

Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);

2754

delete t;

2755

if (U_FAILURE(ec)) {

2756

UnicodeString err(pe.preContext);

2757

err.append((UChar)124/*|*/).append(pe.postContext);

2758

if (err.indexOf("a}b{c") >= 0) {

2759

logln("Ok: " + err);

2760

} else {

2761

errln("FAIL: " + err);

2762

}

2763

return;

2764

}

2765

errln("FAIL: No syntax error");

2766

}

2767

2768

/**

2769

* Test ID form variants

2770

2771

void TransliteratorTest::TestIDForms() {

2772

const char* DATA[] = {

2773

"NFC", NULL, "NFD",

2774

"nfd", NULL, "NFC", // make sure case is ignored

2775

"Any-NFKD", NULL, "Any-NFKC",

2776

"Null", NULL, "Null",

2777

"-nfkc", "nfkc", "NFKD",

2778

"-nfkc/", "nfkc", "NFKD",

2779

"Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",

2780

"Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",

2781

"Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",

2782

"Source-", NULL, NULL,

2783

"Source/Variant-", NULL, NULL,

2784

"Source-/Variant", NULL, NULL,

2785

"/Variant", NULL, NULL,

2786

"/Variant-", NULL, NULL,

2787

"-/Variant", NULL, NULL,

2788

"-/", NULL, NULL,

2789

"-", NULL, NULL,

2790

"/", NULL, NULL,

2791

};

2792

const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);

2793

2794

for (int32_t i=0; i<DATA_length; i+=3) {

2795

const char* ID = DATA[i];

2796

const char* expID = DATA[i+1];

2797

const char* expInvID = DATA[i+2];

2798

UBool expValid = (expInvID != NULL);

2799

if (expID == NULL) {

2800

expID = ID;

2801

}

2802

UParseError pe;

2803

UErrorCode ec = U_ZERO_ERROR;

2804

Transliterator *t =

2805

Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);

2806

if (U_FAILURE(ec)) {

2807

if (!expValid) {

2808

logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));

2809

} else {

2810

errln((UnicodeString)"FAIL: Couldn't create " + ID);

2811

}

2812

delete t;

2813

continue;

2814

}

2815

Transliterator *u = t->createInverse(ec);

2816

if (U_FAILURE(ec)) {

2817

errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);

2818

delete t;

2819

delete u;

2820

continue;

2821

}

2822

if (t->getID() == expID &&

2823

u->getID() == expInvID) {

2824

logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);

2825

} else {

2826

errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +

2827

t->getID() + " x getInverse() => " + u->getID() +

2828

", expected " + expInvID);

2829

}

2830

delete t;

2831

delete u;

2832

}

2833

}

2834

2835

static const UChar SPACE[] = {32,0};

2836

static const UChar NEWLINE[] = {10,0};

2837

static const UChar RETURN[] = {13,0};

2838

static const UChar EMPTY[] = {0};

2839

2840

void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,

2841

const UnicodeString& testRulesForward) {

2842

UnicodeString rules2; t2.toRules(rules2, TRUE);

2843

//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");

2844

rules2.findAndReplace(SPACE, EMPTY);

2845

rules2.findAndReplace(NEWLINE, EMPTY);

2846

rules2.findAndReplace(RETURN, EMPTY);

2847

2848

UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);

2849

2850

if (rules2 != testRules) {

2851

errln(label);

2852

logln((UnicodeString)"GENERATED RULES: " + rules2);

2853

logln((UnicodeString)"SHOULD BE: " + testRulesForward);

2854

}

2855

}

2856

2857

/**

2858

* Mark's toRules test.

2859

2860

void TransliteratorTest::TestToRulesMark() {

2861

const char* testRules =

2862

"::[[:Latin:][:Mark:]];"

2863

"::NFKD (NFC);"

2864

"::Lower (Lower);"

2865

"a <> \\u03B1;" // alpha

2866

"::NFKC (NFD);"

2867

"::Upper (Lower);"

2868

"::Lower ();"

2869

"::([[:Greek:][:Mark:]]);"

2870

;

2871

const char* testRulesForward =

2872

"::[[:Latin:][:Mark:]];"

2873

"::NFKD(NFC);"

2874

"::Lower(Lower);"

2875

"a > \\u03B1;"

2876

"::NFKC(NFD);"

2877

"::Upper (Lower);"

2878

"::Lower ();"

2879

;

2880

const char* testRulesBackward =

2881

"::[[:Greek:][:Mark:]];"

2882

"::Lower (Upper);"

2883

"::NFD(NFKC);"

2884

"\\u03B1 > a;"

2885

"::Lower(Lower);"

2886

"::NFC(NFKD);"

2887

;

2888

UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute

2889

UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute

2890

2891

UParseError pe;

2892

UErrorCode ec = U_ZERO_ERROR;

2893

Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);

2894

Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);

2895

2896

if (U_FAILURE(ec)) {

2897

delete t2;

2898

delete t3;

2899

errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));

2900

return;

2901

}

2902

2903

expect(*t2, source, target);

2904

expect(*t3, target, source);

2905

2906

checkRules("Failed toRules FORWARD", *t2, testRulesForward);

2907

checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);

2908

2909

delete t2;

2910

delete t3;

2911

}

2912

2913

/**

2914

* Test Escape and Unescape transliterators.

2915

2916

void TransliteratorTest::TestEscape() {

2917

UParseError pe;

2918

UErrorCode ec;

2919

Transliterator *t;

2920

2921

ec = U_ZERO_ERROR;

2922

t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);

2923

if (U_FAILURE(ec)) {

2924

errln((UnicodeString)"FAIL: createInstance");

2925

} else {

2926

expect(*t,

2927

"\\x{40}\\U000000312Q",

2928

"@12Q");

2929

}

2930

delete t;

2931

2932

ec = U_ZERO_ERROR;

2933

t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);

2934

if (U_FAILURE(ec)) {

2935

errln((UnicodeString)"FAIL: createInstance");

2936

} else {

2937

expect(*t,

2938

CharsToUnicodeString("A\\U0010BEEF\\uFEED"),

2939

"\\u0041\\U0010BEEF\\uFEED");

2940

}

2941

delete t;

2942

2943

ec = U_ZERO_ERROR;

2944

t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);

2945

if (U_FAILURE(ec)) {

2946

errln((UnicodeString)"FAIL: createInstance");

2947

} else {

2948

expect(*t,

2949

CharsToUnicodeString("A\\U0010BEEF\\uFEED"),

2950

"\\u0041\\uDBEF\\uDEEF\\uFEED");

2951

}

2952

delete t;

2953

2954

ec = U_ZERO_ERROR;

2955

t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);

2956

if (U_FAILURE(ec)) {

2957

errln((UnicodeString)"FAIL: createInstance");

2958

} else {

2959

expect(*t,

2960

CharsToUnicodeString("A\\U0010BEEF\\uFEED"),

2961

"\\x{41}\\x{10BEEF}\\x{FEED}");

2962

}

2963

delete t;

2964

}

2965

2966

2967

void TransliteratorTest::TestAnchorMasking(){

2968

UnicodeString rule ("^a > Q; a > q;");

2969

UErrorCode status= U_ZERO_ERROR;

2970

UParseError parseError;

2971

2972

Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);

2973

if(U_FAILURE(status)){

2974

errln(UnicodeString("FAIL: ") + "ID" +

2975

".createFromRules() => bad rules" +

2976

/*", parse error " + parseError.code +*/

2977

", line " + parseError.line +

2978

", offset " + parseError.offset +

2979

", context " + prettify(parseError.preContext, TRUE) +

2980

", rules: " + prettify(rule, TRUE));

2981

}

2982

delete t;

2983

}

2984

2985

/**

2986

* Make sure display names of variants look reasonable.

2987

2988

void TransliteratorTest::TestDisplayName() {

2989

static const char* DATA[] = {

2990

// ID, forward name, reverse name

2991

// Update the text as necessary -- the important thing is

2992

// not the text itself, but how various cases are handled.

2993

2994

// Basic test

2995

"Any-Hex", "Any to Hex Escape", "Hex Escape to Any",

2996

2997

// Variants

2998

"Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",

2999

3000

// Target-only IDs

3001

"NFC", "Any to NFC", "Any to NFD",

3002

};

3003

3004

int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);

3005

3006

Locale US("en", "US");

3007

3008

for (int32_t i=0; i<DATA_length; i+=3) {

3009

UnicodeString name;

3010

Transliterator::getDisplayName(DATA[i], US, name);

3011

if (name != DATA[i+1]) {

3012

errln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +

3013

name + ", expected " + DATA[i+1]);

3014

} else {

3015

logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);

3016

}

3017

UErrorCode ec = U_ZERO_ERROR;

3018

UParseError pe;

3019

Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);

3020

if (U_FAILURE(ec)) {

3021

delete t;

3022

errln("FAIL: createInstance failed");

3023

continue;

3024

}

3025

name = Transliterator::getDisplayName(t->getID(), US, name);

3026

if (name != DATA[i+2]) {

3027

errln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +

3028

name + ", expected " + DATA[i+2]);

3029

} else {

3030

logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);

3031

}

3032

delete t;

3033

}

3034

}

3035

3036

void TransliteratorTest::TestSpecialCases(void) {

3037

const UnicodeString registerRules[] = {

3038

"Any-Dev1", "x > X; y > Y;",

3039

"Any-Dev2", "XY > Z",

3040

"Greek-Latin/FAKE",

3041

CharsToUnicodeString

3042

("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),

3043

"" // END MARKER

3044

};

3045

3046

static const UnicodeString testCases[] = {

3047

// NORMALIZATION

3048

// should add more test cases

3049

"NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",

3050

"NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",

3051

"NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",

3052

"NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",

3053

3054

// mp -> b BUG

3055

"Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",

3056

"Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",

3057

3058

// check for devanagari bug

3059

"nfd;Dev1;Dev2;nfc", "xy", "Z",

3060

3061

// ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE

3062

"Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,

3063

CharsToUnicodeString("Ab'cd Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,

3064

3065

//TODO: enable this test once Titlecase works right

3066

3067

"Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,

3068

CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,

3069

3070

"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,

3071

CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,

3072

"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,

3073

CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,

3074

3075

"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",

3076

"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",

3077

3078

// FORMS OF S

3079

"Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),

3080

CharsToUnicodeString("s ss s\\u0331s\\u0331") ,

3081

"Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),

3082

CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,

3083

"Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),

3084

CharsToUnicodeString("s ss s\\u0331s\\u0331") ,

3085

"Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),

3086

CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),

3087

// Tatiana bug

3088

// Upper: TAT\\u02B9\\u00C2NA

3089

// Lower: tat\\u02B9\\u00E2na

3090

// Title: Tat\\u02B9\\u00E2na

3091

"Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),

3092

CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),

3093

"Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),

3094

CharsToUnicodeString("tat\\u02B9\\u00E2na"),

3095

"Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),

3096

CharsToUnicodeString("Tat\\u02B9\\u00E2na"),

3097

3098

"" // END MARKER

3099

};

3100

3101

UParseError pos;

3102

int32_t i;

3103

for (i = 0; registerRules[i].length()!=0; i+=2) {

3104

UErrorCode status = U_ZERO_ERROR;

3105

3106

Transliterator *t = Transliterator::createFromRules(registerRules[0+i],

3107

registerRules[i+1], UTRANS_FORWARD, pos, status);

3108

if (U_FAILURE(status)) {

3109

errln("Fails: Unable to create the transliterator from rules.");

3110

} else {

3111

Transliterator::registerInstance(t);

3112

}

3113

}

3114

for (i = 0; testCases[i].length()!=0; i+=3) {

3115

UErrorCode ec = U_ZERO_ERROR;

3116

UParseError pe;

3117

const UnicodeString& name = testCases[i];

3118

Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);

3119

if (U_FAILURE(ec)) {

3120

errln((UnicodeString)"FAIL: Couldn't create " + name);

3121

delete t;

3122

continue;

3123

}

3124

const UnicodeString& id = t->getID();

3125

const UnicodeString& source = testCases[i+1];

3126

UnicodeString target;

3127

3128

// Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)

3129

3130

if (testCases[i+2].length() > 0) {

3131

target = testCases[i+2];

3132

} else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {

3133

Normalizer::normalize(source, UNORM_NFD, 0, target, ec);

3134

} else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {

3135

Normalizer::normalize(source, UNORM_NFC, 0, target, ec);

3136

} else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {

3137

Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);

3138

} else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {

3139

Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);

3140

} else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {

3141

target = source;

3142

target.toLower(Locale::US);

3143

} else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {

3144

target = source;

3145

target.toUpper(Locale::US);

3146

}

3147

if (U_FAILURE(ec)) {

3148

errln((UnicodeString)"FAIL: Internal error normalizing " + source);

3149

continue;

3150

}

3151

3152

expect(*t, source, target);

3153

delete t;

3154

}

3155

for (i = 0; registerRules[i].length()!=0; i+=2) {

3156

Transliterator::unregister(registerRules[i]);

3157

}

3158

}

3159

3160

char* Char32ToEscapedChars(UChar32 ch, char* buffer) {

3161

if (ch <= 0xFFFF) {

3162

sprintf(buffer, "\\u%04x", ch);

3163

} else {

3164

sprintf(buffer, "\\u%08x", ch);

3165

}

3166

return buffer;

3167

}

3168

3169

void TransliteratorTest::TestSurrogateCasing (void) {

3170

// check that casing handles surrogates

3171

// titlecase is currently defective

3172

char buffer[20];

3173

UChar buffer2[20];

3174

UChar32 dee;

3175

UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);

3176

UnicodeString DEE(u_totitle(dee));

3177

if (DEE != DESERET_DEE) {

3178

err("Fails titlecase of surrogates");

3179

err(Char32ToEscapedChars(dee, buffer));

3180

err(", ");

3181

errln(Char32ToEscapedChars(DEE.char32At(0), buffer));

3182

}

3183

3184

UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;

3185

UnicodeString deedeeTest = DESERET_dee + DESERET_dee;

3186

UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;

3187

UErrorCode status= U_ZERO_ERROR;

3188

3189

u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);

3190

if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {

3191

errln("Fails: Can't uppercase surrogates.");

3192

}

3193

3194

status= U_ZERO_ERROR;

3195

u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);

3196

if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {

3197

errln("Fails: Can't lowercase surrogates.");

3198

}

3199

}

3200

3201

static void _trans(Transliterator& t, const UnicodeString& src,

3202

UnicodeString& result) {

3203

result = src;

3204

t.transliterate(result);

3205

}

3206

3207

static void _trans(const UnicodeString& id, const UnicodeString& src,

3208

UnicodeString& result, UErrorCode ec) {

3209

UParseError pe;

3210

Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);

3211

if (U_SUCCESS(ec)) {

3212

_trans(*t, src, result);

3213

}

3214

delete t;

3215

}

3216

3217

static const UnicodeString& _findMatch(const UnicodeString& source,

3218

const UnicodeString* pairs) {

3219

static const UnicodeString empty;

3220

for (int32_t i=0; pairs[i].length() > 0; i+=2) {

3221

if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {

3222

return pairs[i+1];

3223

}

3224

}

3225

return empty;

3226

}

3227

3228

// Check to see that incremental gets at least part way through a reasonable string.

3229

3230

void TransliteratorTest::TestIncrementalProgress(void) {

3231

UErrorCode ec = U_ZERO_ERROR;

3232

UnicodeString latinTest = "The Quick Brown Fox.";

3233

UnicodeString devaTest;

3234

_trans("Latin-Devanagari", latinTest, devaTest, ec);

3235

UnicodeString kataTest;

3236

_trans("Latin-Katakana", latinTest, kataTest, ec);

3237

if (U_FAILURE(ec)) {

3238

errln("FAIL: Internal error");

3239

return;

3240

}

3241

static const UnicodeString tests[] = {

3242

"Any", latinTest,

3243

"Latin", latinTest,

3244

"Halfwidth", latinTest,

3245

"Devanagari", devaTest,

3246

"Katakana", kataTest,

3247

"" // END MARKER

3248

};

3249

3250

UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");

3251

int32_t i = 0, j=0, k=0;

3252

int32_t sources = Transliterator::countAvailableSources();

3253

for (i = 0; i < sources; i++) {

3254

UnicodeString source;

3255

Transliterator::getAvailableSource(i, source);

3256

UnicodeString test = _findMatch(source, tests);

3257

if (test.length() == 0) {

3258

logln((UnicodeString)"Skipping " + source + "-X");

3259

continue;

3260

}

3261

int32_t targets = Transliterator::countAvailableTargets(source);

3262

for (j = 0; j < targets; j++) {

3263

UnicodeString target;

3264

Transliterator::getAvailableTarget(j, source, target);

3265

int32_t variants = Transliterator::countAvailableVariants(source, target);

3266

for (k =0; k< variants; k++) {

3267

UnicodeString variant;

3268

UParseError err;

3269

UErrorCode status = U_ZERO_ERROR;

3270

3271

Transliterator::getAvailableVariant(k, source, target, variant);

3272

UnicodeString id = source + "-" + target + "/" + variant;

3273

3274

Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);

3275

if (U_FAILURE(status)) {

3276

errln((UnicodeString)"FAIL: Could not create " + id);

3277

delete t;

3278

continue;

3279

}

3280

status = U_ZERO_ERROR;

3281

CheckIncrementalAux(t, test);

3282

3283

UnicodeString rev;

3284

_trans(*t, test, rev);

3285

Transliterator *inv = t->createInverse(status);

3286

if (U_FAILURE(status)) {

3287

errln((UnicodeString)"FAIL: Could not create inverse of " + id);

3288

delete t;

3289

delete inv;

3290

continue;

3291

}

3292

CheckIncrementalAux(inv, rev);

3293

delete t;

3294

delete inv;

3295

}

3296

}

3297

}

3298

}

3299

3300

void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,

3301

const UnicodeString& input) {

3302

UErrorCode ec = U_ZERO_ERROR;

3303

UTransPosition pos;

3304

UnicodeString test = input;

3305

3306

pos.contextStart = 0;

3307

pos.contextLimit = input.length();

3308

pos.start = 0;

3309

pos.limit = input.length();

3310

3311

t->transliterate(test, pos, ec);

3312

if (U_FAILURE(ec)) {

3313

errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));

3314

return;

3315

}

3316

UBool gotError = FALSE;

3317

3318

// we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?

3319

3320

if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {

3321

errln((UnicodeString)"No Progress, " +

3322

t->getID() + ": " + formatInput(test, input, pos));

3323

gotError = TRUE;

3324

} else {

3325

logln((UnicodeString)"PASS Progress, " +

3326

t->getID() + ": " + formatInput(test, input, pos));

3327

}

3328

t->finishTransliteration(test, pos);

3329

if (pos.start != pos.limit) {

3330

errln((UnicodeString)"Incomplete, " +

3331

t->getID() + ": " + formatInput(test, input, pos));

3332

gotError = TRUE;

3333

}

3334

}

3335

3336

void TransliteratorTest::TestFunction() {

3337

// Careful with spacing and ';' here: Phrase this exactly

3338

// as toRules() is going to return it. If toRules() changes

3339

// with regard to spacing or ';', then adjust this string.

3340

UnicodeString rule = // TODO clean up spacing

3341

"([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";

3342

3343

UParseError pe;

3344

UErrorCode ec = U_ZERO_ERROR;

3345

Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);

3346

if (t == NULL) {

3347

errln("FAIL: createFromRules failed");

3348

return;

3349

}

3350

3351

UnicodeString r;

3352

t->toRules(r, TRUE);

3353

if (r == rule) {

3354

logln((UnicodeString)"OK: toRules() => " + r);

3355

} else {

3356

errln((UnicodeString)"FAIL: toRules() => " + r +

3357

", expected " + rule);

3358

}

3359

3360

expect(*t, "The Quick Brown Fox",

3361

"T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");

3362

3363

delete t;

3364

}

3365

3366

void TransliteratorTest::TestInvalidBackRef(void) {

3367

UnicodeString rule = ". > $1;";

3368

UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");

3369

UParseError pe;

3370

UErrorCode ec = U_ZERO_ERROR;

3371

Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);

3372

Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);

3373

3374

if (t != NULL) {

3375

errln("FAIL: createFromRules should have returned NULL");

3376

delete t;

3377

}

3378

3379

if (t2 != NULL) {

3380

errln("FAIL: createFromRules should have returned NULL");

3381

delete t2;

3382

}

3383

3384

if (U_SUCCESS(ec)) {

3385

errln("FAIL: Ok: . > $1; => no error");

3386

} else {

3387

logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));

3388

}

3389

}

3390

3391

//======================================================================

3392

// Support methods

3393

//======================================================================

3394

void TransliteratorTest::expectT(const UnicodeString& id,

3395

const UnicodeString& source,

3396

const UnicodeString& expectedResult) {

3397

UErrorCode ec = U_ZERO_ERROR;

3398

UParseError pe;

3399

Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);

3400

if (U_FAILURE(ec)) {

3401

errln((UnicodeString)"FAIL: Could not create " + id);

3402

delete t;

3403

return;

3404

}

3405

expect(*t, source, expectedResult);

3406

delete t;

3407

}

3408

3409

void TransliteratorTest::expect(const UnicodeString& rules,

3410

const UnicodeString& source,

3411

const UnicodeString& expectedResult,

3412

UTransPosition *pos) {

3413

UErrorCode status = U_ZERO_ERROR;

3414

Transliterator *t = new RuleBasedTransliterator("<ID>", rules, status);

3415

if (U_FAILURE(status)) {

3416

errln("FAIL: Transliterator constructor failed");

3417

} else {

3418

expect(*t, source, expectedResult, pos);

3419

}

3420

delete t;

3421

}

3422

3423

void TransliteratorTest::expect(const Transliterator& t,

3424

const UnicodeString& source,

3425

const UnicodeString& expectedResult,

3426

const Transliterator& reverseTransliterator) {

3427

expect(t, source, expectedResult);

3428

expect(reverseTransliterator, expectedResult, source);

3429

}

3430

3431

void TransliteratorTest::expect(const Transliterator& t,

3432

const UnicodeString& source,

3433

const UnicodeString& expectedResult,

3434

UTransPosition *pos) {

3435

if (pos == 0) {

3436

UnicodeString result(source);

3437

t.transliterate(result);

3438

expectAux(t.getID() + ":String", source, result, expectedResult);

3439

}

3440

3441

UTransPosition index={0, 0, 0, 0};

3442

if (pos != 0) {

3443

index = *pos;

3444

}

3445

3446

UnicodeString rsource(source);

3447

if (pos == 0) {

3448

t.transliterate(rsource);

3449

} else {

3450

// Do it all at once -- below we do it incrementally

3451

t.finishTransliteration(rsource, *pos);

3452

}

3453

expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);

3454

3455

// Test keyboard (incremental) transliteration -- this result

3456

// must be the same after we finalize (see below).

3457

UnicodeString log;

3458

rsource.remove();

3459

if (pos != 0) {

3460

rsource = source;

3461

formatInput(log, rsource, index);

3462

log.append(" -> ");

3463

UErrorCode status = U_ZERO_ERROR;

3464

t.transliterate(rsource, index, status);

3465

formatInput(log, rsource, index);

3466

} else {

3467

for (int32_t i=0; i<source.length(); ++i) {

3468

if (i != 0) {

3469

log.append(" + ");

3470

}

3471

log.append(source.charAt(i)).append(" -> ");

3472

UErrorCode status = U_ZERO_ERROR;

3473

t.transliterate(rsource, index, source.charAt(i), status);

3474

formatInput(log, rsource, index);

3475

}

3476

}

3477

3478

// As a final step in keyboard transliteration, we must call

3479

// transliterate to finish off any pending partial matches that

3480

// were waiting for more input.

3481

t.finishTransliteration(rsource, index);

3482

log.append(" => ").append(rsource);

3483

3484

expectAux(t.getID() + ":Keyboard", log,

3485

rsource == expectedResult,

3486

expectedResult);

3487

}

3488

3489

3490

/**

3491

* @param appendTo result is appended to this param.

3492

* @param input the string being transliterated

3493

* @param pos the index struct

3494

3495

UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,

3496

const UnicodeString& input,

3497

const UTransPosition& pos) {

3498

// Output a string of the form aaa{bbb|ccc|ddd}eee, where

3499

// the {} indicate the context start and limit, and the ||

3500

// indicate the start and limit.

3501

if (0 <= pos.contextStart &&

3502

pos.contextStart <= pos.start &&

3503

pos.start <= pos.limit &&

3504

pos.limit <= pos.contextLimit &&

3505

pos.contextLimit <= input.length()) {

3506

3507

UnicodeString a, b, c, d, e;

3508

input.extractBetween(0, pos.contextStart, a);

3509

input.extractBetween(pos.contextStart, pos.start, b);

3510

input.extractBetween(pos.start, pos.limit, c);

3511

input.extractBetween(pos.limit, pos.contextLimit, d);

3512

input.extractBetween(pos.contextLimit, input.length(), e);

3513

appendTo.append(a).append((UChar)123/*{*/).append(b).

3514

append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).

3515

append((UChar)125/*}*/).append(e);

3516

} else {

3517

appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +

3518

pos.contextStart + ", s=" + pos.start + ", l=" +

3519

pos.limit + ", cl=" + pos.contextLimit + "} on " +

3520

input);

3521

}

3522

return appendTo;

3523

}

3524

3525

void TransliteratorTest::expectAux(const UnicodeString& tag,

3526

const UnicodeString& source,

3527

const UnicodeString& result,

3528

const UnicodeString& expectedResult) {

3529

expectAux(tag, source + " -> " + result,

3530

result == expectedResult,

3531

expectedResult);

3532

}

3533

3534

void TransliteratorTest::expectAux(const UnicodeString& tag,

3535

const UnicodeString& summary, UBool pass,

3536

const UnicodeString& expectedResult) {

3537

if (pass) {

3538

logln(UnicodeString("(")+tag+") " + prettify(summary));

3539

} else {

3540

errln(UnicodeString("FAIL: (")+tag+") "

3541

+ prettify(summary)

3542

+ ", expected " + prettify(expectedResult));

3543

}

3544

}

Older »