580
UnicodeStringTest::TestCaseConversion()
582
UChar uppercaseGreek[] =
583
{ 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
587
UChar lowercaseGreek[] =
588
{ 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
592
UChar lowercaseTurkish[] =
593
{ 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
594
0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
596
UChar uppercaseTurkish[] =
597
{ 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
598
0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
600
UnicodeString expectedResult;
603
test3 += (UChar32)0x0130;
604
test3 += "STANBUL, NOT CONSTANTINOPLE!";
606
UnicodeString test4(test3);
608
expectedResult = "istanbul, not constantinople!";
609
if (test4 != expectedResult)
610
errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
613
test4.toLower(Locale("tr", "TR"));
614
expectedResult = lowercaseTurkish;
615
if (test4 != expectedResult)
616
errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
619
test3 += (UChar32)0x0131;
620
test3 += " palace, istanbul";
624
expectedResult = "TOPKAPI PALACE, ISTANBUL";
625
if (test4 != expectedResult)
626
errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
629
test4.toUpper(Locale("tr", "TR"));
630
expectedResult = uppercaseTurkish;
631
if (test4 != expectedResult)
632
errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
634
test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
636
test3.toUpper(Locale("de", "DE"));
637
expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
638
if (test3 != expectedResult)
639
errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
641
test4.replace(0, test4.length(), uppercaseGreek);
643
test4.toLower(Locale("el", "GR"));
644
expectedResult = lowercaseGreek;
645
if (test4 != expectedResult)
646
errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
648
test4.replace(0, test4.length(), lowercaseGreek);
651
expectedResult = uppercaseGreek;
652
if (test4 != expectedResult)
653
errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
655
// more string case mapping tests with the new implementation
659
beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
660
lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
661
lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
663
beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
664
upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
665
upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
667
beforeMiniUpper[]= { 0xdf, 0x61 },
668
miniUpper[]= { 0x53, 0x53, 0x41 };
672
/* lowercase with root locale */
673
s=UnicodeString(FALSE, beforeLower, (int32_t)(sizeof(beforeLower)/U_SIZEOF_UCHAR));
675
if( s.length()!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) ||
676
s!=UnicodeString(FALSE, lowerRoot, s.length())
678
errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, (int32_t)(sizeof(lowerRoot)/U_SIZEOF_UCHAR)) + "\"");
681
/* lowercase with turkish locale */
682
s=UnicodeString(FALSE, beforeLower, (int32_t)(sizeof(beforeLower)/U_SIZEOF_UCHAR));
683
s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
684
if( s.length()!=(sizeof(lowerTurkish)/U_SIZEOF_UCHAR) ||
685
s!=UnicodeString(FALSE, lowerTurkish, s.length())
687
errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, (int32_t)(sizeof(lowerTurkish)/U_SIZEOF_UCHAR)) + "\"");
690
/* uppercase with root locale */
691
s=UnicodeString(FALSE, beforeUpper, (int32_t)(sizeof(beforeUpper)/U_SIZEOF_UCHAR));
692
s.setCharAt(0, beforeUpper[0]).toUpper();
693
if( s.length()!=(sizeof(upperRoot)/U_SIZEOF_UCHAR) ||
694
s!=UnicodeString(FALSE, upperRoot, s.length())
696
errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, (int32_t)(sizeof(upperRoot)/U_SIZEOF_UCHAR)) + "\"");
699
/* uppercase with turkish locale */
700
s=UnicodeString(FALSE, beforeUpper, (int32_t)(sizeof(beforeUpper)/U_SIZEOF_UCHAR));
701
s.toUpper(Locale("tr"));
702
if( s.length()!=(sizeof(upperTurkish)/U_SIZEOF_UCHAR) ||
703
s!=UnicodeString(FALSE, upperTurkish, s.length())
705
errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, (int32_t)(sizeof(upperTurkish)/U_SIZEOF_UCHAR)) + "\"");
708
/* uppercase a short string with root locale */
709
s=UnicodeString(FALSE, beforeMiniUpper, (int32_t)(sizeof(beforeMiniUpper)/U_SIZEOF_UCHAR));
710
s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
711
if( s.length()!=(sizeof(miniUpper)/U_SIZEOF_UCHAR) ||
712
s!=UnicodeString(FALSE, miniUpper, s.length())
714
errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, (int32_t)(sizeof(miniUpper)/U_SIZEOF_UCHAR)) + "\"");
718
// test some supplementary characters (>= Unicode 3.1)
723
deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
724
deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
725
deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
726
(t=deseretInput).toLower();
727
if(t!=deseretLower) {
728
errln("error lowercasing Deseret (plane 1) characters");
730
(t=deseretInput).toUpper();
731
if(t!=deseretUpper) {
732
errln("error uppercasing Deseret (plane 1) characters");
736
// test some more cases that looked like problems
741
ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
742
ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
743
ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
744
(t=ljInput).toLower("en");
746
errln("error lowercasing LJ characters");
748
(t=ljInput).toUpper("en");
750
errln("error uppercasing LJ characters");
754
// Unicode 3.1.1 SpecialCasing tests
758
// sigmas preceded and/or followed by cased letters
760
sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
761
sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
762
sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
764
(t=sigmas).toLower();
766
errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
769
(t=sigmas).toUpper();
771
errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
774
// turkish & azerbaijani dotless i & dotted I
775
// remove dot above if there was a capital I before and there are no more accents above
777
dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
778
dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape(),
779
dotsDefault=UnicodeString("i i i i\\u0327 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
781
(t=dots).toLower("tr");
783
errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
786
(t=dots).toLower("de");
788
errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
792
// more Unicode 3.1.1 tests
796
// lithuanian dot above in uppercasing
798
dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
799
dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
800
dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
802
(t=dots).toUpper("lt");
803
if(t!=dotsLithuanian) {
804
errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
807
(t=dots).toUpper("de");
809
errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
812
// lithuanian adds dot above to i in lowercasing if there are more above accents
814
i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
815
iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
816
iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
820
errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
825
errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
832
s=UNICODE_STRING("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0131", 35).unescape(),
833
f=UNICODE_STRING("ass\\u03bcffi\\U00010434i", 23).unescape(),
838
errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
841
// alternate handling for dotted I/dotless i (U+0130, U+0131)
842
f.setCharAt(f.length()-1, 0x131);
843
(t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
845
errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + f + "\"");
851
578
UnicodeStringTest::TestSearching()
853
580
UnicodeString test1("test test ttest tetest testesteststt");