~ubuntu-branches/ubuntu/gutsy/icu/gutsy

« back to all changes in this revision

Viewing changes to source/test/cintltst/nucnvtst.c

  • Committer: Package Import Robot
  • Author(s): Matthias Klose
  • Date: 2005-05-21 22:44:31 UTC
  • mfrom: (2.1.1 warty)
  • Revision ID: package-import@ubuntu.com-20050521224431-r7rktfhnu1n4tf1g
Tags: 2.1-2.1
Rename icu-doc to icu21-doc. icu-doc is built by the icu28 package.

Show diffs side-by-side

added added

removed removed

Lines of Context:
66
66
                     const char* lang, 
67
67
                     char byteArr[],
68
68
                     int byteArrLen);
69
 
 
 
69
static void TestRoundTrippingAllUTF(void);
70
70
static void TestCoverageMBCS(void);
71
71
 
72
72
void addTestNewConvert(TestNode** root);
223
223
   addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
224
224
   addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
225
225
   addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
 
226
   addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
226
227
 
227
228
}
228
229
 
1107
1108
    {
1108
1109
 
1109
1110
        /* MBCS with three byte codepage data test4.ucm*/
1110
 
        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1111
 
        const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1112
 
        int32_t  totest4Offs[]        = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
 
1111
        static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
 
1112
        static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
 
1113
        static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1113
1114
 
1114
 
        const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1115
 
        const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1116
 
        int32_t fromtest4Offs[]       = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
 
1115
        static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
 
1116
        static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
 
1117
        static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1117
1118
 
1118
1119
        /*from Unicode*/
1119
1120
        if(!testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1134
1135
 
1135
1136
}
1136
1137
 
 
1138
static void TestConverterType(const char *convName, UConverterType convType) {
 
1139
    UConverter* myConverter;
 
1140
    UErrorCode err = U_ZERO_ERROR;
 
1141
 
 
1142
    myConverter = ucnv_open(convName, &err);
 
1143
    if (U_FAILURE(err)) {
 
1144
        log_err("Failed to create an %s converter\n", convName);
 
1145
        return;
 
1146
    }
 
1147
    else
 
1148
    {
 
1149
        if (ucnv_getType(myConverter)!=convType) {
 
1150
            log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
 
1151
                convName, convType);
 
1152
        }
 
1153
        else {
 
1154
            log_verbose("ucnv_getType %s ok\n", convName);
 
1155
        }
 
1156
    }
 
1157
    ucnv_close(myConverter);
 
1158
}
1137
1159
 
1138
1160
static void TestConverterTypesAndStarters()
1139
1161
{
1140
 
    UConverter* myConverter[3];
 
1162
    UConverter* myConverter;
1141
1163
    UErrorCode err = U_ZERO_ERROR;
1142
1164
    UBool mystarters[256];
1143
1165
 
1170
1192
        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1171
1193
 
1172
1194
 
1173
 
  log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
 
1195
    log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1174
1196
 
1175
 
    myConverter[0] = ucnv_open("ksc", &err);
 
1197
    myConverter = ucnv_open("ksc", &err);
1176
1198
    if (U_FAILURE(err)) {
1177
1199
      log_err("Failed to create an ibm-ksc converter\n");
1178
1200
      return;
1179
1201
    }
1180
1202
    else
1181
1203
    {
1182
 
        if (ucnv_getType(myConverter[0])!=UCNV_MBCS)
 
1204
        if (ucnv_getType(myConverter)!=UCNV_MBCS)
1183
1205
            log_err("ucnv_getType Failed for ibm-949\n");
1184
1206
        else
1185
1207
            log_verbose("ucnv_getType ibm-949 ok\n");
1186
1208
 
1187
 
        if(myConverter[0]!=NULL)
1188
 
            ucnv_getStarters(myConverter[0], mystarters, &err);
 
1209
        if(myConverter!=NULL)
 
1210
            ucnv_getStarters(myConverter, mystarters, &err);
1189
1211
 
1190
1212
        /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1191
1213
          log_err("Failed ucnv_getStarters for ksc\n");
1193
1215
          log_verbose("ucnv_getStarters ok\n");*/
1194
1216
 
1195
1217
    }
1196
 
 
1197
 
    myConverter[1] = ucnv_open("ibm-930", &err);
1198
 
    if (U_FAILURE(err)) {
1199
 
        log_err("Failed to create an ibm-930 converter\n");
1200
 
        return;
1201
 
    }
1202
 
    else
1203
 
    {
1204
 
        if (ucnv_getType(myConverter[1])!=UCNV_EBCDIC_STATEFUL)
1205
 
            log_err("ucnv_getType Failed for ibm-930\n");
1206
 
        else
1207
 
            log_verbose("ucnv_getType ibm-930 ok\n");
1208
 
    }
1209
 
 
1210
 
    myConverter[2] = ucnv_open("ibm-878", &err);
1211
 
    if (U_FAILURE(err)) {
1212
 
      log_err("Failed to create an ibm-815 converter\n");
1213
 
      return;
1214
 
    }
1215
 
    else
1216
 
      {
1217
 
        if (ucnv_getType(myConverter[2])!=UCNV_SBCS) log_err("ucnv_getType Failed for ibm-815\n");
1218
 
        else log_verbose("ucnv_getType ibm-815 ok\n");
1219
 
      }
1220
 
 
1221
 
 
1222
 
    ucnv_close(myConverter[0]);
1223
 
    ucnv_close(myConverter[1]);
1224
 
    ucnv_close(myConverter[2]);
 
1218
    ucnv_close(myConverter);
 
1219
 
 
1220
    TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
 
1221
    TestConverterType("ibm-878", UCNV_SBCS);
 
1222
    TestConverterType("iso-8859-1", UCNV_LATIN_1);
 
1223
    TestConverterType("ibm-1208", UCNV_UTF8);
 
1224
    TestConverterType("utf-8", UCNV_UTF8);
 
1225
    TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
 
1226
    TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
 
1227
    TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
 
1228
    TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
 
1229
    TestConverterType("iso-2022", UCNV_ISO_2022);
 
1230
    TestConverterType("hz", UCNV_HZ);
 
1231
    TestConverterType("scsu", UCNV_SCSU);
 
1232
    TestConverterType("x-iscii-de", UCNV_ISCII);
 
1233
    TestConverterType("ascii", UCNV_US_ASCII);
 
1234
    TestConverterType("utf-7", UCNV_UTF7);
1225
1235
}
1226
1236
 
1227
1237
static void
2344
2354
        uSourceLimit=uSource;
2345
2355
        do{
2346
2356
 
2347
 
            uSourceLimit = uSourceLimit+1;
 
2357
            if (uSourceLimit < sourceLimit) {
 
2358
                uSourceLimit = uSourceLimit+1;
 
2359
            }
2348
2360
            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2349
2361
            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2350
2362
               errorCode=U_ZERO_ERROR;
2360
2372
 
2361
2373
        cSourceLimit =cBuf;
2362
2374
        do{
2363
 
            cSourceLimit =cSourceLimit+1;
 
2375
            if (cSourceLimit < cBuf + (cTarget - cBuf)) {
 
2376
                cSourceLimit = cSourceLimit+1;
 
2377
            }
2364
2378
            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2365
2379
            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2366
2380
               errorCode=U_ZERO_ERROR;
2577
2591
        0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
2578
2592
        0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
2579
2593
        0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
2580
 
        0x0964,0x093C,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
 
2594
        0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
2581
2595
        0x096D,0x096E,0x096F,
2582
2596
        /* test Soft halant*/
2583
2597
        0x0915,0x094d, 0x200D,
2602
2616
        0x0C85, 0xC82, 0x0C83,
2603
2617
        /* test Abbr sign and Anudatta */
2604
2618
        0x0970, 0x952,
2605
 
 
2606
 
 
2607
 
       
 
2619
       /* 0x0958,
 
2620
        0x0959,
 
2621
        0x095A,
 
2622
        0x095B,
 
2623
        0x095C,
 
2624
        0x095D,
 
2625
        0x095E,
 
2626
        0x095F,*/
 
2627
        0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
 
2628
        0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
 
2629
        0x090C ,
 
2630
        0x0962,
 
2631
        0x0961 /* Vocallic LL 0xa6, 0xE9 */,
 
2632
        0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
 
2633
        0x0950 /* OM Symbol 0xa1, 0xE9,*/,
 
2634
        0x093D /* Avagraha  0xEA, 0xE9*/, 
 
2635
        0x0958,
 
2636
        0x0959,
 
2637
        0x095A,
 
2638
        0x095B,
 
2639
        0x095C,
 
2640
        0x095D,
 
2641
        0x095E,
2608
2642
 
2609
2643
      };
2610
2644
    static const unsigned char byteArr[]={
2643
2677
        /* kannada range */
2644
2678
        0xEF, 0x48,0xa4, 0xa2, 0xa3,
2645
2679
        /* anudatta and abbreviation sign */
2646
 
        0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8
 
2680
        0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
 
2681
 
 
2682
       
 
2683
        0xAA, 0xE9,/* RI + NUKTA 0x0960*/
 
2684
        
 
2685
        0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
 
2686
        
 
2687
        0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
 
2688
        
 
2689
        0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
 
2690
        
 
2691
        0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
 
2692
        
 
2693
        0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
 
2694
        
 
2695
        0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
 
2696
        
 
2697
        0xEA, 0xE9, /* Danda + Nukta 0x093D*/
 
2698
        
 
2699
        0xB3, 0xE9, /* Ka + NUKTA */
 
2700
        
 
2701
        0xB4, 0xE9, /* Kha + NUKTA */
 
2702
        
 
2703
        0xB5, 0xE9, /* Ga + NUKTA */
 
2704
 
 
2705
        0xBA, 0xE9,
 
2706
 
 
2707
        0xBF, 0xE9,
 
2708
        
 
2709
        0xC0, 0xE9,
 
2710
 
 
2711
        0xC9, 0xE9,
 
2712
 
2647
2713
    };
2648
2714
      
2649
2715
    TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
2738
2804
    const char *cTargetLimit;
2739
2805
    char *cBuf;
2740
2806
    UChar *uBuf,*test;
2741
 
    int32_t uBufSize = 120;
 
2807
    int32_t uBufSize = 120*10;
2742
2808
    UErrorCode errorCode=U_ZERO_ERROR;
2743
2809
    UConverter *cnv;
2744
 
    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
 
2810
    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
2745
2811
    int32_t* myOff= offsets;
2746
2812
    cnv=ucnv_open(conv, &errorCode);
2747
2813
    if(U_FAILURE(errorCode)) {
2749
2815
        return;
2750
2816
    }
2751
2817
 
2752
 
    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2753
 
    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
 
2818
    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
 
2819
    cBuf =(char*)malloc(uBufSize * sizeof(char));
2754
2820
    uSource = (const UChar*)&in[0];
2755
2821
    uSourceLimit=uSource+len;
2756
2822
    cTarget = cBuf;
2757
 
    cTargetLimit = cBuf +uBufSize*5;
 
2823
    cTargetLimit = cBuf +uBufSize;
2758
2824
    uTarget = uBuf;
2759
 
    uTargetLimit = uBuf+ uBufSize*5;
 
2825
    uTargetLimit = uBuf+ uBufSize;
2760
2826
    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
2761
2827
    if(U_FAILURE(errorCode)){
2762
2828
        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2763
2829
        return;
2764
2830
    }
2765
 
    log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));
 
2831
    /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
2766
2832
    cSource = cBuf;
2767
2833
    cSourceLimit =cTarget;
2768
2834
    test =uBuf;
2776
2842
    uSource = (const UChar*)&in[0];
2777
2843
    while(uSource<uSourceLimit){
2778
2844
        if(*test!=*uSource){
2779
 
            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
 
2845
            log_err("for codeapge %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
2780
2846
        }
2781
2847
        uSource++;
2782
2848
        test++;
2814
2880
    free(cBuf);
2815
2881
    free(offsets);
2816
2882
}
 
2883
static UChar
 
2884
_charAt(int32_t offset, void *context) {
 
2885
    return ((char*)context)[offset];
 
2886
}
 
2887
 
 
2888
static int32_t
 
2889
unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
 
2890
    int32_t srcIndex=0;
 
2891
    int32_t dstIndex=0;
 
2892
    if(U_FAILURE(*status)){
 
2893
        return 0;
 
2894
    }
 
2895
    if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
 
2896
        *status = U_ILLEGAL_ARGUMENT_ERROR;
 
2897
        return 0;
 
2898
    }
 
2899
    if(srcLen==-1){
 
2900
        srcLen = uprv_strlen(src);
 
2901
    }
 
2902
 
 
2903
    for (; srcIndex<srcLen; ) {
 
2904
        UChar32 c = src[srcIndex++];
 
2905
        if (c == 0x005C /*'\\'*/) {
 
2906
            c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
 
2907
            if (c == (UChar32)0xFFFFFFFF) {
 
2908
                *status=U_INVALID_CHAR_FOUND; /* return empty string */
 
2909
                break; /* invalid escape sequence */
 
2910
            }
 
2911
        }
 
2912
        if(dstIndex < dstLen){
 
2913
            if(c>0xFFFF){
 
2914
               dst[dstIndex++] = UTF16_LEAD(c);
 
2915
               if(dstIndex<dstLen){
 
2916
                    dst[dstIndex]=UTF16_TRAIL(c);
 
2917
               }else{
 
2918
                   *status=U_BUFFER_OVERFLOW_ERROR;
 
2919
               }
 
2920
            }else{
 
2921
                dst[dstIndex]=(UChar)c;
 
2922
            }
 
2923
 
 
2924
        }else{
 
2925
            *status = U_BUFFER_OVERFLOW_ERROR;
 
2926
        }
 
2927
        dstIndex++; /* for preflighting */
 
2928
    }
 
2929
    return dstIndex;
 
2930
}
 
2931
 
 
2932
static void
 
2933
TestFullRoundtrip(const char* cp){
 
2934
    UChar usource[10] ={0};
 
2935
    UChar nsrc[10] = {0};
 
2936
    uint32_t i=1;
 
2937
    int len=0;
 
2938
    nsrc[0]=0x0061;
 
2939
    /* Test codepoint 0 */
 
2940
    TestConv(usource,1,cp,"",NULL,0);
 
2941
    TestConv(usource,2,cp,"",NULL,0);
 
2942
    nsrc[2]=0x5555;
 
2943
    TestConv(nsrc,3,cp,"",NULL,0);
 
2944
 
 
2945
    for(;i<=0x10FFFF;i++){
 
2946
        if(i>=0xD800 && i<=0xDFFF){
 
2947
            continue;
 
2948
        }
 
2949
        if(i<=0xFFFF){
 
2950
            usource[0] =(UChar) i;
 
2951
            len=1;
 
2952
        }else{
 
2953
            usource[0]=UTF16_LEAD(i);
 
2954
            usource[1]=UTF16_TRAIL(i);
 
2955
            len=2;
 
2956
        }
 
2957
        /* Test only single code points */
 
2958
        TestConv(usource,u_strlen(usource),cp,"",NULL,0);
 
2959
        /* Test codepoint repeated twice */
 
2960
        u_strncat(usource,usource,len);
 
2961
        TestConv(usource,u_strlen(usource),cp,"",NULL,0);
 
2962
        /* Test codepoint repeated 3 times */
 
2963
        u_strncat(usource,usource,len);
 
2964
        TestConv(usource,u_strlen(usource),cp,"",NULL,0);
 
2965
        /* Test codepoint in between 2 codepoints */
 
2966
        nsrc[1]=0;
 
2967
        u_strncat(nsrc,usource,len);
 
2968
        nsrc[len+1]=0x5555;
 
2969
        TestConv(nsrc,len+2,cp,"",NULL,0);        
 
2970
        uprv_memset(usource,0,sizeof(UChar)*10);
 
2971
    }
 
2972
}
 
2973
 
 
2974
static void
 
2975
TestRoundTrippingAllUTF(void){
 
2976
    if(!QUICK){
 
2977
        log_verbose("Running exhaustive round trip test for SCSU\n");
 
2978
        TestFullRoundtrip("SCSU");
 
2979
        log_verbose("Running exhaustive round trip test for UTF-8\n");
 
2980
        TestFullRoundtrip("UTF-8");
 
2981
        log_verbose("Running exhaustive round trip test for UTF-16BE\n");
 
2982
        TestFullRoundtrip("UTF-16BE");
 
2983
        log_verbose("Running exhaustive round trip test for UTF-16LE\n");
 
2984
        TestFullRoundtrip("UTF-16LE");
 
2985
        log_verbose("Running exhaustive round trip test for UTF-32BE\n");
 
2986
        TestFullRoundtrip("UTF-32BE");
 
2987
        log_verbose("Running exhaustive round trip test for UTF-32LE\n");
 
2988
        TestFullRoundtrip("UTF-32LE");
 
2989
        log_verbose("Running exhaustive round trip test for UTF-7\n");
 
2990
        TestFullRoundtrip("UTF-7");
 
2991
        log_verbose("Running exhaustive round trip test for UTF-7\n");
 
2992
        TestFullRoundtrip("UTF-7,version=1");
 
2993
        /*#### TODO: Enable this test when BOCU-1 is available */
 
2994
        /*log_verbose("Running exhaustive round trip test for BOCU-1");*/
 
2995
        /*TestFullRoundtrip("BOCU-1");*/
 
2996
        log_verbose("Running exhaustive round trip test for GB18030\n");
 
2997
        TestFullRoundtrip("GB18030");
 
2998
    }
 
2999
}
 
3000
 
2817
3001
static void
2818
3002
TestSCSU() {
2819
3003
 
2820
 
   uint16_t germanUTF16[]={
 
3004
    static const uint16_t germanUTF16[]={
2821
3005
        0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
2822
3006
    };
2823
3007
 
2824
 
    uint8_t germanSCSU[]={
 
3008
    static const uint8_t germanSCSU[]={
2825
3009
        0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
2826
3010
    };
2827
3011
 
2828
 
    uint16_t russianUTF16[]={
 
3012
    static const uint16_t russianUTF16[]={
2829
3013
        0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
2830
3014
    };
2831
3015
 
2832
 
    uint8_t russianSCSU[]={
 
3016
    static const uint8_t russianSCSU[]={
2833
3017
        0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
2834
3018
    };
2835
3019
 
2836
 
    uint16_t japaneseUTF16[]={
 
3020
    static const uint16_t japaneseUTF16[]={
2837
3021
        0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
2838
3022
        0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
2839
3023
        0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
2853
3037
 
2854
3038
    /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
2855
3039
     it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
2856
 
    uint8_t japaneseSCSU[]={
 
3040
    static const uint8_t japaneseSCSU[]={
2857
3041
        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2858
3042
        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2859
3043
        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2868
3052
        0xcb, 0x82
2869
3053
    };
2870
3054
 
2871
 
    uint16_t allFeaturesUTF16[]={
 
3055
    static const uint16_t allFeaturesUTF16[]={
2872
3056
        0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
2873
3057
        0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
2874
3058
        0x01df, 0xf000, 0xdbff, 0xdfff
2877
3061
    /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
2878
3062
     * result here (34B vs. 35B)
2879
3063
     */
2880
 
    uint8_t allFeaturesSCSU[]={
 
3064
    static const uint8_t allFeaturesSCSU[]={
2881
3065
        0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
2882
3066
        0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
2883
3067
        0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
2913
3097
        0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 
2914
3098
        0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 
2915
3099
        0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 
2916
 
        0xD869, 0xDED5, 
 
3100
        0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
 
3101
        0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
 
3102
 
2917
3103
 
2918
3104
        0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
2919
3105
        0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
2921
3107
        0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
2922
3108
        0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
2923
3109
    };
 
3110
    static const char *fTestCases [] = {
 
3111
          "\\ud800\\udc00", /* smallest surrogate*/
 
3112
          "\\ud8ff\\udcff",
 
3113
          "\\udBff\\udFff", /* largest surrogate pair*/
 
3114
          "\\ud834\\udc00",
 
3115
          "\\U0010FFFF",
 
3116
          "Hello \\u9292 \\u9192 World!",
 
3117
          "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
 
3118
          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
 
3119
  
 
3120
          "\\u0648\\u06c8", /* catch missing reset*/
 
3121
          "\\u0648\\u06c8",
 
3122
  
 
3123
          "\\u4444\\uE001", /* lowest quotable*/
 
3124
          "\\u4444\\uf2FF", /* highest quotable*/
 
3125
          "\\u4444\\uf188\\u4444",
 
3126
          "\\u4444\\uf188\\uf288",
 
3127
          "\\u4444\\uf188abc\\u0429\\uf288",
 
3128
          "\\u9292\\u2222",
 
3129
          "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
 
3130
          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
 
3131
          "Hello World!123456",
 
3132
          "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
 
3133
  
 
3134
          "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
 
3135
          "abc\\u4411d",      /* uses SQU*/
 
3136
          "abc\\u4411\\u4412d",/* uses SCU*/
 
3137
          "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
 
3138
          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
 
3139
          "\\u9292\\u2222",
 
3140
          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
 
3141
          "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
 
3142
          "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
 
3143
  
 
3144
          "", /* empty input*/
 
3145
          "\\u0000", /* smallest BMP character*/
 
3146
          "\\uFFFF", /* largest BMP character*/
 
3147
  
 
3148
          /* regression tests*/
 
3149
          "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
 
3150
          "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
 
3151
          "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
 
3152
          "\\u0041\\u00df\\u0401\\u015f",
 
3153
          "\\u9066\\u2123abc",
 
3154
          "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
 
3155
          "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
 
3156
    };
 
3157
    int i=0;
 
3158
    for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
 
3159
        const char* cSrc = fTestCases[i];
 
3160
        UErrorCode status = U_ZERO_ERROR;
 
3161
        int32_t cSrcLen,srcLen;
 
3162
        UChar* src;
 
3163
        /* UConverter* cnv = ucnv_open("SCSU",&status); */
 
3164
        cSrcLen= srcLen =  uprv_strlen(fTestCases[i]);
 
3165
        src = (UChar*) uprv_malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
 
3166
        srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
 
3167
        log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
 
3168
        TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
 
3169
        uprv_free(src);
 
3170
    }
2924
3171
    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
2925
3172
    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
2926
3173
    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
4512
4759
      ucnv_reset(utf8cnv);
4513
4760
      status = U_ZERO_ERROR;
4514
4761
      bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
4515
 
                                     rules, rules_length, &status);
 
4762
                                     rules, rules_length, &status);
4516
4763
      target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
4517
4764
      if(numNeeded!=0 && numNeeded!= bytes_needed){
4518
4765
          log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");