1
/********************************************************************
3
* Copyright (c) 1997-2001, International Business Machines Corporation and
4
* others. All Rights Reserved.
5
********************************************************************/
8
#include "unicode/unicode.h"
9
#include "unicode/ustring.h"
10
#include "unicode/uchar.h"
14
UnicodeTest::UnicodeTest()
18
UnicodeTest::~UnicodeTest()
22
void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
24
if (exec) logln("TestSuite UnicodeTest: ");
26
case 0: name = "TestUpperLower"; if (exec) TestUpperLower(); break;
27
case 1: name = "TestLetterNumber"; if (exec) TestLetterNumber(); break;
28
case 2: name = "TestMisc"; if (exec) TestMisc(); break;
29
case 3: name = "TestUnicodeData"; if (exec) TestUnicodeData(); break;
30
case 4: name = "TestCodeUnit"; if(exec) TestCodeUnit(); break;
31
case 5: name = "TestCodePoint"; if(exec) TestCodePoint(); break;
32
case 6: name = "TestCharLength"; if(exec) TestCharLength(); break;
33
case 7: name = "TestIdentifier"; if(exec) TestIdentifier(); break;
34
case 8: name = "TestScript"; if(exec) TestScript(); break;
35
default: name = ""; break; //needed to end loop
39
//====================================================
40
// private data used by the tests
41
//====================================================
43
const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD;
44
const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
45
const int32_t tagValues[] =
47
/* Mn */ Unicode::NON_SPACING_MARK,
48
/* Mc */ Unicode::COMBINING_SPACING_MARK,
49
/* Me */ Unicode::ENCLOSING_MARK,
50
/* Nd */ Unicode::DECIMAL_DIGIT_NUMBER,
51
/* Nl */ Unicode::LETTER_NUMBER,
52
/* No */ Unicode::OTHER_NUMBER,
53
/* Zs */ Unicode::SPACE_SEPARATOR,
54
/* Zl */ Unicode::LINE_SEPARATOR,
55
/* Zp */ Unicode::PARAGRAPH_SEPARATOR,
56
/* Cc */ Unicode::CONTROL,
57
/* Cf */ Unicode::FORMAT,
58
/* Cs */ Unicode::SURROGATE,
59
/* Co */ Unicode::PRIVATE_USE,
60
/* Cn */ Unicode::UNASSIGNED,
61
/* Lu */ Unicode::UPPERCASE_LETTER,
62
/* Ll */ Unicode::LOWERCASE_LETTER,
63
/* Lt */ Unicode::TITLECASE_LETTER,
64
/* Lm */ Unicode::MODIFIER_LETTER,
65
/* Lo */ Unicode::OTHER_LETTER,
66
/* Pc */ Unicode::CONNECTOR_PUNCTUATION,
67
/* Pd */ Unicode::DASH_PUNCTUATION,
68
/* Ps */ Unicode::START_PUNCTUATION,
69
/* Pe */ Unicode::END_PUNCTUATION,
70
/* Po */ Unicode::OTHER_PUNCTUATION,
71
/* Sm */ Unicode::MATH_SYMBOL,
72
/* Sc */ Unicode::CURRENCY_SYMBOL,
73
/* Sk */ Unicode::MODIFIER_SYMBOL,
74
/* So */ Unicode::OTHER_SYMBOL,
75
/* Pi */ Unicode::INITIAL_PUNCTUATION,
76
/* Pf */ Unicode::FINAL_PUNCTUATION
78
const char dirStrings[][5] = {
100
//====================================================
101
// test toUpperCase() and toLowerCase()
102
//====================================================
103
void UnicodeTest::TestUpperLower()
105
U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
106
U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
109
U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
110
U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
112
//Checks LetterLike Symbols which were previously a source of confusion
113
//[Bertrand A. D. 02/04/98]
114
for (i=0x2100;i<0x2138;i++)
116
if(i!=0x2126 && i!=0x212a && i!=0x212b)
118
if (i != Unicode::toLowerCase(i)) // itself
119
errln("Failed case conversion with itself: " + UCharToUnicodeString(i));
120
if (i != Unicode::toUpperCase(i))
121
errln("Failed case conversion with itself: " + UCharToUnicodeString(i));
125
for (i = 0; i < 21; i++) {
126
// logln((UnicodeString)"testing " + (int32_t)i + "...");
127
if (Unicode::isLetter(upperTest[i]) && !Unicode::isLowerCase(upperTest[i]))
128
errln("Failed isLowerCase test at " + UCharToUnicodeString(upperTest[i]));
129
else if (Unicode::isLetter(lowerTest[i]) && !Unicode::isUpperCase(lowerTest[i]))
130
errln("Failed isUpperCase test at " + UCharToUnicodeString(lowerTest[i]));
131
else if (upperTest[i] != Unicode::toLowerCase(lowerTest[i]))
132
errln("Failed case conversion : " + UCharToUnicodeString(upperTest[i]) +
133
" to " + UCharToUnicodeString(lowerTest[i]));
134
else if (lowerTest[i] != Unicode::toUpperCase(upperTest[i]))
135
errln("Failed case conversion : " + UCharToUnicodeString(upperTest[i]) +
136
" to " + UCharToUnicodeString(lowerTest[i]));
137
else if (upperTest[i] != Unicode::toLowerCase(upperTest[i])) // itself
138
errln("Failed case conversion with itself: " + UCharToUnicodeString(upperTest[i]));
139
else if (lowerTest[i] != Unicode::toUpperCase(lowerTest[i]))
140
errln("Failed case conversion with itself: " + UCharToUnicodeString(lowerTest[i]));
144
/* test isLetter() and isDigit() */
145
void UnicodeTest::TestLetterNumber()
149
for (i = 0x0041; i < 0x005B; i++) {
150
// logln((UnicodeString)"testing " + (int32_t)i + "...");
151
if (!Unicode::isLetter(i))
152
errln("Failed isLetter test at " + UCharToUnicodeString(i));
154
for (i = 0x0660; i < 0x066A; i++) {
155
// logln((UnicodeString)"testing " + (int32_t)i + "...");
156
if (Unicode::isLetter(i))
157
errln("Failed isLetter test with numbers at " + i);
159
for (i = 0x0660; i < 0x066A; i++) {
160
// logln((UnicodeString)"testing " + (int32_t)i + "...");
161
if (!Unicode::isDigit(i))
162
errln("Failed isNumber test at " + i);
166
/* Tests for isDefined(), isBaseForm(), isSpaceChar() and getCellWidth() */
167
void UnicodeTest::TestMisc()
169
const UChar sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
170
const UChar sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
171
const UChar sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
172
const UChar sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f};
173
const UChar sampleUndefined[] = {0xfff1, 0xfff7, 0xfa30};
174
const UChar sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
175
const UChar sampleBase[] = {0x0061, 0x0031, 0x03d2};
176
const UChar sampleNonBase[] = {0x002B, 0x0020, 0x203B};
177
const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00};
178
const UChar sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
179
const UChar sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
180
const int32_t sampleDigitValues[] = {0, 2, 3, 5};
181
const uint16_t sampleCellWidth[] = {Unicode::ZERO_WIDTH,
186
for (i = 0; i < 5; i++) {
187
// logln((UnicodeString)"testing " + (int32_t)i + "...");
188
if (!(Unicode::isSpaceChar(sampleSpaces[i])) ||
189
(Unicode::isSpaceChar(sampleNonSpaces[i])))
190
errln((UnicodeString)"Space char test error : " + (int32_t)sampleSpaces[i] +
191
" or " + (int32_t)sampleNonSpaces[i]);
193
for (i = 0; i < 5; i++) {
194
// log_ln("Testing for isWhitespace and nonWhitespaces\n");
195
if (!(Unicode::isWhitespace(sampleWhiteSpaces[i])) ||
196
(Unicode::isWhitespace(sampleNonWhiteSpaces[i])))
198
errln((UnicodeString)"White Space char test error : " + (int32_t)sampleWhiteSpaces[i] +
199
"or" + (int32_t)sampleNonWhiteSpaces[i]);
202
for (i = 0; i < 3; i++) {
203
// logln((UnicodeString)"testing " + (int32_t)i + "...");
204
if ((Unicode::isDefined(sampleUndefined[i])) ||
205
!(Unicode::isDefined(sampleDefined[i])))
206
errln((UnicodeString)"Undefined char test error : " +
207
(int32_t)sampleUndefined[i] + " or " + (int32_t)sampleDefined[i]);
209
for (i = 0; i < 3; i++) {
210
// logln((UnicodeString)"testing " + (int32_t)i + "...");
211
if ((Unicode::isBaseForm(sampleNonBase[i])) ||
212
!(Unicode::isBaseForm(sampleBase[i])))
213
errln((UnicodeString)"Non-baseform char test error : " +
214
(int32_t)sampleNonBase[i] + " or " + (int32_t)sampleBase[i]);
216
for (i = 0; i < 4; i++) {
217
// logln((UnicodeString)"testing " + (int32_t)i + "...");
218
if (Unicode::getCellWidth(sampleChars[i]) != sampleCellWidth[i])
219
errln((UnicodeString)"Cell width char test error : " +
220
(int32_t)sampleChars[i]);
222
for (i = 0; i < 4; i++) {
223
if ((Unicode::isDigit(sampleDigits[i]) &&
224
(Unicode::digitValue(sampleDigits[i])!= sampleDigitValues[i])) ||
225
(Unicode::isDigit(sampleNonDigits[i]))) {
226
errln((UnicodeString)"Digit char test error : " +
227
(int32_t)sampleDigits[i] + " or " + (int32_t)sampleNonDigits[i]);
232
/* Tests for isControl() and isPrintable() */
233
void UnicodeTest::TestControlPrint()
235
const UChar sampleControl[] = {0x001b, 0x0097, 0x0082};
236
const UChar sampleNonControl[] = {0x61, 0x0031, 0x00e2};
237
const UChar samplePrintable[] = {0x0042, 0x005f, 0x2014};
238
const UChar sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
240
for (i = 0; i < 3; i++) {
241
// logln((UnicodeString)"testing " + (int32_t)i + "...");
242
if (!(Unicode::isControl(sampleControl[i])) ||
243
(Unicode::isControl(sampleNonControl[i])))
244
errln((UnicodeString)"Control char test error : " + (int32_t)sampleControl[i] +
245
" or " + (int32_t)sampleNonControl[i]);
247
for (i = 0; i < 3; i++) {
248
// logln((UnicodeString)"testing " + (int32_t)i + "...");
249
if ((Unicode::isPrintable(samplePrintable[i])) ||
250
!(Unicode::isPrintable(samplePrintable[i])))
251
errln((UnicodeString)"Printable char test error : " +
252
(int32_t)samplePrintable[i] + " or " + (int32_t)sampleNonPrintable[i]);
256
/* Tests for isJavaIdentifierStart(), isJavaIdentifierPart(),
257
* isUnicodeIdentifierStart() and isUnicodeIdentifierPart() */
258
void UnicodeTest::TestIdentifier()
260
const UChar sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
261
const UChar sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
262
const UChar sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
263
const UChar sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
264
const UChar sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
265
const UChar sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
266
const UChar sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
267
const UChar sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
268
const UChar sampleIDIgnore[] = {0x0006, 0x0010, 0x206b};
269
const UChar sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
272
for (i = 0; i < 3; i++) {
273
// logln((UnicodeString)"testing " + (int32_t)i + "...");
274
if (!(Unicode::isJavaIdentifierStart(sampleJavaIDStart[i])) ||
275
(Unicode::isJavaIdentifierStart(sampleNonJavaIDStart[i])))
276
errln((UnicodeString)"Java ID Start char test error : " + (int32_t)sampleJavaIDStart[i] +
277
" or " + (int32_t)sampleNonJavaIDStart[i]);
279
for (i = 0; i < 3; i++) {
280
// logln((UnicodeString)"testing " + (int32_t)i + "...");
281
if (!(Unicode::isJavaIdentifierPart(sampleJavaIDPart[i])) ||
282
(Unicode::isJavaIdentifierPart(sampleNonJavaIDPart[i])))
283
errln((UnicodeString)"Java ID Part char test error : " + (int32_t)sampleJavaIDPart[i] +
284
" or " + (int32_t)sampleNonJavaIDPart[i]);
286
for (i = 0; i < 3; i++) {
287
// logln((UnicodeString)"testing " + (int32_t)i + "...");
288
if (!(Unicode::isUnicodeIdentifierStart(sampleUnicodeIDStart[i])) ||
289
(Unicode::isUnicodeIdentifierStart(sampleNonUnicodeIDStart[i])))
290
errln((UnicodeString)"Unicode ID Start char test error : " + (int32_t)sampleUnicodeIDStart[i] +
291
" or " + (int32_t)sampleNonUnicodeIDStart[i]);
293
for (i = 0; i < 3; i++) {
294
// logln((UnicodeString)"testing " + (int32_t)i + "...");
295
if (!(Unicode::isUnicodeIdentifierPart(sampleUnicodeIDPart[i])) ||
296
(Unicode::isUnicodeIdentifierPart(sampleNonUnicodeIDPart[i])))
297
errln((UnicodeString)"Unicode ID Part char test error : " + (int32_t)sampleUnicodeIDPart[i] +
298
" or " + (int32_t)sampleNonUnicodeIDPart[i]);
300
for (i = 0; i < 3; i++) {
301
// logln((UnicodeString)"testing " + (int32_t)i + "...");
302
if (!(Unicode::isIdentifierIgnorable(sampleIDIgnore[i])) ||
303
(Unicode::isIdentifierIgnorable(sampleNonIDIgnore[i])))
304
errln((UnicodeString)"ID ignorable char test error : " + (int32_t)sampleIDIgnore[i] +
305
" or " + (int32_t)sampleNonIDIgnore[i]);
309
/* for each line of UnicodeData.txt, check some of the properties */
310
U_CAPI void U_CALLCONV
311
unicodeDataLineFn(void *context,
312
char *fields[][2], int32_t fieldCount,
313
UErrorCode *pErrorCode)
319
UnicodeTest *me=(UnicodeTest *)context;
321
if(U_FAILURE(*pErrorCode)) {
322
me->errln("error: unicodeDataLineFn called with pErrorCode=%d\n", pErrorCode);
326
if(fieldCount != 15) {
327
me->errln("error in UnicodeData.txt: The data format changed. fieldCount=%lu and it should be 15\n", fieldCount);
328
*pErrorCode = U_PARSE_ERROR;
332
/* get the character code, field 0 */
333
c=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
334
if(end<=fields[0][0] || end!=fields[0][1]) {
335
me->errln("error: syntax error in field 0 at %s\n" + UnicodeString(fields[0][0], ""));
336
*pErrorCode = U_PARSE_ERROR;
339
if((uint32_t)c>=0x110000) {
340
me->errln("error in UnicodeData.txt: code point %lu out of range\n", c);
341
*pErrorCode = U_PARSE_ERROR;
345
/* get general category, field 2 */
347
type = (int8_t)tagValues[me->MakeProp(fields[2][0])];
348
if(Unicode::getType(c)!=type) {
349
me->errln("error: Unicode::getType(U+%04lx)==%u instead of %u\n", c, Unicode::getType(c), type);
350
*pErrorCode = U_PARSE_ERROR;
354
/* get canonical combining class, field 3 */
355
value=(uint32_t)uprv_strtoul(fields[3][0], &end, 10);
356
if(end<=fields[3][0] || end!=fields[3][1]) {
357
me->errln("error: syntax error in field 3 at code 0x%lx\n", c);
358
*pErrorCode = U_PARSE_ERROR;
362
me->errln("error in UnicodeData.txt: combining class %lu out of range\n", value);
363
*pErrorCode = U_PARSE_ERROR;
366
if(value!=Unicode::getCombiningClass(c)) {
367
me->errln("error: Unicode::getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, Unicode::getCombiningClass(c), value);
368
*pErrorCode = U_PARSE_ERROR;
372
/* get BiDi category, field 4 */
374
if(Unicode::characterDirection(c)!=me->MakeDir(fields[4][0])) {
375
me->errln("error: Unicode::characterDirection(U+%04lx)==%u instead of %u (%s)\n", c, Unicode::characterDirection(c), me->MakeDir(fields[4][0]), fields[4][0]);
376
*pErrorCode = U_PARSE_ERROR;
380
/* get uppercase mapping, field 12 */
381
if(fields[12][0]!=fields[12][1]) {
382
value=(uint32_t)uprv_strtoul(fields[12][0], &end, 16);
383
if(end!=fields[12][1]) {
384
me->errln("error: syntax error in field 12 at code 0x%lx\n", c);
385
*pErrorCode = U_PARSE_ERROR;
388
if((UChar32)value!=Unicode::toUpperCase(c)) {
389
me->errln("error: Unicode::toUpperCase(U+%04lx)==U+%04lx instead of U+%04lx\n", c, Unicode::toUpperCase(c), value);
390
*pErrorCode = U_PARSE_ERROR;
394
/* no case mapping: the API must map the code point to itself */
395
if(c!=Unicode::toUpperCase(c)) {
396
me->errln("error: U+%04lx does not have an uppercase mapping but Unicode::toUpperCase()==U+%04lx\n", c, Unicode::toUpperCase(c));
397
*pErrorCode = U_PARSE_ERROR;
402
/* get lowercase mapping, field 13 */
403
if(fields[13][0]!=fields[13][1]) {
404
value=(uint32_t)uprv_strtoul(fields[13][0], &end, 16);
405
if(end!=fields[13][1]) {
406
me->errln("error: syntax error in field 13 at code 0x%lx\n", c);
407
*pErrorCode = U_PARSE_ERROR;
410
if((UChar32)value!=Unicode::toLowerCase(c)) {
411
me->errln("error: Unicode::toLowerCase(U+%04lx)==U+%04lx instead of U+%04lx\n", c, Unicode::toLowerCase(c), value);
412
*pErrorCode = U_PARSE_ERROR;
416
/* no case mapping: the API must map the code point to itself */
417
if(c!=Unicode::toLowerCase(c)) {
418
me->errln("error: U+%04lx does not have a lowercase mapping but Unicode::toLowerCase()==U+%04lx\n", c, Unicode::toLowerCase(c));
419
*pErrorCode = U_PARSE_ERROR;
424
/* get titlecase mapping, field 14 */
425
if(fields[14][0]!=fields[14][1]) {
426
value=(uint32_t)uprv_strtoul(fields[14][0], &end, 16);
427
if(end!=fields[14][1]) {
428
me->errln("error: syntax error in field 14 at code 0x%lx\n", c);
429
*pErrorCode = U_PARSE_ERROR;
432
if((UChar32)value!=Unicode::toTitleCase(c)) {
433
me->errln("error: Unicode::toTitleCase(U+%04lx)==U+%04lx instead of U+%04lx\n", c, Unicode::toTitleCase(c), value);
434
*pErrorCode = U_PARSE_ERROR;
438
/* no case mapping: the API must map the code point to itself */
439
if(c!=Unicode::toTitleCase(c)) {
440
me->errln("error: U+%04lx does not have a titlecase mapping but Unicode::toTitleCase()==U+%04lx\n", c, Unicode::toTitleCase(c));
441
*pErrorCode = U_PARSE_ERROR;
447
/* tests for several properties */
448
void UnicodeTest::TestUnicodeData()
451
char backupPath[256];
453
UErrorCode errorCode = U_ZERO_ERROR;
455
/* Look inside ICU_DATA first */
456
strcpy(newPath, u_getDataDirectory());
457
strcat(newPath, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
460
// As a fallback, try to guess where the source data was located
461
// at the time ICU was built, and look there.
462
# if defined (U_TOPSRCDIR)
463
strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");
465
strcpy(backupPath, u_getDataDirectory());
466
strcat(backupPath, ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
468
strcat(backupPath, U_FILE_SEP_STRING);
469
strcat(backupPath, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
471
u_parseDelimitedFile(newPath, ';', fields, 15, unicodeDataLineFn, this, &errorCode);
473
if(errorCode==U_FILE_ACCESS_ERROR) {
474
errorCode=U_ZERO_ERROR;
475
u_parseDelimitedFile(backupPath, ';', fields, 15, unicodeDataLineFn, this, &errorCode);
478
if(U_FAILURE(errorCode)) {
479
errln("error parsing UnicodeData.txt: %s\n" + UnicodeString(u_errorName(errorCode), ""));
483
// test Unicode::getCharName()
484
// a more thorough test of u_charName() is in cintltst/cucdtst.c
486
int32_t length=Unicode::getCharName(0x284, buffer, (int32_t)sizeof(buffer));
488
// use invariant-character conversion to Unicode
489
UnicodeString name(buffer, length, "");
490
if(name!=UNICODE_STRING("LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 49)) {
491
errln("Unicode character name lookup failed\n");
494
// test Unicode::isMirrored() and charMirror()
495
// see also cintltst/cucdtest.c
496
if(!(Unicode::isMirrored(0x28) && Unicode::isMirrored(0xbb) && Unicode::isMirrored(0x2045) && Unicode::isMirrored(0x232a) &&
497
!Unicode::isMirrored(0x27) && !Unicode::isMirrored(0x61) && !Unicode::isMirrored(0x284) && !Unicode::isMirrored(0x3400)
500
errln("Unicode::isMirrored() does not work correctly\n");
503
if(!(Unicode::charMirror(0x3c)==0x3e && Unicode::charMirror(0x5d)==0x5b && Unicode::charMirror(0x208d)==0x208e && Unicode::charMirror(0x3017)==0x3016 &&
504
Unicode::charMirror(0x2e)==0x2e && Unicode::charMirror(0x6f3)==0x6f3 && Unicode::charMirror(0x301c)==0x301c && Unicode::charMirror(0xa4ab)==0xa4ab
507
errln("Unicode::charMirror() does not work correctly\n");
511
int32_t UnicodeTest::MakeProp(char* str)
514
const char* matchPosition;
516
matchPosition = strstr(tagStrings, str);
517
if (matchPosition == 0) errln((UnicodeString)"unrecognized type letter " + str);
518
else result = ((matchPosition - tagStrings) / 2);
522
int32_t UnicodeTest::MakeDir(char* str)
525
for (pos = 0; pos < 19; pos++) {
526
if (strcmp(str, dirStrings[pos]) == 0) {
532
/*Tests added by Madhu*/
534
/* Tests for isSingle(), isLead(), isTrial(), isSurrogate */
535
void UnicodeTest::TestCodeUnit(){
536
const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
540
for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
543
msg.append((UChar32)c);
544
logln((UnicodeString)"Testing code unit value of " + prettify(msg));
546
if(!(Unicode::isSingle(c)) || (Unicode::isLead(c)) || (Unicode::isTrail(c)) ||(Unicode::isSurrogate(c))){
547
errln((UnicodeString)"ERROR:" + prettify(msg) + " is a single");
552
if(!(Unicode::isLead(c)) || Unicode::isSingle(c) || Unicode::isTrail(c) || !(Unicode::isSurrogate(c))){
553
errln((UnicodeString)"ERROR:" + prettify(msg) + " is a first surrogate");
557
if(!(Unicode::isTrail(c)) || Unicode::isSingle(c) || Unicode::isLead(c) || !(Unicode::isSurrogate(c))){
558
errln((UnicodeString)"ERROR:" + prettify(msg) + " is a second surrogate");
564
/* Tests for isSurrogate(), isUnicodeChar(), isError(), isValid() */
565
void UnicodeTest::TestCodePoint(){
566
const UChar32 codePoint[]={
567
//surrogate, notvalid(codepoint), not a UnicodeChar, not Error
574
//not a surrogate, valid, isUnicodeChar , not Error
587
//not a surrogate, not valid, isUnicodeChar, isError
590
//not a surrogate, not valid, not isUnicodeChar, isError
595
for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
596
UChar32 c=codePoint[i];
599
logln((UnicodeString)"Testing code Point value of " + prettify(msg));
601
if(!Unicode::isSurrogate(c)){
602
errln((UnicodeString)"ERROR: isSurrogate() failed for" + prettify(msg));
604
if(Unicode::isValid(c)){
605
errln((UnicodeString)"ERROR: isValid() failed for "+ prettify(msg));
607
if(Unicode::isUnicodeChar(c)){
608
errln((UnicodeString)"ERROR: isUnicodeChar() failed for "+ prettify(msg));
610
if(Unicode::isError(c)){
611
errln((UnicodeString)"ERROR: isError() failed for "+ prettify(msg));
613
}else if(i >=6 && i<18){
614
if(Unicode::isSurrogate(c)){
615
errln((UnicodeString)"ERROR: isSurrogate() failed for" + prettify(msg));
617
if(!Unicode::isValid(c)){
618
errln((UnicodeString)"ERROR: isValid() failed for "+ prettify(msg));
620
if(!Unicode::isUnicodeChar(c)){
621
errln((UnicodeString)"ERROR: isUnicodeChar() failed for "+ prettify(msg));
623
if(Unicode::isError(c)){
624
errln((UnicodeString)"ERROR: isError() failed for "+ prettify(msg));
626
}else if(i >=18 && i<20){
627
if(Unicode::isSurrogate(c)){
628
errln((UnicodeString)"ERROR: isSurrogate() failed for" + prettify(msg));
630
if(Unicode::isValid(c)){
631
errln((UnicodeString)"ERROR: isValid() failed for "+ prettify(msg));
633
if(!Unicode::isUnicodeChar(c)){
634
errln((UnicodeString)"ERROR: isUnicodeChar() failed for "+ prettify(msg));
636
if(!Unicode::isError(c)){
637
errln((UnicodeString)"ERROR: isError() failed for "+ prettify(msg));
640
else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
641
if(Unicode::isSurrogate(c)){
642
errln((UnicodeString)"ERROR: isSurrogate() failed for" + prettify(msg));
644
if(Unicode::isValid(c)){
645
errln((UnicodeString)"ERROR: isValid() failed for "+ prettify(msg));
647
if(Unicode::isUnicodeChar(c)){
648
errln((UnicodeString)"ERROR: isUnicodeChar() failed for "+ prettify(msg));
650
if(!Unicode::isError(c)){
651
errln((UnicodeString)"ERROR: isError() failed for "+ prettify(msg));
658
void UnicodeTest::TestCharLength()
660
const int32_t codepoint[]={
675
for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
676
UChar32 c=codepoint[i+1];
679
if(Unicode::charLength(c) != codepoint[i]){
680
errln((UnicodeString)"The no: of code units for" + prettify(msg)+
681
":- Expected: " + (int32_t)codepoint[i] + " Got: " + Unicode::charLength(c));
683
logln((UnicodeString)"The no: of code units for" + prettify(msg) + " is " + Unicode::charLength(c));
685
multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
686
if(Unicode::needMultipleUChar(c) != multiple){
687
errln("ERROR: Unicode::needMultipleUChar() failed for" + prettify(msg));
693
Various script value testing.
694
This makes sure that the Unicode::EUnicodeScript
695
and UCharScript enum values are the same.
697
void UnicodeTest::TestScript()
699
if ((int32_t)Unicode::kScriptCount != (int32_t)UBLOCK_COUNT) {
700
errln("ERROR: Unicode::EUnicodeScript is not the same size as UCharScript");
703
if ((int32_t)Unicode::kBasicLatin != (int32_t)U_BASIC_LATIN) {
704
errln("ERROR: Different Basic Latin values in EUnicodeScript and UCharScript");
707
if ((int32_t)Unicode::kHighSurrogate != (int32_t)U_HIGH_SURROGATES) {
708
errln("ERROR: Different High Surrogate values in EUnicodeScript and UCharScript");
711
if ((int32_t)Unicode::kLowSurrogate != (int32_t)U_LOW_SURROGATES) {
712
errln("ERROR: Different Low Surrogate values in EUnicodeScript and UCharScript");
715
if ((int32_t)Unicode::kCJKRadicalsSupplement != (int32_t)U_CJK_RADICALS_SUPPLEMENT) {
716
errln("ERROR: Different CJK Radicals Supplement values in EUnicodeScript and UCharScript");
719
if ((int32_t)Unicode::kGreek != (int32_t)U_GREEK) {
720
errln("ERROR: Different Greek values in EUnicodeScript and UCharScript");
723
if ((int32_t)Unicode::kThai != (int32_t)U_THAI) {
724
errln("ERROR: Diffe rent Thai values in EUnicodeScript and UCharScript");