1
/********************************************************************
3
* Copyright (c) 1997-2001, International Business Machines Corporation and
4
* others. All Rights Reserved.
5
********************************************************************/
6
/********************************************************************************
10
* Modification History:
11
* Date Name Description
12
* Madhu Katragadda Ported for C API
13
* 02/19/01 synwee Modified test case for new collation iterator
14
*********************************************************************************/
16
* Collation Iterator tests.
17
* (Let me reiterate my position...)
20
#include "unicode/utypes.h"
21
#include "unicode/ucol.h"
22
#include "unicode/uloc.h"
23
#include "unicode/uchar.h"
24
#include "unicode/ustring.h"
35
extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
37
void addCollIterTest(TestNode** root)
39
addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
40
addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
41
addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
42
addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
43
addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
44
addTest(root, &TestNormalizedUnicodeChar,
45
"tscoll/citertst/TestNormalizedUnicodeChar");
46
addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
47
addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
48
addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
49
addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
50
addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
51
addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
52
addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
53
addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
54
addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
57
/* The locales we support */
59
static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
61
static void TestBug672() {
62
UErrorCode status = U_ZERO_ERROR;
68
u_uastrcpy(pattern, "resume");
69
u_uastrcpy(text, "Time to resume updating my resume.");
71
for (i = 0; i < 3; ++ i) {
72
UCollator *coll = ucol_open(LOCALES[i], &status);
73
UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
75
UCollationElements *titer = ucol_openElements(coll, text, -1,
77
if (U_FAILURE(status)) {
78
log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
83
log_verbose("locale tested %s\n", LOCALES[i]);
85
while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
88
if (U_FAILURE(status)) {
89
log_err("ERROR: reversing collation iterator :%s\n",
95
ucol_setOffset(titer, u_strlen(pattern), &status);
96
if (U_FAILURE(status)) {
97
log_err("ERROR: setting offset in collator :%s\n",
101
result[i][0] = ucol_getOffset(titer);
102
log_verbose("Text iterator set to offset %d\n", result[i][0]);
105
ucol_previous(titer, &status);
106
result[i][1] = ucol_getOffset(titer);
107
log_verbose("Current offset %d after previous\n", result[i][1]);
109
/* Add one to index */
110
log_verbose("Adding one to current offset...\n");
111
ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
112
if (U_FAILURE(status)) {
113
log_err("ERROR: setting offset in collator :%s\n",
114
myErrorName(status));
117
result[i][2] = ucol_getOffset(titer);
118
log_verbose("Current offset in text = %d\n", result[i][2]);
119
ucol_closeElements(pitr);
120
ucol_closeElements(titer);
124
if (uprv_memcmp(result[0], result[1], 3) != 0 ||
125
uprv_memcmp(result[1], result[2], 3) != 0) {
126
log_err("ERROR: Different locales have different offsets at the same character\n");
132
/* Running this test with normalization enabled showed up a bug in the incremental
133
normalization code. */
134
static void TestBug672Normalize() {
135
UErrorCode status = U_ZERO_ERROR;
141
u_uastrcpy(pattern, "resume");
142
u_uastrcpy(text, "Time to resume updating my resume.");
144
for (i = 0; i < 3; ++ i) {
145
UCollator *coll = ucol_open(LOCALES[i], &status);
146
UCollationElements *pitr = NULL;
147
UCollationElements *titer = NULL;
149
ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
151
pitr = ucol_openElements(coll, pattern, -1, &status);
152
titer = ucol_openElements(coll, text, -1, &status);
153
if (U_FAILURE(status)) {
154
log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
155
myErrorName(status));
159
log_verbose("locale tested %s\n", LOCALES[i]);
161
while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
164
if (U_FAILURE(status)) {
165
log_err("ERROR: reversing collation iterator :%s\n",
166
myErrorName(status));
171
ucol_setOffset(titer, u_strlen(pattern), &status);
172
if (U_FAILURE(status)) {
173
log_err("ERROR: setting offset in collator :%s\n",
174
myErrorName(status));
177
result[i][0] = ucol_getOffset(titer);
178
log_verbose("Text iterator set to offset %d\n", result[i][0]);
181
ucol_previous(titer, &status);
182
result[i][1] = ucol_getOffset(titer);
183
log_verbose("Current offset %d after previous\n", result[i][1]);
185
/* Add one to index */
186
log_verbose("Adding one to current offset...\n");
187
ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
188
if (U_FAILURE(status)) {
189
log_err("ERROR: setting offset in collator :%s\n",
190
myErrorName(status));
193
result[i][2] = ucol_getOffset(titer);
194
log_verbose("Current offset in text = %d\n", result[i][2]);
195
ucol_closeElements(pitr);
196
ucol_closeElements(titer);
200
if (uprv_memcmp(result[0], result[1], 3) != 0 ||
201
uprv_memcmp(result[1], result[2], 3) != 0) {
202
log_err("ERROR: Different locales have different offsets at the same character\n");
210
* Test for CollationElementIterator previous and next for the whole set of
211
* unicode characters.
213
static void TestUnicodeChar()
217
UCollationElements *iter;
218
UErrorCode status = U_ZERO_ERROR;
222
en_us = ucol_open("en_US", &status);
223
if (U_FAILURE(status)){
224
log_err("ERROR: in creation of collation data using ucol_open()\n %s\n",
225
myErrorName(status));
229
for (codepoint = 1; codepoint < 0xFFFE;)
233
while (codepoint % 0xFF != 0)
235
if (u_isdefined(codepoint))
236
*(test ++) = codepoint;
240
if (u_isdefined(codepoint))
241
*(test ++) = codepoint;
243
if (codepoint != 0xFFFF)
247
iter=ucol_openElements(en_us, source, u_strlen(source), &status);
248
if(U_FAILURE(status)){
249
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
250
myErrorName(status));
254
/* A basic test to see if it's working at all */
255
log_verbose("codepoint testing %x\n", codepoint);
257
ucol_closeElements(iter);
259
/* null termination test */
260
iter=ucol_openElements(en_us, source, -1, &status);
261
if(U_FAILURE(status)){
262
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
263
myErrorName(status));
267
/* A basic test to see if it's working at all */
269
ucol_closeElements(iter);
276
* Test for CollationElementIterator previous and next for the whole set of
277
* unicode characters with normalization on.
279
static void TestNormalizedUnicodeChar()
283
UCollationElements *iter;
284
UErrorCode status = U_ZERO_ERROR;
288
/* thai should have normalization on */
289
th_th = ucol_open("th_TH", &status);
290
if (U_FAILURE(status)){
291
log_err("ERROR: in creation of thai collation using ucol_open()\n %s\n",
292
myErrorName(status));
296
for (codepoint = 1; codepoint < 0xFFFE;)
300
while (codepoint % 0xFF != 0)
302
if (u_isdefined(codepoint))
303
*(test ++) = codepoint;
307
if (u_isdefined(codepoint))
308
*(test ++) = codepoint;
310
if (codepoint != 0xFFFF)
314
iter=ucol_openElements(th_th, source, u_strlen(source), &status);
315
if(U_FAILURE(status)){
316
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
317
myErrorName(status));
323
ucol_closeElements(iter);
325
iter=ucol_openElements(th_th, source, -1, &status);
326
if(U_FAILURE(status)){
327
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
328
myErrorName(status));
334
ucol_closeElements(iter);
341
* Test the incremental normalization
343
static void TestNormalization()
345
UErrorCode status = U_ZERO_ERROR;
347
"&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
350
int rulelen = u_unescape(str, rule, 50);
352
const char *testdata[] =
353
{"\\u1ED9", "o\\u0323\\u0302",
354
"\\u0300\\u0315", "\\u0315\\u0300",
355
"A\\u0300\\u0315B", "A\\u0315\\u0300B",
356
"A\\u0316\\u0315B", "A\\u0315\\u0316B",
357
"\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
358
"A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
359
"\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
362
UCollationElements *iter;
364
coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
365
ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
366
if (U_FAILURE(status)){
367
log_err("ERROR: in creation of collator using ucol_openRules()\n %s\n",
368
myErrorName(status));
372
srclen = u_unescape(testdata[0], source, 10);
373
iter = ucol_openElements(coll, source, srclen - 1, &status);
375
ucol_closeElements(iter);
377
srclen = u_unescape(testdata[1], source, 10);
378
iter = ucol_openElements(coll, source, srclen - 1, &status);
380
ucol_closeElements(iter);
383
srclen = u_unescape(testdata[count], source, 10);
384
iter = ucol_openElements(coll, source, srclen - 1, &status);
386
if (U_FAILURE(status)){
387
log_err("ERROR: in creation of collator element iterator\n %s\n",
388
myErrorName(status));
392
ucol_closeElements(iter);
394
iter = ucol_openElements(coll, source, -1, &status);
396
if (U_FAILURE(status)){
397
log_err("ERROR: in creation of collator element iterator\n %s\n",
398
myErrorName(status));
402
ucol_closeElements(iter);
409
* Test for CollationElementIterator.previous()
411
* @bug 4108758 - Make sure it works with contracting characters
414
static void TestPrevious()
416
UCollator *coll=NULL;
419
UCollator *c1, *c2, *c3;
420
UCollationElements *iter;
421
UErrorCode status = U_ZERO_ERROR;
423
test1=(UChar*)malloc(sizeof(UChar) * 50);
424
test2=(UChar*)malloc(sizeof(UChar) * 50);
425
u_uastrcpy(test1, "What subset of all possible test cases?");
426
u_uastrcpy(test2, "has the highest probability of detecting");
427
coll = ucol_open("en_US", &status);
429
iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
430
log_verbose("English locale testing back and forth\n");
431
if(U_FAILURE(status)){
432
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
433
myErrorName(status));
437
/* A basic test to see if it's working at all */
439
ucol_closeElements(iter);
442
/* Test with a contracting character sequence */
443
u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
444
c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
446
log_verbose("Contraction rule testing back and forth with no normalization\n");
448
if (c1 == NULL || U_FAILURE(status))
450
log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
451
myErrorName(status));
454
source=(UChar*)malloc(sizeof(UChar) * 20);
455
u_uastrcpy(source, "abchdcba");
456
iter=ucol_openElements(c1, source, u_strlen(source), &status);
457
if(U_FAILURE(status)){
458
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
459
myErrorName(status));
463
ucol_closeElements(iter);
466
/* Test with an expanding character sequence */
467
u_uastrcpy(rule, "&a < b < c/abd < d");
468
c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
469
log_verbose("Expansion rule testing back and forth with no normalization\n");
470
if (c2 == NULL || U_FAILURE(status))
472
log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
473
myErrorName(status));
476
u_uastrcpy(source, "abcd");
477
iter=ucol_openElements(c2, source, u_strlen(source), &status);
478
if(U_FAILURE(status)){
479
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
480
myErrorName(status));
484
ucol_closeElements(iter);
487
u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
488
c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
489
log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
491
if (c3 == NULL || U_FAILURE(status))
493
log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
494
myErrorName(status));
497
u_uastrcpy(source, "abcdbchdc");
498
iter=ucol_openElements(c3, source, u_strlen(source), &status);
499
if(U_FAILURE(status)){
500
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
501
myErrorName(status));
505
ucol_closeElements(iter);
517
coll = ucol_open("th_TH", &status);
518
log_verbose("Thai locale testing back and forth with normalization\n");
519
iter=ucol_openElements(coll, source, u_strlen(source), &status);
520
if(U_FAILURE(status)){
521
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
522
myErrorName(status));
526
ucol_closeElements(iter);
536
coll = ucol_open("ja_JP", &status);
537
log_verbose("Japanese locale testing back and forth with normalization\n");
538
iter=ucol_openElements(coll, source, u_strlen(source), &status);
539
if(U_FAILURE(status)){
540
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
541
myErrorName(status));
545
ucol_closeElements(iter);
554
* Test for getOffset() and setOffset()
556
static void TestOffset()
558
UErrorCode status= U_ZERO_ERROR;
559
UCollator *en_us=NULL;
560
UCollationElements *iter, *pristine;
563
int32_t orderLength=0;
565
test1=(UChar*)malloc(sizeof(UChar) * 50);
566
test2=(UChar*)malloc(sizeof(UChar) * 50);
567
u_uastrcpy(test1, "What subset of all possible test cases?");
568
u_uastrcpy(test2, "has the highest probability of detecting");
569
en_us = ucol_open("en_US", &status);
570
log_verbose("Testing getOffset and setOffset for CollationElements\n");
571
iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
572
if(U_FAILURE(status)){
573
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
574
myErrorName(status));
578
/* Run all the way through the iterator, then get the offset */
580
orders = getOrders(iter, &orderLength);
582
offset = ucol_getOffset(iter);
584
if (offset != u_strlen(test1))
586
log_err("offset at end != length %d vs %d\n", offset,
590
/* Now set the offset back to the beginning and see if it works */
591
pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
592
if(U_FAILURE(status)){
593
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
594
myErrorName(status));
598
status = U_ZERO_ERROR;
600
ucol_setOffset(iter, 0, &status);
601
if (U_FAILURE(status))
603
log_err("setOffset failed. %s\n", myErrorName(status));
607
assertEqual(iter, pristine);
610
ucol_closeElements(pristine);
611
ucol_closeElements(iter);
614
/* testing offsets in normalization buffer */
620
ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
621
iter = ucol_openElements(en_us, test1, 4, &status);
622
if(U_FAILURE(status)){
623
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
624
myErrorName(status));
630
while (ucol_next(iter, &status) != UCOL_NULLORDER &&
634
if (ucol_getOffset(iter) != 1) {
635
log_err("ERROR: Offset of iteration should be 0\n");
639
if (ucol_getOffset(iter) != 4) {
640
log_err("ERROR: Offset of iteration should be 4\n");
644
if (ucol_getOffset(iter) != 3) {
645
log_err("ERROR: Offset of iteration should be 3\n");
653
while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
657
if (ucol_getOffset(iter) != 3) {
658
log_err("ERROR: Offset of iteration should be 3\n");
662
if (ucol_getOffset(iter) != 0) {
663
log_err("ERROR: Offset of iteration should be 0\n");
669
if(U_FAILURE(status)){
670
log_err("ERROR: in iterating collation elements %s\n",
671
myErrorName(status));
674
ucol_closeElements(iter);
683
static void TestSetText()
686
UErrorCode status = U_ZERO_ERROR;
687
UCollator *en_us=NULL;
688
UCollationElements *iter1, *iter2;
689
test1=(UChar*)malloc(sizeof(UChar) * 50);
690
test2=(UChar*)malloc(sizeof(UChar) * 50);
691
u_uastrcpy(test1, "What subset of all possible test cases?");
692
u_uastrcpy(test2, "has the highest probability of detecting");
693
en_us = ucol_open("en_US", &status);
694
log_verbose("testing setText for Collation elements\n");
695
iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
696
if(U_FAILURE(status)){
697
log_err("ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
698
myErrorName(status));
702
iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
703
if(U_FAILURE(status)){
704
log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
705
myErrorName(status));
710
/* Run through the second iterator just to exercise it */
711
c = ucol_next(iter2, &status);
714
while ( ++i < 10 && (c != UCOL_NULLORDER))
716
if (U_FAILURE(status))
718
log_err("iter2->next() returned an error. %s\n", myErrorName(status));
719
ucol_closeElements(iter2);
720
ucol_closeElements(iter1);
725
c = ucol_next(iter2, &status);
728
/* Now set it to point to the same string as the first iterator */
729
ucol_setText(iter2, test1, u_strlen(test1), &status);
730
if (U_FAILURE(status))
732
log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
736
assertEqual(iter1, iter2);
739
/* Now set it to point to a null string with fake length*/
740
ucol_setText(iter2, NULL, 2, &status);
741
if (U_FAILURE(status))
743
log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
747
if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
748
log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
752
ucol_closeElements(iter2);
753
ucol_closeElements(iter1);
761
static void backAndForth(UCollationElements *iter)
763
/* Run through the iterator forwards and stick it into an array */
765
UErrorCode status = U_ZERO_ERROR;
766
int32_t orderLength = 0;
768
orders= getOrders(iter, &orderLength);
771
/* Now go through it backwards and make sure we get the same values */
775
/* synwee : changed */
776
while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
778
if (o != orders[-- index])
784
while (index > 0 && orders[-- index] == 0)
787
if (o != orders[index])
789
log_err("Mismatch at index : 0x%x\n", index);
797
while (index != 0 && orders[index - 1] == 0) {
803
log_err("Didn't get back to beginning - index is %d\n", index);
807
if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER)
809
log_err("Error at %x\n", o);
812
if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
814
log_err("Error at %x\n", o);
823
* Test for getMaxExpansion()
825
static void TestMaxExpansion()
827
UErrorCode status = U_ZERO_ERROR;
828
UCollator *coll ;/*= ucol_open("en_US", &status);*/
830
UChar supplementary[2] = {0xD800, 0xDC00};
832
UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
833
uint32_t temporder = 0;
836
u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
837
coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
838
UCOL_DEFAULT_STRENGTH,NULL, &status);
839
iter = ucol_openElements(coll, &ch, 1, &status);
841
while (ch < 0xFFFF && U_SUCCESS(status)) {
848
ucol_setText(iter, &ch, 1, &status);
849
order = ucol_previous(iter, &status);
851
/* thai management */
853
order = ucol_previous(iter, &status);
855
while (U_SUCCESS(status) &&
856
ucol_previous(iter, &status) != UCOL_NULLORDER) {
860
size = ucol_getMaxExpansion(iter, order);
861
if (U_FAILURE(status) || size < count) {
862
log_err("Failure at codepoint %d, maximum expansion count < %d\n",
867
/* testing for exact max expansion */
872
ucol_setText(iter, &ch, 1, &status);
873
order = ucol_previous(iter, &status);
874
size = ucol_getMaxExpansion(iter, order);
875
if (U_FAILURE(status) || size != 1) {
876
log_err("Failure at codepoint %d, maximum expansion count < %d\n",
883
ucol_setText(iter, &ch, 1, &status);
884
temporder = ucol_previous(iter, &status);
886
if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
887
log_err("Failure at codepoint %d, maximum expansion count != %d\n",
892
ucol_setText(iter, &ch, 1, &status);
893
temporder = ucol_previous(iter, &status);
895
if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
896
log_err("Failure at codepoint %d, maximum expansion count != %d\n",
900
ucol_setText(iter, supplementary, 2, &status);
901
sorder = ucol_previous(iter, &status);
903
if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
904
log_err("Failure at codepoint %d, maximum expansion count < %d\n",
911
ucol_setText(iter, &ch, 1, &status);
912
temporder = ucol_previous(iter, &status);
913
if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
914
log_err("Failure at codepoint %d, maximum expansion count > %d\n",
918
ucol_closeElements(iter);
921
/* testing special jamo &a<\u1160 */
933
coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
934
UCOL_DEFAULT_STRENGTH,NULL, &status);
935
iter = ucol_openElements(coll, &ch, 1, &status);
937
temporder = ucol_previous(iter, &status);
938
if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
939
log_err("Failure at codepoint %d, maximum expansion count > %d\n",
943
ucol_closeElements(iter);
949
* Return an integer array containing all of the collation orders
950
* returned by calls to next on the specified iterator
952
static int32_t* getOrders(UCollationElements *iter, int32_t *orderLength)
956
int32_t maxSize = 100;
959
int32_t *orders =(int32_t*)malloc(sizeof(int32_t) * maxSize);
960
status= U_ZERO_ERROR;
963
while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
968
temp = (int32_t*)malloc(sizeof(int32_t) * maxSize);
970
memcpy(temp, orders, size * sizeof(int32_t));
976
orders[size++] = order;
983
temp = (int32_t*)malloc(sizeof(int32_t) * size);
987
temp = (int32_t*)malloc(sizeof(int32_t) * size);
988
memcpy(temp, orders, size * sizeof(int32_t));
1000
static void assertEqual(UCollationElements *i1, UCollationElements *i2)
1004
UErrorCode status = U_ZERO_ERROR;
1008
c1 = ucol_next(i1, &status);
1009
c2 = ucol_next(i2, &status);
1013
log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
1019
while (c1 != UCOL_NULLORDER);
1023
* Testing iterators with extremely small buffers
1025
static void TestSmallBuffer()
1027
UErrorCode status = U_ZERO_ERROR;
1029
UCollationElements *testiter,
1032
int32_t *testorders,
1036
UChar str[] = {0x300, 0x31A, 0};
1038
creating a long string of decomposable characters,
1039
since by default the writable buffer is of size 256
1041
while (count < 500) {
1042
if ((count & 1) == 0) {
1043
teststr[count ++] = 0x300;
1046
teststr[count ++] = 0x31A;
1050
coll = ucol_open("th_TH", &status);
1051
testiter = ucol_openElements(coll, teststr, 500, &status);
1052
iter = ucol_openElements(coll, str, 2, &status);
1054
orders = getOrders(iter, &count);
1056
log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
1060
this will rearrange the string data to 250 characters of 0x300 first then
1061
250 characters of 0x031A
1063
testorders = getOrders(testiter, &count);
1066
log_err("Error decomposition does not give the right sized collation elements\n");
1069
while (count != 0) {
1070
/* UCA collation element for 0x0F76 */
1071
if ((count > 250 && testorders[-- count] != orders[1]) ||
1072
(count <= 250 && testorders[-- count] != orders[0])) {
1073
log_err("Error decomposition does not give the right collation element at %d count\n", count);
1081
ucol_reset(testiter);
1082
/* ensures that the writable buffer was cleared */
1083
if (testiter->iteratordata_.writableBuffer !=
1084
testiter->iteratordata_.stackWritableBuffer) {
1085
log_err("Error Writable buffer in collation element iterator not reset\n");
1088
/* ensures closing of elements done properly to clear writable buffer */
1089
ucol_next(testiter, &status);
1090
ucol_next(testiter, &status);
1091
ucol_closeElements(testiter);
1092
ucol_closeElements(iter);
1097
* Sniplets of code from genuca
1099
static int32_t hex2num(char hex) {
1100
if(hex>='0' && hex <='9') {
1102
} else if(hex>='a' && hex<='f') {
1104
} else if(hex>='A' && hex<='F') {
1112
* Getting codepoints from a string
1113
* @param str character string contain codepoints seperated by space and ended
1115
* @param codepoints array for storage, assuming size > 5
1116
* @return position at the end of the codepoint section
1118
static char * getCodePoints(char *str, UChar *codepoints) {
1119
char *pStartCP = str;
1120
char *pEndCP = str + 4;
1122
*codepoints = (UChar)((hex2num(*pStartCP) << 12) |
1123
(hex2num(*(pStartCP + 1)) << 8) |
1124
(hex2num(*(pStartCP + 2)) << 4) |
1125
(hex2num(*(pStartCP + 3))));
1127
while (*pEndCP != ';') {
1128
pStartCP = pEndCP + 1;
1129
*codepoints = (UChar)((hex2num(*pStartCP) << 12) |
1130
(hex2num(*(pStartCP + 1)) << 8) |
1131
(hex2num(*(pStartCP + 2)) << 4) |
1132
(hex2num(*(pStartCP + 3))));
1134
pEndCP = pStartCP + 4;
1141
* Sniplets of code from genuca
1144
readElement(char **from, char *to, char separator, UErrorCode *status)
1146
if (U_SUCCESS(*status)) {
1149
while (**from != separator) {
1150
if (**from != ' ') {
1151
*(buffer+i++) = **from;
1165
* Sniplets of code from genuca
1168
getSingleCEValue(char *primary, char *secondary, char *tertiary,
1171
if (U_SUCCESS(*status)) {
1173
char primsave = '\0';
1174
char secsave = '\0';
1175
char tersave = '\0';
1176
char *primend = primary+4;
1177
char *secend = secondary+2;
1178
char *terend = tertiary+2;
1183
if (uprv_strlen(primary) > 4) {
1184
primsave = *primend;
1188
if (uprv_strlen(secondary) > 2) {
1193
if (uprv_strlen(tertiary) > 2) {
1198
primvalue = (*primary!='\0')?strtoul(primary, &primend, 16):0;
1199
secvalue = (*secondary!='\0')?strtoul(secondary, &secend, 16):0;
1200
tervalue = (*tertiary!='\0')?strtoul(tertiary, &terend, 16):0;
1201
if(primvalue <= 0xFF) {
1205
value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1206
| ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1207
| (tervalue & UCOL_TERTIARYORDERMASK);
1209
if(primsave!='\0') {
1210
*primend = primsave;
1224
* Getting collation elements generated from a string
1225
* @param str character string contain collation elements contained in [] and
1226
* seperated by space
1227
* @param ce array for storage, assuming size > 20
1228
* @param status error status
1229
* @return position at the end of the codepoint section
1231
static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1232
char *pStartCP = uprv_strchr(str, '[');
1236
char secondary[100];
1239
while (*pStartCP == '[') {
1240
uint32_t primarycount = 0;
1241
uint32_t secondarycount = 0;
1242
uint32_t tertiarycount = 0;
1244
pEndCP = strchr(pStartCP, ']');
1245
if(pEndCP == NULL) {
1250
primarycount = readElement(&pStartCP, primary, ',', status);
1251
secondarycount = readElement(&pStartCP, secondary, ',', status);
1252
tertiarycount = readElement(&pStartCP, tertiary, ']', status);
1254
/* I want to get the CEs entered right here, including continuation */
1255
ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1256
if (U_FAILURE(*status)) {
1260
while (2 * CEi < primarycount || CEi < secondarycount ||
1261
CEi < tertiarycount) {
1262
uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1263
if (2 * CEi < primarycount) {
1264
value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1265
value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1268
if (2 * CEi + 1 < primarycount) {
1269
value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1270
value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1273
if (CEi < secondarycount) {
1274
value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1275
value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1278
if (CEi < tertiarycount) {
1279
value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1280
value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1284
ces[count ++] = value;
1287
pStartCP = pEndCP + 1;
1294
* Getting the FractionalUCA.txt file stream
1296
static FileStream * getFractionalUCA(void)
1299
char backupPath[256];
1300
FileStream *result = NULL;
1302
/* Look inside ICU_DATA first */
1303
uprv_strcpy(newPath, u_getDataDirectory());
1304
uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1305
uprv_strcat(newPath, "FractionalUCA.txt");
1307
/* As a fallback, try to guess where the source data was located
1308
* at the time ICU was built, and look there.
1310
#if defined (U_TOPSRCDIR)
1311
strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");
1313
strcpy(backupPath, u_getDataDirectory());
1314
strcat(backupPath, ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1316
strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1318
result = T_FileStream_open(newPath, "rb");
1320
if (result == NULL) {
1321
result = T_FileStream_open(backupPath, "rb");
1322
if (result == NULL) {
1323
log_err("Failed to open either %s or %s\n", newPath, backupPath);
1330
* Testing the CEs returned by the iterator
1332
static void TestCEs() {
1333
FileStream *file = NULL;
1336
UChar codepoints[5];
1338
UErrorCode status = U_ZERO_ERROR;
1339
UCollator *coll = ucol_open("", &status);
1341
if (U_FAILURE(status)) {
1342
log_err("Error in opening root collator\n");
1346
file = getFractionalUCA();
1349
log_err("*** unable to open input FractionalUCA.txt file ***\n");
1353
while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1355
UCollationElements *iter;
1356
/* skip this line if it is empty or a comment or is a return value
1357
or start of some variable section */
1358
if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1359
line[0] == 0x000D || line[0] == '[') {
1363
str = getCodePoints(line, codepoints);
1364
getCEs(str, ces, &status);
1365
if (U_FAILURE(status)) {
1366
log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1369
iter = ucol_openElements(coll, codepoints, -1, &status);
1370
if (U_FAILURE(status)) {
1371
log_err("Error in opening collation elements\n");
1375
uint32_t ce = (uint32_t)ucol_next(iter, &status);
1376
if (ce == 0xFFFFFFFF) {
1379
if (ce != ces[count] || U_FAILURE(status)) {
1380
log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1383
if (ces[count] == 0) {
1388
ucol_closeElements(iter);
1391
T_FileStream_close(file);
1396
* Testing the discontigous contractions
1398
static void TestDiscontiguos() {
1399
const char *rulestr =
1400
"&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1402
int rulelen = u_unescape(rulestr, rule, 50);
1403
const char *src[] = {
1404
"ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1405
/* base character blocked */
1406
"XD\\u0300", "XD\\u0300\\u0315",
1407
/* non blocking combining character */
1408
"X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1409
/* blocking combining character */
1410
"X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1411
/* contraction prefix */
1412
"ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1413
"X\\u0300\\u031A\\u0315",
1414
/* ends not with a contraction character */
1415
"X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1416
"X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1418
const char *tgt[] = {
1419
/* non blocking combining character */
1420
"A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1421
/* base character blocked */
1422
"X D \\u0300", "X D \\u0300\\u0315",
1423
/* non blocking combining character */
1424
"X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1425
/* blocking combining character */
1426
"X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1427
/* contraction prefix */
1428
"AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1429
"X\\u0300 \\u031A \\u0315",
1430
/* ends not with a contraction character */
1431
"X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1432
"X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1436
UErrorCode status = U_ZERO_ERROR;
1438
UCollationElements *iter;
1439
UCollationElements *resultiter;
1441
coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1442
iter = ucol_openElements(coll, rule, 1, &status);
1443
resultiter = ucol_openElements(coll, rule, 1, &status);
1445
if (U_FAILURE(status)) {
1446
log_err("Error opening collation rules\n");
1450
while (count < size) {
1453
int strLen = u_unescape(src[count], str, 20);
1456
ucol_setText(iter, str, strLen, &status);
1457
if (U_FAILURE(status)) {
1458
log_err("Error opening collation iterator\n");
1462
u_unescape(tgt[count], tstr, 20);
1465
log_verbose("count %d\n", count);
1469
UChar *e = u_strchr(s, 0x20);
1473
ucol_setText(resultiter, s, e - s, &status);
1474
ce = ucol_next(resultiter, &status);
1475
if (U_FAILURE(status)) {
1476
log_err("Error manipulating collation iterator\n");
1479
while (ce != UCOL_NULLORDER) {
1480
if (ce != (uint32_t)ucol_next(iter, &status) ||
1481
U_FAILURE(status)) {
1482
log_err("Discontiguos contraction test mismatch\n");
1485
ce = ucol_next(resultiter, &status);
1486
if (U_FAILURE(status)) {
1487
log_err("Error getting next collation element\n");
1500
ucol_closeElements(resultiter);
1501
ucol_closeElements(iter);
1505
static void TestCEBufferOverflow()
1507
UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1508
UErrorCode status = U_ZERO_ERROR;
1511
UCollationElements *iter;
1513
u_uastrcpy(rule, "&z < AB");
1514
coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1515
if (U_FAILURE(status)) {
1516
log_err("Rule based collator not created for testing ce buffer overflow\n");
1519
/* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1520
test. this will cause an overflow in getPrev */
1521
str[0] = 0x0041; /* 'A' */
1522
/*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1523
uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1524
str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */
1525
iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1527
if (ucol_previous(iter, &status) != UCOL_NULLORDER ||
1528
status != U_BUFFER_OVERFLOW_ERROR) {
1529
log_err("CE buffer expected to overflow with long string of trail surrogates\n");
1531
ucol_closeElements(iter);
1536
* Byte bounds checks. Checks if each byte in data is between upper and lower
1539
static UBool checkByteBounds(uint32_t data, char upper, char lower)
1543
char b = (char)(data & 0xFF);
1544
if (b > upper || b < lower) {
1554
* Determines case of the string of codepoints.
1555
* If it is a multiple codepoints it has to treated as a contraction.
1558
static uint8_t getCase(const UChar *s, uint32_t len) {
1559
UBool lower = FALSE;
1560
UBool upper = FALSE;
1561
UBool title = FALSE;
1562
UErrorCode status = U_ZERO_ERROR;
1564
const UChar *ps = s;
1567
return UCOL_LOWER_CASE;
1585
if ((lower && !upper && !title) || (!lower && !upper && !title)){
1586
return UCOL_LOWER_CASE;
1588
if (upper && !lower && !title) {
1589
return UCOL_UPPER_CASE;
1591
/* mix of cases here */
1592
/* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
1593
if (U_FAILURE(status)) {
1594
log_err("Error normalizing data string\n");
1595
return UCOL_LOWER_CASE;
1598
if ((title && len >= 2) || (lower && upper)) {
1599
return UCOL_MIXED_CASE;
1601
if (u_isupper(s[0])) {
1602
return UCOL_UPPER_CASE;
1604
return UCOL_LOWER_CASE;
1609
* Checking collation element validity given the boundary arguments.
1611
static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
1612
int length, uint32_t primarymax,
1613
uint32_t secondarymax)
1615
UErrorCode status = U_ZERO_ERROR;
1616
UCollationElements *iter = ucol_openElements(coll, codepoints, length,
1621
UBool upper = FALSE;
1622
UBool lower = FALSE;
1625
if (U_FAILURE(status)) {
1626
log_err("Error creating iterator for testing validity\n");
1629
ce = ucol_next(iter, &status);
1631
while (ce != UCOL_NULLORDER) {
1633
uint32_t primary = UCOL_PRIMARYORDER(ce);
1634
uint32_t secondary = UCOL_SECONDARYORDER(ce);
1635
uint32_t tertiary = UCOL_TERTIARYORDER(ce);
1636
/* uint32_t scasebits = tertiary & 0xC0;*/
1638
if ((tertiary == 0 && secondary != 0) ||
1639
(tertiary < 0xC0 && secondary == 0 && primary != 0)) {
1640
/* n-1th level is not zero when the nth level is
1641
except for continuations, this is wrong */
1642
log_err("Lower level weight not 0 when high level weight is 0\n");
1646
/* checks if any byte is illegal ie = 01 02 03. */
1647
if (checkByteBounds(ce, 0x3, 0x1)) {
1648
log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
1652
if ((primary != 0 && primary < primarymax) || (primary >= 0xFF00 && !isContinuation(ce))) {
1653
log_err("UCA primary weight out of bounds\n");
1656
/* case matching not done since data generated by ken */
1658
if (secondary >= 6 && secondary <= secondarymax) {
1659
log_err("Secondary weight out of range\n");
1665
ce = ucol_next(iter, &status);
1667
ucol_closeElements(iter);
1670
ucol_closeElements(iter);
1674
static void TestCEValidity()
1676
/* testing UCA collation elements */
1677
UErrorCode status = U_ZERO_ERROR;
1678
/* en_US has no tailorings */
1679
UCollator *coll = ucol_open("en_US", &status);
1680
/* tailored locales */
1681
char locale[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
1682
FileStream *file = getFractionalUCA();
1684
UChar codepoints[5];
1686
UParseError parseError;
1687
if (U_FAILURE(status)) {
1688
log_err("en_US collator creation failed\n");
1691
log_verbose("Testing UCA elements\n");
1693
log_err("Fractional UCA data can not be opened\n");
1697
while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1698
if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1699
line[0] == 0x000D || line[0] == '[') {
1703
getCodePoints(line, codepoints);
1704
checkCEValidity(coll, codepoints, u_strlen(codepoints), 5, 86);
1707
log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1709
while (codepoints[0] < 0xFFFF) {
1710
if (u_isdefined((UChar32)codepoints[0])) {
1711
checkCEValidity(coll, codepoints, 1, 5, 86);
1718
/* testing tailored collation elements */
1719
log_verbose("Testing tailored elements\n");
1721
const UChar *rules = NULL,
1723
UChar *rulesCopy = NULL;
1724
int32_t ruleLen = 0;
1726
uint32_t chOffset = 0;
1728
uint32_t exOffset = 0;
1730
uint32_t prefixOffset = 0;
1731
uint32_t prefixLen = 0;
1732
UBool startOfRules = TRUE;
1735
UColTokenParser src;
1736
uint32_t strength = 0;
1739
coll = ucol_open(locale[count], &status);
1740
if (U_FAILURE(status)) {
1741
log_err("%s collator creation failed\n", locale[count]);
1746
rules = ucol_getRules(coll, &ruleLen);
1749
rulesCopy = (UChar *)uprv_malloc((ruleLen +
1750
UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1751
uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1752
src.source = src.current = rulesCopy;
1753
src.end = rulesCopy + ruleLen;
1754
src.extraCurrent = src.end;
1755
src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1757
while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
1758
strength = src.parsedToken.strength;
1759
chOffset = src.parsedToken.charsOffset;
1760
chLen = src.parsedToken.charsLen;
1761
exOffset = src.parsedToken.extensionOffset;
1762
exLen = src.parsedToken.extensionLen;
1763
prefixOffset = src.parsedToken.prefixOffset;
1764
prefixLen = src.parsedToken.prefixLen;
1765
specs = src.parsedToken.flags;
1767
startOfRules = FALSE;
1768
uprv_memcpy(codepoints, rules + chOffset,
1769
chLen * sizeof(UChar));
1770
codepoints[chLen] = 0;
1771
checkCEValidity(coll, codepoints, u_strlen(codepoints), 4, 85);
1773
uprv_free(rulesCopy);
1779
T_FileStream_close(file);
1782
static void printSortKeyError(const UChar *codepoints, int length,
1783
uint8_t *sortkey, int sklen)
1786
log_err("Sortkey not valid for ");
1787
while (length > 0) {
1788
log_err("0x%04x ", *codepoints);
1792
log_err("\nSortkey : ");
1793
while (count < sklen) {
1794
log_err("0x%02x ", sortkey[count]);
1801
* Checking sort key validity for all levels
1803
static UBool checkSortKeyValidity(UCollator *coll,
1804
const UChar *codepoints,
1807
UErrorCode status = U_ZERO_ERROR;
1808
UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1809
UCOL_TERTIARY, UCOL_QUATERNARY,
1811
int strengthlen = 5;
1815
while (caselevel < 1) {
1816
if (caselevel == 0) {
1817
ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1820
ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1823
while (index < strengthlen) {
1826
uint8_t sortkey[128];
1829
ucol_setStrength(coll, strength[index]);
1830
sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1831
while (sortkey[count] != 0) {
1832
if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && index != 4)) {
1833
printSortKeyError(codepoints, length, sortkey, sklen);
1836
if (sortkey[count] == 1) {
1842
if (count + 1 != sklen || (count01 != index + caselevel)) {
1843
printSortKeyError(codepoints, length, sortkey, sklen);
1853
static void TestSortKeyValidity(void)
1855
/* testing UCA collation elements */
1856
UErrorCode status = U_ZERO_ERROR;
1857
/* en_US has no tailorings */
1858
UCollator *coll = ucol_open("en_US", &status);
1859
/* tailored locales */
1860
char locale[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
1861
FileStream *file = getFractionalUCA();
1863
UChar codepoints[5];
1865
UParseError parseError;
1866
if (U_FAILURE(status)) {
1867
log_err("en_US collator creation failed\n");
1870
log_verbose("Testing UCA elements\n");
1872
log_err("Fractional UCA data can not be opened\n");
1876
while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1877
if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1878
line[0] == 0x000D || line[0] == '[') {
1882
getCodePoints(line, codepoints);
1883
checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1886
log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1889
while (codepoints[0] < 0xFFFF) {
1890
if (u_isdefined((UChar32)codepoints[0])) {
1891
checkSortKeyValidity(coll, codepoints, 1);
1898
/* testing tailored collation elements */
1899
log_verbose("Testing tailored elements\n");
1901
const UChar *rules = NULL,
1903
UChar *rulesCopy = NULL;
1904
int32_t ruleLen = 0;
1906
uint32_t chOffset = 0;
1908
uint32_t exOffset = 0;
1910
uint32_t prefixOffset = 0;
1911
uint32_t prefixLen = 0;
1912
UBool startOfRules = TRUE;
1915
UColTokenParser src;
1916
uint32_t strength = 0;
1919
coll = ucol_open(locale[count], &status);
1920
if (U_FAILURE(status)) {
1921
log_err("%s collator creation failed\n", locale[count]);
1926
rules = ucol_getRules(coll, &ruleLen);
1929
rulesCopy = (UChar *)uprv_malloc((ruleLen +
1930
UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1931
uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1932
src.source = src.current = rulesCopy;
1933
src.end = rulesCopy + ruleLen;
1934
src.extraCurrent = src.end;
1935
src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1937
while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
1938
strength = src.parsedToken.strength;
1939
chOffset = src.parsedToken.charsOffset;
1940
chLen = src.parsedToken.charsLen;
1941
exOffset = src.parsedToken.extensionOffset;
1942
exLen = src.parsedToken.extensionLen;
1943
prefixOffset = src.parsedToken.prefixOffset;
1944
prefixLen = src.parsedToken.prefixLen;
1945
specs = src.parsedToken.flags;
1947
startOfRules = FALSE;
1948
uprv_memcpy(codepoints, rules + chOffset,
1949
chLen * sizeof(UChar));
1950
codepoints[chLen] = 0;
1951
checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1953
uprv_free(rulesCopy);
1959
T_FileStream_close(file);