1
/********************************************************************
3
* Copyright (c) 1997-2001, International Business Machines Corporation and
4
* others. All Rights Reserved.
5
********************************************************************/
9
* Modification History:
11
* Date Name Description
12
* 05/17/99 stephen Creation (ported from java)
13
* 09/24/99 stephen Added new test for data split on decompression.
14
*******************************************************************************
20
#include "unicode/scsu.h"
21
#include "unicode/ustring.h"
22
#include "unicode/utypes.h"
27
#ifdef ICU_SCSU_USE_DEPRECATES
33
#define MIN(a,b) (a < b ? a : b)
39
#define MAX(a,b) (a > b ? a : b)
42
/* Compression modes */
43
#define SINGLEBYTEMODE 0
47
/* Single-byte mode tags */
49
/* 0x0C is a reserved value*/
50
#define SRESERVED 0x0C
81
/* Unicode mode tags */
101
#define UDEFINEX 0xF1
106
return c - 0x30 - (c >= 0x41 ? (c >= 0x61 ? 39 : 7) : 0);
110
unescape(const char *s)
115
retval = (UChar*) calloc(uprv_strlen(s) + 1, sizeof(UChar));
117
log_err("calloc error at line %d - memory error..\n", __LINE__);
118
return 0; /* flag an error */
127
value = digitvalue(*s++);
129
value += digitvalue(*s++);
131
value += digitvalue(*s++);
133
value += digitvalue(*s++);
135
*alias++ = (UChar)value;
147
printChars(const UChar *chars,
152
for(i = 0; i < len; i++) {
153
printf("%#x ", chars[i]);
161
printChars2(const UChar *chars,
166
for(i = 0; i < len; i++) {
167
if(chars[i] < 0x0020 || chars[i] > 0x007E)
168
printf("[%#x]", chars[i]);
170
printf("%c", chars[i]);
177
printBytes(const uint8_t *byteBuffer,
180
int32_t curByteIndex = 0;
181
int32_t byteBufferLimit = len;
182
int32_t mode = SINGLEBYTEMODE;
183
int32_t aByte = 0x00;
185
while(curByteIndex < byteBufferLimit) {
188
while(curByteIndex < byteBufferLimit && mode == SINGLEBYTEMODE) {
189
aByte = byteBuffer[curByteIndex++] & 0xFF;
192
printf("%#x ", aByte);
199
if(curByteIndex < byteBufferLimit)
200
printf("%#x ", byteBuffer[curByteIndex++]);
201
if(curByteIndex < byteBufferLimit)
202
printf("%#x ", byteBuffer[curByteIndex++]);
205
/* switch to Unicode mode*/
211
/* handle all quote tags*/
212
case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3:
213
case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7:
214
printf("SQUOTE%d ", aByte - SQUOTE0);
215
if(curByteIndex < byteBufferLimit)
216
printf("%#x ", byteBuffer[curByteIndex++]);
219
/* handle all switch tags*/
220
case SCHANGE0: case SCHANGE1: case SCHANGE2: case SCHANGE3:
221
case SCHANGE4: case SCHANGE5: case SCHANGE6: case SCHANGE7:
222
printf("SCHANGE%d ", aByte - SCHANGE0);
225
/* handle all define tags*/
226
case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3:
227
case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7:
228
printf("SDEFINE%d ", aByte - SDEFINE0);
229
if(curByteIndex < byteBufferLimit)
230
printf("%#x ", byteBuffer[curByteIndex++]);
233
/* handle define extended tag*/
236
if(curByteIndex < byteBufferLimit)
237
printf("%#x ", byteBuffer[curByteIndex++]);
238
if(curByteIndex < byteBufferLimit)
239
printf("%#x ", byteBuffer[curByteIndex++]);
247
while(curByteIndex < byteBufferLimit && mode == UNICODEMODE) {
249
aByte = byteBuffer[curByteIndex++] & 0xFF;
252
/* handle all define tags*/
253
case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3:
254
case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7:
255
printf("UDEFINE%d ", aByte - UDEFINE0);
256
if(curByteIndex < byteBufferLimit)
257
printf("%#x ", byteBuffer[curByteIndex++]);
258
mode = SINGLEBYTEMODE;
261
/* handle define extended tag*/
264
if(curByteIndex < byteBufferLimit)
265
printf("%#x ", byteBuffer[curByteIndex++]);
266
if(curByteIndex < byteBufferLimit)
267
printf("%#x ", byteBuffer[curByteIndex++]);
270
/* handle all switch tags*/
271
case UCHANGE0: case UCHANGE1: case UCHANGE2: case UCHANGE3:
272
case UCHANGE4: case UCHANGE5: case UCHANGE6: case UCHANGE7:
273
printf("UCHANGE%d ", aByte - UCHANGE0);
274
mode = SINGLEBYTEMODE;
280
if(curByteIndex < byteBufferLimit)
281
printf("%#x ", byteBuffer[curByteIndex++]);
282
if(curByteIndex < byteBufferLimit)
283
printf("%#x ", byteBuffer[curByteIndex++]);
287
printf("%#x ", aByte);
288
if(curByteIndex < byteBufferLimit)
289
printf("%#x ", byteBuffer[curByteIndex++]);
296
} /* end switch( mode )*/
303
printDiffs(const UChar *s1,
308
UBool result = FALSE;
313
puts("====================");
314
printf("Length doesn't match: expected %d, got %d\n", s1len, s2len);
316
printChars(s1, s1len);
318
printChars(s2, s2len);
322
len = (s1len < s2len ? s1len : s2len);
323
for(i = 0; i < len; ++i) {
326
puts("====================");
327
printf("First difference at char %d\n", i);
328
printf("Exp. char: %#x\n", s1[i]);
329
printf("Got char : %#x\n", s2[i]);
331
printChars(s1, s1len);
333
printChars(s2, s2len);
342
/* generate a run of characters in a "window" */
344
randomRun(UChar *target,
348
int32_t offset = (int32_t)(0xFFFF * (double)(rand()/(double)RAND_MAX));
351
/* don't overflow 16 bits*/
355
for(i = pos; i < pos + len; i++) {
356
target[i] = (UChar) (offset + (int32_t)(0x7F * (double)(rand()/(double)RAND_MAX)));
360
/* generate a string of characters, with simulated runs of characters */
362
randomChars(int32_t len)
368
result = (UChar*) calloc(len, sizeof(UChar));
370
log_err("calloc error at line %d.\n", __LINE__);
375
runLen = (int32_t)(30 * (double)(rand()/(double)RAND_MAX));
376
if(used + runLen >= len)
378
randomRun(result, used, runLen);
386
myTest(const UChar *chars,
389
UnicodeCompressor myCompressor;
391
/* compression variables */
392
uint8_t *myCompressed = 0;
393
uint8_t *myCTarget = 0;
394
int32_t myCTargetSize = MAX(512, 3*len);
395
const UChar *myCSource = chars;
397
/* decompression variables */
398
UChar *myDecompressed = 0;
399
UChar *myDTarget = 0;
400
int32_t myDTargetSize = MAX(2*len, 2);
401
const uint8_t *myDSource = 0;
403
/* variables for my compressor */
404
int32_t myByteCount = 0;
405
int32_t myCharCount = 0;
408
UErrorCode status = U_ZERO_ERROR;
411
/* allocate memory */
412
myCompressed = (uint8_t*) calloc(myCTargetSize, sizeof(uint8_t));
413
myDecompressed = (UChar*) calloc(myDTargetSize, sizeof(UChar));
415
if(myCompressed == 0 || myDecompressed == 0) {
416
log_err("calloc error at line %d.\n", __LINE__);
420
/* init compressor */
421
scsu_init(&myCompressor);
424
myCTarget = myCompressed;
425
scsu_compress(&myCompressor,
427
myCTarget + myCTargetSize,
432
if(U_FAILURE(status)) {
433
log_err("Failing status code at line %d.\n", __LINE__);
437
myByteCount = (myCTarget - myCompressed);
440
scsu_reset(&myCompressor);
443
myDTarget = myDecompressed;
444
myDSource = myCompressed;
445
scsu_decompress(&myCompressor,
447
myDTarget + myDTargetSize,
449
myDSource + myByteCount,
452
if(U_FAILURE(status)) {
453
log_err("Failing status code at line %d.\n", __LINE__);
457
myCharCount = (myDTarget - myDecompressed);
459
/* find differences */
460
if( printDiffs(chars, len, myDecompressed, myCharCount) == FALSE) {
461
/*printf("%d chars ===> %d bytes ===> %d chars (%f)\n", len,
462
myByteCount, myCharCount, (double)(myByteCount/(myCharCount*2.0)));*/
466
printBytes(myCompressed, myByteCount);
471
free(myDecompressed);
474
/* tweak these; COMPRESSIONBUFFERSIZE must not be less than 4, and
475
DECOMPRESSIONBUFFERSIZE must not be less than 2 */
476
#define COMPRESSIONBUFFERSIZE 4
477
#define DECOMPRESSIONBUFFERSIZE 2
480
myMultipassTest(const UChar *chars,
483
UnicodeCompressor myCompressor;
485
/* compression variables */
486
uint8_t myCompressionBuffer [COMPRESSIONBUFFERSIZE];
487
uint8_t *myCompressed = 0;
488
uint8_t *myCTarget = 0;
489
int32_t myCTargetSize = MAX(512, 3 * len);
490
const UChar *myCSource = chars;
491
const UChar *myCSourceAlias = 0;
493
/* decompression variables */
494
UChar myDecompressionBuffer [DECOMPRESSIONBUFFERSIZE];
495
UChar *myDecompressed = 0;
496
UChar *myDTarget = 0;
497
int32_t myDTargetSize = MAX(2 * len, 2);
498
const uint8_t *myDSource = 0;
499
const uint8_t *myDSourceAlias = 0;
502
int32_t totalCharsCompressed = 0;
503
int32_t totalBytesWritten = 0;
505
int32_t totalBytesDecompressed = 0;
506
int32_t totalCharsWritten = 0;
509
UErrorCode status = U_ZERO_ERROR;
511
/* allocate memory */
512
myCompressed = (uint8_t*) calloc(myCTargetSize, sizeof(uint8_t));
513
myDecompressed = (UChar*) calloc(myDTargetSize, sizeof(UChar));
515
if(myCompressed == 0 || myDecompressed == 0) {
516
log_err("calloc error at line %d.\n", __LINE__);
520
/* init compressor */
521
scsu_init(&myCompressor);
523
/* perform the compression in a loop */
525
status = U_ZERO_ERROR;
526
myCTarget = myCompressionBuffer;
527
myCSourceAlias = myCSource;
529
scsu_compress(&myCompressor,
531
myCTarget + COMPRESSIONBUFFERSIZE,
536
if(status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status)) {
537
log_err("Failing status code at line %d.\n", __LINE__);
541
/* copy the newly-compressed chunk to the target */
542
uprv_memcpy(myCompressed + totalBytesWritten,
544
sizeof(uint8_t) * (myCTarget - myCompressionBuffer));
546
/* printf("Compression pass complete. Compressed %d chars into %d bytes\n",
547
(myCSource - myCSourceAlias), (myCTarget - myCompressionBuffer));*/
549
/* update pointers */
550
totalCharsCompressed = (myCSource - chars);
552
totalBytesWritten += (myCTarget - myCompressionBuffer);
554
} while(status == U_BUFFER_OVERFLOW_ERROR/*totalCharsCompressed < len*/);
557
scsu_reset(&myCompressor);
559
/* set up decompression params */
560
myDSource = myCompressed;
562
/* perform the decompression in a loop */
564
status = U_ZERO_ERROR;
565
myDTarget = myDecompressionBuffer;
566
myDSourceAlias = myDSource;
568
scsu_decompress(&myCompressor,
570
myDTarget + DECOMPRESSIONBUFFERSIZE,
572
myCompressed + totalBytesWritten,
575
if(status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status)) {
576
log_err("Failing status code at line %d.\n", __LINE__);
580
/* copy the newly-decompressed chunk to the target */
581
uprv_memcpy(myDecompressed + totalCharsWritten,
582
myDecompressionBuffer,
583
sizeof(UChar) * (myDTarget - myDecompressionBuffer));
585
/* printf("Decompression pass complete. Decompressed %d bytes into %d chars\n",
586
(myDSource - myDSourceAlias), (myDTarget - myDecompressionBuffer));*/
588
/* update pointers */
589
totalBytesDecompressed = (myDSource - myCompressed);
591
totalCharsWritten += (myDTarget - myDecompressionBuffer);
593
} while(status == U_BUFFER_OVERFLOW_ERROR/*totalBytesDecompressed < totalBytesWritten*/);
595
/* find differences */
596
if( printDiffs(chars, len, myDecompressed, totalCharsWritten) == FALSE) {
597
/*printf("%d chars ===> %d bytes ===> %d chars (%f) (MP)\n", len,
598
totalBytesWritten, totalCharsWritten,
599
(double)(totalBytesWritten/(totalCharsWritten*2.0)));*/
603
printBytes(myCompressed, totalBytesWritten);
608
free(myDecompressed);
611
static const char *fTestCases [] = {
612
"Hello \\9292 \\9192 World!",
613
"Hell\\0429o \\9292 \\9192 W\\00e4rld!",
614
"Hell\\0429o \\9292 \\9292W\\00e4rld!",
616
"\\0648\\06c8", /* catch missing reset*/
619
"\\4444\\E001", /* lowest quotable*/
620
"\\4444\\f2FF", /* highest quotable*/
621
"\\4444\\f188\\4444",
622
"\\4444\\f188\\f288",
623
"\\4444\\f188abc\\0429\\f288",
625
"Hell\\0429\\04230o \\9292 \\9292W\\00e4\\0192rld!",
626
"Hell\\0429o \\9292 \\9292W\\00e4rld!",
627
"Hello World!123456",
628
"Hello W\\0081\\011f\\0082!", /* Latin 1 run*/
630
"abc\\0301\\0302", /* uses SQn for u301 u302*/
631
"abc\\4411d", /* uses SQU*/
632
"abc\\4411\\4412d",/* uses SCU*/
633
"abc\\0401\\0402\\047f\\00a5\\0405", /* uses SQn for ua5*/
634
"\\9191\\9191\\3041\\9191\\3041\\3041\\3000", /* SJIS like data*/
636
"\\9191\\9191\\3041\\9191\\3041\\3041\\3000",
637
"\\9999\\3051\\300c\\9999\\9999\\3060\\9999\\3065\\3065\\3065\\300c",
638
"\\3000\\266a\\30ea\\30f3\\30b4\\53ef\\611b\\3044\\3084\\53ef\\611b\\3044\\3084\\30ea\\30f3\\30b4\\3002",
641
"\\0000", /* smallest BMP character*/
642
"\\FFFF", /* largest BMP character*/
644
"\\d800\\dc00", /* smallest surrogate*/
645
"\\d8ff\\dcff", /* largest surrogate pair*/
647
/* regression tests*/
648
"\\6441\\b413\\a733\\f8fe\\eedb\\587f\\195f\\4899\\f23d\\49fd\\0aac\\5792\\fc22\\fc3c\\fc46\\00aa",
649
"\\00df\\01df\\f000\\dbff\\dfff\\000d\n\\0041\\00df\\0401\\015f\\00df\\01df\\f000\\dbff\\dfff",
650
"\\30f9\\8321\\05e5\\181c\\d72b\\2019\\99c9\\2f2f\\c10c\\82e1\\2c4d\\1ebc\\6013\\66dc\\bbde\\94a5\\4726\\74af\\3083\\55b9\\000c",
651
"\\0041\\00df\\0401\\015f",
653
"\\d266\\43d7\\\\e386\\c9c0\\4a6b\\9222\\901f\\7410\\a63f\\539b\\9596\\482e\\9d47\\cfe4\\7b71\\c280\\f26a\\982f\\862a\\4edd\\f513\\fda6\\869d\\2ee0\\a216\\3ff6\\3c70\\89c0\\9576\\d5ec\\bfda\\6cca\\5bb3\\bcea\\554c\\914e\\fa4a\\ede3\\2990\\d2f5\\2729\\5141\\0f26\\ccd8\\5413\\d196\\bbe2\\51b9\\9b48\\0dc8\\2195\\21a2\\21e9\\00e4\\9d92\\0bc0\\06c5",
654
"\\f95b\\2458\\2468\\0e20\\f51b\\e36e\\bfc1\\0080\\02dd\\f1b5\\0cf3\\6059\\7489",
659
static unsigned long gTotalChars;
663
/* Decompress the two segments */
665
segment_test(uint8_t *segment1,
670
UErrorCode status = U_ZERO_ERROR;
671
UnicodeCompressor myDecompressor;
673
const uint8_t *seg1 = segment1;
674
const uint8_t *seg2 = segment2;
676
int32_t charBufferCap = 2*(seg1Len + seg2Len);
677
UChar *charBuffer = (UChar*) malloc(sizeof(UChar) * charBufferCap);
679
UChar *target = charBuffer;
680
int32_t outCount = 0, count1 = 0, count2 = 0;
683
scsu_init(&myDecompressor);
685
scsu_decompress(&myDecompressor, &target, charBuffer + charBufferCap,
686
&seg1, segment1 + seg1Len, &status);
688
count1 = seg1 - segment1;
690
/* println("Segment 1 (" + segment1.length + " bytes) " +
691
"decompressed into " + count1 + " chars");
692
println("Bytes consumed: " + bytesRead[0]);
694
print("Got chars: ");
695
println(System.out, charBuffer, 0, count1);*/
697
/*s.append(charBuffer, 0, count1);*/
699
scsu_decompress(&myDecompressor, &target,
700
charBuffer + charBufferCap,
701
&seg2, segment2 + seg2Len, &status);
703
count2 = seg2 - segment2;
705
outCount = (target - charBuffer);
707
/* println("Segment 2 (" + segment2.length + " bytes) " +
708
"decompressed into " + count2 + " chars");
709
println("Bytes consumed: " + bytesRead[0]);
711
print("Got chars: ");
712
println(System.out, charBuffer, count1, count2);*/
714
/*s.append(charBuffer, count1, count2);*/
717
println(System.out, charBuffer, 0, count1 + count2);
718
println("====================");*/
720
charBuffer [ outCount ] = 0x0000;
732
/* multi-segment test data */
734
/* compressed segment breaking on a define window sequence */
736
uint8_t segment1a [] = { 0x42, 0x6f, 0x6f, 0x74, 0x68, 0x19 };
738
uint8_t segment1b [] = { 0x01, 0x2c, 0x20, 0x53, 0x2e };
739
/* expected result */
740
UChar result1 [] = { 0x0042, 0x006f, 0x006f, 0x0074, 0x0068,
741
0x002c, 0x0020, 0x0053, 0x002e, 0x0000 };
743
/* compressed segment breaking on a quote unicode sequence */
745
uint8_t segment2a [] = { 0x42, 0x6f, 0x6f, 0x74, 0x0e, 0x00 };
748
uint8_t segment2b [] = { 0x68, 0x2c, 0x20, 0x53, 0x2e };
749
/* expected result */
750
UChar result2 [] = { 0x0042, 0x006f, 0x006f, 0x0074, 0x0068,
751
0x002c, 0x0020, 0x0053, 0x002e, 0x0000 };
753
/* compressed segment breaking on a quote unicode sequence */
755
uint8_t segment3a [] = { 0x0f, 0xf0, 0x00 };
758
uint8_t segment3b [] = { 0x42 };
759
/* expected result */
760
UChar result3 [] = { 0x0042, 0x0000 };
763
chars = segment_test(segment1a, 6, segment1b, 5);
764
if(u_strcmp(chars, result1)) {
765
log_err("Failure in multisegment 1\n");
769
chars = segment_test(segment2a, 6, segment2b, 5);
770
if(u_strcmp(chars, result2)) {
771
log_err("Failure in multisegment 2\n");
775
chars = segment_test(segment3a, 3, segment3b, 1);
776
if(u_strcmp(chars, result3)) {
777
log_err("Failure in multisegment 3\n");
781
/* initialize char count */
784
/* initialize random number generator */
787
for(i = 0; fTestCases[i] != 0; i++) {
789
chars = unescape(fTestCases[i]);
791
return; /* memory error */
794
len = u_strlen(chars);
796
/*printChars2(chars, len);*/
799
myMultipassTest(chars, len);
806
/*puts("==============================");*/
809
len = (int32_t)(1000 * (double)(rand()/(double)RAND_MAX));
810
if(len == 0) /* 0-length malloc will fail */
812
chars = randomChars(len);
815
log_err("scsutest aborted.\n");
819
myMultipassTest(chars, len);
827
void addSCSUTest(TestNode** root);
830
addSCSUTest(TestNode** root)
832
#ifdef ICU_SCSU_USE_DEPRECATES
833
addTest(root, &TestSCSU, "scsutest/TestSCSU");