1
/**************************************************************************
3
* Copyright (C) 2000, International Business Machines
4
* Corporation and others. All Rights Reserved.
6
***************************************************************************
7
* file name: convsamp.c
8
* encoding: ASCII (7-bit)
10
* created on: 2000may30
11
* created by: Steven R. Loomis
13
* Sample code for the ICU conversion routines.
15
* Note: Nothing special is needed to build this sample. Link with
16
* the icu UC and icu I18N libraries.
18
* I use 'assert' for error checking, you probably will want
19
* something more flexible. '***BEGIN SAMPLE***' and
20
* '***END SAMPLE***' mark pieces suitable for stand alone
24
* Each test can define it's own BUFFERSIZE
29
#include <ctype.h> /* for isspace, etc. */
32
#include <stdlib.h> /* malloc */
34
#include "unicode/utypes.h" /* Basic ICU data types */
35
#include "unicode/ucnv.h" /* C Converter API */
36
#include "unicode/convert.h" /* C++ Converter API */
37
#include "unicode/ustring.h" /* some more string fcns*/
38
#include "unicode/uchar.h" /* char names */
39
#include "unicode/uloc.h"
44
/* Some utility functions */
46
static const UChar kNone[] = { 0x0000 };
48
#define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
50
/* Print a UChar if possible, in seven characters. */
51
void prettyPrintUChar(UChar c)
55
printf(" '%c' ", (char)(0x00FF&c));
56
} else if ( c > 0x007F ) {
58
UErrorCode status = U_ZERO_ERROR;
61
o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 1000, &status);
62
if(U_SUCCESS(status) && (o>0) ) {
66
o = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, 1000, &status);
67
if(U_SUCCESS(status) && (o>0)) {
76
switch((char)(c & 0x007F)) {
94
void printUChars(const char *name = "?",
95
const UChar *uch = kNone,
100
if( (len == -1) && (uch) ) {
104
printf("%5s: ", name);
105
for( i = 0; i <len; i++) {
110
printf("%5s: ", "uni");
111
for( i = 0; i <len; i++) {
112
printf("\\u%04X ", (int)uch[i]);
116
printf("%5s:", "ch");
117
for( i = 0; i <len; i++) {
118
prettyPrintUChar(uch[i]);
123
void printString(const char *name, const UnicodeString& string)
126
int32_t len = string.length();
127
uch = (UChar*)malloc(sizeof(UChar)*(len+1));
128
string.extract(0,len,uch,0);
130
printUChars(name, uch, -1);
134
void printBytes(const char *name = "?",
135
const char *uch = "",
140
if( (len == -1) && (uch) ) {
144
printf("%5s: ", name);
145
for( i = 0; i <len; i++) {
150
printf("%5s: ", "uni");
151
for( i = 0; i <len; i++) {
152
printf("\\x%02X ", 0x00FF & (int)uch[i]);
156
printf("%5s:", "ch");
157
for( i = 0; i <len; i++) {
158
if(isgraph(uch[i])) {
159
printf(" '%c' ", (char)uch[i]);
167
void printUChar(UChar32 ch32)
170
printf("ch: U+%06X\n", ch32);
173
UChar ch = (UChar)ch32;
174
printUChars("C", &ch, 1);
178
/*******************************************************************
179
Very simple C++ sample to convert the word 'Moscow' in Russian in Unicode,
180
followed by an exclamation mark (!) into the KOI8-R Russian code page.
182
This example first creates a UnicodeString out of the Unicode chars.
184
targetSize must be set to the amount of space available in the target
185
buffer. After UnicodeConverter::fromUnicodeString() is called,
186
targetSize will contain the number of bytes in target[] which were
187
used in the resulting codepage. In this case, there is a 1:1 mapping
188
between the input and output characters. The exclamation mark has the
189
same value in both KOI8-R and Unicode.
192
uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
193
ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
196
uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
201
UErrorCode convsample_01()
203
printf("\n\n==============================================\n"
204
"Sample 01: C++: simple Unicode -> koi8-r conversion\n");
207
// **************************** START SAMPLE *******************
208
// "Moscva!" in cyrillic letters, to be converted to the KOI8-R
209
// Russian code page.
210
UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
211
0x0430, 0x0021, 0x0000 };
213
int32_t targetSize = sizeof(target);
214
UnicodeString myString(source);
215
UErrorCode status = U_ZERO_ERROR;
217
// set up the converter
218
UnicodeConverter conv("koi8-r", status);
222
conv.fromUnicodeString(target, targetSize, myString, status);
225
// ***************************** END SAMPLE ********************
228
printUChars("src", source);
230
printBytes("targ", target, targetSize);
236
/******************************************************
237
Similar sample to the preceding one. Converting FROM unicode
239
You must call ucnv_close to clean up the memory used by the
242
'len' returns the number of OUTPUT bytes resulting from the
246
UErrorCode convsample_02()
248
printf("\n\n==============================================\n"
249
"Sample 02: C: simple Unicode -> koi8-r conversion\n");
252
// **************************** START SAMPLE *******************
254
UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
255
0x0430, 0x0021, 0x0000 };
257
UErrorCode status = U_ZERO_ERROR;
261
// set up the converter
262
conv = ucnv_open("koi8-r", &status);
263
assert(U_SUCCESS(status));
266
len = ucnv_fromUChars(conv, target, 100, source, -1, &status);
267
assert(U_SUCCESS(status));
269
// close the converter
272
// ***************************** END SAMPLE ********************
275
printUChars("src", source);
277
printBytes("targ", target, len);
283
UErrorCode convsample_03()
285
printf("\n\n==============================================\n"
286
"Sample 03: C: print out all converters\n");
291
// **************************** START SAMPLE *******************
292
count = ucnv_countAvailable();
293
printf("Available converters: %d\n", count);
297
printf("%s ", ucnv_getAvailableName(i));
300
// ***************************** END SAMPLE ********************
309
#define BUFFERSIZE 17 /* make it interesting :) */
312
Converting from a codepage to Unicode in bulk..
313
What is the best way to determine the buffer size?
315
The 'buffersize' is in bytes of input.
316
For a given converter, divinding this by the minimum char size
317
give you the maximum number of Unicode characters that could be
318
expected for a given number of input bytes.
319
see: ucnv_getMinCharSize()
321
For example, a single byte codepage like 'Latin-3' has a
322
minimum char size of 1. (It takes at least 1 byte to represent
323
each Unicode char.) So the unicode buffer has the same number of
324
UChars as the input buffer has bytes.
326
In a strictly double byte codepage such as cp1362 (Windows
327
Korean), the minimum char size is 2. So, only half as many Unicode
328
chars as bytes are needed.
330
This work to calculate the buffer size is an optimization. Any
331
size of input and output buffer can be used, as long as the
332
program handles the following cases: If the input buffer is empty,
333
the source pointer will be equal to sourceLimit. If the output
334
buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
337
UErrorCode convsample_05()
339
printf("\n\n==============================================\n"
340
"Sample 05: C: count the number of letters in a UTF-8 document\n");
344
char inBuf[BUFFERSIZE];
346
const char *sourceLimit;
351
int32_t uBufSize = 0;
353
UErrorCode status = U_ZERO_ERROR;
354
uint32_t letters=0, total=0;
356
f = fopen("data01.txt", "r");
359
fprintf(stderr, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
360
return U_FILE_ACCESS_ERROR;
363
// **************************** START SAMPLE *******************
364
conv = ucnv_open("utf-8", &status);
365
assert(U_SUCCESS(status));
367
uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
368
printf("input bytes %d / min chars %d = %d UChars\n",
369
BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
370
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
373
// grab another buffer's worth
375
((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
377
// Convert bytes to unicode
379
sourceLimit = inBuf + count;
384
targetLimit = uBuf + uBufSize;
386
ucnv_toUnicode(conv, &target, targetLimit,
387
&source, sourceLimit, NULL,
388
feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
389
/* is true (when no more data will come) */
392
if(status == U_BUFFER_OVERFLOW_ERROR)
394
// simply ran out of space - we'll reset the target ptr the next
395
// time through the loop.
396
status = U_ZERO_ERROR;
400
// Check other errors here.
401
assert(U_SUCCESS(status));
402
// Break out of the loop (by force)
405
// Process the Unicode
406
// Todo: handle UTF-16/surrogates
408
for(p = uBuf; p<target; p++)
414
} while (source < sourceLimit); // while simply out of space
417
printf("%d letters out of %d total UChars.\n", letters, total);
419
// ***************************** END SAMPLE ********************
428
#define BUFFERSIZE 1024
435
UErrorCode convsample_06()
437
printf("\n\n==============================================\n"
438
"Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
442
char inBuf[BUFFERSIZE];
444
const char *sourceLimit;
446
int32_t uBufSize = 0;
448
UErrorCode status = U_ZERO_ERROR;
449
uint32_t letters=0, total=0;
452
UChar32 charCount = 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
459
f = fopen("data06.txt", "r");
462
fprintf(stderr, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
463
return U_FILE_ACCESS_ERROR;
466
info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
469
fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
472
/* reset frequencies */
473
for(p=0;p<charCount;p++)
475
info[p].codepoint = p;
476
info[p].frequency = 0;
479
// **************************** START SAMPLE *******************
480
conv = ucnv_open("utf-8", &status);
481
assert(U_SUCCESS(status));
483
uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
484
printf("input bytes %d / min chars %d = %d UChars\n",
485
BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
486
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
489
// grab another buffer's worth
491
((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
493
// Convert bytes to unicode
495
sourceLimit = inBuf + count;
497
while(source < sourceLimit)
499
p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
500
if(U_FAILURE(status))
502
fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
503
status = U_ZERO_ERROR;
512
if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
515
if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
520
fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
521
return U_UNSUPPORTED_ERROR;
531
printf("%d letters out of %d total UChars.\n", letters, total);
532
printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
534
// now, we could sort it..
536
// qsort(info, charCount, sizeof(info[0]), charfreq_compare);
538
for(p=0;p<charCount;p++)
540
if(info[p].frequency)
542
printf("% 5d U+%06X ", info[p].frequency, p);
545
prettyPrintUChar((UChar)p);
551
// ***************************** END SAMPLE ********************
560
/*******************************************************************
561
Very simple C++ sample to convert a string into Unicode from SJIS
563
This example creates a UnicodeString out of the chars.
566
UErrorCode convsample_11()
568
printf("\n\n==============================================\n"
569
"Sample 11: C++: simple sjis -> Unicode conversion\n");
572
// **************************** START SAMPLE *******************
574
char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
575
int32_t sourceSize = sizeof(source);
576
UnicodeString target;
577
UErrorCode status = U_ZERO_ERROR;
579
// set up the converter
580
UnicodeConverter conv("shift_jis", status);
581
assert(U_SUCCESS(status));
584
conv.toUnicodeString(target, source, sourceSize, status);
585
assert(U_SUCCESS(status));
587
// ***************************** END SAMPLE ********************
590
printBytes("src", source, sourceSize);
592
printString("targ", target );
599
/******************************************************
600
Similar sample to the preceding one.
601
You must call ucnv_close to clean up the memory used by the
604
'len' returns the number of OUTPUT bytes resulting from the
608
UErrorCode convsample_12()
610
printf("\n\n==============================================\n"
611
"Sample 12: C: simple sjis -> unicode conversion\n");
614
// **************************** START SAMPLE *******************
616
char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
618
UErrorCode status = U_ZERO_ERROR;
622
// set up the converter
623
conv = ucnv_open("shift_jis", &status);
624
assert(U_SUCCESS(status));
626
// convert to Unicode
627
// Note: we can use strlen, we know it's an 8 bit null terminated codepage
629
len = ucnv_toUChars(conv, target, 100, source, strlen(source), &status);
631
// close the converter
634
// ***************************** END SAMPLE ********************
637
printBytes("src", source, strlen(source) );
639
printUChars("targ", target, len);
645
/******************************************************************
646
C: Convert from codepage to Unicode one at a time.
649
UErrorCode convsample_13()
651
printf("\n\n==============================================\n"
652
"Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
655
const char sourceChars[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
656
// const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
657
const char *source, *sourceLimit;
659
UErrorCode status = U_ZERO_ERROR;
660
UConverter *conv = NULL;
664
srcCount = sizeof(sourceChars);
666
conv = ucnv_open("Big5", &status);
669
source = sourceChars;
670
sourceLimit = sourceChars + sizeof(sourceChars);
672
// **************************** START SAMPLE *******************
675
printBytes("src",source,sourceLimit-source);
677
while(source < sourceLimit)
680
target = ucnv_getNextUChar (conv,
685
// printBytes("src",source,sourceLimit-source);
692
// ************************** END SAMPLE *************************
694
printf("src=%d bytes, dst=%d uchars\n", srcCount, dstCount);
703
UBool convsample_20_didSubstitute(const char *source)
707
UConverter *conv = NULL;
708
UErrorCode status = U_ZERO_ERROR;
711
FromUFLAGContext context;
713
printf("\n\n==============================================\n"
714
"Sample 20: C: Test for substitution using callbacks\n");
716
/* print out the original source */
717
printBytes("src", source);
720
/* First, convert from UTF8 to unicode */
721
conv = ucnv_open("utf-8", &status);
724
len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
727
printUChars("uch", uchars, len);
730
/* Now, close the converter */
733
/* Now, convert to windows-1252 */
734
conv = ucnv_open("windows-1252", &status);
737
/* Converter starts out with the SUBSTITUTE callback set. */
739
/* initialize our callback */
740
context.subCallback = NULL;
741
context.subContext = NULL;
742
context.flag = FALSE;
744
/* Set our special callback */
745
ucnv_setFromUCallBack(conv,
746
UCNV_FROM_U_CALLBACK_FLAG,
748
&context.subCallback,
753
len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status);
758
/* print out the original source */
759
printBytes("bytes", bytes, len2);
761
return context.flag; /* true if callback was called */
764
UErrorCode convsample_20()
766
const char *sample1 = "abc\xdf\xbf";
767
const char *sample2 = "abc_def";
770
if(convsample_20_didSubstitute(sample1))
772
printf("DID substitute.\n******\n");
776
printf("Did NOT substitute.\n*****\n");
779
if(convsample_20_didSubstitute(sample2))
781
printf("DID substitute.\n******\n");
785
printf("Did NOT substitute.\n*****\n");
791
// 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16]
793
#define BUFFERSIZE 17 /* make it interesting :) */
795
UErrorCode convsample_40()
797
printf("\n\n==============================================\n"
798
"Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
803
char inBuf[BUFFERSIZE];
805
const char *sourceLimit;
809
int32_t uBufSize = 0;
810
UConverter *conv = NULL;
811
UErrorCode status = U_ZERO_ERROR;
812
uint32_t inbytes=0, total=0;
814
f = fopen("data02.bin", "rb");
817
fprintf(stderr, "Couldn't open file 'data02.bin' (cp37 data file).\n");
818
return U_FILE_ACCESS_ERROR;
821
out = fopen("data40.utf16", "wb");
824
fprintf(stderr, "Couldn't create file 'data40.utf16'.\n");
825
return U_FILE_ACCESS_ERROR;
828
// **************************** START SAMPLE *******************
829
conv = ucnv_openCCSID(37, UCNV_IBM, &status);
830
assert(U_SUCCESS(status));
832
uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
833
printf("input bytes %d / min chars %d = %d UChars\n",
834
BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
835
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
838
// grab another buffer's worth
840
((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
844
// Convert bytes to unicode
846
sourceLimit = inBuf + count;
851
targetLimit = uBuf + uBufSize;
853
ucnv_toUnicode( conv, &target, targetLimit,
854
&source, sourceLimit, NULL,
855
feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
856
/* is true (when no more data will come) */
859
if(status == U_BUFFER_OVERFLOW_ERROR)
861
// simply ran out of space - we'll reset the target ptr the next
862
// time through the loop.
863
status = U_ZERO_ERROR;
867
// Check other errors here.
868
assert(U_SUCCESS(status));
869
// Break out of the loop (by force)
872
// Process the Unicode
873
// Todo: handle UTF-16/surrogates
874
assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
875
(size_t)(target-uBuf));
876
total += (target-uBuf);
877
} while (source < sourceLimit); // while simply out of space
880
printf("%d bytes in, %d UChars out.\n", inbytes, total);
882
// ***************************** END SAMPLE ********************
893
// convsample_41(); // C++, cp37 -> UTF16 [data02.bin -> data41.utf16]
895
#define BUFFERSIZE 17 /* make it interesting :) */
897
UErrorCode convsample_41()
899
printf("\n\n==============================================\n"
900
"Sample 41: C++: convert data02.bin from cp37 to UTF16 [data41.utf16]\n");
905
char inBuf[BUFFERSIZE];
907
const char *sourceLimit;
911
int32_t uBufSize = 0;
912
UnicodeConverter *conv;
913
UErrorCode status = U_ZERO_ERROR;
914
uint32_t inbytes=0, total=0;
916
f = fopen("data02.bin", "rb");
919
fprintf(stderr, "Couldn't open file 'data02.bin' (cp37 data file).\n");
920
return U_FILE_ACCESS_ERROR;
923
out = fopen("data41.utf16", "wb");
926
fprintf(stderr, "Couldn't create file 'data41.utf16'.\n");
927
return U_FILE_ACCESS_ERROR;
930
// **************************** START SAMPLE *******************
931
conv = new UnicodeConverter(37, UCNV_IBM, status);
932
assert(U_SUCCESS(status));
934
uBufSize = (BUFFERSIZE/conv->getMinBytesPerChar());
936
printf("input bytes %d / min chars %d = %d UChars\n",
937
BUFFERSIZE, conv->getMinBytesPerChar(), uBufSize);
938
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
941
// grab another buffer's worth
943
((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
947
// Convert bytes to unicode
949
sourceLimit = inBuf + count;
954
targetLimit = uBuf + uBufSize;
956
conv->toUnicode( target, targetLimit,
957
source, sourceLimit, NULL,
958
feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
959
/* is true (when no more data will come) */
962
if(status == U_BUFFER_OVERFLOW_ERROR)
964
// simply ran out of space - we'll reset the target ptr the next
965
// time through the loop.
966
status = U_ZERO_ERROR;
970
// Check other errors here.
971
assert(U_SUCCESS(status));
972
// Break out of the loop (by force)
975
// Process the Unicode
976
// Todo: handle UTF-16/surrogates
977
assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
978
(size_t)(target-uBuf));
979
total += (target-uBuf);
981
fprintf(stderr, "srcLeft=%d, wrote %d, err %s\n",
982
sourceLimit - source, target-uBuf, u_errorName(status));
984
} while (source < sourceLimit); // while simply out of space
987
printf("%d bytes in, %d UChars out.\n", inbytes, total);
989
// ***************************** END SAMPLE ********************
1002
// 46- C, UTF16 -> latin2 [data41.utf16 -> data46.out]
1004
#define BUFFERSIZE 24 /* make it interesting :) */
1006
UErrorCode convsample_46()
1008
printf("\n\n==============================================\n"
1009
"Sample 46: C: convert data41.utf16 from UTF16 to latin2 [data46.out]\n");
1014
UChar inBuf[BUFFERSIZE];
1015
const UChar *source;
1016
const UChar *sourceLimit;
1021
int32_t bufSize = 0;
1022
UConverter *conv = NULL;
1023
UErrorCode status = U_ZERO_ERROR;
1024
uint32_t inchars=0, total=0;
1026
f = fopen("data41.utf16", "rb");
1029
fprintf(stderr, "Couldn't open file 'data41.utf16' (did you run convsample_41() ?)\n");
1030
return U_FILE_ACCESS_ERROR;
1033
out = fopen("data46.out", "wb");
1036
fprintf(stderr, "Couldn't create file 'data46.out'.\n");
1037
return U_FILE_ACCESS_ERROR;
1040
// **************************** START SAMPLE *******************
1041
conv = ucnv_open( "iso-8859-2", &status);
1042
assert(U_SUCCESS(status));
1044
bufSize = (BUFFERSIZE*ucnv_getMaxCharSize(conv));
1045
printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1046
BUFFERSIZE, ucnv_getMaxCharSize(conv), bufSize);
1047
buf = (char*)malloc(bufSize * sizeof(char));
1050
// grab another buffer's worth
1052
((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) )
1056
// Convert bytes to unicode
1058
sourceLimit = inBuf + count;
1063
targetLimit = buf + bufSize;
1065
ucnv_fromUnicode( conv, &target, targetLimit,
1066
&source, sourceLimit, NULL,
1067
feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
1068
/* is true (when no more data will come) */
1071
if(status == U_BUFFER_OVERFLOW_ERROR)
1073
// simply ran out of space - we'll reset the target ptr the next
1074
// time through the loop.
1075
status = U_ZERO_ERROR;
1079
// Check other errors here.
1080
assert(U_SUCCESS(status));
1081
// Break out of the loop (by force)
1084
// Process the Unicode
1085
assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) ==
1086
(size_t)(target-buf));
1087
total += (target-buf);
1088
} while (source < sourceLimit); // while simply out of space
1091
printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total);
1093
// ***************************** END SAMPLE ********************
1100
return U_ZERO_ERROR;
1104
#define BUFFERSIZE 219
1106
UErrorCode convsample_47()
1108
printf("\n\n==============================================\n"
1109
"Sample 47: C++: convert data40.utf16 from UTF16 to latin2 [data47.out]\n");
1114
UChar inBuf[BUFFERSIZE];
1115
const UChar *source;
1116
const UChar *sourceLimit;
1121
int32_t bufSize = 0;
1122
UnicodeConverter *conv = NULL;
1123
UErrorCode status = U_ZERO_ERROR;
1124
uint32_t inchars=0, total=0;
1126
f = fopen("data40.utf16", "rb");
1129
fprintf(stderr, "Couldn't open file 'data40.utf16' (Did you run convsample_40() ?)\n");
1130
return U_FILE_ACCESS_ERROR;
1133
out = fopen("data47.out", "wb");
1136
fprintf(stderr, "Couldn't create file 'data47.out'.\n");
1137
return U_FILE_ACCESS_ERROR;
1141
// **************************** START SAMPLE *******************
1142
conv = new UnicodeConverter( "iso-8859-2", status);
1143
assert(U_SUCCESS(status));
1145
bufSize = (BUFFERSIZE*conv->getMaxBytesPerChar());
1146
printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1147
BUFFERSIZE, conv->getMaxBytesPerChar(), bufSize);
1148
buf = (char*)malloc(bufSize * sizeof(char));
1151
// grab another buffer's worth
1153
((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) )
1157
// Convert bytes to unicode
1159
sourceLimit = inBuf + count;
1164
targetLimit = buf + bufSize;
1166
conv->fromUnicode( target, targetLimit,
1167
source, sourceLimit, NULL,
1168
feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
1169
/* is true (when no more data will come) */
1172
if(status == U_BUFFER_OVERFLOW_ERROR)
1174
// simply ran out of space - we'll reset the target ptr the next
1175
// time through the loop.
1176
status = U_ZERO_ERROR;
1180
// Check other errors here.
1181
assert(U_SUCCESS(status));
1182
// Break out of the loop (by force)
1185
// Process the Unicode
1186
assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) ==
1187
(size_t)(target-buf));
1188
total += (target-buf);
1189
} while (source < sourceLimit); // while simply out of space
1192
printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total);
1194
// ***************************** END SAMPLE ********************
1201
return U_ZERO_ERROR;
1211
printf("Default Converter=%s\n", ucnv_getDefaultName() );
1213
convsample_01(); // C++, u->koi8r, conv
1214
convsample_02(); // C , u->koi8r, conv
1215
convsample_03(); // C, iterate
1216
// convsample_04(); /* not written yet */
1217
convsample_05(); // C, utf8->u, getNextUChar
1218
convsample_06(); // C freq counter thingy
1219
convsample_11(); // C++, sjis->u, conv
1220
convsample_12(); // C, sjis->u, conv
1221
convsample_13(); // C, big5->u, getNextU
1223
convsample_20(); // C, callback
1225
convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
1226
convsample_41(); // C++, cp37 -> UTF16 [data02.bin -> data41.utf16]
1228
convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
1229
convsample_47(); // C++,UTF16 -> latin3 [data40.utf16 -> data47.out]