2
******************************************************************************
4
* Copyright (C) 2001-2003, International Business Machines
5
* Corporation and others. All Rights Reserved.
7
******************************************************************************
11
* Modification History:
13
* Date Name Description
14
* 9/10/2001 Ram Creation.
15
******************************************************************************
18
/*******************************************************************************
20
* u_strTo* and u_strFrom* APIs
22
*******************************************************************************
26
#include "unicode/putil.h"
27
#include "unicode/ucnv.h"
28
#include "unicode/ustring.h"
36
u_growAnyBufferFromStatic(void *context,
37
void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
38
int32_t length, int32_t size) {
40
void *newBuffer=uprv_malloc(reqCapacity*size);
43
uprv_memcpy(newBuffer, *pBuffer, length*size);
45
*pCapacity=reqCapacity;
50
/* release the old pBuffer if it was not statically allocated */
51
if(*pBuffer!=(void *)context) {
56
return (UBool)(newBuffer!=NULL);
59
#define _STACK_BUFFER_CAPACITY 1000
61
U_CAPI UChar* U_EXPORT2
62
u_strFromUTF32(UChar *dest,
67
UErrorCode *pErrorCode)
69
int32_t reqLength = 0;
71
UChar *pDestLimit =dest+destCapacity;
73
const uint32_t *pSrc = (const uint32_t *)src;
76
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
80
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
81
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
85
/* Check if the source is null terminated */
87
while(((ch=*pSrc)!=0) && (pDest < pDestLimit)){
91
}else if(ch<=0x10ffff){
92
*(pDest++)=UTF16_LEAD(ch);
94
*(pDest++)=UTF16_TRAIL(ch);
100
*pErrorCode = U_INVALID_CHAR_FOUND;
104
while((ch=*pSrc++) != 0){
105
reqLength+=UTF_CHAR_LENGTH(ch);
108
const uint32_t* pSrcLimit = ((const uint32_t*)pSrc) + srcLength;
109
while((pSrc < pSrcLimit) && (pDest < pDestLimit)){
112
*(pDest++)=(UChar)ch;
113
}else if(ch<=0x10FFFF){
114
*(pDest++)=UTF16_LEAD(ch);
115
if(pDest<pDestLimit){
116
*(pDest++)=UTF16_TRAIL(ch);
122
*pErrorCode = U_INVALID_CHAR_FOUND;
126
while(pSrc <pSrcLimit){
128
reqLength+=UTF_CHAR_LENGTH(ch);
132
reqLength += pDest - dest;
134
*pDestLength = reqLength;
137
/* Terminate the buffer */
138
u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
144
U_CAPI UChar32* U_EXPORT2
145
u_strToUTF32(UChar32 *dest,
146
int32_t destCapacity,
147
int32_t *pDestLength,
150
UErrorCode *pErrorCode)
152
const UChar* pSrc = src;
153
const UChar* pSrcLimit;
156
uint32_t *pDest = (uint32_t *)dest;
157
uint32_t *pDestLimit = pDest + destCapacity;
161
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
166
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
167
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
172
while((ch=*pSrc)!=0 && pDest!=pDestLimit) {
174
/*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
175
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
177
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
181
while((ch=*pSrc++)!=0) {
182
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
188
pSrcLimit = pSrc+srcLength;
189
while(pSrc<pSrcLimit && pDest<pDestLimit) {
191
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
193
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
197
while(pSrc!=pSrcLimit) {
199
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
206
reqLength+=(pDest - (uint32_t *)dest);
208
*pDestLength = reqLength;
211
/* Terminate the buffer */
212
u_terminateUChar32s(dest,destCapacity,reqLength,pErrorCode);
217
U_CAPI UChar* U_EXPORT2
218
u_strFromUTF8(UChar *dest,
219
int32_t destCapacity,
220
int32_t *pDestLength,
223
UErrorCode *pErrorCode){
226
UChar *pDestLimit = dest+destCapacity;
229
int32_t reqLength = 0;
230
uint8_t* pSrc = (uint8_t*) src;
233
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
237
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
238
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
243
srcLength = uprv_strlen((char*)pSrc);
246
while((index < srcLength)&&(pDest<pDestLimit)){
251
ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE);
253
*(pDest++)=(UChar)ch;
254
}else if(ch<=0x10ffff){
255
*(pDest++)=UTF16_LEAD(ch);
256
if(pDest<pDestLimit){
257
*(pDest++)=UTF16_TRAIL(ch);
263
*pErrorCode = U_INVALID_CHAR_FOUND;
268
/* donot fill the dest buffer just count the UChars needed */
269
while(index < srcLength){
274
ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE);
275
reqLength+=UTF_CHAR_LENGTH(ch);
279
reqLength+=(pDest - dest);
282
*pDestLength = reqLength;
285
/* Terminate the buffer */
286
u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
291
U_INLINE static uint8_t *
292
_appendUTF8(uint8_t *pDest, UChar32 c) {
293
/* c<=0x7f is handled by the caller, here it is 0x80<=c<=0x10ffff */
295
*pDest++=(uint8_t)((c>>6)|0xc0);
296
*pDest++=(uint8_t)((c&0x3f)|0x80);
297
} else if((uint32_t)(c)<=0xffff) {
298
*pDest++=(uint8_t)((c>>12)|0xe0);
299
*pDest++=(uint8_t)(((c>>6)&0x3f)|0x80);
300
*pDest++=(uint8_t)(((c)&0x3f)|0x80);
301
} else /* if((uint32_t)(c)<=0x10ffff) */ {
302
*pDest++=(uint8_t)(((c)>>18)|0xf0);
303
*pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80);
304
*pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80);
305
*pDest++=(uint8_t)(((c)&0x3f)|0x80);
311
U_CAPI char* U_EXPORT2
312
u_strToUTF8(char *dest,
313
int32_t destCapacity,
314
int32_t *pDestLength,
317
UErrorCode *pErrorCode){
320
const UChar *pSrcLimit;
322
uint8_t *pDest = (uint8_t *)dest;
323
uint8_t *pDestLimit = pDest + destCapacity;
327
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
331
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
332
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
337
while((ch=*pSrc)!=0 && pDest!=pDestLimit) {
345
/*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
346
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
348
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
350
reqLength += UTF8_CHAR_LENGTH(ch);
351
/* do we have enough room in destination? */
352
if(destCapacity< reqLength){
355
/* convert and append*/
356
pDest=_appendUTF8(pDest, ch);
358
while((ch=*pSrc++)!=0) {
359
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
363
reqLength+=UTF8_CHAR_LENGTH(ch);
367
pSrcLimit = pSrc+srcLength;
368
while(pSrc<pSrcLimit && pDest<pDestLimit) {
376
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
378
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
380
reqLength += UTF8_CHAR_LENGTH(ch);
381
/* do we have enough room in destination? */
382
if(destCapacity< reqLength){
385
/* convert and append*/
386
pDest=_appendUTF8(pDest, ch);
388
while(pSrc<pSrcLimit) {
390
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
394
reqLength+=UTF8_CHAR_LENGTH(ch);
400
*pDestLength = reqLength;
403
/* Terminate the buffer */
404
u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode);
409
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
410
/* helper function */
412
_strToWCS(wchar_t *dest,
413
int32_t destCapacity,
414
int32_t *pDestLength,
417
UErrorCode *pErrorCode){
419
char stackBuffer [_STACK_BUFFER_CAPACITY];
420
char* tempBuf = stackBuffer;
421
int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
422
char* tempBufLimit = stackBuffer + tempBufCapacity;
423
UConverter* conv = NULL;
424
char* saveBuf = tempBuf;
425
wchar_t* intTarget=NULL;
426
int32_t intTargetCapacity=0;
427
int count=0,retVal=0;
429
const UChar *pSrcLimit =NULL;
430
const UChar *pSrc = src;
432
conv = u_getDefaultConverter(pErrorCode);
434
if(U_FAILURE(*pErrorCode)){
439
srcLength = u_strlen(pSrc);
442
pSrcLimit = pSrc + srcLength;
445
/* reset the error state */
446
*pErrorCode = U_ZERO_ERROR;
448
/* convert to chars using default converter */
449
ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
450
count =(tempBuf - saveBuf);
452
/* This should rarely occur */
453
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
456
/* we dont have enough room on the stack grow the buffer */
457
if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
458
(2*(pSrcLimit-pSrc)+100), count,sizeof(char))){
463
tempBufLimit = tempBuf + tempBufCapacity;
464
tempBuf = tempBuf + count;
471
if(U_FAILURE(*pErrorCode)){
475
/* done with conversion null terminate the char buffer */
476
if(count>=tempBufCapacity){
478
/* we dont have enough room on the stack grow the buffer */
479
if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
480
tempBufCapacity-count+1, count,sizeof(char))){
489
/* allocate more space than required
490
* here we assume that every char requires
491
* no more than 2 wchar_ts
493
intTargetCapacity = (count*2+1) /*for null termination */;
494
intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
499
int32_t remaining = intTargetCapacity;
500
wchar_t* pIntTarget=intTarget;
503
/* now convert the mbs to wcs */
506
/* we can call the system API since we are sure that
507
* there is atleast 1 null in the input
509
retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
512
*pErrorCode = U_INVALID_CHAR_FOUND;
514
}else if(retVal== remaining){/* should never occur */
515
int numWritten = (pIntTarget-intTarget);
516
u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
521
pIntTarget = intTarget;
522
remaining=intTargetCapacity;
524
if(nulLen!=count){ /*there are embedded nulls*/
525
pIntTarget+=numWritten;
526
remaining-=numWritten;
531
/* we donot check for limit since tempBuf is null terminated */
532
while(tempBuf[nulLen++] != 0){
534
pIntTarget = pIntTarget + retVal+1;
535
remaining -=(retVal+1);
537
/* check if we have reached the source limit*/
543
count = (int32_t)(pIntTarget-intTarget);
545
if(0 < count && count <= destCapacity){
546
uprv_memcpy(dest,intTarget,count*sizeof(wchar_t));
550
*pDestLength = count;
553
/* free the allocated memory */
554
uprv_free(intTarget);
557
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
560
/* are we still using stack buffer */
561
if(stackBuffer != saveBuf){
564
u_terminateWChars(dest,destCapacity,count,pErrorCode);
566
u_releaseDefaultConverter(conv);
572
U_CAPI wchar_t* U_EXPORT2
573
u_strToWCS(wchar_t *dest,
574
int32_t destCapacity,
575
int32_t *pDestLength,
578
UErrorCode *pErrorCode){
581
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
585
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
586
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
590
#ifdef U_WCHAR_IS_UTF16
591
/* wchar_t is UTF-16 just do a memcpy */
593
srcLength = u_strlen(src);
595
if(0 < srcLength && srcLength <= destCapacity){
596
uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
599
*pDestLength = srcLength;
602
u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
606
#elif defined U_WCHAR_IS_UTF32
608
return u_strToUTF32(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
612
return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
617
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
618
/* helper function */
620
_strFromWCS( UChar *dest,
621
int32_t destCapacity,
622
int32_t *pDestLength,
625
UErrorCode *pErrorCode){
627
int32_t retVal =0, count =0 ;
628
UConverter* conv = NULL;
629
UChar* pTarget = NULL;
630
UChar* pTargetLimit = NULL;
631
UChar* target = NULL;
633
UChar uStack [_STACK_BUFFER_CAPACITY];
635
wchar_t wStack[_STACK_BUFFER_CAPACITY];
636
wchar_t* pWStack = wStack;
639
char cStack[_STACK_BUFFER_CAPACITY];
640
int32_t cStackCap = _STACK_BUFFER_CAPACITY;
643
char* pCSrcLimit=NULL;
645
const wchar_t* pSrc = src;
646
const wchar_t* pSrcLimit = NULL;
649
/* if the wchar_t source is null terminated we can safely
650
* assume that there are no embedded nulls, this is a fast
651
* path for null terminated strings.
654
/* convert wchars to chars */
655
retVal = uprv_wcstombs(pCSrc,src, cStackCap);
658
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
660
}else if(retVal == cStackCap){
661
/* Should rarely occur */
662
u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
663
cStackCap*2,0,sizeof(char));
666
/* converted every thing */
667
pCSrc = pCSrc+retVal;
673
/* here the source is not null terminated
674
* so it may have nulls embeded and we need to
675
* do some extra processing
677
int32_t remaining =cStackCap;
679
pSrcLimit = src + srcLength;
682
register int32_t nulLen = 0;
684
/* find nulls in the string */
685
while(nulLen<srcLength && pSrc[nulLen++]!=0){
688
if((pSrc+nulLen) < pSrcLimit){
689
/* check if we have enough room in pCSrc */
690
if(remaining < (nulLen * MB_CUR_MAX)){
691
/* should rarely occur */
692
int32_t len = (pCSrc-pCSave);
694
/* we do not have enough room so grow the buffer*/
695
u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
696
2*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
700
remaining = cStackCap-(pCSrc - pCSave);
703
/* we have found a null so convert the
704
* chunk from begining of non-null char to null
706
retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
709
/* an error occurred bail out */
710
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
714
pCSrc += retVal+1 /* already null terminated */;
716
pSrc += nulLen; /* skip past the null */
717
srcLength-=nulLen; /* decrement the srcLength */
718
remaining -= (pCSrc-pCSave);
722
/* the source is not null terminated and we are
723
* end of source so we copy the source to a temp buffer
724
* null terminate it and convert wchar_ts to chars
726
if(nulLen > _STACK_BUFFER_CAPACITY){
727
/* Should rarely occcur */
728
/* allocate new buffer buffer */
729
pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * nulLen);
731
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
736
/* copy the contents to tempStack */
737
uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t));
740
/* null terminate the tempBuffer */
743
if(remaining < (nulLen * MB_CUR_MAX)){
744
/* Should rarely occur */
745
int32_t len = (pCSrc-pCSave);
747
/* we do not have enough room so grow the buffer*/
748
u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
749
cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
753
remaining = cStackCap-(pCSrc - pCSave);
755
/* convert to chars */
756
retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
760
srcLength-=nulLen; /* decrement the srcLength */
766
/* OK..now we have converted from wchar_ts to chars now
767
* convert chars to UChars
771
pTarget = target= dest;
772
pTargetLimit = dest + destCapacity;
774
conv= u_getDefaultConverter(pErrorCode);
776
if(U_FAILURE(*pErrorCode)|| conv==NULL){
782
*pErrorCode = U_ZERO_ERROR;
784
/* convert to stack buffer*/
785
ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
787
/* increment count to number written to stack */
788
count+= pTarget - target;
790
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
793
pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
804
u_terminateUChars(dest,destCapacity,count,pErrorCode);
808
if(cStack != pCSave){
812
if(wStack != pWStack){
816
u_releaseDefaultConverter(conv);
822
U_CAPI UChar* U_EXPORT2
823
u_strFromWCS(UChar *dest,
824
int32_t destCapacity,
825
int32_t *pDestLength,
828
UErrorCode *pErrorCode)
832
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
836
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
837
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
841
#ifdef U_WCHAR_IS_UTF16
842
/* wchar_t is UTF-16 just do a memcpy */
844
srcLength = u_strlen(src);
846
if(0 < srcLength && srcLength <= destCapacity){
847
uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
850
*pDestLength = srcLength;
853
u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
857
#elif defined U_WCHAR_IS_UTF32
859
return u_strFromUTF32(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
863
return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);