2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
19
* $Id: IconvGNUTransService.cpp 676954 2008-07-15 16:29:19Z dbertoni $
22
// ---------------------------------------------------------------------------
24
// ---------------------------------------------------------------------------
31
#elif HAVE_MACHINE_ENDIAN_H
32
#include <machine/endian.h>
35
#include <xercesc/util/XMLString.hpp>
36
#include <xercesc/util/XMLUniDefs.hpp>
37
#include <xercesc/util/XMLUni.hpp>
38
#include <xercesc/util/PlatformUtils.hpp>
39
#include <xercesc/util/TranscodingException.hpp>
40
#include <xercesc/util/Janitor.hpp>
41
#include "IconvGNUTransService.hpp"
43
XERCES_CPP_NAMESPACE_BEGIN
45
// ---------------------------------------------------------------------------
46
// Description of encoding schemas, supported by iconv()
47
// ---------------------------------------------------------------------------
48
typedef struct __IconvGNUEncoding {
49
const char* fSchema; // schema name
50
size_t fUChSize; // size of the character
51
unsigned int fUBO; // byte order, relative to the host
54
static const IconvGNUEncoding gIconvGNUEncodings[] = {
55
{ "UTF-16LE", 2, LITTLE_ENDIAN },
56
{ "UTF-16BE", 2, BIG_ENDIAN },
57
{ "UCS-2LE", 2, LITTLE_ENDIAN },
58
{ "UCS-2BE", 2, BIG_ENDIAN },
59
{ "UCS-2-INTERNAL", 2, BYTE_ORDER },
65
//--------------------------------------------------
66
// Macro-definitions to translate "native unicode"
67
// characters <-> XMLCh with different host byte order
68
// and encoding schemas.
70
# if BYTE_ORDER == LITTLE_ENDIAN
71
# define IXMLCh2WC16(x,w) \
72
*(w) = ((*(x)) >> 8) & 0xFF; \
73
*((w)+1) = (*(x)) & 0xFF
74
# define IWC162XMLCh(w,x) *(x) = ((*(w)) << 8) | (*((w)+1))
75
# define XMLCh2WC16(x,w) \
76
*(w) = (*(x)) & 0xFF; \
77
*((w)+1) = ((*(x)) >> 8) & 0xFF
78
# define WC162XMLCh(w,x) *(x) = ((*((w)+1)) << 8) | (*(w))
80
# define IXMLCh2WC32(x,w) \
81
*(w) = ((*(x)) >> 24) & 0xFF; \
82
*((w)+1) = ((*(x)) >> 16) & 0xFF; \
83
*((w)+2) = ((*(x)) >> 8) & 0xFF; \
84
*((w)+3) = (*(x)) & 0xFF
85
# define IWC322XMLCh(w,x) \
86
*(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) | \
87
((*((w)+2)) << 8) | (*((w)+3))
88
# define XMLCh2WC32(x,w) \
89
*((w)+3) = ((*(x)) >> 24) & 0xFF; \
90
*((w)+2) = ((*(x)) >> 16) & 0xFF; \
91
*((w)+1) = ((*(x)) >> 8) & 0xFF; \
93
# define WC322XMLCh(w,x) \
94
*(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) | \
95
((*((w)+1)) << 8) | (*(w))
97
# else /* BYTE_ORDER != LITTLE_ENDIAN */
99
# define XMLCh2WC16(x,w) \
100
*(w) = ((*(x)) >> 8) & 0xFF; \
101
*((w)+1) = (*(x)) & 0xFF
102
# define WC162XMLCh(w,x) *(x) = ((*(w)) << 8) | (*((w)+1))
103
# define IXMLCh2WC16(x,w) \
104
*(w) = (*(x)) & 0xFF; \
105
*((w)+1) = ((*(x)) >> 8) & 0xFF
106
# define IWC162XMLCh(w,x) *(x) = ((*((w)+1)) << 8) | (*(w))
108
# define XMLCh2WC32(x,w) \
109
*(w) = ((*(x)) >> 24) & 0xFF; \
110
*((w)+1) = ((*(x)) >> 16) & 0xFF; \
111
*((w)+2) = ((*(x)) >> 8) & 0xFF; \
112
*((w)+3) = (*(x)) & 0xFF
113
# define WC322XMLCh(w,x) \
114
*(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) | \
115
((*((w)+2)) << 8) | (*((w)+3))
116
# define IXMLCh2WC32(x,w) \
117
*((w)+3) = ((*(x)) >> 24) & 0xFF; \
118
*((w)+2) = ((*(x)) >> 16) & 0xFF; \
119
*((w)+1) = ((*(x)) >> 8) & 0xFF; \
121
# define IWC322XMLCh(w,x) \
122
*(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) | \
123
((*((w)+1)) << 8) | (*(w))
124
# endif /* BYTE_ORDER == LITTLE_ENDIAN */
133
// ---------------------------------------------------------------------------
135
// ---------------------------------------------------------------------------
136
static const unsigned int gTempBuffArraySize = 4096;
137
static const XMLCh gMyServiceId[] =
139
chLatin_I, chLatin_C, chLatin_o, chLatin_n, chLatin_v, chNull
143
// ---------------------------------------------------------------------------
145
// ---------------------------------------------------------------------------
146
static XMLSize_t getWideCharLength(const XMLCh* const src)
152
const XMLCh* pTmp = src;
159
//----------------------------------------------------------------------------
160
// There is implementation of the libiconv for FreeBSD (available through the
161
// ports collection). The following is a wrapper around the iconv().
162
//----------------------------------------------------------------------------
164
IconvGNUWrapper::IconvGNUWrapper (MemoryManager* manager)
165
: fUChSize(0), fUBO(LITTLE_ENDIAN),
166
fCDTo((iconv_t)-1), fCDFrom((iconv_t)-1), fMutex(manager)
170
IconvGNUWrapper::IconvGNUWrapper ( iconv_t cd_from,
174
MemoryManager* manager)
175
: fUChSize(uchsize), fUBO(ubo),
176
fCDTo(cd_to), fCDFrom(cd_from), fMutex(manager)
178
if (fCDFrom == (iconv_t) -1 || fCDTo == (iconv_t) -1) {
179
XMLPlatformUtils::panic (PanicHandler::Panic_NoTransService);
183
IconvGNUWrapper::~IconvGNUWrapper()
187
// Convert "native unicode" character into XMLCh
188
void IconvGNUWrapper::mbcToXMLCh (const char *mbc, XMLCh *toRet) const
190
if (fUBO == BYTE_ORDER) {
191
if (fUChSize == sizeof(XMLCh))
192
*toRet = *((XMLCh*) mbc);
193
else if (fUChSize == 2) {
194
WC162XMLCh( mbc, toRet );
196
WC322XMLCh( mbc, toRet );
200
IWC162XMLCh( mbc, toRet );
202
IWC322XMLCh( mbc, toRet );
207
// Convert XMLCh into "native unicode" character
208
void IconvGNUWrapper::xmlChToMbc (XMLCh xch, char *mbc) const
210
if (fUBO == BYTE_ORDER) {
211
if (fUChSize == sizeof(XMLCh)) {
212
memcpy (mbc, &xch, fUChSize);
216
XMLCh2WC16( &xch, mbc );
218
XMLCh2WC32( &xch, mbc );
222
IXMLCh2WC16( &xch, mbc );
224
IXMLCh2WC32( &xch, mbc );
229
// Return uppercase equivalent for XMLCh
230
XMLCh IconvGNUWrapper::toUpper (const XMLCh ch)
235
char wcbuf[MAX_UCHSIZE * 2];
236
xmlChToMbc (ch, wcbuf);
240
size_t len = fUChSize;
241
char *pTmpArr = tmpArr;
244
if (::iconv (fCDTo, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1)
246
tmpArr[1] = toupper (*((unsigned char *)tmpArr));
252
if (::iconv (fCDFrom, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1)
254
mbcToXMLCh (wcbuf, (XMLCh*) &ch);
258
// Return lowercase equivalent for XMLCh
259
XMLCh IconvGNUWrapper::toLower (const XMLCh ch)
264
char wcbuf[MAX_UCHSIZE * 2];
265
xmlChToMbc (ch, wcbuf);
269
size_t len = fUChSize;
270
char *pTmpArr = tmpArr;
273
if (::iconv (fCDTo, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1)
275
tmpArr[1] = tolower (*((unsigned char*)tmpArr));
281
if (::iconv (fCDFrom, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1)
283
mbcToXMLCh (wcbuf, (XMLCh*) &ch);
287
// Fill array of XMLCh characters with data, supplyed in the array
288
// of "native unicode" characters.
289
XMLCh* IconvGNUWrapper::mbsToXML
296
if (mbs_str == NULL || xml_str == NULL || cnt == 0)
298
if (fUBO == BYTE_ORDER) {
299
if (fUChSize == sizeof(XMLCh)) {
300
// null-transformation
301
memcpy (xml_str, mbs_str, fUChSize * cnt);
305
for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {
306
WC162XMLCh( mbs_str, xml_str + i);
309
for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {
310
WC322XMLCh( mbs_str, xml_str + i );
314
for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {
315
IWC162XMLCh( mbs_str, xml_str + i );
318
for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {
319
IWC322XMLCh( mbs_str, xml_str + i );
325
// Fill array of "native unicode" characters with data, supplyed
326
// in the array of XMLCh characters.
327
char* IconvGNUWrapper::xmlToMbs
334
if (mbs_str == NULL || xml_str == NULL || cnt == 0)
336
char *toReturn = mbs_str;
337
if (fUBO == BYTE_ORDER) {
338
if (fUChSize == sizeof(XMLCh)) {
339
// null-transformation
340
memcpy (mbs_str, xml_str, fUChSize * cnt);
344
for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {
345
XMLCh2WC16( xml_str, mbs_str );
348
for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {
349
XMLCh2WC32( xml_str, mbs_str );
353
for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {
354
IXMLCh2WC16( xml_str, mbs_str );
357
for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {
358
IXMLCh2WC32( xml_str, mbs_str );
364
size_t IconvGNUWrapper::iconvFrom ( const char *fromPtr,
369
char ** tmpPtr = (char**)&fromPtr;
370
return ::iconv (fCDFrom, tmpPtr, fromLen, toPtr, &toLen);
373
size_t IconvGNUWrapper::iconvTo ( const char *fromPtr,
378
char ** tmpPtr = (char**)&fromPtr;
379
return ::iconv (fCDTo, tmpPtr, fromLen, toPtr, &toLen);
383
// ---------------------------------------------------------------------------
384
// IconvGNUTransService: Constructors and Destructor
385
// ---------------------------------------------------------------------------
387
IconvGNUTransService::IconvGNUTransService(MemoryManager* manager)
388
: IconvGNUWrapper(manager), fUnicodeCP(0)
390
// Try to obtain local (host) characterset from the setlocale
391
// and through the environment. Do not call setlocale(LC_*, "")!
392
// Using an empty string instead of NULL, will modify the libc
395
char* fLocalCP = setlocale (LC_CTYPE, NULL);
396
if (fLocalCP == NULL || *fLocalCP == 0 ||
397
strcmp (fLocalCP, "C") == 0 ||
398
strcmp (fLocalCP, "POSIX") == 0) {
399
fLocalCP = getenv ("LC_ALL");
400
if (fLocalCP == NULL) {
401
fLocalCP = getenv ("LC_CTYPE");
402
if (fLocalCP == NULL)
403
fLocalCP = getenv ("LANG");
407
if (fLocalCP == NULL || *fLocalCP == 0 ||
408
strcmp (fLocalCP, "C") == 0 ||
409
strcmp (fLocalCP, "POSIX") == 0)
410
fLocalCP = "iso-8859-1"; // fallback locale
412
char *ptr = strchr (fLocalCP, '.');
414
fLocalCP = "iso-8859-1"; // fallback locale
419
// Select the native unicode characters encoding schema
420
const IconvGNUEncoding *eptr;
421
// first - try to use the schema with character size equal to XMLCh, and same endianness
422
for (eptr = gIconvGNUEncodings; eptr->fSchema; eptr++)
424
if (eptr->fUChSize != sizeof(XMLCh) || eptr->fUBO != BYTE_ORDER)
427
// try to create conversion descriptor
428
iconv_t cd_to = iconv_open(fLocalCP, eptr->fSchema);
429
if (cd_to == (iconv_t)-1)
431
iconv_t cd_from = iconv_open(eptr->fSchema, fLocalCP);
432
if (cd_from == (iconv_t)-1) {
438
setUChSize(eptr->fUChSize);
442
fUnicodeCP = eptr->fSchema;
445
if (fUnicodeCP == NULL)
446
// try to use any known schema
447
for (eptr = gIconvGNUEncodings; eptr->fSchema; eptr++)
449
// try to create conversion descriptor
450
iconv_t cd_to = iconv_open(fLocalCP, eptr->fSchema);
451
if (cd_to == (iconv_t)-1)
453
iconv_t cd_from = iconv_open(eptr->fSchema, fLocalCP);
454
if (cd_from == (iconv_t)-1) {
460
setUChSize(eptr->fUChSize);
464
fUnicodeCP = eptr->fSchema;
468
if (fUnicodeCP == NULL || cdTo() == (iconv_t)-1 || cdFrom() == (iconv_t)-1)
469
XMLPlatformUtils::panic (PanicHandler::Panic_NoTransService);
472
IconvGNUTransService::~IconvGNUTransService()
474
if (cdTo() != (iconv_t) -1) {
475
iconv_close (cdTo());
476
setCDTo ((iconv_t)-1);
478
if (cdFrom() != (iconv_t) -1) {
479
iconv_close (cdFrom());
480
setCDFrom ((iconv_t)-1);
484
// ---------------------------------------------------------------------------
485
// IconvGNUTransService: The virtual transcoding service API
486
// ---------------------------------------------------------------------------
487
int IconvGNUTransService::compareIString(const XMLCh* const comp1
488
, const XMLCh* const comp2)
490
const XMLCh* cptr1 = comp1;
491
const XMLCh* cptr2 = comp2;
493
XMLMutexLock lockConverter(&fMutex);
495
XMLCh c1 = toUpper(*cptr1);
496
XMLCh c2 = toUpper(*cptr2);
497
while ( (*cptr1 != 0) && (*cptr2 != 0) ) {
500
c1 = toUpper(*(++cptr1));
501
c2 = toUpper(*(++cptr2));
504
return (int) ( c1 - c2 );
508
int IconvGNUTransService::compareNIString(const XMLCh* const comp1
509
, const XMLCh* const comp2
510
, const XMLSize_t maxChars)
513
const XMLCh* cptr1 = comp1;
514
const XMLCh* cptr2 = comp2;
516
XMLMutexLock lockConverter(&fMutex);
518
while (true && maxChars)
520
XMLCh c1 = toUpper(*cptr1);
521
XMLCh c2 = toUpper(*cptr2);
524
return (int) (c1 - c2);
526
// If either ended, then both ended, so equal
527
if (!*cptr1 || !*cptr2)
533
// Bump the count of chars done. If it equals the count then we
534
// are equal for the requested count, so break out and return
545
const XMLCh* IconvGNUTransService::getId() const
550
XMLLCPTranscoder* IconvGNUTransService::makeNewLCPTranscoder(MemoryManager* manager)
552
return new (manager) IconvGNULCPTranscoder (cdFrom(), cdTo(), uChSize(), UBO(), manager);
555
bool IconvGNUTransService::supportsSrcOfs() const
560
// ---------------------------------------------------------------------------
561
// IconvGNUTransService: The protected virtual transcoding service API
562
// ---------------------------------------------------------------------------
564
IconvGNUTransService::makeNewXMLTranscoder
566
const XMLCh* const encodingName
567
, XMLTransService::Codes& resValue
568
, const XMLSize_t blockSize
569
, MemoryManager* const manager
572
resValue = XMLTransService::UnsupportedEncoding;
573
IconvGNUTranscoder *newTranscoder = NULL;
575
char *encLocal = XMLString::transcode(encodingName, manager);
576
ArrayJanitor<char> janBuf(encLocal, manager);
577
iconv_t cd_from, cd_to;
579
cd_from = iconv_open (fUnicodeCP, encLocal);
580
if (cd_from == (iconv_t)-1) {
581
resValue = XMLTransService::SupportFilesNotFound;
584
cd_to = iconv_open (encLocal, fUnicodeCP);
585
if (cd_to == (iconv_t)-1) {
586
resValue = XMLTransService::SupportFilesNotFound;
587
iconv_close (cd_from);
590
newTranscoder = new (manager) IconvGNUTranscoder (encodingName,
593
uChSize(), UBO(), manager);
595
resValue = XMLTransService::Ok;
596
return newTranscoder;
599
void IconvGNUTransService::upperCase(XMLCh* const toUpperCase)
601
XMLCh* outPtr = toUpperCase;
603
XMLMutexLock lockConverter(&fMutex);
607
*outPtr = toUpper(*outPtr);
612
void IconvGNUTransService::lowerCase(XMLCh* const toLowerCase)
614
XMLCh* outPtr = toLowerCase;
616
XMLMutexLock lockConverter(&fMutex);
620
*outPtr = toLower(*outPtr);
625
// ---------------------------------------------------------------------------
626
// IconvGNULCPTranscoder: The virtual transcoder API
627
// ---------------------------------------------------------------------------
628
XMLSize_t IconvGNULCPTranscoder::calcRequiredSize (const char* const srcText
629
, MemoryManager* const manager)
635
len = srcLen = strlen(srcText);
639
char tmpWideArr[gTempBuffArraySize];
642
XMLMutexLock lockConverter(&fMutex);
645
char *pTmpArr = tmpWideArr;
646
const char *ptr = srcText + srcLen - len;
647
size_t rc = iconvFrom(ptr, &len, &pTmpArr, gTempBuffArraySize);
648
if (rc == (size_t) -1 && errno != E2BIG) {
649
ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, manager);
652
rc = pTmpArr - (char *) tmpWideArr;
654
if (rc == 0 || len == 0)
657
return totalLen / uChSize();
661
XMLSize_t IconvGNULCPTranscoder::calcRequiredSize(const XMLCh* const srcText
662
, MemoryManager* const manager)
666
XMLSize_t wLent = getWideCharLength(srcText);
670
char tmpWBuff[gTempBuffArraySize];
673
ArrayJanitor<char> janBuf(wBufPtr, manager);
674
size_t len = wLent * uChSize();
675
if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
676
if (len > gTempBuffArraySize) {
677
wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len];
678
janBuf.reset(wBufPtr, manager);
682
xmlToMbs (srcText, wBuf, wLent);
684
wBuf = (char *) srcText;
686
char tmpBuff[gTempBuffArraySize];
688
char *srcEnd = wBuf + wLent * uChSize();
690
XMLMutexLock lockConverter(&fMutex);
693
char *pTmpArr = tmpBuff;
694
const char *ptr = srcEnd - len;
695
size_t rc = iconvTo(ptr, &len, &pTmpArr, gTempBuffArraySize);
696
if (rc == (size_t) -1 && errno != E2BIG) {
697
ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, manager);
700
rc = pTmpArr - tmpBuff;
702
if (rc == 0 || len == 0)
709
char* IconvGNULCPTranscoder::transcode(const XMLCh* const toTranscode,
710
MemoryManager* const manager)
717
retVal = (char*) manager->allocate(sizeof(char));//new char[1];
722
XMLSize_t wLent = getWideCharLength(toTranscode);
725
XMLSize_t neededLen = calcRequiredSize (toTranscode, manager);
728
// allocate output buffer
729
retVal = (char*) manager->allocate((neededLen + 1) * sizeof(char));//new char[neededLen + 1];
730
// prepare the original
731
char tmpWBuff[gTempBuffArraySize];
732
char *wideCharBuf = 0;
734
ArrayJanitor<char> janBuf(wBufPtr, manager);
735
size_t len = wLent * uChSize();
737
if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
738
if (len > gTempBuffArraySize) {
739
wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len];
740
janBuf.reset(wBufPtr, manager);
741
wideCharBuf = wBufPtr;
743
wideCharBuf = tmpWBuff;
744
xmlToMbs (toTranscode, wideCharBuf, wLent);
746
wideCharBuf = (char *) toTranscode;
748
// perform conversion
753
XMLMutexLock lockConverter(&fMutex);
754
rc = iconvTo(wideCharBuf, &len, &ptr, neededLen);
757
if (rc == (size_t)-1) {
760
retVal[neededLen] = 0;
766
bool IconvGNULCPTranscoder::transcode( const XMLCh* const toTranscode
768
, const XMLSize_t maxBytes
769
, MemoryManager* const manager)
771
// Watch for a couple of pyscho corner cases
772
if (!toTranscode || !maxBytes) {
781
XMLSize_t wLent = getWideCharLength(toTranscode);
782
if (wLent > maxBytes)
785
// Fill the "unicode" string
786
char tmpWBuff[gTempBuffArraySize];
787
char *wideCharBuf = 0;
789
ArrayJanitor<char> janBuf(wBufPtr, manager);
790
size_t len = wLent * uChSize();
792
if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
793
if (len > gTempBuffArraySize) {
794
wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len];
795
janBuf.reset(wBufPtr, manager);
796
wideCharBuf = wBufPtr;
798
wideCharBuf = tmpWBuff;
799
xmlToMbs (toTranscode, wideCharBuf, wLent);
801
wideCharBuf = (char *) toTranscode;
803
// Ok, go ahead and try the transcoding. If it fails, then ...
808
XMLMutexLock lockConverter(&fMutex);
809
rc = iconvTo(wideCharBuf, &len, &ptr, maxBytes);
812
if (rc == (size_t)-1) {
822
XMLCh* IconvGNULCPTranscoder::transcode(const char* const toTranscode,
823
MemoryManager* const manager)
830
retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1];
835
XMLSize_t wLent = calcRequiredSize(toTranscode, manager);
837
retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1];
842
char tmpWBuff[gTempBuffArraySize];
843
char *wideCharBuf = 0;
845
ArrayJanitor<char> janBuf(wBufPtr, manager);
846
size_t len = wLent * uChSize();
848
retVal = (XMLCh*) manager->allocate((wLent + 1) * sizeof(XMLCh));//new XMLCh[wLent + 1];
849
if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
850
if (len > gTempBuffArraySize) {
851
wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len];
852
janBuf.reset(wBufPtr, manager);
853
wideCharBuf = wBufPtr;
855
wideCharBuf = tmpWBuff;
857
wideCharBuf = (char *) retVal;
859
size_t flen = strlen(toTranscode);
860
char *ptr = wideCharBuf;
864
XMLMutexLock lockConverter(&fMutex);
865
rc = iconvFrom(toTranscode, &flen, &ptr, len);
868
if (rc == (size_t) -1) {
871
if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER)
872
mbsToXML (wideCharBuf, retVal, wLent);
873
retVal[wLent] = 0x00;
879
bool IconvGNULCPTranscoder::transcode(const char* const toTranscode
880
, XMLCh* const toFill
881
, const XMLSize_t maxChars
882
, MemoryManager* const manager)
884
// Check for a couple of psycho corner cases
885
if (!toTranscode || !maxChars)
897
XMLSize_t wLent = calcRequiredSize(toTranscode);
898
if (wLent > maxChars)
901
char tmpWBuff[gTempBuffArraySize];
902
char *wideCharBuf = 0;
904
ArrayJanitor<char> janBuf(wBufPtr, manager);
905
size_t len = wLent * uChSize();
907
if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
908
if (len > gTempBuffArraySize) {
909
wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len];
910
janBuf.reset(wBufPtr, manager);
911
wideCharBuf = wBufPtr;
913
wideCharBuf = tmpWBuff;
915
wideCharBuf = (char *) toFill;
917
size_t flen = strlen(toTranscode); // wLent;
918
char *ptr = wideCharBuf;
922
XMLMutexLock lockConverter(&fMutex);
923
rc = iconvFrom(toTranscode, &flen, &ptr, len);
926
if (rc == (size_t)-1) {
930
if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER)
931
mbsToXML (wideCharBuf, toFill, wLent);
933
toFill[wLent] = 0x00;
938
// ---------------------------------------------------------------------------
939
// IconvGNULCPTranscoder: Constructors and Destructor
940
// ---------------------------------------------------------------------------
943
IconvGNULCPTranscoder::IconvGNULCPTranscoder (iconv_t cd_from,
947
MemoryManager* manager)
948
: IconvGNUWrapper (cd_from, cd_to, uchsize, ubo, manager)
953
IconvGNULCPTranscoder::~IconvGNULCPTranscoder()
958
// ---------------------------------------------------------------------------
959
// IconvGNUTranscoder: Constructors and Destructor
960
// ---------------------------------------------------------------------------
961
IconvGNUTranscoder::IconvGNUTranscoder (const XMLCh* const encodingName
962
, const XMLSize_t blockSize
967
, MemoryManager* const manager
969
: XMLTranscoder(encodingName, blockSize, manager)
970
, IconvGNUWrapper (cd_from, cd_to, uchsize, ubo, manager)
974
IconvGNUTranscoder::~IconvGNUTranscoder()
976
if (cdTo() != (iconv_t)-1) {
977
iconv_close (cdTo());
978
setCDTo ((iconv_t)-1);
980
if (cdFrom() != (iconv_t)-1) {
981
iconv_close (cdFrom());
982
setCDFrom ((iconv_t)-1);
986
// ---------------------------------------------------------------------------
987
// IconvGNUTranscoder: Implementation of the virtual transcoder API
988
// ---------------------------------------------------------------------------
989
XMLSize_t IconvGNUTranscoder::transcodeFrom
991
const XMLByte* const srcData
992
, const XMLSize_t srcCount
993
, XMLCh* const toFill
994
, const XMLSize_t maxChars
995
, XMLSize_t& bytesEaten
996
, unsigned char* const charSizes )
998
// Transcode TO XMLCh
999
const char* startSrc = (const char*) srcData;
1000
const char* endSrc = (const char*) srcData + srcCount;
1002
char tmpWBuff[gTempBuffArraySize];
1003
char *startTarget = 0;
1005
ArrayJanitor<char> janBuf(wBufPtr, getMemoryManager());
1006
size_t len = maxChars * uChSize();
1008
if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
1009
if (len > gTempBuffArraySize) {
1010
wBufPtr = (char*) getMemoryManager()->allocate(len * sizeof(char));//new char[len];
1011
janBuf.reset(wBufPtr, getMemoryManager());
1012
startTarget = wBufPtr;
1014
startTarget = tmpWBuff;
1016
startTarget = (char *) toFill;
1018
// Do character-by-character transcoding
1019
char *orgTarget = startTarget;
1020
size_t srcLen = srcCount;
1021
size_t prevSrcLen = srcLen;
1022
unsigned int toReturn = 0;
1025
XMLMutexLock lockConverter(&fMutex);
1027
for (size_t cnt = 0; cnt < maxChars && srcLen; cnt++) {
1028
size_t rc = iconvFrom(startSrc, &srcLen, &orgTarget, uChSize());
1029
if (rc == (size_t)-1) {
1030
if (errno != E2BIG || prevSrcLen == srcLen) {
1031
ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
1034
charSizes[cnt] = prevSrcLen - srcLen;
1035
prevSrcLen = srcLen;
1036
bytesEaten += charSizes[cnt];
1037
startSrc = endSrc - srcLen;
1040
if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER)
1041
mbsToXML (startTarget, toFill, toReturn);
1045
XMLSize_t IconvGNUTranscoder::transcodeTo
1047
const XMLCh* const srcData
1048
, const XMLSize_t srcCount
1049
, XMLByte* const toFill
1050
, const XMLSize_t maxBytes
1051
, XMLSize_t& charsEaten
1052
, const UnRepOpts options )
1054
// Transcode FROM XMLCh
1055
char tmpWBuff[gTempBuffArraySize];
1056
char *startSrc = tmpWBuff;
1058
ArrayJanitor<char> janBuf(wBufPtr, getMemoryManager());
1059
size_t len = srcCount * uChSize();
1061
if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
1062
if (len > gTempBuffArraySize) {
1063
wBufPtr = (char*) getMemoryManager()->allocate(len * sizeof(char));//new char[len];
1064
janBuf.reset(wBufPtr, getMemoryManager());
1067
startSrc = tmpWBuff;
1068
xmlToMbs (srcData, startSrc, srcCount);
1070
startSrc = (char *) srcData;
1072
char* startTarget = (char *) toFill;
1073
size_t srcLen = len;
1078
XMLMutexLock lockConverter(&fMutex);
1079
rc = iconvTo (startSrc, &srcLen, &startTarget, maxBytes);
1082
if (rc == (size_t)-1 && errno != E2BIG) {
1083
ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
1085
charsEaten = srcCount - srcLen / uChSize();
1086
return startTarget - (char *)toFill;
1089
bool IconvGNUTranscoder::canTranscodeTo
1091
const unsigned int toCheck
1095
// If the passed value is really a surrogate embedded together, then
1096
// we need to break it out into its two chars. Else just one.
1098
char srcBuf[MAX_UCHSIZE * 2];
1099
unsigned int srcCount = 1;
1100
if (toCheck & 0xFFFF0000) {
1101
XMLCh ch1 = (toCheck >> 10) + 0xD800;
1102
XMLCh ch2 = toCheck & 0x3FF + 0xDC00;
1103
xmlToMbs(&ch1, srcBuf, 1);
1104
xmlToMbs(&ch2, srcBuf + uChSize(), 1);
1107
xmlToMbs((const XMLCh*) &toCheck, srcBuf, 1);
1108
size_t len = srcCount * uChSize();
1110
char* pTmpBuf = tmpBuf;
1112
XMLMutexLock lockConverter(&fMutex);
1113
size_t rc = iconvTo( srcBuf, &len, &pTmpBuf, 64);
1115
return (rc != (size_t)-1) && (len == 0);
1118
XERCES_CPP_NAMESPACE_END