2
Licensed Materials - Property of IBM
3
DB2 Storage Engine Enablement
4
Copyright IBM Corporation 2007,2008
7
Redistribution and use in source and binary forms, with or without modification,
8
are permitted provided that the following conditions are met:
9
(a) Redistributions of source code must retain this list of conditions, the
10
copyright notice in section {d} below, and the disclaimer following this
12
(b) Redistributions in binary form must reproduce this list of conditions, the
13
copyright notice in section (d) below, and the disclaimer following this
14
list of conditions, in the documentation and/or other materials provided
15
with the distribution.
16
(c) The name of IBM may not be used to endorse or promote products derived from
17
this software without specific prior written permission.
18
(d) The text of the required copyright notice is:
19
Licensed Materials - Property of IBM
20
DB2 Storage Engine Enablement
21
Copyright IBM Corporation 2007,2008
24
THIS SOFTWARE IS PROVIDED BY IBM CORPORATION "AS IS" AND ANY EXPRESS OR IMPLIED
25
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
27
SHALL IBM CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
29
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31
CONTRACT, STRICT LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
39
@brief A direct map optimization of iconv and related functions
40
This was show to significantly reduce character conversion cost
41
for short strings when compared to calling iconv system code.
69
#define EXTERN extern "C"
76
/* ANSI integer data types */
77
#if defined(__OS400_TGTVRM__)
78
/* for DTAMDL(*P128), datamodel(P128): int/long/pointer=4/4/16 */
79
/* LLP64:4/4/8 is used for teraspace ?? */
80
typedef short int16_t;
81
typedef unsigned short uint16_t;
83
typedef unsigned int uint32_t;
84
typedef long long int64_t;
85
typedef unsigned long long uint64_t;
87
/* PASE uses IPL32: int/long/pointer=4/4/4 + long long */
88
#elif defined(__64BIT__)
89
/* AIX 64 bit uses LP64: int/long/pointer=4/8/8 */
92
#define CONVERTER_ICONV 1
93
#define CONVERTER_DMAP 2
117
typedef struct __dmap_rec *dmap_t;
121
uint32_t codingSchema;
122
unsigned char * dmapS2S; /* SBCS -> SBCS */
123
/* The following conversion needs be followed by conversion from UCS-2/UTF-16 to UTF-8 */
124
UniChar * dmapD12U; /* DBCS(non-EUC) -> UCS-2/UTF-16 */
125
UniChar * dmapD22U; /* DBCS(non-EUC) -> UCS-2/UTF-16 */
126
UniChar * dmapE02U; /* EUC/SS0 -> UCS-2/UTF-16 */
127
UniChar * dmapE12U; /* EUC/SS1 -> UCS-2/UTF-16 */
128
UniChar * dmapE22U; /* EUC/0x8E + SS2 -> UCS-2/UTF-16 */
129
UniChar * dmapE32U; /* EUC/0x8F + SS3 -> UCS-2/UTF-16 */
130
uchar * dmapU2D; /* UCS-2 -> DBCS */
131
uchar * dmapU2S; /* UCS-2 -> EUC SS0 */
132
uchar * dmapU2M2; /* UCS-2 -> EUC SS1 */
133
uchar * dmapU2M3; /* UCS-2 -> EUC SS2/SS3 */
134
/* All of these pointers/tables are not used at the same time.
135
* You may be able save some space if you consolidate them.
137
uchar * dmapS28; /* SBCS -> UTF-8 */
138
uchar * dmapD28; /* DBCS -> UTF-8 */
141
typedef struct __myconv_rec *myconv_t;
144
uint32_t converterType;
145
uint32_t index; /* for close */
150
int32_t allocatedSize;
153
UniChar subD; /* DBCS substitution char */
154
char subS; /* SBCS substitution char */
155
UniChar srcSubD; /* DBCS substitution char of src codepage */
156
char srcSubS; /* SBCS substitution char of src codepage */
157
char from [41+1]; /* codepage name is up to 41 bytes */
158
char to [41+1]; /* codepage name is up to 41 bytes */
160
char reserved[10]; /* align 128 */
162
char reserved[14]; /* align 128 */
167
EXTERN int32_t myconvDebug;
171
EXTERN int myconvGetES(CCSID);
172
EXTERN int myconvIsEBCDIC(const char *);
173
EXTERN int myconvIsASCII(const char *);
174
EXTERN int myconvIsUnicode(const char *); /* UTF-8, UTF-16, or UCS-2 */
175
EXTERN int myconvIsUnicode2(const char *); /* 2 byte Unicode */
176
EXTERN int myconvIsUCS2(const char *);
177
EXTERN int myconvIsUTF16(const char *);
178
EXTERN int myconvIsUTF8(const char *);
179
EXTERN int myconvIsEUC(const char *);
180
EXTERN int myconvIsISO(const char *);
181
EXTERN int myconvIsSBCS(const char *);
182
EXTERN int myconvIsDBCS(const char *);
183
EXTERN char myconvGetSubS(const char *);
184
EXTERN UniChar myconvGetSubD(const char *);
187
EXTERN myconv_t myconv_open(const char*, const char*, int32_t);
188
EXTERN int myconv_close(myconv_t);
190
INTERN size_t myconv_iconv(myconv_t cd ,
194
size_t* outBytesLeft,
197
return iconv(cd->cnv_iconv, inBuf, inBytesLeft, outBuf, outBytesLeft);
200
INTERN size_t myconv_dmap(myconv_t cd,
204
size_t* outBytesLeft,
207
if (cd->cnv_dmap->codingSchema == DMAP_S2S) {
208
register unsigned char * dmapS2S=cd->cnv_dmap->dmapS2S;
209
register int inLen=*inBytesLeft;
210
register char * pOut=*outBuf;
211
register char * pIn=*inBuf;
212
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
213
register char subS=cd->subS;
214
register size_t numS=0;
216
if (pLastOutBuf < pOut)
223
errno=EILSEQ; /* 116 */
224
*outBytesLeft-=(*inBytesLeft-inLen);
232
if ((*pOut=dmapS2S[*pIn]) == subS) {
233
if (*pIn != cd->srcSubS)
242
*outBytesLeft-=(*inBytesLeft-inLen);
249
} else if (cd->cnv_dmap->codingSchema == DMAP_E2U) {
250
/* use uchar * instead of UniChar to avoid memcpy */
251
register uchar * dmapE02U=(uchar *) (cd->cnv_dmap->dmapE02U);
252
register uchar * dmapE12U=(uchar *) (cd->cnv_dmap->dmapE12U);
253
register uchar * dmapE22U=(uchar *) (cd->cnv_dmap->dmapE22U);
254
register uchar * dmapE32U=(uchar *) (cd->cnv_dmap->dmapE32U);
255
register int inLen=*inBytesLeft;
256
register char * pOut=*outBuf;
257
register char * pIn=*inBuf;
259
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
260
register size_t numS=0;
262
if (pLastOutBuf < pOut)
272
if (*pIn == 0x8E) { /* SS2 */
274
if (cd->fromCcsid == 33722 || /* IBM-eucJP */
275
cd->fromCcsid == 964) /* IBM-eucTW */
276
errno=EINVAL; /* 22 */
278
errno=EILSEQ; /* 116 */
279
*outBytesLeft-=(pOut-*outBuf);
287
if (cd->fromCcsid == 964) /* IBM-eucTW */
288
errno=EINVAL; /* 22 */
290
errno=EILSEQ; /* 116 */
291
*outBytesLeft-=(pOut-*outBuf);
297
offset=(*pIn - 0xA0);
299
if (dmapE22U[offset] == 0x00 &&
300
dmapE22U[offset+1] == 0x00) { /* 2 bytes */
302
if (cd->fromCcsid == 964) /* IBM-eucTW */
303
errno=EINVAL; /* 22 */
305
errno=EILSEQ; /* 116 */
306
*outBytesLeft-=(pOut-*outBuf);
312
offset=(*pIn - 0xA0) * 0x60 + 0x60;
315
if (cd->fromCcsid == 964) /* IBM-eucTW */
316
errno=EINVAL; /* 22 */
318
errno=EILSEQ; /* 116 */
319
*outBytesLeft-=(pOut-*outBuf);
325
offset+=(*pIn - 0xA0);
327
if (dmapE22U[offset] == 0x00 &&
328
dmapE22U[offset+1] == 0x00) {
329
if (cd->fromCcsid == 964) /* IBM-eucTW */
330
errno=EINVAL; /* 22 */
332
errno=EILSEQ; /* 116 */
333
*outBytesLeft-=(pOut-*outBuf);
339
*pOut=dmapE22U[offset];
341
*pOut=dmapE22U[offset+1];
343
if (dmapE22U[offset] == 0xFF &&
344
dmapE22U[offset+1] == 0xFD) {
345
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
350
} else { /* 1 bytes */
351
*pOut=dmapE22U[offset];
353
*pOut=dmapE22U[offset+1];
358
} else if (*pIn == 0x8F) { /* SS3 */
360
if (cd->fromCcsid == 33722) /* IBM-eucJP */
361
errno=EINVAL; /* 22 */
363
errno=EILSEQ; /* 116 */
364
*outBytesLeft-=(pOut-*outBuf);
372
if (cd->fromCcsid == 970 || /* IBM-eucKR */
373
cd->fromCcsid == 964 || /* IBM-eucTW */
374
cd->fromCcsid == 1383 || /* IBM-eucCN */
375
(cd->fromCcsid == 33722 && 3 <= inLen)) /* IBM-eucJP */
376
errno=EILSEQ; /* 116 */
378
errno=EINVAL; /* 22 */
379
*outBytesLeft-=(pOut-*outBuf);
385
offset=(*pIn - 0xA0);
387
if (dmapE32U[offset] == 0x00 &&
388
dmapE32U[offset+1] == 0x00) { /* 0x8F + 2 bytes */
390
if (cd->fromCcsid == 33722)
391
errno=EINVAL; /* 22 */
393
errno=EILSEQ; /* 116 */
394
*outBytesLeft-=(pOut-*outBuf);
400
offset=(*pIn - 0xA0) * 0x60 + 0x60;
403
errno=EILSEQ; /* 116 */
404
*outBytesLeft-=(pOut-*outBuf);
410
offset+=(*pIn - 0xA0);
412
if (dmapE32U[offset] == 0x00 &&
413
dmapE32U[offset+1] == 0x00) {
414
errno=EILSEQ; /* 116 */
415
*outBytesLeft-=(pOut-*outBuf);
421
*pOut=dmapE32U[offset];
423
*pOut=dmapE32U[offset+1];
425
if (dmapE32U[offset] == 0xFF &&
426
dmapE32U[offset+1] == 0xFD) {
427
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
432
} else { /* 0x8F + 1 bytes */
433
*pOut=dmapE32U[offset];
435
*pOut=dmapE32U[offset+1];
444
if (dmapE02U[offset] == 0x00 &&
445
dmapE02U[offset+1] == 0x00) { /* SS1 */
447
if ((cd->fromCcsid == 33722 && (*pIn == 0xA0 || (0xA9 <= *pIn && *pIn <= 0xAF) || *pIn == 0xFF)) ||
448
(cd->fromCcsid == 970 && (*pIn == 0xA0 || *pIn == 0xAD || *pIn == 0xAE || *pIn == 0xAF || *pIn == 0xFF)) ||
449
(cd->fromCcsid == 964 && (*pIn == 0xA0 || (0xAA <= *pIn && *pIn <= 0xC1) || *pIn == 0xC3 || *pIn == 0xFE || *pIn == 0xFF)) ||
450
(cd->fromCcsid == 1383 && (*pIn == 0xA0 || *pIn == 0xFF)))
451
errno=EILSEQ; /* 116 */
453
errno=EINVAL; /* 22 */
454
*outBytesLeft-=(pOut-*outBuf);
461
errno=EILSEQ; /* 116 */
462
*outBytesLeft-=(pOut-*outBuf);
468
offset=(*pIn - 0xA0) * 0x60;
471
errno=EILSEQ; /* 116 */
472
*outBytesLeft-=(pOut-*outBuf);
478
offset+=(*pIn - 0xA0);
480
if (dmapE12U[offset] == 0x00 &&
481
dmapE12U[offset+1] == 0x00) { /* undefined mapping */
482
errno=EILSEQ; /* 116 */
483
*outBytesLeft-=(pOut-*outBuf);
489
*pOut=dmapE12U[offset];
491
*pOut=dmapE12U[offset+1];
493
if (dmapE12U[offset] == 0xFF &&
494
dmapE12U[offset+1] == 0xFD) {
495
if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
501
*pOut=dmapE02U[offset];
503
*pOut=dmapE02U[offset+1];
505
if (dmapE02U[offset] == 0x00 &&
506
dmapE02U[offset+1] == 0x1A) {
507
if (*pIn != cd->srcSubS)
516
*outBytesLeft-=(pOut-*outBuf);
524
} else if (cd->cnv_dmap->codingSchema == DMAP_E28) {
525
/* use uchar * instead of UniChar to avoid memcpy */
526
register uchar * dmapE02U=(uchar *) (cd->cnv_dmap->dmapE02U);
527
register uchar * dmapE12U=(uchar *) (cd->cnv_dmap->dmapE12U);
528
register uchar * dmapE22U=(uchar *) (cd->cnv_dmap->dmapE22U);
529
register uchar * dmapE32U=(uchar *) (cd->cnv_dmap->dmapE32U);
530
register int inLen=*inBytesLeft;
531
register char * pOut=*outBuf;
532
register char * pIn=*inBuf;
534
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
535
register size_t numS=0;
536
register UniChar in; /* copy part of U28 */
537
register UniChar ucs2;
539
if (pLastOutBuf < pOut)
547
if (*pIn == 0x8E) { /* SS2 */
549
if (cd->fromCcsid == 33722 || /* IBM-eucJP */
550
cd->fromCcsid == 964) /* IBM-eucTW */
551
errno=EINVAL; /* 22 */
553
errno=EILSEQ; /* 116 */
554
*outBytesLeft-=(pOut-*outBuf);
562
if (cd->fromCcsid == 964) /* IBM-eucTW */
563
errno=EINVAL; /* 22 */
565
errno=EILSEQ; /* 116 */
566
*outBytesLeft-=(pOut-*outBuf);
572
offset=(*pIn - 0xA0);
574
if (dmapE22U[offset] == 0x00 &&
575
dmapE22U[offset+1] == 0x00) { /* 2 bytes */
577
if (cd->fromCcsid == 964) /* IBM-eucTW */
578
errno=EINVAL; /* 22 */
580
errno=EILSEQ; /* 116 */
581
*outBytesLeft-=(pOut-*outBuf);
587
offset=(*pIn - 0xA0) * 0x60 + 0x60;
590
if (cd->fromCcsid == 964) /* IBM-eucTW */
591
errno=EINVAL; /* 22 */
593
errno=EILSEQ; /* 116 */
594
*outBytesLeft-=(pOut-*outBuf);
600
offset+=(*pIn - 0xA0);
602
if (dmapE22U[offset] == 0x00 &&
603
dmapE22U[offset+1] == 0x00) {
604
if (cd->fromCcsid == 964) /* IBM-eucTW */
605
errno=EINVAL; /* 22 */
607
errno=EILSEQ; /* 116 */
608
*outBytesLeft-=(pOut-*outBuf);
616
in+=dmapE22U[offset+1];
617
if (dmapE22U[offset] == 0xFF &&
618
dmapE22U[offset+1] == 0xFD) {
619
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
624
} else { /* 1 bytes */
627
in+=dmapE22U[offset+1];
631
} else if (*pIn == 0x8F) { /* SS3 */
633
if (cd->fromCcsid == 33722) /* IBM-eucJP */
634
errno=EINVAL; /* 22 */
636
errno=EILSEQ; /* 116 */
637
*outBytesLeft-=(pOut-*outBuf);
645
if (cd->fromCcsid == 970 || /* IBM-eucKR */
646
cd->fromCcsid == 964 || /* IBM-eucTW */
647
cd->fromCcsid == 1383 || /* IBM-eucCN */
648
(cd->fromCcsid == 33722 && 3 <= inLen)) /* IBM-eucJP */
649
errno=EILSEQ; /* 116 */
651
errno=EINVAL; /* 22 */
652
*outBytesLeft-=(pOut-*outBuf);
658
offset=(*pIn - 0xA0);
660
if (dmapE32U[offset] == 0x00 &&
661
dmapE32U[offset+1] == 0x00) { /* 0x8F + 2 bytes */
663
if (cd->fromCcsid == 33722)
664
errno=EINVAL; /* 22 */
666
errno=EILSEQ; /* 116 */
667
*outBytesLeft-=(pOut-*outBuf);
673
offset=(*pIn - 0xA0) * 0x60 + 0x60;
676
errno=EILSEQ; /* 116 */
677
*outBytesLeft-=(pOut-*outBuf);
683
offset+=(*pIn - 0xA0);
685
if (dmapE32U[offset] == 0x00 &&
686
dmapE32U[offset+1] == 0x00) {
687
errno=EILSEQ; /* 116 */
688
*outBytesLeft-=(pOut-*outBuf);
696
in+=dmapE32U[offset+1];
697
if (dmapE32U[offset] == 0xFF &&
698
dmapE32U[offset+1] == 0xFD) {
699
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
704
} else { /* 0x8F + 1 bytes */
707
in+=dmapE32U[offset+1];
715
if (dmapE02U[offset] == 0x00 &&
716
dmapE02U[offset+1] == 0x00) { /* SS1 */
718
if ((cd->fromCcsid == 33722 && (*pIn == 0xA0 || (0xA9 <= *pIn && *pIn <= 0xAF) || *pIn == 0xFF)) ||
719
(cd->fromCcsid == 970 && (*pIn == 0xA0 || *pIn == 0xAD || *pIn == 0xAE || *pIn == 0xAF || *pIn == 0xFF)) ||
720
(cd->fromCcsid == 964 && (*pIn == 0xA0 || (0xAA <= *pIn && *pIn <= 0xC1) || *pIn == 0xC3 || *pIn == 0xFE || *pIn == 0xFF)) ||
721
(cd->fromCcsid == 1383 && (*pIn == 0xA0 || *pIn == 0xFF)))
722
errno=EILSEQ; /* 116 */
724
errno=EINVAL; /* 22 */
725
*outBytesLeft-=(pOut-*outBuf);
732
errno=EILSEQ; /* 116 */
733
*outBytesLeft-=(pOut-*outBuf);
739
offset=(*pIn - 0xA0) * 0x60;
742
errno=EILSEQ; /* 116 */
743
*outBytesLeft-=(pOut-*outBuf);
749
offset+=(*pIn - 0xA0);
751
if (dmapE12U[offset] == 0x00 &&
752
dmapE12U[offset+1] == 0x00) { /* undefined mapping */
753
errno=EILSEQ; /* 116 */
754
*outBytesLeft-=(pOut-*outBuf);
762
in+=dmapE12U[offset+1];
763
if (dmapE12U[offset] == 0xFF &&
764
dmapE12U[offset+1] == 0xFD) {
765
if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
773
in+=dmapE02U[offset+1];
774
if (dmapE02U[offset] == 0x00 &&
775
dmapE02U[offset+1] == 0x1A) {
776
if (*pIn != cd->srcSubS)
784
if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
787
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
790
in&=0x001F; /* 0b0000000000011111 */
791
in|=0x00C0; /* 0b0000000011000000 */
794
byte=ucs2; /* dmapD12U[offset+1]; */
795
byte&=0x3F; /* 0b00111111; */
796
byte|=0x80; /* 0b10000000; */
799
} else if ((in & 0xFC00) == 0xD800) {
809
byte=(ucs2>>8); /* dmapD12U[offset]; */
811
byte|=0xE0; /* 0b11100000; */
815
byte=(ucs2>>8); /* dmapD12U[offset]; */
817
work=ucs2; /* dmapD12U[offset+1]; */
820
byte&=0x3F; /* 0b00111111; */
821
byte|=0x80; /* 0b10000000; */
825
byte=ucs2; /* dmapD12U[offset+1]; */
826
byte&=0x3F; /* 0b00111111; */
827
byte|=0x80; /* 0b10000000; */
834
*outBytesLeft-=(pOut-*outBuf);
841
} else if (cd->cnv_dmap->codingSchema == DMAP_U2E) {
842
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
843
register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
844
register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
845
register int inLen=*inBytesLeft;
846
register char * pOut=*outBuf;
847
register char * pIn=*inBuf;
848
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
849
register char subS=cd->subS;
850
register char * pSubD=(char *) &(cd->subD);
851
register size_t numS=0;
852
register size_t rc=0;
854
register uint32_t in;
856
errno=EINVAL; /* 22 */
857
*outBytesLeft-=(pOut-*outBuf);
863
if (pLastOutBuf < pOut)
871
} else if (in < 0x100 && dmapU2S[in] != 0x0000) {
872
if ((*pOut=dmapU2S[in]) == subS) {
873
if (in != cd->srcSubS)
879
if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
881
if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
891
*pOut=dmapU2M3[1+in];
893
*pOut=dmapU2M3[2+in];
899
if (dmapU2M2[1+in] == 0x00) {
902
if (in != cd->srcSubS)
906
*pOut=dmapU2M2[1+in];
908
if (memcmp(pOut-2, pSubD, 2) == 0) {
910
if (in != cd->srcSubD) {
921
*outBytesLeft-=(pOut-*outBuf);
926
return rc; /* compatibility to iconv() */
928
} else if (cd->cnv_dmap->codingSchema == DMAP_T2E) {
929
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
930
register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
931
register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
932
register int inLen=*inBytesLeft;
933
register char * pOut=*outBuf;
934
register char * pIn=*inBuf;
935
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
936
register char subS=cd->subS;
937
register char * pSubD=(char *) &(cd->subD);
938
register size_t numS=0;
939
register size_t rc=0;
941
register uint32_t in;
943
errno=EINVAL; /* 22 */
944
*outBytesLeft-=(pOut-*outBuf);
945
*inBytesLeft=inLen-1;
952
if (pLastOutBuf < pOut)
960
} else if (0xD800 <= in && in <= 0xDBFF) { /* first byte of surrogate */
961
errno=EINVAL; /* 22 */
962
*inBytesLeft=inLen-2;
963
*outBytesLeft-=(pOut-*outBuf);
970
} else if (0xDC00 <= in && in <= 0xDFFF) { /* second byte of surrogate */
971
errno=EINVAL; /* 22 */
972
*inBytesLeft=inLen-1;
973
*outBytesLeft-=(pOut-*outBuf);
980
} else if (in < 0x100 && dmapU2S[in] != 0x0000) {
981
if ((*pOut=dmapU2S[in]) == subS) {
982
if (in != cd->srcSubS)
988
if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
990
if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
1000
*pOut=dmapU2M3[1+in];
1002
*pOut=dmapU2M3[2+in];
1008
if (dmapU2M2[1+in] == 0x00) {
1009
if (*pOut == subS) {
1011
if (in != cd->srcSubS)
1015
*pOut=dmapU2M2[1+in];
1017
if (memcmp(pOut-2, pSubD, 2) == 0) {
1019
if (in != cd->srcSubD) {
1030
*outBytesLeft-=(pOut-*outBuf);
1037
} else if (cd->cnv_dmap->codingSchema == DMAP_82E) {
1038
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
1039
register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
1040
register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
1041
register int inLen=*inBytesLeft;
1042
register char * pOut=*outBuf;
1043
register char * pIn=*inBuf;
1044
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
1045
register char subS=cd->subS;
1046
register char * pSubD=(char *) &(cd->subD);
1047
register size_t numS=0;
1048
register size_t rc=0;
1050
register uint32_t in;
1052
if (pLastOutBuf < pOut)
1054
/* convert from UTF-8 to UCS-2 */
1060
register uchar byte1=*pIn;
1061
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
1062
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
1066
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
1068
errno=EINVAL; /* 22 */
1069
*outBytesLeft-=(pOut-*outBuf);
1076
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
1077
errno=EILSEQ; /* 116 */
1078
*outBytesLeft-=(pOut-*outBuf);
1085
/* 2 bytes sequence:
1086
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
1087
register uchar byte2;
1090
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
1091
register uchar work=byte1;
1093
byte2&=0x3F; /* 0b00111111; */
1096
byte1&=0x1F; /* 0b00011111; */
1103
} else { /* invalid sequence */
1104
errno=EILSEQ; /* 116 */
1105
*outBytesLeft-=(pOut-*outBuf);
1112
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
1113
/* 3 bytes sequence:
1114
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
1115
register uchar byte2;
1116
register uchar byte3;
1118
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
1119
errno=EILSEQ; /* 116 */
1121
errno=EINVAL; /* 22 */
1122
*outBytesLeft-=(pOut-*outBuf);
1133
if ((byte2 & 0xC0) != 0x80 ||
1134
(byte3 & 0xC0) != 0x80 ||
1135
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
1136
errno=EILSEQ; /* 116 */
1137
*outBytesLeft-=(pOut-*outBuf);
1145
register uchar work=byte2;
1147
byte3&=0x3F; /* 0b00111111; */
1150
byte2&=0x3F; /* 0b00111111; */
1160
} else if ((0xF0 <= byte1 && byte1 <= 0xF4)) { /* (bytes1 & 11111000) == 0x1110000 */
1162
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
1163
where uuuuu = wwww + 1 */
1164
register uchar byte2;
1165
register uchar byte3;
1166
register uchar byte4;
1168
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
1169
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
1170
(cd->toCcsid == 13488) )
1171
errno=EILSEQ; /* 116 */
1173
errno=EINVAL; /* 22 */
1174
*outBytesLeft-=(pOut-*outBuf);
1187
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
1188
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
1189
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
1190
register uchar work=byte2;
1191
if (byte1 == 0xF0 && byte2 < 0x90) {
1192
errno=EILSEQ; /* 116 */
1193
*outBytesLeft-=(pOut-*outBuf);
1199
/* iconv() returns 0 for 0xF4908080 and convert to 0x00
1200
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
1202
*outBytesLeft-=(pOut-*outBuf);
1211
work&=0x30; /* 0b00110000; */
1213
byte1&=0x07; /* 0b00000111; */
1215
byte1+=work; /* uuuuu */
1220
work+=0xD8; /* 0b11011011; */
1226
byte2&=0x3C; /* 0b00111100; */
1229
work&=0x03; /* 0b00000011; */
1236
work&=0x03; /* 0b00000011; */
1237
work|=0xDC; /* 0b110111xx; */
1242
byte4&=0x3F; /* 0b00111111; */
1247
#ifdef match_with_GBK
1248
if ((0xD800 == in && in2 < 0xDC80) ||
1249
(0xD840 == in && in2 < 0xDC80) ||
1250
(0xD880 == in && in2 < 0xDC80) ||
1251
(0xD8C0 == in && in2 < 0xDC80) ||
1252
(0xD900 == in && in2 < 0xDC80) ||
1253
(0xD940 == in && in2 < 0xDC80) ||
1254
(0xD980 == in && in2 < 0xDC80) ||
1255
(0xD9C0 == in && in2 < 0xDC80) ||
1256
(0xDA00 == in && in2 < 0xDC80) ||
1257
(0xDA40 == in && in2 < 0xDC80) ||
1258
(0xDA80 == in && in2 < 0xDC80) ||
1259
(0xDAC0 == in && in2 < 0xDC80) ||
1260
(0xDB00 == in && in2 < 0xDC80) ||
1261
(0xDB40 == in && in2 < 0xDC80) ||
1262
(0xDB80 == in && in2 < 0xDC80) ||
1263
(0xDBC0 == in && in2 < 0xDC80)) {
1265
if ((0xD800 <= in && in <= 0xDBFF) &&
1266
(0xDC00 <= in2 && in2 <= 0xDFFF)) {
1273
} else { /* invalid sequence */
1274
errno=EILSEQ; /* 116 */
1275
*outBytesLeft-=(pOut-*outBuf);
1282
} else if (0xF5 <= byte1 && byte1 <= 0xFF) { /* minic iconv() behavior */
1284
(inLen >= 4 && byte1 == 0xF8 && pIn[1] < 0x90) ||
1285
pIn[1] < 0x80 || 0xBF < pIn[1] ||
1286
pIn[2] < 0x80 || 0xBF < pIn[2] ||
1287
pIn[3] < 0x80 || 0xBF < pIn[3] ) {
1289
errno=EINVAL; /* 22 */
1290
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
1291
errno=EILSEQ; /* 116 */
1292
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
1293
errno=EILSEQ; /* 116 */
1294
else if (inLen >= 4 && (byte1 == 0xF8 || (pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
1295
errno=EILSEQ; /* 116 */
1297
errno=EINVAL; /* 22 */
1299
*outBytesLeft-=(pOut-*outBuf);
1305
} else if ((pIn[1] == 0x80 || pIn[1] == 0x90 || pIn[1] == 0xA0 || pIn[1] == 0xB0) &&
1307
*pOut=subS; /* Though returns replacement character, which iconv() does not return. */
1314
*pOut=pSubD[0]; /* Though returns replacement character, which iconv() does not return. */
1322
/* iconv() returns 0 with strange 1 byte converted values */
1325
} else { /* invalid sequence */
1326
errno=EILSEQ; /* 116 */
1327
*outBytesLeft-=(pOut-*outBuf);
1335
/* end of UTF-8 to UCS-2 */
1339
} else if (in < 0x100 && dmapU2S[in] != 0x0000) {
1340
if ((*pOut=dmapU2S[in]) == subS) {
1341
if (in != cd->srcSubS)
1347
if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
1349
if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
1359
*pOut=dmapU2M3[1+in];
1361
*pOut=dmapU2M3[2+in];
1367
if (dmapU2M2[1+in] == 0x00) {
1368
if (*pOut == subS) {
1370
if (in != cd->srcSubS)
1374
*pOut=dmapU2M2[1+in];
1376
if (memcmp(pOut-2, pSubD, 2) == 0) {
1378
if (in != cd->srcSubD) {
1387
*outBytesLeft-=(pOut-*outBuf);
1394
} else if (cd->cnv_dmap->codingSchema == DMAP_S2U) {
1395
/* use uchar * instead of UniChar to avoid memcpy */
1396
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
1397
register int inLen=*inBytesLeft;
1398
register char * pOut=*outBuf;
1399
register char * pIn=*inBuf;
1400
register int offset;
1401
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
1402
register size_t numS=0;
1404
if (pLastOutBuf < pOut)
1416
*pOut=dmapD12U[offset];
1418
*pOut=dmapD12U[offset+1];
1420
if (dmapD12U[offset] == 0x00) {
1421
if (dmapD12U[offset+1] == 0x1A) {
1422
if (*pIn != cd->srcSubS)
1424
} else if (dmapD12U[offset+1] == 0x00) {
1426
*outBytesLeft-=(pOut-*outBuf);
1438
*outBytesLeft-=(pOut-*outBuf);
1445
} else if (cd->cnv_dmap->codingSchema == DMAP_S28) {
1446
/* use uchar * instead of UniChar to avoid memcpy */
1447
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
1448
register int inLen=*inBytesLeft;
1449
register char * pOut=*outBuf;
1450
register char * pIn=*inBuf;
1451
register int offset;
1452
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
1453
register size_t numS=0;
1454
register UniChar in; /* copy part of U28 */
1456
if (pLastOutBuf < pOut)
1466
in=dmapD12U[offset];
1468
in+=dmapD12U[offset+1];
1469
if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
1471
errno=EILSEQ; /* 116 */
1472
*outBytesLeft-=(pOut-*outBuf);
1481
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
1482
register uchar byte;
1484
in&=0x001F; /* 0b0000000000011111 */
1485
in|=0x00C0; /* 0b0000000011000000 */
1488
byte=dmapD12U[offset+1];
1489
byte&=0x3F; /* 0b00111111; */
1490
byte|=0x80; /* 0b10000000; */
1493
} else if ((in & 0xFC00) == 0xD800) { /* There should not be no surrogate character in SBCS. */
1501
register uchar byte;
1502
register uchar work;
1503
byte=dmapD12U[offset];
1505
byte|=0xE0; /* 0b11100000; */
1509
byte=dmapD12U[offset];
1511
work=dmapD12U[offset+1];
1514
byte&=0x3F; /* 0b00111111; */
1515
byte|=0x80; /* 0b10000000; */
1519
byte=dmapD12U[offset+1];
1520
byte&=0x3F; /* 0b00111111; */
1521
byte|=0x80; /* 0b10000000; */
1526
if (dmapD12U[offset] == 0x00) {
1527
if (dmapD12U[offset+1] == 0x1A) {
1528
if (*pIn != cd->srcSubS)
1536
*outBytesLeft-=(pOut-*outBuf);
1543
} else if (cd->cnv_dmap->codingSchema == DMAP_U2S) {
1544
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
1545
register int inLen=*inBytesLeft;
1546
register char * pOut=*outBuf;
1547
register char * pIn=*inBuf;
1548
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
1549
register char subS=cd->subS;
1550
register size_t numS=0;
1552
register uint32_t in;
1554
errno=EINVAL; /* 22 */
1557
*outBytesLeft-=(pOut-*outBuf);
1562
if (pLastOutBuf < pOut)
1570
if ((*pOut=dmapU2S[in]) == 0x00) {
1573
errno=EINVAL; /* 22 */
1574
} else if (*pOut == subS) {
1575
if (in != cd->srcSubS)
1583
*outBytesLeft-=(pOut-*outBuf);
1590
} else if (cd->cnv_dmap->codingSchema == DMAP_T2S) {
1591
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
1592
register int inLen=*inBytesLeft;
1593
register char * pOut=*outBuf;
1594
register char * pIn=*inBuf;
1595
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
1596
register char subS=cd->subS;
1597
register size_t numS=0;
1599
register uint32_t in;
1601
errno=EINVAL; /* 22 */
1603
*inBytesLeft=inLen-1;
1604
*outBytesLeft-=(pOut-*outBuf);
1611
if (pLastOutBuf < pOut)
1619
} else if (0xD800 <= in && in <= 0xDFFF) { /* 0xD800-0xDFFF, surrogate first and second values */
1620
if (0xDC00 <= in ) {
1621
errno=EINVAL; /* 22 */
1622
*inBytesLeft=inLen-1;
1623
*outBytesLeft-=(pOut-*outBuf);
1628
} else if (inLen < 4) {
1629
errno=EINVAL; /* 22 */
1630
*inBytesLeft=inLen-2;
1631
*outBytesLeft-=(pOut-*outBuf);
1637
register uint32_t in2;
1641
if (0xDC00 <= in2 && in2 <= 0xDFFF) { /* second surrogate character =0xDC00 - 0xDFFF*/
1646
errno=EINVAL; /* 22 */
1647
*inBytesLeft=inLen-1;
1648
*outBytesLeft-=(pOut-*outBuf);
1655
if ((*pOut=dmapU2S[in]) == 0x00) {
1658
errno=EINVAL; /* 22 */
1659
} else if (*pOut == subS) {
1660
if (in != cd->srcSubS)
1668
*outBytesLeft-=(pOut-*outBuf);
1675
} else if (cd->cnv_dmap->codingSchema == DMAP_82S) {
1676
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
1677
register int inLen=*inBytesLeft;
1678
register char * pOut=*outBuf;
1679
register char * pIn=*inBuf;
1680
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
1681
register char subS=cd->subS;
1682
register size_t numS=0;
1684
register uint32_t in;
1685
uint32_t in2; /* The second surrogate value */
1686
if (pLastOutBuf < pOut)
1688
/* convert from UTF-8 to UCS-2 */
1694
register uchar byte1=*pIn;
1695
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
1696
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
1700
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
1702
errno=EINVAL; /* 22 */
1703
*outBytesLeft-=(pOut-*outBuf);
1710
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
1711
errno=EILSEQ; /* 116 */
1712
*outBytesLeft-=(pOut-*outBuf);
1719
/* 2 bytes sequence:
1720
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
1721
register uchar byte2;
1724
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
1725
register uchar work=byte1;
1727
byte2&=0x3F; /* 0b00111111; */
1730
byte1&=0x1F; /* 0b00011111; */
1737
} else { /* invalid sequence */
1738
errno=EILSEQ; /* 116 */
1739
*outBytesLeft-=(pOut-*outBuf);
1746
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
1747
/* 3 bytes sequence:
1748
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
1749
register uchar byte2;
1750
register uchar byte3;
1752
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
1753
errno=EILSEQ; /* 116 */
1755
errno=EINVAL; /* 22 */
1756
*outBytesLeft-=(pOut-*outBuf);
1767
if ((byte2 & 0xC0) != 0x80 ||
1768
(byte3 & 0xC0) != 0x80 ||
1769
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
1770
errno=EILSEQ; /* 116 */
1771
*outBytesLeft-=(pOut-*outBuf);
1779
register uchar work=byte2;
1781
byte3&=0x3F; /* 0b00111111; */
1784
byte2&=0x3F; /* 0b00111111; */
1794
} else if ((0xF0 <= byte1 && byte1 <= 0xF4) || /* (bytes1 & 11111000) == 0x1110000 */
1795
((byte1&=0xF7) && 0xF0 <= byte1 && byte1 <= 0xF4)) { /* minic iconv() behavior */
1797
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
1798
where uuuuu = wwww + 1 */
1799
register uchar byte2;
1800
register uchar byte3;
1801
register uchar byte4;
1803
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
1804
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
1805
(cd->toCcsid == 13488) )
1806
errno=EILSEQ; /* 116 */
1808
errno=EINVAL; /* 22 */
1809
*outBytesLeft-=(pOut-*outBuf);
1822
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
1823
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
1824
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
1825
register uchar work=byte2;
1826
if (byte1 == 0xF0 && byte2 < 0x90) {
1827
errno=EILSEQ; /* 116 */
1828
*outBytesLeft-=(pOut-*outBuf);
1834
/* iconv() returns 0 for 0xF4908080 and convert to 0x00
1835
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
1837
*outBytesLeft-=(pOut-*outBuf);
1846
work&=0x30; /* 0b00110000; */
1848
byte1&=0x07; /* 0b00000111; */
1850
byte1+=work; /* uuuuu */
1855
work+=0xD8; /* 0b11011011; */
1861
byte2&=0x3C; /* 0b00111100; */
1864
work&=0x03; /* 0b00000011; */
1871
work&=0x03; /* 0b00000011; */
1872
work|=0xDC; /* 0b110111xx; */
1877
byte4&=0x3F; /* 0b00111111; */
1882
} else { /* invalid sequence */
1883
errno=EILSEQ; /* 116 */
1884
*outBytesLeft-=(pOut-*outBuf);
1891
} else if ((byte1 & 0xF0) == 0xF0) { /* minic iconv() behavior */
1893
pIn[1] < 0x80 || 0xBF < pIn[1] ||
1894
pIn[2] < 0x80 || 0xBF < pIn[2] ||
1895
pIn[3] < 0x80 || 0xBF < pIn[3] ) {
1897
errno=EINVAL; /* 22 */
1898
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
1899
errno=EILSEQ; /* 116 */
1900
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
1901
errno=EILSEQ; /* 116 */
1902
else if (inLen >= 4 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
1903
errno=EILSEQ; /* 116 */
1905
errno=EINVAL; /* 22 */
1907
*outBytesLeft-=(pOut-*outBuf);
1914
*pOut=subS; /* Though returns replacement character, which iconv() does not return. */
1919
/* UTF-8_IBM-850 0xF0908080 : converted value does not match, iconv=0x00, dmap=0x7F
1920
UTF-8_IBM-850 0xF0908081 : converted value does not match, iconv=0x01, dmap=0x7F
1921
UTF-8_IBM-850 0xF0908082 : converted value does not match, iconv=0x02, dmap=0x7F
1922
UTF-8_IBM-850 0xF0908083 : converted value does not match, iconv=0x03, dmap=0x7F
1924
UTF-8_IBM-850 0xF09081BE : converted value does not match, iconv=0x7E, dmap=0x7F
1925
UTF-8_IBM-850 0xF09081BF : converted value does not match, iconv=0x1C, dmap=0x7F
1926
UTF-8_IBM-850 0xF09082A0 : converted value does not match, iconv=0xFF, dmap=0x7F
1927
UTF-8_IBM-850 0xF09082A1 : converted value does not match, iconv=0xAD, dmap=0x7F
1931
/* iconv() returns 0 with strange 1 byte converted values */
1934
} else { /* invalid sequence */
1935
errno=EILSEQ; /* 116 */
1936
*outBytesLeft-=(pOut-*outBuf);
1944
/* end of UTF-8 to UCS-2 */
1948
if ((*pOut=dmapU2S[in]) == 0x00) {
1951
errno=EINVAL; /* 22 */
1952
} else if (*pOut == subS) {
1953
if (in != cd->srcSubS) {
1960
*outBytesLeft-=(pOut-*outBuf);
1967
} else if (cd->cnv_dmap->codingSchema == DMAP_D2U) {
1968
/* use uchar * instead of UniChar to avoid memcpy */
1969
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
1970
register uchar * dmapD22U=(uchar *) (cd->cnv_dmap->dmapD22U);
1971
register int inLen=*inBytesLeft;
1972
register char * pOut=*outBuf;
1973
register char * pIn=*inBuf;
1974
register int offset;
1975
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
1976
register size_t numS=0;
1978
if (pLastOutBuf < pOut)
1990
if (dmapD12U[offset] == 0x00 &&
1991
dmapD12U[offset+1] == 0x00) { /* DBCS */
1993
if (*pIn == 0x80 || *pIn == 0xFF ||
1994
(cd->fromCcsid == 943 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0xA0 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xEF || *pIn == 0xFD || *pIn == 0xFE)) ||
1995
(cd->fromCcsid == 932 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0x87 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xED || *pIn == 0xEE || *pIn == 0xEF)) ||
1996
(cd->fromCcsid == 1381 && ((0x85 <= *pIn && *pIn <= 0x8B) || (0xAA <= *pIn && *pIn <= 0xAF) || (0xF8 <= *pIn && *pIn <= 0xFE))))
1997
errno=EILSEQ; /* 116 */
1999
errno=EINVAL; /* 22 */
2000
*outBytesLeft-=(pOut-*outBuf);
2010
if (dmapD22U[offset] == 0x00 &&
2011
dmapD22U[offset+1] == 0x00) {
2012
errno=EILSEQ; /* 116 */
2013
*outBytesLeft-=(pOut-*outBuf);
2019
*pOut=dmapD22U[offset];
2021
*pOut=dmapD22U[offset+1];
2023
if (dmapD22U[offset] == 0xFF &&
2024
dmapD22U[offset+1] == 0xFD) {
2025
if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
2031
*pOut=dmapD12U[offset];
2033
*pOut=dmapD12U[offset+1];
2035
if (dmapD12U[offset] == 0x00 &&
2036
dmapD12U[offset+1] == 0x1A) {
2037
if (*pIn != cd->srcSubS)
2045
*outBytesLeft-=(pOut-*outBuf);
2052
} else if (cd->cnv_dmap->codingSchema == DMAP_D28) {
2053
/* use uchar * instead of UniChar to avoid memcpy */
2054
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
2055
register uchar * dmapD22U=(uchar *) (cd->cnv_dmap->dmapD22U);
2056
register int inLen=*inBytesLeft;
2057
register char * pOut=*outBuf;
2058
register char * pIn=*inBuf;
2059
register int offset;
2060
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
2061
register size_t numS=0;
2062
register UniChar in; /* copy part of U28 */
2063
register UniChar ucs2;
2065
if (pLastOutBuf < pOut)
2075
if (dmapD12U[offset] == 0x00 &&
2076
dmapD12U[offset+1] == 0x00) { /* DBCS */
2078
if (*pIn == 0x80 || *pIn == 0xFF ||
2079
(cd->fromCcsid == 943 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0xA0 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xEF || *pIn == 0xFD || *pIn == 0xFE)) ||
2080
(cd->fromCcsid == 932 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0x87 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xED || *pIn == 0xEE || *pIn == 0xEF)) ||
2081
(cd->fromCcsid == 1381 && ((0x85 <= *pIn && *pIn <= 0x8B) || (0xAA <= *pIn && *pIn <= 0xAF) || (0xF8 <= *pIn && *pIn <= 0xFE))))
2082
errno=EILSEQ; /* 116 */
2084
errno=EINVAL; /* 22 */
2085
*outBytesLeft-=(pOut-*outBuf);
2095
if (dmapD22U[offset] == 0x00 &&
2096
dmapD22U[offset+1] == 0x00) {
2097
errno=EILSEQ; /* 116 */
2098
*outBytesLeft-=(pOut-*outBuf);
2104
in=dmapD22U[offset];
2106
in+=dmapD22U[offset+1];
2108
if (dmapD22U[offset] == 0xFF &&
2109
dmapD22U[offset+1] == 0xFD) {
2110
if (in != cd->srcSubD)
2116
in=dmapD12U[offset];
2118
in+=dmapD12U[offset+1];
2120
if (dmapD12U[offset] == 0x00 &&
2121
dmapD12U[offset+1] == 0x1A) {
2122
if (in != cd->srcSubS)
2128
if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
2131
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
2132
register uchar byte;
2134
in&=0x001F; /* 0b0000000000011111 */
2135
in|=0x00C0; /* 0b0000000011000000 */
2138
byte=ucs2; /* dmapD12U[offset+1]; */
2139
byte&=0x3F; /* 0b00111111; */
2140
byte|=0x80; /* 0b10000000; */
2143
} else if ((in & 0xFC00) == 0xD800) { /* There should not be no surrogate character in SBCS. */
2151
register uchar byte;
2152
register uchar work;
2153
byte=(ucs2>>8); /* dmapD12U[offset]; */
2155
byte|=0xE0; /* 0b11100000; */
2159
byte=(ucs2>>8); /* dmapD12U[offset]; */
2161
work=ucs2; /* dmapD12U[offset+1]; */
2164
byte&=0x3F; /* 0b00111111; */
2165
byte|=0x80; /* 0b10000000; */
2169
byte=ucs2; /* dmapD12U[offset+1]; */
2170
byte&=0x3F; /* 0b00111111; */
2171
byte|=0x80; /* 0b10000000; */
2178
*outBytesLeft-=(pOut-*outBuf);
2185
} else if (cd->cnv_dmap->codingSchema == DMAP_U2D) {
2186
register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
2187
register int inLen=*inBytesLeft;
2188
register char * pOut=*outBuf;
2189
register char * pIn=*inBuf;
2190
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
2191
register char subS=cd->subS;
2192
register char * pSubD=(char *) &(cd->subD);
2193
register size_t numS=0;
2195
register uint32_t in;
2197
errno=EINVAL; /* 22 */
2200
*outBytesLeft-=(pOut-*outBuf);
2205
if (pLastOutBuf < pOut)
2217
if (dmapU2D[in+1] == 0x00) { /* SBCS */
2218
if (*pOut == subS) {
2219
if (in != cd->srcSubS)
2223
*pOut=dmapU2D[in+1];
2225
if (dmapU2D[in] == pSubD[0] &&
2226
dmapU2D[in+1] == pSubD[1]) {
2228
if (in != cd->srcSubD)
2236
*outBytesLeft-=(pOut-*outBuf);
2241
return numS; /* to minic iconv() behavior */
2243
} else if (cd->cnv_dmap->codingSchema == DMAP_T2D) {
2244
register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
2245
register int inLen=*inBytesLeft;
2246
register char * pOut=*outBuf;
2247
register char * pIn=*inBuf;
2248
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
2249
register char subS=cd->subS;
2250
register char * pSubD=(char *) &(cd->subD);
2251
register size_t numS=0;
2253
register uint32_t in;
2255
errno=EINVAL; /* 22 */
2256
*inBytesLeft=inLen-1;
2257
*outBytesLeft-=(pOut-*outBuf);
2264
if (pLastOutBuf < pOut)
2272
} else if (0xD800 <= in && in <= 0xDBFF) { /* first byte of surrogate */
2273
errno=EINVAL; /* 22 */
2274
*inBytesLeft=inLen-2;
2275
*outBytesLeft-=(pOut-*outBuf);
2282
} else if (0xDC00 <= in && in <= 0xDFFF) { /* second byte of surrogate */
2283
errno=EINVAL; /* 22 */
2284
*inBytesLeft=inLen-1;
2285
*outBytesLeft-=(pOut-*outBuf);
2296
if (dmapU2D[in+1] == 0x00) { /* SBCS */
2297
if (*pOut == subS) {
2298
if (in != cd->srcSubS)
2302
*pOut=dmapU2D[in+1];
2304
if (dmapU2D[in] == pSubD[0] &&
2305
dmapU2D[in+1] == pSubD[1]) {
2307
if (in != cd->srcSubD)
2315
*outBytesLeft-=(pOut-*outBuf);
2320
return 0; /* to minic iconv() behavior */
2322
} else if (cd->cnv_dmap->codingSchema == DMAP_82D) {
2323
register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
2324
register int inLen=*inBytesLeft;
2325
register char * pOut=*outBuf;
2326
register char * pIn=*inBuf;
2327
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
2328
register char subS=cd->subS;
2329
register char * pSubD=(char *) &(cd->subD);
2330
register size_t numS=0;
2332
register uint32_t in;
2334
if (pLastOutBuf < pOut)
2336
/* convert from UTF-8 to UCS-2 */
2342
register uchar byte1=*pIn;
2343
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
2344
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
2348
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
2350
errno=EINVAL; /* 22 */
2351
*outBytesLeft-=(pOut-*outBuf);
2358
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
2359
errno=EILSEQ; /* 116 */
2360
*outBytesLeft-=(pOut-*outBuf);
2367
/* 2 bytes sequence:
2368
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
2369
register uchar byte2;
2372
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
2373
register uchar work=byte1;
2375
byte2&=0x3F; /* 0b00111111; */
2378
byte1&=0x1F; /* 0b00011111; */
2385
} else { /* invalid sequence */
2386
errno=EILSEQ; /* 116 */
2387
*outBytesLeft-=(pOut-*outBuf);
2394
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
2395
/* 3 bytes sequence:
2396
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
2397
register uchar byte2;
2398
register uchar byte3;
2400
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
2401
errno=EILSEQ; /* 116 */
2403
errno=EINVAL; /* 22 */
2404
*outBytesLeft-=(pOut-*outBuf);
2415
if ((byte2 & 0xC0) != 0x80 ||
2416
(byte3 & 0xC0) != 0x80 ||
2417
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
2418
errno=EILSEQ; /* 116 */
2419
*outBytesLeft-=(pOut-*outBuf);
2427
register uchar work=byte2;
2429
byte3&=0x3F; /* 0b00111111; */
2432
byte2&=0x3F; /* 0b00111111; */
2442
} else if ((0xF0 <= byte1 && byte1 <= 0xF4)) { /* (bytes1 & 11111000) == 0x1110000 */
2444
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
2445
where uuuuu = wwww + 1 */
2446
register uchar byte2;
2447
register uchar byte3;
2448
register uchar byte4;
2450
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
2451
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
2452
(cd->toCcsid == 13488) )
2453
errno=EILSEQ; /* 116 */
2455
errno=EINVAL; /* 22 */
2456
*outBytesLeft-=(pOut-*outBuf);
2469
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
2470
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
2471
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
2472
register uchar work=byte2;
2473
if (byte1 == 0xF0 && byte2 < 0x90) {
2474
errno=EILSEQ; /* 116 */
2475
*outBytesLeft-=(pOut-*outBuf);
2481
/* iconv() returns 0 for 0xF4908080 and convert to 0x00
2482
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
2484
*outBytesLeft-=(pOut-*outBuf);
2493
work&=0x30; /* 0b00110000; */
2495
byte1&=0x07; /* 0b00000111; */
2497
byte1+=work; /* uuuuu */
2502
work+=0xD8; /* 0b11011011; */
2508
byte2&=0x3C; /* 0b00111100; */
2511
work&=0x03; /* 0b00000011; */
2518
work&=0x03; /* 0b00000011; */
2519
work|=0xDC; /* 0b110111xx; */
2524
byte4&=0x3F; /* 0b00111111; */
2529
#ifdef match_with_GBK
2530
if ((0xD800 == in && in2 < 0xDC80) ||
2531
(0xD840 == in && in2 < 0xDC80) ||
2532
(0xD880 == in && in2 < 0xDC80) ||
2533
(0xD8C0 == in && in2 < 0xDC80) ||
2534
(0xD900 == in && in2 < 0xDC80) ||
2535
(0xD940 == in && in2 < 0xDC80) ||
2536
(0xD980 == in && in2 < 0xDC80) ||
2537
(0xD9C0 == in && in2 < 0xDC80) ||
2538
(0xDA00 == in && in2 < 0xDC80) ||
2539
(0xDA40 == in && in2 < 0xDC80) ||
2540
(0xDA80 == in && in2 < 0xDC80) ||
2541
(0xDAC0 == in && in2 < 0xDC80) ||
2542
(0xDB00 == in && in2 < 0xDC80) ||
2543
(0xDB40 == in && in2 < 0xDC80) ||
2544
(0xDB80 == in && in2 < 0xDC80) ||
2545
(0xDBC0 == in && in2 < 0xDC80)) {
2547
if ((0xD800 <= in && in <= 0xDBFF) &&
2548
(0xDC00 <= in2 && in2 <= 0xDFFF)) {
2555
} else { /* invalid sequence */
2556
errno=EILSEQ; /* 116 */
2557
*outBytesLeft-=(pOut-*outBuf);
2564
} else if (0xF5 <= byte1 && byte1 <= 0xFF) { /* minic iconv() behavior */
2566
(inLen >= 4 && byte1 == 0xF8 && pIn[1] < 0x90) ||
2567
pIn[1] < 0x80 || 0xBF < pIn[1] ||
2568
pIn[2] < 0x80 || 0xBF < pIn[2] ||
2569
pIn[3] < 0x80 || 0xBF < pIn[3] ) {
2571
errno=EINVAL; /* 22 */
2572
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
2573
errno=EILSEQ; /* 116 */
2574
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
2575
errno=EILSEQ; /* 116 */
2576
else if (inLen >= 4 && (byte1 == 0xF8 || (pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
2577
errno=EILSEQ; /* 116 */
2579
errno=EINVAL; /* 22 */
2581
*outBytesLeft-=(pOut-*outBuf);
2587
} else if ((pIn[1] == 0x80 || pIn[1] == 0x90 || pIn[1] == 0xA0 || pIn[1] == 0xB0) &&
2589
*pOut=subS; /* Though returns replacement character, which iconv() does not return. */
2596
*pOut=pSubD[0]; /* Though returns replacement character, which iconv() does not return. */
2604
/* iconv() returns 0 with strange 1 byte converted values */
2607
} else { /* invalid sequence */
2608
errno=EILSEQ; /* 116 */
2609
*outBytesLeft-=(pOut-*outBuf);
2617
/* end of UTF-8 to UCS-2 */
2625
if (dmapU2D[in+1] == 0x00) { /* SBCS */
2626
if (dmapU2D[in] == subS) {
2628
if (in != cd->srcSubS)
2632
*pOut=dmapU2D[in+1];
2634
if (dmapU2D[in] == pSubD[0] &&
2635
dmapU2D[in+1] == pSubD[1]) {
2637
if (in != cd->srcSubD)
2643
*outBytesLeft-=(pOut-*outBuf);
2650
} else if (cd->cnv_dmap->codingSchema == DMAP_82U) {
2651
/* See http://unicode.org/versions/corrigendum1.html */
2652
/* convert from UTF-8 to UTF-16 can cover all conversion from UTF-8 to UCS-2 */
2653
register int inLen=*inBytesLeft;
2654
register char * pOut=*outBuf;
2655
register char * pIn=*inBuf;
2656
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
2657
register size_t numS=0;
2659
if (pLastOutBuf < pOut)
2669
register uchar byte1=*pIn;
2670
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
2671
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
2678
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
2680
errno=EINVAL; /* 22 */
2681
*outBytesLeft-=(pOut-*outBuf);
2688
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
2689
errno=EILSEQ; /* 116 */
2690
*outBytesLeft-=(pOut-*outBuf);
2697
/* 2 bytes sequence:
2698
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
2699
register uchar byte2;
2702
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
2703
register uchar work=byte1;
2705
byte2&=0x3F; /* 0b00111111; */
2708
byte1&=0x1F; /* 0b00011111; */
2716
} else { /* invalid sequence */
2717
errno=EILSEQ; /* 116 */
2718
*outBytesLeft-=(pOut-*outBuf);
2725
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
2726
/* 3 bytes sequence:
2727
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
2728
register uchar byte2;
2729
register uchar byte3;
2731
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
2732
errno=EILSEQ; /* 116 */
2734
errno=EINVAL; /* 22 */
2735
*outBytesLeft-=(pOut-*outBuf);
2746
if ((byte2 & 0xC0) != 0x80 ||
2747
(byte3 & 0xC0) != 0x80 ||
2748
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
2749
errno=EILSEQ; /* 116 */
2750
*outBytesLeft-=(pOut-*outBuf);
2758
register uchar work=byte2;
2760
byte3&=0x3F; /* 0b00111111; */
2763
byte2&=0x3F; /* 0b00111111; */
2767
*pOut=byte1 | byte2;;
2774
} else if ((0xF0 <= byte1 && byte1 <= 0xF4) || /* (bytes1 & 11111000) == 0x1110000 */
2775
((byte1&=0xF7) && 0xF0 <= byte1 && byte1 <= 0xF4)) { /* minic iconv() behavior */
2777
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
2778
where uuuuu = wwww + 1 */
2779
register uchar byte2;
2780
register uchar byte3;
2781
register uchar byte4;
2782
if (inLen < 4 || cd->toCcsid == 13488) {
2783
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
2784
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
2785
(cd->toCcsid == 13488) )
2786
errno=EILSEQ; /* 116 */
2788
errno=EINVAL; /* 22 */
2789
*outBytesLeft-=(pOut-*outBuf);
2802
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
2803
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
2804
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
2805
register uchar work=byte2;
2806
if (byte1 == 0xF0 && byte2 < 0x90) {
2807
errno=EILSEQ; /* 116 */
2808
*outBytesLeft-=(pOut-*outBuf);
2814
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
2815
errno=EINVAL; /* 22 */
2816
*outBytesLeft-=(pOut-*outBuf);
2824
work&=0x30; /* 0b00110000; */
2826
byte1&=0x07; /* 0b00000111; */
2828
byte1+=work; /* uuuuu */
2833
work+=0xD8; /* 0b11011011; */
2839
byte2&=0x3C; /* 0b00111100; */
2842
work&=0x03; /* 0b00000011; */
2850
work&=0x03; /* 0b00000011; */
2851
work|=0xDC; /* 0b110111xx; */
2856
byte4&=0x3F; /* 0b00111111; */
2862
} else { /* invalid sequence */
2863
errno=EILSEQ; /* 116 */
2864
*outBytesLeft-=(pOut-*outBuf);
2871
} else if ((byte1 & 0xF0) == 0xF0) {
2872
if (cd->toCcsid == 13488) {
2873
errno=EILSEQ; /* 116 */
2876
errno=EINVAL; /* 22 */
2877
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
2878
errno=EILSEQ; /* 116 */
2879
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
2880
errno=EILSEQ; /* 116 */
2881
else if (inLen >= 4 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
2882
errno=EILSEQ; /* 116 */
2884
errno=EINVAL; /* 22 */
2886
*outBytesLeft-=(pOut-*outBuf);
2893
} else { /* invalid sequence */
2894
errno=EILSEQ; /* 116 */
2895
*outBytesLeft-=(pOut-*outBuf);
2904
*outBytesLeft-=(pOut-*outBuf);
2910
} else if (cd->cnv_dmap->codingSchema == DMAP_U28) {
2911
/* See http://unicode.org/versions/corrigendum1.html */
2912
register int inLen=*inBytesLeft;
2913
register char * pOut=*outBuf;
2914
register char * pIn=*inBuf;
2915
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
2916
// register size_t numS=0;
2918
register uint32_t in;
2920
errno=EINVAL; /* 22 */
2922
*outBytesLeft-=(pOut-*outBuf);
2927
if (pLastOutBuf < pOut)
2935
} else if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
2938
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
2939
register uchar byte;
2941
in&=0x001F; /* 0b0000000000011111 */
2942
in|=0x00C0; /* 0b0000000011000000 */
2946
byte&=0x3F; /* 0b00111111; */
2947
byte|=0x80; /* 0b10000000; */
2951
register uchar byte;
2952
register uchar work;
2955
byte|=0xE0; /* 0b11100000; */
2964
byte&=0x3F; /* 0b00111111; */
2965
byte|=0x80; /* 0b10000000; */
2970
byte&=0x3F; /* 0b00111111; */
2971
byte|=0x80; /* 0b10000000; */
2978
*outBytesLeft-=(pOut-*outBuf);
2985
} else if (cd->cnv_dmap->codingSchema == DMAP_T28) { /* UTF-16_UTF-8 */
2986
/* See http://unicode.org/versions/corrigendum1.html */
2987
register int inLen=*inBytesLeft;
2988
register char * pOut=*outBuf;
2989
register char * pIn=*inBuf;
2990
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
2991
// register size_t numS=0;
2993
register uint32_t in;
2995
errno=EINVAL; /* 22 */
2997
*outBytesLeft-=(pOut-*outBuf);
3002
if (pLastOutBuf < pOut)
3010
} else if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
3013
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
3014
register uchar byte;
3016
in&=0x001F; /* 0b0000000000011111 */
3017
in|=0x00C0; /* 0b0000000011000000 */
3021
byte&=0x3F; /* 0b00111111; */
3022
byte|=0x80; /* 0b10000000; */
3025
} else if ((in & 0xFC00) == 0xD800) { /* in & 0b1111110000000000 == 0b1101100000000000, first surrogate character */
3026
if (0xDC00 <= in ) {
3027
errno=EINVAL; /* 22 */
3028
*inBytesLeft=inLen-1;
3029
*outBytesLeft-=(pOut-*outBuf);
3034
} else if (inLen < 4) {
3035
errno=EINVAL; /* 22 */
3036
*inBytesLeft=inLen-2;
3037
*outBytesLeft-=(pOut-*outBuf);
3042
} else if ((pIn[2] & 0xFC) != 0xDC) { /* pIn[2] & 0b11111100 == 0b11011100, second surrogate character */
3043
errno=EINVAL; /* 22 */
3044
*inBytesLeft=inLen-2;
3045
*outBytesLeft-=(pOut-*outBuf);
3051
register uchar byte;
3052
register uchar work;
3054
in&=0x000F; /* 0b0000000000001111 */
3057
work=byte; /* save uuuuu */
3059
byte|=0xF0; /* 0b11110000; */
3064
byte&=0x03; /* 0b00000011; */
3066
byte|=0x80; /* 0b10000000; */
3068
work&=0x3C; /* 0b00111100; */
3075
byte&=0x03; /* 0b00000011; */
3077
byte|=0x80; /* 0b10000000; */
3079
work&=0x03; /* 0b00000011; */
3089
byte&=0x3F; /* 0b00111111; */
3090
byte|=0x80; /* 0b10000000; */
3096
} else if ((in & 0xFC00) == 0xDC00) { /* in & 0b11111100 == 0b11011100, second surrogate character */
3097
errno=EINVAL; /* 22 */
3098
*inBytesLeft=inLen-1;
3099
*outBytesLeft-=(pOut-*outBuf);
3105
register uchar byte;
3106
register uchar work;
3109
byte|=0xE0; /* 0b11100000; */
3118
byte&=0x3F; /* 0b00111111; */
3119
byte|=0x80; /* 0b10000000; */
3124
byte&=0x3F; /* 0b00111111; */
3125
byte|=0x80; /* 0b10000000; */
3132
*outBytesLeft-=(pOut-*outBuf);
3139
} else if (cd->cnv_dmap->codingSchema == DMAP_U2U) { /* UTF-16_UCS-2 */
3140
register int inLen=*inBytesLeft;
3141
register int outLen=*outBytesLeft;
3142
if (inLen <= outLen) {
3143
memcpy(*outBuf, *inBuf, inLen);
3144
(*outBytesLeft)-=inLen;
3150
memcpy(*outBuf, *inBuf, outLen);
3154
*inBytesLeft-=outLen;
3155
return (*inBytesLeft);
3165
inline size_t myconv(myconv_t cd ,
3167
size_t* inBytesLeft,
3169
size_t* outBytesLeft,
3172
if (cd->converterType == CONVERTER_ICONV) {
3173
return myconv_iconv(cd,inBuf,inBytesLeft,outBuf,outBytesLeft,numSub);
3174
} else if (cd->converterType == CONVERTER_DMAP) {
3175
return myconv_dmap(cd,inBuf,inBytesLeft,outBuf,outBytesLeft,numSub);
3180
inline char * converterName(int32_t type)
3182
if (type == CONVERTER_ICONV)
3184
else if (type == CONVERTER_DMAP)
3190
#define myconv(a,b,c,d,e,f) \
3191
(((a)->converterType == CONVERTER_ICONV)? myconv_iconv((a),(b),(c),(d),(e),(f)): (((a)->converterType == CONVERTER_DMAP)? myconv_dmap((a),(b),(c),(d),(e),(f)): -1))
3194
#define converterName(a) \
3195
(((a) == CONVERTER_ICONV)? "iconv": ((a) == CONVERTER_DMAP)? "dmap": "?????")
3199
void cleanupMyconv();