2
Licensed Materials - Property of IBM
3
DB2 Storage Engine Enablement
4
Copyright IBM Corporation 2007,2008
7
Redistribution and use in source and binary forms, with or without modification,
8
are permitted provided that the following conditions are met:
9
(a) Redistributions of source code must retain this list of conditions, the
10
copyright notice in section {d} below, and the disclaimer following this
12
(b) Redistributions in binary form must reproduce this list of conditions, the
13
copyright notice in section (d) below, and the disclaimer following this
14
list of conditions, in the documentation and/or other materials provided
15
with the distribution.
16
(c) The name of IBM may not be used to endorse or promote products derived from
17
this software without specific prior written permission.
18
(d) The text of the required copyright notice is:
19
Licensed Materials - Property of IBM
20
DB2 Storage Engine Enablement
21
Copyright IBM Corporation 2007,2008
24
THIS SOFTWARE IS PROVIDED BY IBM CORPORATION "AS IS" AND ANY EXPRESS OR IMPLIED
25
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
27
SHALL IBM CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
29
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31
CONTRACT, STRICT LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
38
#include "db2i_charsetSupport.h"
39
#include "as400_types.h"
40
#include "as400_protos.h"
41
#include "db2i_ileBridge.h"
43
#include "db2i_errors.h"
47
The following arrays define a mapping between IANA-style text descriptors and
48
IBM i CCSID text descriptors. The mapping is a 1-to-1 correlation between
49
corresponding array slots.
51
#define MAX_IANASTRING 23
52
static const char ianaStringType[MAX_IANASTRING][10] =
78
static const char ccsidType[MAX_IANASTRING][6] =
105
static _ILEpointer *QlgCvtTextDescToDesc_sym;
107
/* We keep a cache of the mapping for text descriptions obtained via
108
QlgTextDescToDesc. The following structures implement this cache. */
109
static HASH textDescMapHash;
110
static MEM_ROOT textDescMapMemroot;
111
static pthread_mutex_t textDescMapHashMutex;
118
char inDesc[Qlg_MaxDescSize];
120
char outDesc[Qlg_MaxDescSize];
123
/* We keep a cache of the mapping for open iconv descriptors. The following
124
structures implement this cache. */
125
static HASH iconvMapHash;
126
static MEM_ROOT iconvMapMemroot;
127
static pthread_mutex_t iconvMapHashMutex;
132
uint32 direction; // These are uint32s to avoid garbage data in the key from compiler padding
134
const CHARSET_INFO* myCharset;
141
Initialize the static structures used by this module.
143
This must only be called once per plugin instantiation.
145
@return 0 if successful. Failure otherwise
147
int32 initCharsetSupport()
149
DBUG_ENTER("initCharsetSupport");
151
int actmark = _ILELOAD("QSYS/QLGUSR", ILELOAD_LIBOBJ);
154
DBUG_PRINT("initCharsetSupport", ("conversion srvpgm activation failed"));
158
QlgCvtTextDescToDesc_sym = (ILEpointer*)malloc_aligned(sizeof(ILEpointer));
159
if (_ILESYM(QlgCvtTextDescToDesc_sym, actmark, "QlgCvtTextDescToDesc") == -1)
161
DBUG_PRINT("initCharsetSupport",
162
("resolve of QlgCvtTextDescToDesc failed"));
166
VOID(pthread_mutex_init(&textDescMapHashMutex,MY_MUTEX_INIT_FAST));
167
hash_init(&textDescMapHash, &my_charset_bin, 10, offsetof(TextDescMap, hashKey), sizeof(TextDescMap::hashKey), 0, 0, HASH_UNIQUE);
169
VOID(pthread_mutex_init(&iconvMapHashMutex,MY_MUTEX_INIT_FAST));
170
hash_init(&iconvMapHash, &my_charset_bin, 10, offsetof(IconvMap, hashKey), sizeof(IconvMap::hashKey), 0, 0, HASH_UNIQUE);
172
init_alloc_root(&textDescMapMemroot, 2048, 0);
173
init_alloc_root(&iconvMapMemroot, 256, 0);
181
Cleanup the static structures used by this module.
183
This must only be called once per plugin instantiation and only if
184
initCharsetSupport() was successful.
186
void doneCharsetSupport()
190
free_root(&textDescMapMemroot, 0);
191
free_root(&iconvMapMemroot, 0);
193
pthread_mutex_destroy(&textDescMapHashMutex);
194
hash_free(&textDescMapHash);
195
pthread_mutex_destroy(&iconvMapHashMutex);
196
hash_free(&iconvMapHash);
197
free_aligned(QlgCvtTextDescToDesc_sym);
202
Convert a text description from one type to another.
204
This function is just a wrapper for the IBM i QlgTextDescToDesc function plus
205
some overrides for conversions that the API does not handle correctly and
206
support for caching the computed conversion.
208
@param inType The type of descriptor pointed to by "in".
209
@param outType The type of descriptor requested for "out".
210
@param in The descriptor to be convereted.
211
@param[out] out The equivalent descriptor
212
@param hashKey The hash key to be used for caching the conversion result.
214
@return 0 if successful. Failure otherwise
216
static int32 getNewTextDesc(const int32 inType,
220
const TextDescMap::HashKey* hashKey)
222
DBUG_ENTER("db2i_charsetSupport::getNewTextDesc");
223
const arg_type_t signature[] = { ARG_INT32, ARG_INT32, ARG_MEMPTR, ARG_INT32, ARG_MEMPTR, ARG_INT32, ARG_INT32, ARG_END };
226
ILEarglist_base base;
236
if ((inType == Qlg_TypeIANA) && (outType == Qlg_TypeAix41))
238
// Override non-standard charsets
239
if (unlikely(strcmp("IBM1381", in) == 0))
241
strcpy(out, "IBM-1381");
245
else if ((inType == Qlg_TypeAS400CCSID) && (outType == Qlg_TypeAix41))
247
// Override non-standard charsets
248
if (strcmp("1148", in) == 0)
250
strcpy(out, "IBM-1148");
253
else if (unlikely(strcmp("1153", in) == 0))
255
strcpy(out, "IBM-1153");
260
char argBuf[sizeof(ArgList)+15];
261
arguments = (ArgList*)roundToQuadWordBdy(argBuf);
263
arguments->CRDIInType = inType;
264
arguments->CRDIOutType = outType;
265
arguments->CRDIDesc.s.addr = (address64_t) in;
266
arguments->CRDIDescSize = Qlg_MaxDescSize;
267
arguments->CRDODesc.s.addr = (address64_t) out;
268
arguments->CRDODescSize = Qlg_MaxDescSize;
269
arguments->CTDCCSID = 819;
270
_ILECALL(QlgCvtTextDescToDesc_sym,
274
if (unlikely(arguments->base.result.s_int32.r_int32 < 0))
276
if (arguments->base.result.s_int32.r_int32 == Qlg_InDescriptorNotFound)
278
DBUG_RETURN(DB2I_ERR_UNSUPP_CHARSET);
282
getErrTxt(DB2I_ERR_ILECALL,"QlgCvtTextDescToDesc",arguments->base.result.s_int32.r_int32);
283
DBUG_RETURN(DB2I_ERR_ILECALL);
287
// Store the conversion information into a cache entry
288
TextDescMap* mapping = (TextDescMap*)alloc_root(&textDescMapMemroot, sizeof(TextDescMap));
289
if (unlikely(!mapping))
290
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
291
memcpy(&(mapping->hashKey), hashKey, sizeof(hashKey));
292
strcpy(mapping->outDesc, out);
293
pthread_mutex_lock(&textDescMapHashMutex);
294
my_hash_insert(&textDescMapHash, (const uchar*)mapping);
295
pthread_mutex_unlock(&textDescMapHashMutex);
302
Convert a text description from one type to another.
304
This function takes a text description in one representation and converts
305
it into another representation. Although the OS provides some facilities for
306
doing this, the support is not complete, nor does MySQL always use standard
307
identifiers. Therefore, there are a lot of hardcoded overrides required.
308
There is probably some room for optimization here, but this should not be
309
called frequently under most circumstances.
311
@param inType The type of descriptor pointed to by "in".
312
@param outType The type of descriptor requested for "out".
313
@param in The descriptor to be convereted.
314
@param[out] out The equivalent descriptor
316
@return 0 if successful. Failure otherwise
318
static int32 convertTextDesc(const int32 inType, const int32 outType, const char* inDesc, char* outDesc)
320
DBUG_ENTER("db2i_charsetSupport::convertTextDesc");
321
const char* inDescOverride;
323
if (inType == Qlg_TypeIANA)
325
// Override non-standard charsets
326
if (strcmp("big5", inDesc) == 0)
327
inDescOverride = "Big5";
328
else if (strcmp("cp932", inDesc) == 0)
329
inDescOverride = "IBM943";
330
else if (strcmp("euckr", inDesc) == 0)
331
inDescOverride = "EUC-KR";
332
else if (strcmp("gb2312", inDesc) == 0)
333
inDescOverride = "IBM1381";
334
else if (strcmp("gbk", inDesc) == 0)
335
inDescOverride = "IBM1386";
336
else if (strcmp("sjis", inDesc) == 0)
337
inDescOverride = "Shift_JIS";
338
else if (strcmp("ujis", inDesc) == 0)
339
inDescOverride = "EUC-JP";
341
inDescOverride = inDesc;
343
// Hardcode non-standard charsets
344
if (outType == Qlg_TypeAix41)
346
if (strcmp("Big5", inDescOverride) == 0)
348
strcpy(outDesc,"big5");
351
else if (strcmp("IBM1386", inDescOverride) == 0)
353
strcpy(outDesc,"GBK");
356
else if (strcmp("Shift_JIS", inDescOverride) == 0 ||
357
strcmp("IBM943", inDescOverride) == 0)
359
strcpy(outDesc,"IBM-943");
362
else if (strcmp("tis620", inDescOverride) == 0)
364
strcpy(outDesc,"TIS-620");
367
else if (strcmp("ucs2", inDescOverride) == 0)
369
strcpy(outDesc,"UCS-2");
372
else if (strcmp("cp1250", inDescOverride) == 0)
374
strcpy(outDesc,"IBM-1250");
377
else if (strcmp("cp1251", inDescOverride) == 0)
379
strcpy(outDesc,"IBM-1251");
382
else if (strcmp("cp1256", inDescOverride) == 0)
384
strcpy(outDesc,"IBM-1256");
387
else if (strcmp("macce", inDescOverride) == 0)
389
strcpy(outDesc,"IBM-1282");
393
else if (outType == Qlg_TypeAS400CCSID)
395
// See if we can fast path the convert
396
for (int loopCnt = 0; loopCnt < MAX_IANASTRING; ++loopCnt)
398
if (strcmp((char*)ianaStringType[loopCnt],inDescOverride) == 0)
400
strcpy(outDesc,ccsidType[loopCnt]);
407
inDescOverride = inDesc;
409
// We call getNewTextDesc for all other conversions and cache the result.
410
TextDescMap *mapping;
411
TextDescMap::HashKey hashKey;
412
hashKey.inType= inType;
413
hashKey.outType= outType;
414
uint32 len = strlen(inDescOverride);
415
memcpy(hashKey.inDesc, inDescOverride, len);
416
memset(hashKey.inDesc+len, 0, sizeof(hashKey.inDesc) - len);
418
if (!(mapping=(TextDescMap *) hash_search(&textDescMapHash,
419
(const uchar*)&hashKey,
422
DBUG_RETURN(getNewTextDesc(inType, outType, inDescOverride, outDesc, &hashKey));
426
strcpy(outDesc, mapping->outDesc);
433
Convert an IANA character set name into a DB2 for i CCSID value.
435
@param parmIANADesc An IANA character set name
436
@param[out] db2Ccsid The equivalent CCSID value
438
@return 0 if successful. Failure otherwise
440
int32 convertIANAToDb2Ccsid(const char* parmIANADesc, uint16* db2Ccsid)
444
char aixCcsidString[Qlg_MaxDescSize];
445
int aixEncodingScheme;
446
int db2EncodingScheme;
447
rc = convertTextDesc(Qlg_TypeIANA, Qlg_TypeAS400CCSID, parmIANADesc, aixCcsidString);
450
if (rc == DB2I_ERR_UNSUPP_CHARSET)
451
getErrTxt(DB2I_ERR_UNSUPP_CHARSET, parmIANADesc);
455
aixCcsid = atoi(aixCcsidString);
456
rc = getEncodingScheme(aixCcsid, aixEncodingScheme);
459
switch(aixEncodingScheme) { // Select on encoding scheme
460
case 0x1100: // EDCDIC SBCS
461
case 0x2100: // ASCII SBCS
462
case 0x4100: // AIX SBCS
463
case 0x4105: // MS Windows
464
case 0x5100: // ISO 7 bit ASCII
465
db2EncodingScheme = 0x1100;
467
case 0x1200: // EDCDIC DBCS
468
case 0x2200: // ASCII DBCS
469
db2EncodingScheme = 0x1200;
471
case 0x1301: // EDCDIC Mixed
472
case 0x2300: // ASCII Mixed
473
case 0x4403: // EUC (ISO 2022)
474
db2EncodingScheme = 0x1301;
477
db2EncodingScheme = 0x7200;
479
case 0x7807: // UTF-8
480
db2EncodingScheme = 0x7807;
482
case 0x7500: // UTF-32
483
db2EncodingScheme = 0x7500;
487
getErrTxt(DB2I_ERR_UNKNOWN_ENCODING,aixEncodingScheme);
488
return DB2I_ERR_UNKNOWN_ENCODING;
492
if (aixEncodingScheme == db2EncodingScheme)
494
*db2Ccsid = aixCcsid;
498
rc = getAssociatedCCSID(aixCcsid, db2EncodingScheme, db2Ccsid); // EDCDIC SBCS
508
Obtain the encoding scheme of a CCSID.
510
@param inCcsid An IBM i CCSID
511
@param[out] outEncodingScheme The associated encoding scheme
513
@return 0 if successful. Failure otherwise
515
int32 getEncodingScheme(const uint16 inCcsid, int32& outEncodingScheme)
517
DBUG_ENTER("db2i_charsetSupport::getEncodingScheme");
519
static bool ptrInited = FALSE;
520
static char ptrSpace[sizeof(ILEpointer) + 15];
521
static ILEpointer* ptrToPtr = (ILEpointer*)roundToQuadWordBdy(ptrSpace);
526
rc = _RSLOBJ2(ptrToPtr, RSLOBJ_TS_PGM, "QTQGESP", "QSYS");
530
getErrTxt(DB2I_ERR_RESOLVE_OBJ,"QTQGESP","QSYS","*PGM",errno);
531
DBUG_RETURN(DB2I_ERR_RESOLVE_OBJ);
536
DBUG_ASSERT(inCcsid != 0);
538
int GESPCCSID = inCcsid;
545
ILEArgv[0] = &GESPCCSID;
546
ILEArgv[1] = &GESPLen;
547
ILEArgv[2] = &GESPNbrVal;
548
ILEArgv[3] = &GESPES;
549
ILEArgv[4] = &GESPCSCPL;
550
ILEArgv[5] = &GESPFB;
553
rc = _PGMCALL(ptrToPtr, (void**)&ILEArgv, 0);
557
getErrTxt(DB2I_ERR_PGMCALL,"QTQGESP","QSYS",rc);
558
DBUG_RETURN(DB2I_ERR_PGMCALL);
560
if (GESPFB[0] != 0 ||
564
getErrTxt(DB2I_ERR_QTQGESP,GESPFB[0],GESPFB[1],GESPFB[2]);
565
DBUG_RETURN(DB2I_ERR_QTQGESP);
567
outEncodingScheme = GESPES;
574
Get the best fit equivalent CCSID. (Wrapper for QTQGRDC API)
576
@param inCcsid An IBM i CCSID
577
@param inEncodingScheme The encoding scheme
578
@param[out] outCcsid The equivalent CCSID
580
@return 0 if successful. Failure otherwise
582
int32 getAssociatedCCSID(const uint16 inCcsid, const int inEncodingScheme, uint16* outCcsid)
584
DBUG_ENTER("db2i_charsetSupport::getAssociatedCCSID");
585
static bool ptrInited = FALSE;
586
static char ptrSpace[sizeof(ILEpointer) + 15];
587
static ILEpointer* ptrToPtr = (ILEpointer*)roundToQuadWordBdy(ptrSpace);
590
// Override non-standard charsets
591
if ((inCcsid == 923) && (inEncodingScheme == 0x1100))
596
else if ((inCcsid == 1250) && (inEncodingScheme == 0x1100))
604
rc = _RSLOBJ2(ptrToPtr, RSLOBJ_TS_PGM, "QTQGRDC", "QSYS");
608
getErrTxt(DB2I_ERR_RESOLVE_OBJ,"QTQGRDC","QSYS","*PGM",errno);
609
DBUG_RETURN(DB2I_ERR_RESOLVE_OBJ);
614
int GRDCCCSID = inCcsid;
615
int GRDCES = inEncodingScheme;
620
ILEArgv[0] = &GRDCCCSID;
621
ILEArgv[1] = &GRDCES;
622
ILEArgv[2] = &GRDCSel;
623
ILEArgv[3] = &GRDCAssCCSID;
624
ILEArgv[4] = &GRDCFB;
627
rc = _PGMCALL(ptrToPtr, (void**)&ILEArgv, 0);
631
getErrTxt(DB2I_ERR_PGMCALL,"QTQGRDC","QSYS",rc);
632
DBUG_RETURN(DB2I_ERR_PGMCALL);
634
if (GRDCFB[0] != 0 ||
638
getErrTxt(DB2I_ERR_QTQGRDC,GRDCFB[0],GRDCFB[1],GRDCFB[2]);
639
DBUG_RETURN(DB2I_ERR_QTQGRDC);
642
*outCcsid = GRDCAssCCSID;
648
Open an iconv conversion between a MySQL charset and the respective IBM i CCSID
650
@param direction The direction of the conversion
651
@param mysqlCSName Name of the MySQL character set
652
@param db2CCSID The IBM i CCSID
653
@param hashKey The key to use for inserting the opened conversion into the cache
654
@param[out] newConversion The iconv descriptor
656
@return 0 if successful. Failure otherwise
658
static int32 openNewConversion(enum_conversionDirection direction,
659
const char* mysqlCSName,
661
IconvMap::HashKey* hashKey,
662
iconv_t& newConversion)
664
DBUG_ENTER("db2i_charsetSupport::openNewConversion");
666
char mysqlAix41Desc[Qlg_MaxDescSize];
667
char db2Aix41Desc[Qlg_MaxDescSize];
668
char db2CcsidString[6] = "";
672
First we have to convert the MySQL IANA-like name and the DB2 CCSID into
673
there equivalent iconv descriptions.
675
rc = convertTextDesc(Qlg_TypeIANA, Qlg_TypeAix41, mysqlCSName, mysqlAix41Desc);
678
if (rc == DB2I_ERR_UNSUPP_CHARSET)
679
getErrTxt(DB2I_ERR_UNSUPP_CHARSET, mysqlCSName);
683
CHARSET_INFO *cs= &my_charset_bin;
684
(uint)(cs->cset->long10_to_str)(cs,db2CcsidString,sizeof(db2CcsidString), 10, db2CCSID);
685
rc = convertTextDesc(Qlg_TypeAS400CCSID, Qlg_TypeAix41, db2CcsidString, db2Aix41Desc);
688
if (rc == DB2I_ERR_UNSUPP_CHARSET)
689
getErrTxt(DB2I_ERR_UNSUPP_CHARSET, mysqlCSName);
694
/* Call iconv to open the conversion. */
695
if (direction == toDB2)
697
newConversion = iconv_open(db2Aix41Desc, mysqlAix41Desc);
701
newConversion = iconv_open(mysqlAix41Desc, db2Aix41Desc);
704
if (unlikely(newConversion == (iconv_t) -1))
706
getErrTxt(DB2I_ERR_UNSUPP_CHARSET, mysqlCSName);
707
DBUG_RETURN(DB2I_ERR_UNSUPP_CHARSET);
710
/* Insert the new conversion into the cache. */
711
IconvMap* mapping = (IconvMap*)alloc_root(&iconvMapMemroot, sizeof(IconvMap));
714
my_error(ER_OUTOFMEMORY, MYF(0), sizeof(IconvMap));
715
DBUG_RETURN( HA_ERR_OUT_OF_MEM);
717
memcpy(&(mapping->hashKey), hashKey, sizeof(mapping->hashKey));
718
mapping->iconvDesc = newConversion;
719
pthread_mutex_lock(&iconvMapHashMutex);
720
my_hash_insert(&iconvMapHash, (const uchar*)mapping);
721
pthread_mutex_unlock(&iconvMapHashMutex);
728
Open an iconv conversion between a MySQL charset and the respective IBM i CCSID
730
@param direction The direction of the conversion
731
@param cs The MySQL character set
732
@param db2CCSID The IBM i CCSID
733
@param[out] newConversion The iconv descriptor
735
@return 0 if successful. Failure otherwise
737
int32 getConversion(enum_conversionDirection direction, const CHARSET_INFO* cs, uint16 db2CCSID, iconv_t& conversion)
739
DBUG_ENTER("db2i_charsetSupport::getConversion");
743
/* Build the hash key */
744
IconvMap::HashKey hashKey;
745
hashKey.direction= direction;
746
hashKey.myCharset= cs;
747
hashKey.db2CCSID= db2CCSID;
749
/* Look for the conversion in the cache and add it if it is not there. */
751
if (!(mapping= (IconvMap *) hash_search(&iconvMapHash,
752
(const uchar*)&hashKey,
755
DBUG_PRINT("getConversion", ("Hash miss for direction=%d, cs=%s, ccsid=%d", direction, cs->name, db2CCSID));
756
rc= openNewConversion(direction, cs->csname, db2CCSID, &hashKey, conversion);
762
conversion= mapping->iconvDesc;
769
Fast-path conversion from ASCII to EBCDIC for use in converting
770
identifiers to be sent to the QMY APIs.
772
@param input ASCII data
773
@param[out] ouput EBCDIC data
774
@param ilen Size of input buffer and output buffer
776
int convToEbcdic(const char* input, char* output, size_t ilen)
778
static bool inited = FALSE;
786
ic = iconv_open( "IBM-037", "ISO8859-1" );
789
size_t substitutedChars;
791
if (iconv( ic, (char**)&input, &ilen, &output, &olen, &substitutedChars ) == -1)
799
Fast-path conversion from EBCDIC to ASCII for use in converting
800
data received from the QMY APIs.
802
@param input EBCDIC data
803
@param[out] ouput ASCII data
804
@param ilen Size of input buffer and output buffer
806
int convFromEbcdic(const char* input, char* output, size_t ilen)
808
static bool inited = FALSE;
816
ic = iconv_open("ISO8859-1", "IBM-037");
820
size_t substitutedChars;
822
if (iconv( ic, (char**)&input, &ilen, &output, &olen, &substitutedChars) == -1)