1
/*-------------------------------------------------------------------------
3
* EUC_JP, SJIS and MULE_INTERNAL
5
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
6
* Portions Copyright (c) 1994, Regents of the University of California
9
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.9 2004-12-31 22:01:53 pgsql Exp $
11
*-------------------------------------------------------------------------
16
#include "mb/pg_wchar.h"
19
* SJIS alternative code.
20
* this code is used if a mapping EUC -> SJIS is not defined.
22
#define PGSJISALTCODE 0x81ac
23
#define PGEUCALTCODE 0xa2ae
26
* conversion table between SJIS UDC (IBM kanji) and EUC_JP
30
#define ENCODING_GROWTH_RATE 4
32
PG_FUNCTION_INFO_V1(euc_jp_to_sjis);
33
PG_FUNCTION_INFO_V1(sjis_to_euc_jp);
34
PG_FUNCTION_INFO_V1(euc_jp_to_mic);
35
PG_FUNCTION_INFO_V1(mic_to_euc_jp);
36
PG_FUNCTION_INFO_V1(sjis_to_mic);
37
PG_FUNCTION_INFO_V1(mic_to_sjis);
39
extern Datum euc_jp_to_sjis(PG_FUNCTION_ARGS);
40
extern Datum sjis_to_euc_jp(PG_FUNCTION_ARGS);
41
extern Datum euc_jp_to_mic(PG_FUNCTION_ARGS);
42
extern Datum mic_to_euc_jp(PG_FUNCTION_ARGS);
43
extern Datum sjis_to_mic(PG_FUNCTION_ARGS);
44
extern Datum mic_to_sjis(PG_FUNCTION_ARGS);
48
* INTEGER, -- source encoding id
49
* INTEGER, -- destination encoding id
50
* CSTRING, -- source string (null terminated C string)
51
* CSTRING, -- destination string (null terminated C string)
52
* INTEGER -- source string length
57
static void sjis2mic(unsigned char *sjis, unsigned char *p, int len);
58
static void mic2sjis(unsigned char *mic, unsigned char *p, int len);
59
static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len);
60
static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len);
63
euc_jp_to_sjis(PG_FUNCTION_ARGS)
65
unsigned char *src = PG_GETARG_CSTRING(2);
66
unsigned char *dest = PG_GETARG_CSTRING(3);
67
int len = PG_GETARG_INT32(4);
70
Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
71
Assert(PG_GETARG_INT32(1) == PG_SJIS);
74
buf = palloc(len * ENCODING_GROWTH_RATE);
75
euc_jp2mic(src, buf, len);
76
mic2sjis(buf, dest, strlen(buf));
83
sjis_to_euc_jp(PG_FUNCTION_ARGS)
85
unsigned char *src = PG_GETARG_CSTRING(2);
86
unsigned char *dest = PG_GETARG_CSTRING(3);
87
int len = PG_GETARG_INT32(4);
90
Assert(PG_GETARG_INT32(0) == PG_SJIS);
91
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
94
buf = palloc(len * ENCODING_GROWTH_RATE);
95
sjis2mic(src, buf, len);
96
mic2euc_jp(buf, dest, strlen(buf));
103
euc_jp_to_mic(PG_FUNCTION_ARGS)
105
unsigned char *src = PG_GETARG_CSTRING(2);
106
unsigned char *dest = PG_GETARG_CSTRING(3);
107
int len = PG_GETARG_INT32(4);
109
Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
110
Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL);
113
euc_jp2mic(src, dest, len);
119
mic_to_euc_jp(PG_FUNCTION_ARGS)
121
unsigned char *src = PG_GETARG_CSTRING(2);
122
unsigned char *dest = PG_GETARG_CSTRING(3);
123
int len = PG_GETARG_INT32(4);
125
Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL);
126
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
129
mic2sjis(src, dest, len);
135
sjis_to_mic(PG_FUNCTION_ARGS)
137
unsigned char *src = PG_GETARG_CSTRING(2);
138
unsigned char *dest = PG_GETARG_CSTRING(3);
139
int len = PG_GETARG_INT32(4);
141
Assert(PG_GETARG_INT32(0) == PG_SJIS);
142
Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL);
145
sjis2mic(src, dest, len);
151
mic_to_sjis(PG_FUNCTION_ARGS)
153
unsigned char *src = PG_GETARG_CSTRING(2);
154
unsigned char *dest = PG_GETARG_CSTRING(3);
155
int len = PG_GETARG_INT32(4);
157
Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL);
158
Assert(PG_GETARG_INT32(1) == PG_SJIS);
161
mic2sjis(src, dest, len);
170
sjis2mic(unsigned char *sjis, unsigned char *p, int len)
174
/* Eiji Tokuya patched begin */
179
/* Eiji Tokuya patched end */
180
while (len >= 0 && (c1 = *sjis++))
182
if (c1 >= 0xa1 && c1 <= 0xdf)
184
/* JIS X0201 (1 byte kana) */
192
* JIS X0208, X0212, user defined extended characters
196
/* Eiji Tokuya patched begin */
197
if (k >= 0xed40 && k < 0xf040)
199
/* NEC selection IBM kanji */
202
k2 = ibmkanji[i].nec;
207
k = ibmkanji[i].sjis;
208
c1 = (k >> 8) & 0xff;
215
/* Eiji Tokuya patched end */
220
*p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
221
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
223
/* Eiji Tokuya patched begin */
224
else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
226
/* NEC selection IBM kanji - Other undecided justice */
227
/* Eiji Tokuya patched end */
229
*p++ = PGEUCALTCODE >> 8;
230
*p++ = PGEUCALTCODE & 0xff;
232
else if (k >= 0xf040 && k < 0xf540)
235
* UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
236
* 0x7e7e EUC 0xf5a1 - 0xfefe
241
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
242
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
244
else if (k >= 0xf540 && k < 0xfa40)
247
* UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
248
* 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
253
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
254
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
256
else if (k >= 0xfa40)
259
* mapping IBM kanji to X0208 and X0212
265
k2 = ibmkanji[i].sjis;
274
*p++ = 0x80 | ((k & 0xff00) >> 8);
275
*p++ = 0x80 | (k & 0xff);
280
*p++ = 0x80 | (k >> 8);
281
*p++ = 0x80 | (k & 0xff);
288
{ /* should be ASCII */
300
mic2sjis(unsigned char *mic, unsigned char *p, int len)
306
while (len >= 0 && (c1 = *mic))
308
len -= pg_mic_mblen(mic++);
310
if (c1 == LC_JISX0201K)
312
else if (c1 == LC_JISX0208)
316
k = (c1 << 8) | (c2 & 0xff);
321
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
324
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
325
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
327
else if (c1 == LC_JISX0212)
339
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
340
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
347
k2 = ibmkanji[i].euc & 0xffff;
350
*p++ = PGSJISALTCODE >> 8;
351
*p++ = PGSJISALTCODE & 0xff;
356
k = ibmkanji[i].sjis;
366
/* cannot convert to SJIS! */
367
*p++ = PGSJISALTCODE >> 8;
368
*p++ = PGSJISALTCODE & 0xff;
371
{ /* should be ASCII */
382
euc_jp2mic(unsigned char *euc, unsigned char *p, int len)
386
while (len >= 0 && (c1 = *euc++))
395
{ /* JIS X0212 kanji? */
409
{ /* should be ASCII */
421
mic2euc_jp(unsigned char *mic, unsigned char *p, int len)
425
while (len >= 0 && (c1 = *mic))
427
len -= pg_mic_mblen(mic++);
429
if (c1 == LC_JISX0201K)
434
else if (c1 == LC_JISX0212)
440
else if (c1 == LC_JISX0208)
446
{ /* cannot convert to EUC_JP! */
448
pg_print_bogus_char(&mic, &p);
451
{ /* should be ASCII */