~vcs-imports/mammoth-replicator/trunk

« back to all changes in this revision

Viewing changes to src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c

  • Committer: alvherre
  • Date: 2005-12-16 21:24:52 UTC
  • Revision ID: svn-v4:db760fc0-0f08-0410-9d63-cc6633f64896:trunk:1
Initial import of the REL8_0_3 sources from the Pgsql CVS repository.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 *        EUC_JP, SJIS and MULE_INTERNAL
 
4
 *
 
5
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 
6
 * Portions Copyright (c) 1994, Regents of the University of California
 
7
 *
 
8
 * IDENTIFICATION
 
9
 *        $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.9 2004-12-31 22:01:53 pgsql Exp $
 
10
 *
 
11
 *-------------------------------------------------------------------------
 
12
 */
 
13
 
 
14
#include "postgres.h"
 
15
#include "fmgr.h"
 
16
#include "mb/pg_wchar.h"
 
17
 
 
18
/*
 
19
 * SJIS alternative code.
 
20
 * this code is used if a mapping EUC -> SJIS is not defined.
 
21
 */
 
22
#define PGSJISALTCODE 0x81ac
 
23
#define PGEUCALTCODE 0xa2ae
 
24
 
 
25
/*
 
26
 * conversion table between SJIS UDC (IBM kanji) and EUC_JP
 
27
 */
 
28
#include "sjis.map"
 
29
 
 
30
#define ENCODING_GROWTH_RATE 4
 
31
 
 
32
PG_FUNCTION_INFO_V1(euc_jp_to_sjis);
 
33
PG_FUNCTION_INFO_V1(sjis_to_euc_jp);
 
34
PG_FUNCTION_INFO_V1(euc_jp_to_mic);
 
35
PG_FUNCTION_INFO_V1(mic_to_euc_jp);
 
36
PG_FUNCTION_INFO_V1(sjis_to_mic);
 
37
PG_FUNCTION_INFO_V1(mic_to_sjis);
 
38
 
 
39
extern Datum euc_jp_to_sjis(PG_FUNCTION_ARGS);
 
40
extern Datum sjis_to_euc_jp(PG_FUNCTION_ARGS);
 
41
extern Datum euc_jp_to_mic(PG_FUNCTION_ARGS);
 
42
extern Datum mic_to_euc_jp(PG_FUNCTION_ARGS);
 
43
extern Datum sjis_to_mic(PG_FUNCTION_ARGS);
 
44
extern Datum mic_to_sjis(PG_FUNCTION_ARGS);
 
45
 
 
46
/* ----------
 
47
 * conv_proc(
 
48
 *              INTEGER,        -- source encoding id
 
49
 *              INTEGER,        -- destination encoding id
 
50
 *              CSTRING,        -- source string (null terminated C string)
 
51
 *              CSTRING,        -- destination string (null terminated C string)
 
52
 *              INTEGER         -- source string length
 
53
 * ) returns VOID;
 
54
 * ----------
 
55
 */
 
56
 
 
57
static void sjis2mic(unsigned char *sjis, unsigned char *p, int len);
 
58
static void mic2sjis(unsigned char *mic, unsigned char *p, int len);
 
59
static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len);
 
60
static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len);
 
61
 
 
62
Datum
 
63
euc_jp_to_sjis(PG_FUNCTION_ARGS)
 
64
{
 
65
        unsigned char *src = PG_GETARG_CSTRING(2);
 
66
        unsigned char *dest = PG_GETARG_CSTRING(3);
 
67
        int                     len = PG_GETARG_INT32(4);
 
68
        unsigned char *buf;
 
69
 
 
70
        Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
 
71
        Assert(PG_GETARG_INT32(1) == PG_SJIS);
 
72
        Assert(len >= 0);
 
73
 
 
74
        buf = palloc(len * ENCODING_GROWTH_RATE);
 
75
        euc_jp2mic(src, buf, len);
 
76
        mic2sjis(buf, dest, strlen(buf));
 
77
        pfree(buf);
 
78
 
 
79
        PG_RETURN_VOID();
 
80
}
 
81
 
 
82
Datum
 
83
sjis_to_euc_jp(PG_FUNCTION_ARGS)
 
84
{
 
85
        unsigned char *src = PG_GETARG_CSTRING(2);
 
86
        unsigned char *dest = PG_GETARG_CSTRING(3);
 
87
        int                     len = PG_GETARG_INT32(4);
 
88
        unsigned char *buf;
 
89
 
 
90
        Assert(PG_GETARG_INT32(0) == PG_SJIS);
 
91
        Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
 
92
        Assert(len >= 0);
 
93
 
 
94
        buf = palloc(len * ENCODING_GROWTH_RATE);
 
95
        sjis2mic(src, buf, len);
 
96
        mic2euc_jp(buf, dest, strlen(buf));
 
97
        pfree(buf);
 
98
 
 
99
        PG_RETURN_VOID();
 
100
}
 
101
 
 
102
Datum
 
103
euc_jp_to_mic(PG_FUNCTION_ARGS)
 
104
{
 
105
        unsigned char *src = PG_GETARG_CSTRING(2);
 
106
        unsigned char *dest = PG_GETARG_CSTRING(3);
 
107
        int                     len = PG_GETARG_INT32(4);
 
108
 
 
109
        Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
 
110
        Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL);
 
111
        Assert(len >= 0);
 
112
 
 
113
        euc_jp2mic(src, dest, len);
 
114
 
 
115
        PG_RETURN_VOID();
 
116
}
 
117
 
 
118
Datum
 
119
mic_to_euc_jp(PG_FUNCTION_ARGS)
 
120
{
 
121
        unsigned char *src = PG_GETARG_CSTRING(2);
 
122
        unsigned char *dest = PG_GETARG_CSTRING(3);
 
123
        int                     len = PG_GETARG_INT32(4);
 
124
 
 
125
        Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL);
 
126
        Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
 
127
        Assert(len >= 0);
 
128
 
 
129
        mic2sjis(src, dest, len);
 
130
 
 
131
        PG_RETURN_VOID();
 
132
}
 
133
 
 
134
Datum
 
135
sjis_to_mic(PG_FUNCTION_ARGS)
 
136
{
 
137
        unsigned char *src = PG_GETARG_CSTRING(2);
 
138
        unsigned char *dest = PG_GETARG_CSTRING(3);
 
139
        int                     len = PG_GETARG_INT32(4);
 
140
 
 
141
        Assert(PG_GETARG_INT32(0) == PG_SJIS);
 
142
        Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL);
 
143
        Assert(len >= 0);
 
144
 
 
145
        sjis2mic(src, dest, len);
 
146
 
 
147
        PG_RETURN_VOID();
 
148
}
 
149
 
 
150
Datum
 
151
mic_to_sjis(PG_FUNCTION_ARGS)
 
152
{
 
153
        unsigned char *src = PG_GETARG_CSTRING(2);
 
154
        unsigned char *dest = PG_GETARG_CSTRING(3);
 
155
        int                     len = PG_GETARG_INT32(4);
 
156
 
 
157
        Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL);
 
158
        Assert(PG_GETARG_INT32(1) == PG_SJIS);
 
159
        Assert(len >= 0);
 
160
 
 
161
        mic2sjis(src, dest, len);
 
162
 
 
163
        PG_RETURN_VOID();
 
164
}
 
165
 
 
166
/*
 
167
 * SJIS ---> MIC
 
168
 */
 
169
static void
 
170
sjis2mic(unsigned char *sjis, unsigned char *p, int len)
 
171
{
 
172
        int                     c1,
 
173
                                c2,
 
174
/* Eiji Tokuya patched begin */
 
175
                                i,
 
176
                                k,
 
177
                                k2;
 
178
 
 
179
/* Eiji Tokuya patched end */
 
180
        while (len >= 0 && (c1 = *sjis++))
 
181
        {
 
182
                if (c1 >= 0xa1 && c1 <= 0xdf)
 
183
                {
 
184
                        /* JIS X0201 (1 byte kana) */
 
185
                        len--;
 
186
                        *p++ = LC_JISX0201K;
 
187
                        *p++ = c1;
 
188
                }
 
189
                else if (c1 > 0x7f)
 
190
                {
 
191
                        /*
 
192
                         * JIS X0208, X0212, user defined extended characters
 
193
                         */
 
194
                        c2 = *sjis++;
 
195
                        k = (c1 << 8) + c2;
 
196
/* Eiji Tokuya patched begin */
 
197
                        if (k >= 0xed40 && k < 0xf040)
 
198
                        {
 
199
                                /* NEC selection IBM kanji */
 
200
                                for (i = 0;; i++)
 
201
                                {
 
202
                                        k2 = ibmkanji[i].nec;
 
203
                                        if (k2 == 0xffff)
 
204
                                                break;
 
205
                                        if (k2 == k)
 
206
                                        {
 
207
                                                k = ibmkanji[i].sjis;
 
208
                                                c1 = (k >> 8) & 0xff;
 
209
                                                c2 = k & 0xff;
 
210
                                        }
 
211
                                }
 
212
                        }
 
213
 
 
214
                        if (k < 0xeb3f)
 
215
/* Eiji Tokuya patched end */
 
216
                        {
 
217
                                /* JIS X0208 */
 
218
                                len -= 2;
 
219
                                *p++ = LC_JISX0208;
 
220
                                *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
 
221
                                *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
 
222
                        }
 
223
/* Eiji Tokuya patched begin */
 
224
                        else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
 
225
                        {
 
226
                                /* NEC selection IBM kanji - Other undecided justice */
 
227
/* Eiji Tokuya patched end */
 
228
                                *p++ = LC_JISX0208;
 
229
                                *p++ = PGEUCALTCODE >> 8;
 
230
                                *p++ = PGEUCALTCODE & 0xff;
 
231
                        }
 
232
                        else if (k >= 0xf040 && k < 0xf540)
 
233
                        {
 
234
                                /*
 
235
                                 * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
 
236
                                 * 0x7e7e EUC 0xf5a1 - 0xfefe
 
237
                                 */
 
238
                                len -= 2;
 
239
                                *p++ = LC_JISX0208;
 
240
                                c1 -= 0x6f;
 
241
                                *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
 
242
                                *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
 
243
                        }
 
244
                        else if (k >= 0xf540 && k < 0xfa40)
 
245
                        {
 
246
                                /*
 
247
                                 * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
 
248
                                 * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
 
249
                                 */
 
250
                                len -= 2;
 
251
                                *p++ = LC_JISX0212;
 
252
                                c1 -= 0x74;
 
253
                                *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
 
254
                                *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
 
255
                        }
 
256
                        else if (k >= 0xfa40)
 
257
                        {
 
258
                                /*
 
259
                                 * mapping IBM kanji to X0208 and X0212
 
260
                                 *
 
261
                                 */
 
262
                                len -= 2;
 
263
                                for (i = 0;; i++)
 
264
                                {
 
265
                                        k2 = ibmkanji[i].sjis;
 
266
                                        if (k2 == 0xffff)
 
267
                                                break;
 
268
                                        if (k2 == k)
 
269
                                        {
 
270
                                                k = ibmkanji[i].euc;
 
271
                                                if (k >= 0x8f0000)
 
272
                                                {
 
273
                                                        *p++ = LC_JISX0212;
 
274
                                                        *p++ = 0x80 | ((k & 0xff00) >> 8);
 
275
                                                        *p++ = 0x80 | (k & 0xff);
 
276
                                                }
 
277
                                                else
 
278
                                                {
 
279
                                                        *p++ = LC_JISX0208;
 
280
                                                        *p++ = 0x80 | (k >> 8);
 
281
                                                        *p++ = 0x80 | (k & 0xff);
 
282
                                                }
 
283
                                        }
 
284
                                }
 
285
                        }
 
286
                }
 
287
                else
 
288
                {                                               /* should be ASCII */
 
289
                        len--;
 
290
                        *p++ = c1;
 
291
                }
 
292
        }
 
293
        *p = '\0';
 
294
}
 
295
 
 
296
/*
 
297
 * MIC ---> SJIS
 
298
 */
 
299
static void
 
300
mic2sjis(unsigned char *mic, unsigned char *p, int len)
 
301
{
 
302
        int                     c1,
 
303
                                c2,
 
304
                                k;
 
305
 
 
306
        while (len >= 0 && (c1 = *mic))
 
307
        {
 
308
                len -= pg_mic_mblen(mic++);
 
309
 
 
310
                if (c1 == LC_JISX0201K)
 
311
                        *p++ = *mic++;
 
312
                else if (c1 == LC_JISX0208)
 
313
                {
 
314
                        c1 = *mic++;
 
315
                        c2 = *mic++;
 
316
                        k = (c1 << 8) | (c2 & 0xff);
 
317
                        if (k >= 0xf5a1)
 
318
                        {
 
319
                                /* UDC1 */
 
320
                                c1 -= 0x54;
 
321
                                *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
 
322
                        }
 
323
                        else
 
324
                                *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
 
325
                        *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
 
326
                }
 
327
                else if (c1 == LC_JISX0212)
 
328
                {
 
329
                        int                     i,
 
330
                                                k2;
 
331
 
 
332
                        c1 = *mic++;
 
333
                        c2 = *mic++;
 
334
                        k = c1 << 8 | c2;
 
335
                        if (k >= 0xf5a1)
 
336
                        {
 
337
                                /* UDC2 */
 
338
                                c1 -= 0x54;
 
339
                                *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
 
340
                                *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
 
341
                        }
 
342
                        else
 
343
                        {
 
344
                                /* IBM kanji */
 
345
                                for (i = 0;; i++)
 
346
                                {
 
347
                                        k2 = ibmkanji[i].euc & 0xffff;
 
348
                                        if (k2 == 0xffff)
 
349
                                        {
 
350
                                                *p++ = PGSJISALTCODE >> 8;
 
351
                                                *p++ = PGSJISALTCODE & 0xff;
 
352
                                                break;
 
353
                                        }
 
354
                                        if (k2 == k)
 
355
                                        {
 
356
                                                k = ibmkanji[i].sjis;
 
357
                                                *p++ = k >> 8;
 
358
                                                *p++ = k & 0xff;
 
359
                                                break;
 
360
                                        }
 
361
                                }
 
362
                        }
 
363
                }
 
364
                else if (c1 > 0x7f)
 
365
                {
 
366
                        /* cannot convert to SJIS! */
 
367
                        *p++ = PGSJISALTCODE >> 8;
 
368
                        *p++ = PGSJISALTCODE & 0xff;
 
369
                }
 
370
                else
 
371
                {                                               /* should be ASCII */
 
372
                        *p++ = c1;
 
373
                }
 
374
        }
 
375
        *p = '\0';
 
376
}
 
377
 
 
378
/*
 
379
 * EUC_JP ---> MIC
 
380
 */
 
381
static void
 
382
euc_jp2mic(unsigned char *euc, unsigned char *p, int len)
 
383
{
 
384
        int                     c1;
 
385
 
 
386
        while (len >= 0 && (c1 = *euc++))
 
387
        {
 
388
                if (c1 == SS2)
 
389
                {                                               /* 1 byte kana? */
 
390
                        len -= 2;
 
391
                        *p++ = LC_JISX0201K;
 
392
                        *p++ = *euc++;
 
393
                }
 
394
                else if (c1 == SS3)
 
395
                {                                               /* JIS X0212 kanji? */
 
396
                        len -= 3;
 
397
                        *p++ = LC_JISX0212;
 
398
                        *p++ = *euc++;
 
399
                        *p++ = *euc++;
 
400
                }
 
401
                else if (c1 & 0x80)
 
402
                {                                               /* kanji? */
 
403
                        len -= 2;
 
404
                        *p++ = LC_JISX0208;
 
405
                        *p++ = c1;
 
406
                        *p++ = *euc++;
 
407
                }
 
408
                else
 
409
                {                                               /* should be ASCII */
 
410
                        len--;
 
411
                        *p++ = c1;
 
412
                }
 
413
        }
 
414
        *p = '\0';
 
415
}
 
416
 
 
417
/*
 
418
 * MIC ---> EUC_JP
 
419
 */
 
420
static void
 
421
mic2euc_jp(unsigned char *mic, unsigned char *p, int len)
 
422
{
 
423
        int                     c1;
 
424
 
 
425
        while (len >= 0 && (c1 = *mic))
 
426
        {
 
427
                len -= pg_mic_mblen(mic++);
 
428
 
 
429
                if (c1 == LC_JISX0201K)
 
430
                {
 
431
                        *p++ = SS2;
 
432
                        *p++ = *mic++;
 
433
                }
 
434
                else if (c1 == LC_JISX0212)
 
435
                {
 
436
                        *p++ = SS3;
 
437
                        *p++ = *mic++;
 
438
                        *p++ = *mic++;
 
439
                }
 
440
                else if (c1 == LC_JISX0208)
 
441
                {
 
442
                        *p++ = *mic++;
 
443
                        *p++ = *mic++;
 
444
                }
 
445
                else if (c1 > 0x7f)
 
446
                {                                               /* cannot convert to EUC_JP! */
 
447
                        mic--;
 
448
                        pg_print_bogus_char(&mic, &p);
 
449
                }
 
450
                else
 
451
                {                                               /* should be ASCII */
 
452
                        *p++ = c1;
 
453
                }
 
454
        }
 
455
        *p = '\0';
 
456
}