~ubuntu-branches/ubuntu/lucid/postgresql-8.4/lucid-security

« back to all changes in this revision

Viewing changes to src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-03-20 12:00:13 UTC
  • Revision ID: james.westby@ubuntu.com-20090320120013-hogj7egc5mjncc5g
Tags: upstream-8.4~0cvs20090328
ImportĀ upstreamĀ versionĀ 8.4~0cvs20090328

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * conversion between BIG5 and Mule Internal Code(CNS 116643-1992
 
3
 * plane 1 and plane 2).
 
4
 * This program is partially copied from lv(Multilingual file viewer)
 
5
 * and slightly modified. lv is written and copyrighted by NARITA Tomio
 
6
 * (nrt@web.ad.jp).
 
7
 *
 
8
 * 1999/1/15 Tatsuo Ishii
 
9
 *
 
10
 * $PostgreSQL$
 
11
 */
 
12
 
 
13
/* can be used in either frontend or backend */
 
14
#include "postgres_fe.h"
 
15
 
 
16
#include "mb/pg_wchar.h"
 
17
 
 
18
typedef struct
 
19
{
 
20
        unsigned short code,
 
21
                                peer;
 
22
} codes_t;
 
23
 
 
24
/* map Big5 Level 1 to CNS 11643-1992 Plane 1 */
 
25
static codes_t big5Level1ToCnsPlane1[25] = {    /* range */
 
26
        {0xA140, 0x2121},
 
27
        {0xA1F6, 0x2258},
 
28
        {0xA1F7, 0x2257},
 
29
        {0xA1F8, 0x2259},
 
30
        {0xA2AF, 0x2421},
 
31
        {0xA3C0, 0x4221},
 
32
        {0xa3e1, 0x0000},
 
33
        {0xA440, 0x4421},
 
34
        {0xACFE, 0x5753},
 
35
        {0xacff, 0x0000},
 
36
        {0xAD40, 0x5323},
 
37
        {0xAFD0, 0x5754},
 
38
        {0xBBC8, 0x6B51},
 
39
        {0xBE52, 0x6B50},
 
40
        {0xBE53, 0x6F5C},
 
41
        {0xC1AB, 0x7536},
 
42
        {0xC2CB, 0x7535},
 
43
        {0xC2CC, 0x7737},
 
44
        {0xC361, 0x782E},
 
45
        {0xC3B9, 0x7865},
 
46
        {0xC3BA, 0x7864},
 
47
        {0xC3BB, 0x7866},
 
48
        {0xC456, 0x782D},
 
49
        {0xC457, 0x7962},
 
50
        {0xc67f, 0x0000}
 
51
};
 
52
 
 
53
/* map CNS 11643-1992 Plane 1 to Big5 Level 1 */
 
54
static codes_t cnsPlane1ToBig5Level1[26] = {    /* range */
 
55
        {0x2121, 0xA140},
 
56
        {0x2257, 0xA1F7},
 
57
        {0x2258, 0xA1F6},
 
58
        {0x2259, 0xA1F8},
 
59
        {0x234f, 0x0000},
 
60
        {0x2421, 0xA2AF},
 
61
        {0x2571, 0x0000},
 
62
        {0x4221, 0xA3C0},
 
63
        {0x4242, 0x0000},
 
64
        {0x4421, 0xA440},
 
65
        {0x5323, 0xAD40},
 
66
        {0x5753, 0xACFE},
 
67
        {0x5754, 0xAFD0},
 
68
        {0x6B50, 0xBE52},
 
69
        {0x6B51, 0xBBC8},
 
70
        {0x6F5C, 0xBE53},
 
71
        {0x7535, 0xC2CB},
 
72
        {0x7536, 0xC1AB},
 
73
        {0x7737, 0xC2CC},
 
74
        {0x782D, 0xC456},
 
75
        {0x782E, 0xC361},
 
76
        {0x7864, 0xC3BA},
 
77
        {0x7865, 0xC3B9},
 
78
        {0x7866, 0xC3BB},
 
79
        {0x7962, 0xC457},
 
80
        {0x7d4c, 0x0000}
 
81
};
 
82
 
 
83
/* map Big5 Level 2 to CNS 11643-1992 Plane 2 */
 
84
static codes_t big5Level2ToCnsPlane2[48] = {    /* range */
 
85
        {0xC940, 0x2121},
 
86
        {0xc94a, 0x0000},
 
87
        {0xC94B, 0x212B},
 
88
        {0xC96C, 0x214D},
 
89
        {0xC9BE, 0x214C},
 
90
        {0xC9BF, 0x217D},
 
91
        {0xC9ED, 0x224E},
 
92
        {0xCAF7, 0x224D},
 
93
        {0xCAF8, 0x2439},
 
94
        {0xD77A, 0x3F6A},
 
95
        {0xD77B, 0x387E},
 
96
        {0xDBA7, 0x3F6B},
 
97
        {0xDDFC, 0x4176},
 
98
        {0xDDFD, 0x4424},
 
99
        {0xE8A3, 0x554C},
 
100
        {0xE976, 0x5723},
 
101
        {0xEB5B, 0x5A29},
 
102
        {0xEBF1, 0x554B},
 
103
        {0xEBF2, 0x5B3F},
 
104
        {0xECDE, 0x5722},
 
105
        {0xECDF, 0x5C6A},
 
106
        {0xEDAA, 0x5D75},
 
107
        {0xEEEB, 0x642F},
 
108
        {0xEEEC, 0x6039},
 
109
        {0xF056, 0x5D74},
 
110
        {0xF057, 0x6243},
 
111
        {0xF0CB, 0x5A28},
 
112
        {0xF0CC, 0x6337},
 
113
        {0xF163, 0x6430},
 
114
        {0xF16B, 0x6761},
 
115
        {0xF16C, 0x6438},
 
116
        {0xF268, 0x6934},
 
117
        {0xF269, 0x6573},
 
118
        {0xF2C3, 0x664E},
 
119
        {0xF375, 0x6762},
 
120
        {0xF466, 0x6935},
 
121
        {0xF4B5, 0x664D},
 
122
        {0xF4B6, 0x6962},
 
123
        {0xF4FD, 0x6A4C},
 
124
        {0xF663, 0x6A4B},
 
125
        {0xF664, 0x6C52},
 
126
        {0xF977, 0x7167},
 
127
        {0xF9C4, 0x7166},
 
128
        {0xF9C5, 0x7234},
 
129
        {0xF9C6, 0x7240},
 
130
        {0xF9C7, 0x7235},
 
131
        {0xF9D2, 0x7241},
 
132
        {0xf9d6, 0x0000}
 
133
};
 
134
 
 
135
/* map CNS 11643-1992 Plane 2 to Big5 Level 2 */
 
136
static codes_t cnsPlane2ToBig5Level2[49] = {    /* range */
 
137
        {0x2121, 0xC940},
 
138
        {0x212B, 0xC94B},
 
139
        {0x214C, 0xC9BE},
 
140
        {0x214D, 0xC96C},
 
141
        {0x217D, 0xC9BF},
 
142
        {0x224D, 0xCAF7},
 
143
        {0x224E, 0xC9ED},
 
144
        {0x2439, 0xCAF8},
 
145
        {0x387E, 0xD77B},
 
146
        {0x3F6A, 0xD77A},
 
147
        {0x3F6B, 0xDBA7},
 
148
        {0x4424, 0x0000},
 
149
        {0x4176, 0xDDFC},
 
150
        {0x4177, 0x0000},
 
151
        {0x4424, 0xDDFD},
 
152
        {0x554B, 0xEBF1},
 
153
        {0x554C, 0xE8A3},
 
154
        {0x5722, 0xECDE},
 
155
        {0x5723, 0xE976},
 
156
        {0x5A28, 0xF0CB},
 
157
        {0x5A29, 0xEB5B},
 
158
        {0x5B3F, 0xEBF2},
 
159
        {0x5C6A, 0xECDF},
 
160
        {0x5D74, 0xF056},
 
161
        {0x5D75, 0xEDAA},
 
162
        {0x6039, 0xEEEC},
 
163
        {0x6243, 0xF057},
 
164
        {0x6337, 0xF0CC},
 
165
        {0x642F, 0xEEEB},
 
166
        {0x6430, 0xF163},
 
167
        {0x6438, 0xF16C},
 
168
        {0x6573, 0xF269},
 
169
        {0x664D, 0xF4B5},
 
170
        {0x664E, 0xF2C3},
 
171
        {0x6761, 0xF16B},
 
172
        {0x6762, 0xF375},
 
173
        {0x6934, 0xF268},
 
174
        {0x6935, 0xF466},
 
175
        {0x6962, 0xF4B6},
 
176
        {0x6A4B, 0xF663},
 
177
        {0x6A4C, 0xF4FD},
 
178
        {0x6C52, 0xF664},
 
179
        {0x7166, 0xF9C4},
 
180
        {0x7167, 0xF977},
 
181
        {0x7234, 0xF9C5},
 
182
        {0x7235, 0xF9C7},
 
183
        {0x7240, 0xF9C6},
 
184
        {0x7241, 0xF9D2},
 
185
        {0x7245, 0x0000}
 
186
};
 
187
 
 
188
/* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */
 
189
static unsigned short b1c4[][2] = {
 
190
        {0xC879, 0x2123},
 
191
        {0xC87B, 0x2124},
 
192
        {0xC87D, 0x212A},
 
193
        {0xC8A2, 0x2152}
 
194
};
 
195
 
 
196
/* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */
 
197
static unsigned short b2c3[][2] = {
 
198
        {0xF9D6, 0x4337},
 
199
        {0xF9D7, 0x4F50},
 
200
        {0xF9D8, 0x444E},
 
201
        {0xF9D9, 0x504A},
 
202
        {0xF9DA, 0x2C5D},
 
203
        {0xF9DB, 0x3D7E},
 
204
        {0xF9DC, 0x4B5C}
 
205
};
 
206
 
 
207
static unsigned short BinarySearchRange
 
208
                        (codes_t *array, int high, unsigned short code)
 
209
{
 
210
        int                     low,
 
211
                                mid,
 
212
                                distance,
 
213
                                tmp;
 
214
 
 
215
        low = 0;
 
216
        mid = high >> 1;
 
217
 
 
218
        for (; low <= high; mid = (low + high) >> 1)
 
219
        {
 
220
                if ((array[mid].code <= code) && (array[mid + 1].code > code))
 
221
                {
 
222
                        if (0 == array[mid].peer)
 
223
                                return 0;
 
224
                        if (code >= 0xa140U)
 
225
                        {
 
226
                                /* big5 to cns */
 
227
                                tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
 
228
                                high = code & 0x00ff;
 
229
                                low = array[mid].code & 0x00ff;
 
230
 
 
231
                                /*
 
232
                                 * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e,
 
233
                                 * 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix is
 
234
                                 * 0x9d.                                         [region_low, region_high] We
 
235
                                 * should remember big5 has two different regions (above).
 
236
                                 * There is a bias for the distance between these regions.
 
237
                                 * 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 and 0x7e is
 
238
                                 * 1.) bias = - 0x22.
 
239
                                 */
 
240
                                distance = tmp * 0x9d + high - low +
 
241
                                        (high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22)
 
242
                                         : (low >= 0xa1 ? +0x22 : 0));
 
243
 
 
244
                                /*
 
245
                                 * NOTE: we have to convert the distance into a code point.
 
246
                                 * The code point's low_byte is 0x21 plus mod_0x5e. In the
 
247
                                 * first, we extract the mod_0x5e of the starting code point,
 
248
                                 * subtracting 0x21, and add distance to it. Then we calculate
 
249
                                 * again mod_0x5e of them, and restore the final codepoint,
 
250
                                 * adding 0x21.
 
251
                                 */
 
252
                                tmp = (array[mid].peer & 0x00ff) + distance - 0x21;
 
253
                                tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8)
 
254
                                        + 0x21 + tmp % 0x5e;
 
255
                                return tmp;
 
256
                        }
 
257
                        else
 
258
                        {
 
259
                                /* cns to big5 */
 
260
                                tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
 
261
 
 
262
                                /*
 
263
                                 * NOTE: ISO charsets ranges between 0x21-0xfe (94charset).
 
264
                                 * Its radix is 0x5e. But there is no distance bias like big5.
 
265
                                 */
 
266
                                distance = tmp * 0x5e
 
267
                                        + ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff));
 
268
 
 
269
                                /*
 
270
                                 * NOTE: Similar to big5 to cns conversion, we extract
 
271
                                 * mod_0x9d and restore mod_0x9d into a code point.
 
272
                                 */
 
273
                                low = array[mid].peer & 0x00ff;
 
274
                                tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40);
 
275
                                low = tmp % 0x9d;
 
276
                                tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8)
 
277
                                        + (low > 0x3e ? 0x62 : 0x40) + low;
 
278
                                return tmp;
 
279
                        }
 
280
                }
 
281
                else if (array[mid].code > code)
 
282
                        high = mid - 1;
 
283
                else
 
284
                        low = mid + 1;
 
285
        }
 
286
 
 
287
        return 0;
 
288
}
 
289
 
 
290
 
 
291
unsigned short
 
292
BIG5toCNS(unsigned short big5, unsigned char *lc)
 
293
{
 
294
        unsigned short cns = 0;
 
295
        int                     i;
 
296
 
 
297
        if (big5 < 0xc940U)
 
298
        {
 
299
                /* level 1 */
 
300
 
 
301
                for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
 
302
                {
 
303
                        if (b1c4[i][0] == big5)
 
304
                        {
 
305
                                *lc = LC_CNS11643_4;
 
306
                                return (b1c4[i][1] | 0x8080U);
 
307
                        }
 
308
                }
 
309
 
 
310
                if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5)))
 
311
                        *lc = LC_CNS11643_1;
 
312
        }
 
313
        else if (big5 == 0xc94aU)
 
314
        {
 
315
                /* level 2 */
 
316
                *lc = LC_CNS11643_1;
 
317
                cns = 0x4442;
 
318
        }
 
319
        else
 
320
        {
 
321
                /* level 2 */
 
322
                for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
 
323
                {
 
324
                        if (b2c3[i][0] == big5)
 
325
                        {
 
326
                                *lc = LC_CNS11643_3;
 
327
                                return (b2c3[i][1] | 0x8080U);
 
328
                        }
 
329
                }
 
330
 
 
331
                if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5)))
 
332
                        *lc = LC_CNS11643_2;
 
333
        }
 
334
 
 
335
        if (0 == cns)
 
336
        {                                                       /* no mapping Big5 to CNS 11643-1992 */
 
337
                *lc = 0;
 
338
                return (unsigned short) '?';
 
339
        }
 
340
 
 
341
        return cns | 0x8080;
 
342
}
 
343
 
 
344
unsigned short
 
345
CNStoBIG5(unsigned short cns, unsigned char lc)
 
346
{
 
347
        int                     i;
 
348
        unsigned int big5 = 0;
 
349
 
 
350
        cns &= 0x7f7f;
 
351
 
 
352
        switch (lc)
 
353
        {
 
354
                case LC_CNS11643_1:
 
355
                        big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns);
 
356
                        break;
 
357
                case LC_CNS11643_2:
 
358
                        big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns);
 
359
                        break;
 
360
                case LC_CNS11643_3:
 
361
                        for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
 
362
                        {
 
363
                                if (b2c3[i][1] == cns)
 
364
                                        return (b2c3[i][0]);
 
365
                        }
 
366
                        break;
 
367
                case LC_CNS11643_4:
 
368
                        for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
 
369
                        {
 
370
                                if (b1c4[i][1] == cns)
 
371
                                        return (b1c4[i][0]);
 
372
                        }
 
373
                default:
 
374
                        break;
 
375
        }
 
376
        return big5;
 
377
}