~ubuntu-branches/ubuntu/trusty/python3.4/trusty-proposed

« back to all changes in this revision

Viewing changes to Modules/cjkcodecs/_codecs_hk.c

  • Committer: Package Import Robot
  • Author(s): Matthias Klose
  • Date: 2013-11-25 09:44:27 UTC
  • Revision ID: package-import@ubuntu.com-20131125094427-lzxj8ap5w01lmo7f
Tags: upstream-3.4~b1
ImportĀ upstreamĀ versionĀ 3.4~b1

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * _codecs_hk.c: Codecs collection for encodings from Hong Kong
 
3
 *
 
4
 * Written by Hye-Shik Chang <perky@FreeBSD.org>
 
5
 */
 
6
 
 
7
#define USING_IMPORTED_MAPS
 
8
 
 
9
#include "cjkcodecs.h"
 
10
#include "mappings_hk.h"
 
11
 
 
12
/*
 
13
 * BIG5HKSCS codec
 
14
 */
 
15
 
 
16
static const encode_map *big5_encmap = NULL;
 
17
static const decode_map *big5_decmap = NULL;
 
18
 
 
19
CODEC_INIT(big5hkscs)
 
20
{
 
21
    static int initialized = 0;
 
22
 
 
23
    if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
 
24
        return -1;
 
25
    initialized = 1;
 
26
    return 0;
 
27
}
 
28
 
 
29
/*
 
30
 * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
 
31
 *  U+00CA U+0304 -> 8862  (U+00CA alone is mapped to 8866)
 
32
 *  U+00CA U+030C -> 8864
 
33
 *  U+00EA U+0304 -> 88a3  (U+00EA alone is mapped to 88a7)
 
34
 *  U+00EA U+030C -> 88a5
 
35
 * These are handled by not mapping tables but a hand-written code.
 
36
 */
 
37
static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
 
38
 
 
39
ENCODER(big5hkscs)
 
40
{
 
41
    while (*inpos < inlen) {
 
42
        Py_UCS4 c = INCHAR1;
 
43
        DBCHAR code;
 
44
        Py_ssize_t insize;
 
45
 
 
46
        if (c < 0x80) {
 
47
            REQUIRE_OUTBUF(1);
 
48
            **outbuf = (unsigned char)c;
 
49
            NEXT(1, 1);
 
50
            continue;
 
51
        }
 
52
 
 
53
        insize = 1;
 
54
        REQUIRE_OUTBUF(2);
 
55
 
 
56
        if (c < 0x10000) {
 
57
            if (TRYMAP_ENC(big5hkscs_bmp, code, c)) {
 
58
                if (code == MULTIC) {
 
59
                    Py_UCS4 c2;
 
60
                    if (inlen - *inpos >= 2)
 
61
                        c2 = INCHAR2;
 
62
                    else
 
63
                        c2 = 0;
 
64
 
 
65
                    if (inlen - *inpos >= 2 &&
 
66
                        ((c & 0xffdf) == 0x00ca) &&
 
67
                        ((c2 & 0xfff7) == 0x0304)) {
 
68
                        code = big5hkscs_pairenc_table[
 
69
                            ((c >> 4) |
 
70
                             (c2 >> 3)) & 3];
 
71
                        insize = 2;
 
72
                    }
 
73
                    else if (inlen - *inpos < 2 &&
 
74
                             !(flags & MBENC_FLUSH))
 
75
                        return MBERR_TOOFEW;
 
76
                    else {
 
77
                        if (c == 0xca)
 
78
                            code = 0x8866;
 
79
                        else /* c == 0xea */
 
80
                            code = 0x88a7;
 
81
                    }
 
82
                }
 
83
            }
 
84
            else if (TRYMAP_ENC(big5, code, c))
 
85
                ;
 
86
            else
 
87
                return 1;
 
88
        }
 
89
        else if (c < 0x20000)
 
90
            return insize;
 
91
        else if (c < 0x30000) {
 
92
            if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff))
 
93
                ;
 
94
            else
 
95
                return insize;
 
96
        }
 
97
        else
 
98
            return insize;
 
99
 
 
100
        OUTBYTE1(code >> 8);
 
101
        OUTBYTE2(code & 0xFF);
 
102
        NEXT(insize, 2);
 
103
    }
 
104
 
 
105
    return 0;
 
106
}
 
107
 
 
108
#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
 
109
 
 
110
DECODER(big5hkscs)
 
111
{
 
112
    while (inleft > 0) {
 
113
        unsigned char c = INBYTE1;
 
114
        Py_UCS4 decoded;
 
115
 
 
116
        if (c < 0x80) {
 
117
            OUTCHAR(c);
 
118
            NEXT_IN(1);
 
119
            continue;
 
120
        }
 
121
 
 
122
        REQUIRE_INBUF(2);
 
123
 
 
124
        if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
 
125
            if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
 
126
                OUTCHAR(decoded);
 
127
                NEXT_IN(2);
 
128
                continue;
 
129
            }
 
130
        }
 
131
 
 
132
        if (TRYMAP_DEC(big5hkscs, decoded, c, INBYTE2))
 
133
        {
 
134
            int s = BH2S(c, INBYTE2);
 
135
            const unsigned char *hintbase;
 
136
 
 
137
            assert(0x87 <= c && c <= 0xfe);
 
138
            assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
 
139
 
 
140
            if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
 
141
                    hintbase = big5hkscs_phint_0;
 
142
                    s -= BH2S(0x87, 0x40);
 
143
            }
 
144
            else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
 
145
                    hintbase = big5hkscs_phint_12130;
 
146
                    s -= BH2S(0xc6, 0xa1);
 
147
            }
 
148
            else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
 
149
                    hintbase = big5hkscs_phint_21924;
 
150
                    s -= BH2S(0xf9, 0xd6);
 
151
            }
 
152
            else
 
153
                    return MBERR_INTERNAL;
 
154
 
 
155
            if (hintbase[s >> 3] & (1 << (s & 7))) {
 
156
                    OUTCHAR(decoded | 0x20000);
 
157
                    NEXT_IN(2);
 
158
            }
 
159
            else {
 
160
                    OUTCHAR(decoded);
 
161
                    NEXT_IN(2);
 
162
            }
 
163
            continue;
 
164
        }
 
165
 
 
166
        switch ((c << 8) | INBYTE2) {
 
167
        case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
 
168
        case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
 
169
        case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
 
170
        case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
 
171
        default: return 1;
 
172
        }
 
173
 
 
174
        NEXT_IN(2); /* all decoded codepoints are pairs, above. */
 
175
    }
 
176
 
 
177
    return 0;
 
178
}
 
179
 
 
180
 
 
181
BEGIN_MAPPINGS_LIST
 
182
  MAPPING_DECONLY(big5hkscs)
 
183
  MAPPING_ENCONLY(big5hkscs_bmp)
 
184
  MAPPING_ENCONLY(big5hkscs_nonbmp)
 
185
END_MAPPINGS_LIST
 
186
 
 
187
BEGIN_CODECS_LIST
 
188
  CODEC_STATELESS_WINIT(big5hkscs)
 
189
END_CODECS_LIST
 
190
 
 
191
I_AM_A_MODULE_FOR(hk)