~ubuntu-branches/ubuntu/quantal/php5/quantal

« back to all changes in this revision

Viewing changes to ext/mbstring/libmbfl/filters/mbfilter_utf7.c

  • Committer: Bazaar Package Importer
  • Author(s): Sean Finney
  • Date: 2009-07-01 09:12:10 UTC
  • mto: (0.9.1) (1.1.17 upstream)
  • mto: This revision was merged to the branch mainline in revision 58.
  • Revision ID: james.westby@ubuntu.com-20090701091210-go0h6506p62on17r
Tags: upstream-5.3.0
ImportĀ upstreamĀ versionĀ 5.3.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * "streamable kanji code filter and converter"
 
3
 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
 
4
 *
 
5
 * LICENSE NOTICES
 
6
 *
 
7
 * This file is part of "streamable kanji code filter and converter",
 
8
 * which is distributed under the terms of GNU Lesser General Public 
 
9
 * License (version 2) as published by the Free Software Foundation.
 
10
 *
 
11
 * This software is distributed in the hope that it will be useful,
 
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
14
 * GNU Lesser General Public License for more details.
 
15
 *
 
16
 * You should have received a copy of the GNU Lesser General Public
 
17
 * License along with "streamable kanji code filter and converter";
 
18
 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
 
19
 * Suite 330, Boston, MA  02111-1307  USA
 
20
 *
 
21
 * The author of this file:
 
22
 *
 
23
 */
 
24
/*
 
25
 * The source code included in this files was separated from mbfilter.c
 
26
 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
 
27
 * 
 
28
 */
 
29
 
 
30
#ifdef HAVE_CONFIG_H
 
31
#include "config.h"
 
32
#endif
 
33
 
 
34
#include "mbfilter.h"
 
35
#include "mbfilter_utf7.h"
 
36
 
 
37
static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter);
 
38
 
 
39
static const unsigned char mbfl_base64_table[] = {
 
40
 /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
 
41
   0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
 
42
 /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
 
43
   0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
 
44
 /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
 
45
   0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
 
46
 /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
 
47
   0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
 
48
 /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
 
49
   0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
 
50
};
 
51
 
 
52
static const char *mbfl_encoding_utf7_aliases[] = {"utf7", NULL};
 
53
 
 
54
const mbfl_encoding mbfl_encoding_utf7 = {
 
55
        mbfl_no_encoding_utf7,
 
56
        "UTF-7",
 
57
        "UTF-7",
 
58
        (const char *(*)[])&mbfl_encoding_utf7_aliases,
 
59
        NULL,
 
60
        MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
 
61
};
 
62
 
 
63
const struct mbfl_identify_vtbl vtbl_identify_utf7 = {
 
64
        mbfl_no_encoding_utf7,
 
65
        mbfl_filt_ident_common_ctor,
 
66
        mbfl_filt_ident_common_dtor,
 
67
        mbfl_filt_ident_utf7
 
68
};
 
69
 
 
70
const struct mbfl_convert_vtbl vtbl_utf7_wchar = {
 
71
        mbfl_no_encoding_utf7,
 
72
        mbfl_no_encoding_wchar,
 
73
        mbfl_filt_conv_common_ctor,
 
74
        mbfl_filt_conv_common_dtor,
 
75
        mbfl_filt_conv_utf7_wchar,
 
76
        mbfl_filt_conv_common_flush
 
77
};
 
78
 
 
79
const struct mbfl_convert_vtbl vtbl_wchar_utf7 = {
 
80
        mbfl_no_encoding_wchar,
 
81
        mbfl_no_encoding_utf7,
 
82
        mbfl_filt_conv_common_ctor,
 
83
        mbfl_filt_conv_common_dtor,
 
84
        mbfl_filt_conv_wchar_utf7,
 
85
        mbfl_filt_conv_wchar_utf7_flush
 
86
};
 
87
 
 
88
 
 
89
#define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
 
90
 
 
91
/*
 
92
 * UTF-7 => wchar
 
93
 */
 
94
int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
 
95
{
 
96
        int s, n;
 
97
 
 
98
        n = -1;
 
99
        if (filter->status != 0) {              /* Modified Base64 */
 
100
                if (c >= 0x41 && c <= 0x5a) {           /* A - Z */
 
101
                        n = c - 65;
 
102
                } else if (c >= 0x61 && c <= 0x7a) {    /* a - z */
 
103
                        n = c - 71;
 
104
                } else if (c >= 0x30 && c <= 0x39) {    /* 0 - 9 */
 
105
                        n = c + 4;
 
106
                } else if (c == 0x2b) {                 /* '+' */
 
107
                        n = 62;
 
108
                } else if (c == 0x2f) {                 /* '/' */
 
109
                        n = 63;
 
110
                }
 
111
                if (n < 0 || n > 63) {
 
112
                        if (c == 0x2d) {
 
113
                                if (filter->status == 1) {              /* "+-" -> "+" */
 
114
                                        CK((*filter->output_function)(0x2b, filter->data));
 
115
                                }
 
116
                        } else if (c >= 0 && c < 0x80) {        /* ASCII exclude '-' */
 
117
                                CK((*filter->output_function)(c, filter->data));
 
118
                        } else {                /* illegal character */
 
119
                                s = c & MBFL_WCSGROUP_MASK;
 
120
                                s |= MBFL_WCSGROUP_THROUGH;
 
121
                                CK((*filter->output_function)(s, filter->data));
 
122
                        }
 
123
                        filter->cache = 0;
 
124
                        filter->status = 0;
 
125
                        return c;
 
126
                }
 
127
        }
 
128
 
 
129
        switch (filter->status) {
 
130
        /* directly encoded characters */
 
131
        case 0:
 
132
                if (c == 0x2b) {        /* '+'  shift character */
 
133
                        filter->status = 1;
 
134
                } else if (c >= 0 && c < 0x80) {        /* ASCII */
 
135
                        CK((*filter->output_function)(c, filter->data));
 
136
                } else {                /* illegal character */
 
137
                        s = c & MBFL_WCSGROUP_MASK;
 
138
                        s |= MBFL_WCSGROUP_THROUGH;
 
139
                        CK((*filter->output_function)(s, filter->data));
 
140
                }
 
141
                break;
 
142
 
 
143
        /* decode Modified Base64 */
 
144
        case 1:
 
145
        case 2:
 
146
                filter->cache |= n << 10;
 
147
                filter->status = 3;
 
148
                break;
 
149
        case 3:
 
150
                filter->cache |= n << 4;
 
151
                filter->status = 4;
 
152
                break;
 
153
        case 4:
 
154
                s = ((n >> 2) & 0xf) | (filter->cache & 0xffff);
 
155
                n = (n & 0x3) << 14;
 
156
                filter->status = 5;
 
157
                if (s >= 0xd800 && s < 0xdc00) {
 
158
                        s = (((s & 0x3ff) << 16) + 0x400000) | n;
 
159
                        filter->cache = s;
 
160
                } else if (s >= 0xdc00 && s < 0xe000) {
 
161
                        s &= 0x3ff;
 
162
                        s |= (filter->cache & 0xfff0000) >> 6;
 
163
                        filter->cache = n;
 
164
                        if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
 
165
                                CK((*filter->output_function)(s, filter->data));
 
166
                        } else {                /* illegal character */
 
167
                                s &= MBFL_WCSGROUP_MASK;
 
168
                                s |= MBFL_WCSGROUP_THROUGH;
 
169
                                CK((*filter->output_function)(s, filter->data));
 
170
                        }
 
171
                } else {
 
172
                        filter->cache = n;
 
173
                        CK((*filter->output_function)(s, filter->data));
 
174
                }
 
175
                break;
 
176
 
 
177
        case 5:
 
178
                filter->cache |= n << 8;
 
179
                filter->status = 6;
 
180
                break;
 
181
        case 6:
 
182
                filter->cache |= n << 2;
 
183
                filter->status = 7;
 
184
                break;
 
185
        case 7:
 
186
                s = ((n >> 4) & 0x3) | (filter->cache & 0xffff);
 
187
                n = (n & 0xf) << 12;
 
188
                filter->status = 8;
 
189
                if (s >= 0xd800 && s < 0xdc00) {
 
190
                        s = (((s & 0x3ff) << 16) + 0x400000) | n;
 
191
                        filter->cache = s;
 
192
                } else if (s >= 0xdc00 && s < 0xe000) {
 
193
                        s &= 0x3ff;
 
194
                        s |= (filter->cache & 0xfff0000) >> 6;
 
195
                        filter->cache = n;
 
196
                        if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
 
197
                                CK((*filter->output_function)(s, filter->data));
 
198
                        } else {                /* illegal character */
 
199
                                s &= MBFL_WCSGROUP_MASK;
 
200
                                s |= MBFL_WCSGROUP_THROUGH;
 
201
                                CK((*filter->output_function)(s, filter->data));
 
202
                        }
 
203
                } else {
 
204
                        filter->cache = n;
 
205
                        CK((*filter->output_function)(s, filter->data));
 
206
                }
 
207
                break;
 
208
 
 
209
        case 8:
 
210
                filter->cache |= n << 6;
 
211
                filter->status = 9;
 
212
                break;
 
213
        case 9:
 
214
                s = n | (filter->cache & 0xffff);
 
215
                filter->status = 2;
 
216
                if (s >= 0xd800 && s < 0xdc00) {
 
217
                        s = (((s & 0x3ff) << 16) + 0x400000);
 
218
                        filter->cache = s;
 
219
                } else if (s >= 0xdc00 && s < 0xe000) {
 
220
                        s &= 0x3ff;
 
221
                        s |= (filter->cache & 0xfff0000) >> 6;
 
222
                        filter->cache = 0;
 
223
                        if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
 
224
                                CK((*filter->output_function)(s, filter->data));
 
225
                        } else {                /* illegal character */
 
226
                                s &= MBFL_WCSGROUP_MASK;
 
227
                                s |= MBFL_WCSGROUP_THROUGH;
 
228
                                CK((*filter->output_function)(s, filter->data));
 
229
                        }
 
230
                } else {
 
231
                        filter->cache = 0;
 
232
                        CK((*filter->output_function)(s, filter->data));
 
233
                }
 
234
                break;
 
235
 
 
236
        default:
 
237
                filter->status = 0;
 
238
                break;
 
239
        }
 
240
 
 
241
        return c;
 
242
}
 
243
 
 
244
/*
 
245
 * wchar => UTF-7
 
246
 */
 
247
int mbfl_filt_conv_wchar_utf7(int c, mbfl_convert_filter *filter)
 
248
{
 
249
        int s, n;
 
250
 
 
251
        n = 0;
 
252
        if (c >= 0 && c < 0x80) {       /* ASCII */
 
253
                if (c >= 0x41 && c <= 0x5a) {           /* A - Z */
 
254
                        n = 1;
 
255
                } else if (c >= 0x61 && c <= 0x7a) {    /* a - z */
 
256
                        n = 1;
 
257
                } else if (c >= 0x30 && c <= 0x39) {    /* 0 - 9 */
 
258
                        n = 1;
 
259
                } else if (c == '\0') {                 /* '\0' */
 
260
                        n = 1;
 
261
                } else if (c == 0x2f) {                 /* '/' */
 
262
                        n = 1;
 
263
                } else if (c == 0x2d) {                 /* '-' */
 
264
                        n = 1;
 
265
                } else if (c == 0x20) {                 /* SPACE */
 
266
                        n = 2;
 
267
                } else if (c == 0x09) {                 /* HTAB */
 
268
                        n = 2;
 
269
                } else if (c == 0x0d) {                 /* CR */
 
270
                        n = 2;
 
271
                } else if (c == 0x0a) {                 /* LF */
 
272
                        n = 2;
 
273
                } else if (c == 0x27) {                 /* "'" */
 
274
                        n = 2;
 
275
                } else if (c == 0x28) {                 /* '(' */
 
276
                        n = 2;
 
277
                } else if (c == 0x29) {                 /* ')' */
 
278
                        n = 2;
 
279
                } else if (c == 0x2c) {                 /* ',' */
 
280
                        n = 2;
 
281
                } else if (c == 0x2e) {                 /* '.' */
 
282
                        n = 2;
 
283
                } else if (c == 0x3a) {                 /* ':' */
 
284
                        n = 2;
 
285
                } else if (c == 0x3f) {                 /* '?' */
 
286
                        n = 2;
 
287
                }
 
288
        } else if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
 
289
                ;
 
290
        } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
 
291
                s = ((c >> 10) - 0x40) | 0xd800;
 
292
                CK((*filter->filter_function)(s, filter));
 
293
                s = (c & 0x3ff) | 0xdc00;
 
294
                CK((*filter->filter_function)(s, filter));
 
295
                return c;
 
296
        } else {
 
297
                if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
 
298
                        CK(mbfl_filt_conv_illegal_output(c, filter));
 
299
                }
 
300
                return c;
 
301
        }
 
302
 
 
303
        switch (filter->status) {
 
304
        case 0:
 
305
                if (n != 0) {   /* directly encode characters */
 
306
                        CK((*filter->output_function)(c, filter->data));
 
307
                } else {        /* Modified Base64 */
 
308
                        CK((*filter->output_function)(0x2b, filter->data));             /* '+' */
 
309
                        filter->status++;
 
310
                        filter->cache = c;
 
311
                }
 
312
                break;
 
313
 
 
314
        /* encode Modified Base64 */
 
315
        case 1:
 
316
                s = filter->cache;
 
317
                CK((*filter->output_function)(mbfl_base64_table[(s >> 10) & 0x3f], filter->data));
 
318
                CK((*filter->output_function)(mbfl_base64_table[(s >> 4) & 0x3f], filter->data));
 
319
                if (n != 0) {
 
320
                        CK((*filter->output_function)(mbfl_base64_table[(s << 2) & 0x3c], filter->data));
 
321
                        if (n == 1) {
 
322
                                CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 
323
                        }
 
324
                        CK((*filter->output_function)(c, filter->data));
 
325
                        filter->status = 0;
 
326
                } else {
 
327
                        filter->status++;
 
328
                        filter->cache = ((s & 0xf) << 16) | c;
 
329
                }
 
330
                break;
 
331
 
 
332
        case 2:
 
333
                s = filter->cache;
 
334
                CK((*filter->output_function)(mbfl_base64_table[(s >> 14) & 0x3f], filter->data));
 
335
                CK((*filter->output_function)(mbfl_base64_table[(s >> 8) & 0x3f], filter->data));
 
336
                CK((*filter->output_function)(mbfl_base64_table[(s >> 2) & 0x3f], filter->data));
 
337
                if (n != 0) {
 
338
                        CK((*filter->output_function)(mbfl_base64_table[(s << 4) & 0x30], filter->data));
 
339
                        if (n == 1) {
 
340
                                CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 
341
                        }
 
342
                        CK((*filter->output_function)(c, filter->data));
 
343
                        filter->status = 0;
 
344
                } else {
 
345
                        filter->status++;
 
346
                        filter->cache = ((s & 0x3) << 16) | c;
 
347
                }
 
348
                break;
 
349
 
 
350
        case 3:
 
351
                s = filter->cache;
 
352
                CK((*filter->output_function)(mbfl_base64_table[(s >> 12) & 0x3f], filter->data));
 
353
                CK((*filter->output_function)(mbfl_base64_table[(s >> 6) & 0x3f], filter->data));
 
354
                CK((*filter->output_function)(mbfl_base64_table[s & 0x3f], filter->data));
 
355
                if (n != 0) {
 
356
                        if (n == 1) {
 
357
                                CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 
358
                        }
 
359
                        CK((*filter->output_function)(c, filter->data));
 
360
                        filter->status = 0;
 
361
                } else {
 
362
                        filter->status = 1;
 
363
                        filter->cache = c;
 
364
                }
 
365
                break;
 
366
 
 
367
        default:
 
368
                filter->status = 0;
 
369
                break;
 
370
        }
 
371
 
 
372
        return c;
 
373
 
 
374
}
 
375
 
 
376
int mbfl_filt_conv_wchar_utf7_flush(mbfl_convert_filter *filter)
 
377
{
 
378
        int status, cache;
 
379
 
 
380
        status = filter->status;
 
381
        cache = filter->cache;
 
382
        filter->status = 0;
 
383
        filter->cache = 0;
 
384
        /* flush fragments */
 
385
        switch (status) {
 
386
        case 1:
 
387
                CK((*filter->output_function)(mbfl_base64_table[(cache >> 10) & 0x3f], filter->data));
 
388
                CK((*filter->output_function)(mbfl_base64_table[(cache >> 4) & 0x3f], filter->data));
 
389
                CK((*filter->output_function)(mbfl_base64_table[(cache << 2) & 0x3c], filter->data));
 
390
                CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 
391
                break;
 
392
 
 
393
        case 2:
 
394
                CK((*filter->output_function)(mbfl_base64_table[(cache >> 14) & 0x3f], filter->data));
 
395
                CK((*filter->output_function)(mbfl_base64_table[(cache >> 8) & 0x3f], filter->data));
 
396
                CK((*filter->output_function)(mbfl_base64_table[(cache >> 2) & 0x3f], filter->data));
 
397
                CK((*filter->output_function)(mbfl_base64_table[(cache << 4) & 0x30], filter->data));
 
398
                CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 
399
                break;
 
400
 
 
401
        case 3:
 
402
                CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
 
403
                CK((*filter->output_function)(mbfl_base64_table[(cache >> 6) & 0x3f], filter->data));
 
404
                CK((*filter->output_function)(mbfl_base64_table[cache & 0x3f], filter->data));
 
405
                CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 
406
                break;
 
407
        }
 
408
        return 0;
 
409
}
 
410
 
 
411
static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter)
 
412
{
 
413
        int n;
 
414
 
 
415
        switch (filter->status) {
 
416
        /* directly encoded characters */
 
417
        case 0:
 
418
                if (c == 0x2b) {        /* '+'  shift character */
 
419
                        filter->status++;
 
420
                } else if (c == 0x5c || c == 0x7e || c < 0 || c > 0x7f) {       /* illegal character */
 
421
                        filter->flag = 1;       /* bad */
 
422
                }
 
423
                break;
 
424
 
 
425
        /* Modified Base64 */
 
426
        case 1:
 
427
        case 2:
 
428
                n = 0;
 
429
                if (c >= 0x41 && c <= 0x5a) {           /* A - Z */
 
430
                        n = 1;
 
431
                } else if (c >= 0x61 && c <= 0x7a) {    /* a - z */
 
432
                        n = 1;
 
433
                } else if (c >= 0x30 && c <= 0x39) {    /* 0 - 9 */
 
434
                        n = 1;
 
435
                } else if (c == 0x2b) {                 /* '+' */
 
436
                        n = 1;
 
437
                } else if (c == 0x2f) {                 /* '/' */
 
438
                        n = 1;
 
439
                }
 
440
                if (n <= 0) {
 
441
                        if (filter->status == 1 && c != 0x2d) {
 
442
                                filter->flag = 1;       /* bad */
 
443
                        } else if (c < 0 || c > 0x7f) {
 
444
                                filter->flag = 1;       /* bad */
 
445
                        }
 
446
                        filter->status = 0;
 
447
                } else {
 
448
                        filter->status = 2;
 
449
                }
 
450
                break;
 
451
 
 
452
        default:
 
453
                filter->status = 0;
 
454
                break;
 
455
        }
 
456
 
 
457
        return c;
 
458
}
 
459
 
 
460