2
* "streamable kanji code filter and converter"
3
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
7
* This file is part of "streamable kanji code filter and converter",
8
* which is distributed under the terms of GNU Lesser General Public
9
* License (version 2) as published by the Free Software Foundation.
11
* This software is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with "streamable kanji code filter and converter";
18
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19
* Suite 330, Boston, MA 02111-1307 USA
21
* The author of this file:
25
* the source code included in this files was separated from mbfilter_sjis_open.c
26
* by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
35
#include "mbfilter_sjis_mac.h"
37
#include "unicode_table_cp932_ext.h"
38
#include "unicode_table_jis.h"
40
#include "sjis_mac2uni.h"
42
extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
43
extern const unsigned char mblen_table_sjis[];
45
static int mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter);
47
static const char *mbfl_encoding_sjis_mac_aliases[] = {"MacJapanese", "x-Mac-Japanese", NULL};
49
const mbfl_encoding mbfl_encoding_sjis_mac = {
50
mbfl_no_encoding_sjis_mac,
53
(const char *(*)[])&mbfl_encoding_sjis_mac_aliases,
55
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
58
const struct mbfl_identify_vtbl vtbl_identify_sjis_mac = {
59
mbfl_no_encoding_sjis_mac,
60
mbfl_filt_ident_common_ctor,
61
mbfl_filt_ident_common_dtor,
65
const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = {
66
mbfl_no_encoding_sjis_mac,
67
mbfl_no_encoding_wchar,
68
mbfl_filt_conv_common_ctor,
69
mbfl_filt_conv_common_dtor,
70
mbfl_filt_conv_sjis_mac_wchar,
71
mbfl_filt_conv_common_flush
74
const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac = {
75
mbfl_no_encoding_wchar,
76
mbfl_no_encoding_sjis_mac,
77
mbfl_filt_conv_common_ctor,
78
mbfl_filt_conv_common_dtor,
79
mbfl_filt_conv_wchar_sjis_mac,
80
mbfl_filt_conv_sjis_mac_flush
83
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
85
#define SJIS_ENCODE(c1,c2,s1,s2) \
106
#define SJIS_DECODE(c1,c2,s1,s2) \
132
mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter)
135
int c1, s, s1, s2, w;
137
switch (filter->status) {
139
if (c >= 0 && c < 0x80 && c != 0x5c) { /* latin */
140
CK((*filter->output_function)(c, filter->data));
141
} else if (c > 0xa0 && c < 0xe0) { /* kana */
142
CK((*filter->output_function)(0xfec0 + c, filter->data));
143
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
146
} else if (c == 0x5c) {
147
CK((*filter->output_function)(0x00a5, filter->data));
148
} else if (c == 0x80) {
149
CK((*filter->output_function)(0x005c, filter->data));
150
} else if (c == 0xa0) {
151
CK((*filter->output_function)(0x00a0, filter->data));
152
} else if (c == 0xfd) {
153
CK((*filter->output_function)(0x00a9, filter->data));
154
} else if (c == 0xfe) {
155
CK((*filter->output_function)(0x2122, filter->data));
156
} else if (c == 0xff) {
157
CK((*filter->output_function)(0x2026, filter->data));
158
CK((*filter->output_function)(0xf87f, filter->data));
160
w = c & MBFL_WCSGROUP_MASK;
161
w |= MBFL_WCSGROUP_THROUGH;
162
CK((*filter->output_function)(w, filter->data));
166
case 1: /* kanji second char */
169
if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
171
SJIS_DECODE(c1, c, s1, s2);
172
s = (s1 - 0x21)*94 + s2 - 0x21;
175
w = 0x2014; /* EM DASH */
176
} else if (s == 0x1f) {
177
w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
178
} else if (s == 0x20) {
179
w = 0x301c; /* FULLWIDTH TILDE */
180
} else if (s == 0x21) {
181
w = 0x2016; /* PARALLEL TO */
182
} else if (s == 0x3c) {
183
w = 0x2212; /* FULLWIDTH HYPHEN-MINUS */
184
} else if (s == 0x50) {
185
w = 0x00a2; /* FULLWIDTH CENT SIGN */
186
} else if (s == 0x51) {
187
w = 0x00a3; /* FULLWIDTH POUND SIGN */
188
} else if (s == 0x89) {
189
w = 0x00ac; /* FULLWIDTH NOT SIGN */
193
/* apple gaiji area 0x8540 - 0x886d */
195
for (i=0; i<7; i++) {
196
if (s >= code_tbl[i][0] && s <= code_tbl[i][1]) {
197
w = s - code_tbl[i][0] + code_tbl[i][2];
205
for (i=0; i<code_tbl_m_len; i++) {
206
if (s == code_tbl_m[i][0]) {
207
if (code_tbl_m[i][1] == 0xf860) {
209
} else if (code_tbl_m[i][1] == 0xf861) {
214
for (j=1; j<n-1; j++) {
215
CK((*filter->output_function)(code_tbl_m[i][j], filter->data));
217
w = code_tbl_m[i][n-1];
224
for (i=0; i<8; i++) {
225
if (s >= code_ofst_tbl[i][0] && s <= code_ofst_tbl[i][1]) {
226
w = code_map[i][s - code_ofst_tbl[i][0]];
228
if (s >= 0x043e && s <= 0x0441) {
230
} else if (s == 0x03b1 || s == 0x03b7) {
232
} else if (s == 0x04b8 || s == 0x04b9 || s == 0x04c4) {
234
} else if (s == 0x1ed9 || s == 0x1eda || s == 0x1ee8 || s == 0x1ef3 ||
235
(s >= 0x1ef5 && s <= 0x1efb) || s == 0x1f05 || s == 0x1f06 ||
236
s == 0x1f18 || (s >= 0x1ff2 && s <= 0x20a5)) {
240
CK((*filter->output_function)(w, filter->data));
248
if (w == 0 && s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
249
w = jisx0208_ucs_table[s];
254
w &= MBFL_WCSPLANE_MASK;
255
w |= MBFL_WCSPLANE_WINCP932;
257
CK((*filter->output_function)(w, filter->data));
258
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
259
CK((*filter->output_function)(c, filter->data));
262
w &= MBFL_WCSGROUP_MASK;
263
w |= MBFL_WCSGROUP_THROUGH;
264
CK((*filter->output_function)(w, filter->data));
280
mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter)
283
int c1, c2, s1, s2, mode;
288
// a1: U+0000 -> U+046F
289
// a2: U+2000 -> U+30FF
290
// i: U+4E00 -> U+9FFF
291
// r: U+FF00 -> U+FFFF
293
switch (filter->status) {
305
if (c1 == s_form_tbl[i+34+3+3]) {
306
s1 = s_form_sjis_tbl[i+34+3+3];
313
} else if (c == 0x20dd) {
315
if (c1 == s_form_tbl[i+34+3]) {
316
s1 = s_form_sjis_tbl[i+34+3];
323
} else if (c == 0xf87f) {
325
if (c1 == s_form_tbl[i+34]) {
326
s1 = s_form_sjis_tbl[i+34];
333
} else if (c == 0xf87e) {
335
if (c1 == s_form_tbl[i]) {
336
s1 = s_form_sjis_tbl[i];
349
for (i=0;i<s_form_tbl_len;i++) {
350
if (c1 == s_form_tbl[i]) {
351
s1 = s_form_sjis_fallback_tbl[i];
359
CK((*filter->output_function)(s1, filter->data));
361
CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data));
362
CK((*filter->output_function)(s1 & 0xff, filter->data));
365
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
366
CK(mbfl_filt_conv_illegal_output(c, filter));
370
if (s2 <= 0 || s1 == -1) {
376
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
377
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
380
} else if (c == 0xa9) {
383
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
384
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
387
} else if (c == 0x2014) {
389
} else if (c == 0x2116) {
392
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
393
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
394
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
395
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
399
for (i=0;i<s_form_tbl_len;i++) {
400
if (c == s_form_tbl[i]) {
407
if (c == 0xf860 || c == 0xf861 || c == 0xf862) {
415
c1 = c & ~MBFL_WCSPLANE_MASK;
416
if (c1 == MBFL_WCSPLANE_WINCP932) {
417
s1 = c & MBFL_WCSPLANE_MASK;
419
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
420
s1 = c & MBFL_WCSPLANE_MASK;
421
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
422
s1 = c & MBFL_WCSPLANE_MASK;
424
} else if (c == 0xa0) {
426
} else if (c == 0xa5) { /* YEN SIGN */
427
s1 = 0x216f; /* FULLWIDTH YEN SIGN */
428
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
434
for (i=0; i<wchar2sjis_mac_r_tbl_len; i++) {
435
if (c >= wchar2sjis_mac_r_tbl[i][0] && c <= wchar2sjis_mac_r_tbl[i][1]) {
436
s1 = c - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2];
442
for (i=0; i<wchar2sjis_mac_r_map_len; i++) {
443
if (c >= wchar2sjis_mac_r_map[i][0] && c <= wchar2sjis_mac_r_map[i][1]) {
444
s1 = wchar2sjis_mac_code_map[i][c-wchar2sjis_mac_r_map[i][0]];
451
for (i=0; i<wchar2sjis_mac_wchar_tbl_len ; i++) {
452
if ( c == wchar2sjis_mac_wchar_tbl[i][0]) {
453
s1 = wchar2sjis_mac_wchar_tbl[i][1] & 0xffff;
461
c2 = s1-94*(c1-0x21)+0x21;
467
if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
473
} else if (s1 <= 0) {
479
if (s1 < 0x100) { /* latin or kana */
480
CK((*filter->output_function)(s1, filter->data));
482
c1 = (s1 >> 8) & 0xff;
484
SJIS_ENCODE(c1, c2, s1, s2);
485
CK((*filter->output_function)(s1, filter->data));
486
CK((*filter->output_function)(s2, filter->data));
489
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
490
CK(mbfl_filt_conv_illegal_output(c, filter));
501
for (i=0; i<5; i++) {
502
if (c == code_tbl_m[i][2]) {
503
filter->cache = c | 0x10000;
508
} else if (c1 == 0xf861) {
509
for (i=0; i<3; i++) {
510
if (c == code_tbl_m[i+5][2]) {
511
filter->cache = c | 0x20000;
516
} else if (c1 == 0xf862) {
517
for (i=0; i<4; i++) {
518
if (c == code_tbl_m[i+5+3][2]) {
519
filter->cache = c | 0x40000;
526
if (filter->status == 0 && filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
527
CK(mbfl_filt_conv_illegal_output(c1, filter));
528
CK(mbfl_filt_conv_illegal_output(c, filter));
535
c1 = filter->cache & 0xffff;
536
mode = (filter->cache & 0xf0000) >> 16;
542
for (i=0; i<5; i++) {
543
if (c1 == code_tbl_m[i][2] && c == code_tbl_m[i][3]) {
544
s1 = code_tbl_m[i][0];
551
c2 = s1-94*(c1-0x21)+0x21;
552
SJIS_ENCODE(c1, c2, s1, s2);
553
CK((*filter->output_function)(s1, filter->data));
554
CK((*filter->output_function)(s2, filter->data));
557
if (s1 <= 0 && filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
558
CK(mbfl_filt_conv_illegal_output(0xf860, filter));
559
CK(mbfl_filt_conv_illegal_output(c1, filter));
560
CK(mbfl_filt_conv_illegal_output(c, filter));
563
} else if (mode == 0x2) {
564
for (i=0; i<3; i++) {
565
if (c1 == code_tbl_m[i+5][2] && c == code_tbl_m[i+5][3]) {
566
filter->cache = c | 0x20000;
571
} else if (mode == 0x4) {
572
for (i=0; i<4; i++) {
573
if (c1 == code_tbl_m[i+8][2] && c == code_tbl_m[i+8][3]) {
574
filter->cache = c | 0x40000;
584
c1 = filter->cache & 0xffff;
585
mode = (filter->cache & 0xf0000) >> 16;
591
for (i=0; i<3; i++) {
592
if (c1 == code_tbl_m[i+5][3] && c == code_tbl_m[i+5][4]) {
593
s1 = code_tbl_m[i+5][0];
600
c2 = s1-94*(c1-0x21)+0x21;
601
SJIS_ENCODE(c1, c2, s1, s2);
602
CK((*filter->output_function)(s1, filter->data));
603
CK((*filter->output_function)(s2, filter->data));
606
if (s1 <= 0 && filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
607
CK(mbfl_filt_conv_illegal_output(0xf861, filter));
608
for (i=0; i<3; i++) {
609
if (c1 == code_tbl_m[i+5][3]) {
610
CK(mbfl_filt_conv_illegal_output(code_tbl_m[i+5][2], filter));
614
CK(mbfl_filt_conv_illegal_output(c1, filter));
615
CK(mbfl_filt_conv_illegal_output(c, filter));
617
} else if (mode == 0x4) {
618
for (i=0; i<4; i++) {
619
if (c1 == code_tbl_m[i+8][3] && c == code_tbl_m[i+8][4]) {
620
filter->cache = c | 0x40000;
630
c1 = filter->cache & 0xffff;
631
mode = (filter->cache & 0xf0000) >> 16;
637
for (i=0; i<4; i++) {
638
if (c1 == code_tbl_m[i+8][4] && c == code_tbl_m[i+8][5]) {
639
s1 = code_tbl_m[i+8][0];
646
c2 = s1-94*(c1-0x21)+0x21;
647
SJIS_ENCODE(c1, c2, s1, s2);
648
CK((*filter->output_function)(s1, filter->data));
649
CK((*filter->output_function)(s2, filter->data));
652
if (s1 <= 0 && filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
653
CK(mbfl_filt_conv_illegal_output(0xf862, filter));
654
for (i=0; i<4; i++) {
655
if (c1 == code_tbl_m[i+8][4]) {
656
CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][2], filter));
657
CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][3], filter));
661
CK(mbfl_filt_conv_illegal_output(c1, filter));
662
CK(mbfl_filt_conv_illegal_output(c, filter));
675
mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter)
678
if (filter->status == 1 && filter->cache > 0) {
680
for (i=0;i<s_form_tbl_len;i++) {
681
if (c1 == s_form_tbl[i]) {
682
s1 = s_form_sjis_fallback_tbl[i];
687
CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data));
688
CK((*filter->output_function)(s1 & 0xff, filter->data));
694
if (filter->flush_function != NULL) {
695
return (*filter->flush_function)(filter->data);