1
/*===========================================================================
2
* Filename : scmport-mbchar.c
3
* About : A ScmCharPort implementation for multibyte character stream
5
* Copyright (C) 2005-2006 YAMAMOTO Kengo <yamaken AT bp.iij4u.or.jp>
6
* Copyright (c) 2007 SigScheme Project <uim AT freedesktop.org>
10
* Redistribution and use in source and binary forms, with or without
11
* modification, are permitted provided that the following conditions
14
* 1. Redistributions of source code must retain the above copyright
15
* notice, this list of conditions and the following disclaimer.
16
* 2. Redistributions in binary form must reproduce the above copyright
17
* notice, this list of conditions and the following disclaimer in the
18
* documentation and/or other materials provided with the distribution.
19
* 3. Neither the name of authors nor the names of its contributors
20
* may be used to endorse or promote products derived from this software
21
* without specific prior written permission.
23
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
24
* IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
25
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
27
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
30
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
33
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
===========================================================================*/
37
* - This file is intended to be portable. Don't depend on SigScheme.
38
* - To isolate and hide implementation-dependent things, don't merge this file
50
#include "scmport-config.h"
52
#include "scmport-mbchar.h"
54
/*=======================================
55
File Local Macro Definitions
56
=======================================*/
57
#define HANDLE_MBC_START 0
59
#if SCM_USE_STATEFUL_ENCODING
60
#define SCM_MBCPORT_CLEAR_STATE(port) (port->state = NULL)
62
#define SCM_MBCPORT_CLEAR_STATE(port) SCM_EMPTY_EXPR
65
/*=======================================
66
File Local Type Definitions
67
=======================================*/
68
struct ScmMultiByteCharPort_ { /* inherits ScmBaseCharPort */
69
const ScmCharPortVTbl *vptr;
71
ScmBytePort *bport; /* protected */
72
size_t linenum; /* protected */
75
ScmMultibyteState state;
76
scm_byte_t rbuf[SCM_MB_CHAR_BUF_SIZE];
79
/*=======================================
80
File Local Function Declarations
81
=======================================*/
82
static ScmCharPort *mbcport_dyn_cast(ScmCharPort *cport,
83
const ScmCharPortVTbl *dst_vptr);
84
static ScmCharCodec *mbcport_codec(ScmMultiByteCharPort *port);
85
static char *mbcport_inspect(ScmMultiByteCharPort *port);
86
static scm_ichar_t mbcport_get_char(ScmMultiByteCharPort *port);
87
static scm_ichar_t mbcport_peek_char(ScmMultiByteCharPort *port);
88
static scm_bool mbcport_char_readyp(ScmMultiByteCharPort *port);
89
static void mbcport_put_char(ScmMultiByteCharPort *port, scm_ichar_t ch);
91
static ScmMultibyteCharInfo mbcport_fill_rbuf(ScmMultiByteCharPort *port,
94
/*=======================================
96
=======================================*/
97
SCM_GLOBAL_VARS_BEGIN(static_scmport_mbchar);
99
static ScmCharPortVTbl l_ScmMultiByteCharPort_vtbl;
101
SCM_GLOBAL_VARS_END(static_scmport_mbchar);
102
#define l_ScmMultiByteCharPort_vtbl \
103
SCM_GLOBAL_VAR(static_scmport_mbchar, l_ScmMultiByteCharPort_vtbl)
104
SCM_DEFINE_STATIC_VARS(static_scmport_mbchar);
106
SCM_EXPORT const ScmCharPortVTbl *ScmMultiByteCharPort_vptr;
108
/*=======================================
110
=======================================*/
112
scm_mbcport_init(void)
114
ScmCharPortVTbl *vptr;
116
SCM_GLOBAL_VARS_INIT(static_scmport_mbchar);
118
l_ScmMultiByteCharPort_vtbl = *ScmBaseCharPort_vptr;
120
vptr = &l_ScmMultiByteCharPort_vtbl;
121
vptr->dyn_cast = (ScmCharPortMethod_dyn_cast)&mbcport_dyn_cast;
122
vptr->codec = (ScmCharPortMethod_codec)&mbcport_codec;
123
vptr->inspect = (ScmCharPortMethod_inspect)&mbcport_inspect;
124
vptr->get_char = (ScmCharPortMethod_get_char)&mbcport_get_char;
125
vptr->peek_char = (ScmCharPortMethod_peek_char)&mbcport_peek_char;
126
vptr->char_readyp = (ScmCharPortMethod_char_readyp)&mbcport_char_readyp;
127
vptr->put_char = (ScmCharPortMethod_put_char)&mbcport_put_char;
128
ScmMultiByteCharPort_vptr = vptr;
132
ScmMultiByteCharPort_construct(ScmMultiByteCharPort *port,
133
const ScmCharPortVTbl *vptr,
134
ScmBytePort *bport, ScmCharCodec *codec)
136
ScmBaseCharPort_construct((ScmBaseCharPort *)port, vptr, bport);
139
port->rbuf[0] = '\0';
140
SCM_MBCPORT_CLEAR_STATE(port);
143
SCM_EXPORT ScmCharPort *
144
ScmMultiByteCharPort_new(ScmBytePort *bport, ScmCharCodec *codec)
146
ScmMultiByteCharPort *cport;
148
cport = SCM_PORT_MALLOC(sizeof(ScmMultiByteCharPort));
149
ScmMultiByteCharPort_construct(cport, ScmMultiByteCharPort_vptr,
152
return (ScmCharPort *)cport;
156
ScmMultiByteCharPort_set_codec(ScmCharPort *cport, ScmCharCodec *codec)
158
ScmMultiByteCharPort *mbcport;
160
mbcport = SCM_BYTEPORT_DYNAMIC_CAST(ScmMultiByteCharPort, cport);
161
mbcport->codec = codec;
162
SCM_MBCPORT_CLEAR_STATE(mbcport);
163
/* only one byte can be preserved for new codec. otherwise cleared */
164
if (1 < strlen((char *)mbcport->rbuf))
165
mbcport->rbuf[0] = '\0';
169
mbcport_dyn_cast(ScmCharPort *cport, const ScmCharPortVTbl *dst_vptr)
171
return (dst_vptr == ScmBaseCharPort_vptr
172
|| dst_vptr == ScmMultiByteCharPort_vptr) ? cport : NULL;
175
static ScmCharCodec *
176
mbcport_codec(ScmMultiByteCharPort *port)
182
mbcport_inspect(ScmMultiByteCharPort *port)
184
return ScmBaseCharPort_inspect((ScmBaseCharPort *)port, "mb");
188
mbcport_get_char(ScmMultiByteCharPort *port)
191
#if SCM_USE_STATEFUL_ENCODING
192
ScmMultibyteCharInfo mbc;
193
ScmMultibyteState next_state;
195
mbc = mbcport_fill_rbuf(port, scm_true);
196
next_state = SCM_MBCINFO_GET_STATE(mbc);
199
ch = mbcport_peek_char(port);
200
port->rbuf[0] = '\0';
201
#if SCM_USE_STATEFUL_ENCODING
202
SCM_MBCPORT_SET_STATE(port, next_state)
205
if (ch == SCM_NEWLINE_STR[0])
213
mbcport_peek_char(ScmMultiByteCharPort *port)
215
ScmMultibyteCharInfo mbc;
219
mbc = mbcport_fill_rbuf(port, scm_true);
220
size = SCM_MBCINFO_GET_SIZE(mbc);
222
ch = SCM_CHARCODEC_STR2INT(port->codec, (char *)port->rbuf, size,
231
mbcport_char_readyp(ScmMultiByteCharPort *port)
233
ScmMultibyteCharInfo mbc;
235
mbc = mbcport_fill_rbuf(port, scm_false);
236
return !SCM_MBCINFO_INCOMPLETEP(mbc);
240
mbcport_put_char(ScmMultiByteCharPort *port, scm_ichar_t ch)
244
char wbuf[SCM_MB_CHAR_BUF_SIZE];
246
/* FIXME: set updated state to port->state */
247
end = SCM_CHARCODEC_INT2STR(port->codec, wbuf, ch, port->state);
249
SCM_CHARPORT_ERROR(port, "ScmMultibyteCharPort: invalid character");
251
SCM_BYTEPORT_WRITE(port->bport, size, wbuf);
254
static ScmMultibyteCharInfo
255
mbcport_fill_rbuf(ScmMultiByteCharPort *port, scm_bool blockp)
259
ScmMultibyteString mbs;
260
ScmMultibyteCharInfo mbc;
262
end = (scm_byte_t *)strchr((char *)port->rbuf, '\0');
263
SCM_MBS_SET_STATE(mbs, port->state);
265
SCM_MBS_SET_STR(mbs, (char *)port->rbuf);
266
SCM_MBS_SET_SIZE(mbs, end - port->rbuf);
268
mbc = SCM_CHARCODEC_SCAN_CHAR(port->codec, mbs);
270
if (SCM_MBCINFO_ERRORP(mbc))
271
SCM_CHARPORT_ERROR(port, "ScmMultibyteCharPort: broken character");
272
if (!SCM_MBCINFO_INCOMPLETEP(mbc) && SCM_MBCINFO_GET_SIZE(mbc))
274
if (SCM_MBS_GET_SIZE(mbs) == SCM_MB_MAX_LEN)
275
SCM_CHARPORT_ERROR(port, "ScmMultibyteCharPort: broken scanner");
277
byte = SCM_BYTEPORT_GET_BYTE(port->bport);
278
SCM_MBCINFO_SET_STATE(mbc, SCM_MBS_GET_STATE(mbs));
279
if (byte == SCM_ICHAR_EOF) {
280
SCM_MBCINFO_INIT(mbc);
281
port->rbuf[0] = '\0';
283
mbc->start = (char *)port->rbuf;
289
} while (blockp || SCM_BYTEPORT_BYTE_READYP(port->bport));