1
/*===========================================================================
3
* About : S-Expression reader
5
* Copyright (C) 2000-2001 by Shiro Kawai (shiro@acm.org)
6
* Copyright (C) 2005 by Kazuki Ohta (mover@hct.zaq.ne.jp)
10
* Redistribution and use in source and binary forms, with or without
11
* modification, are permitted provided that the following conditions
14
* 1. Redistributions of source code must retain the above copyright
15
* notice, this list of conditions and the following disclaimer.
16
* 2. Redistributions in binary form must reproduce the above copyright
17
* notice, this list of conditions and the following disclaimer in the
18
* documentation and/or other materials provided with the distribution.
19
* 3. Neither the name of authors nor the names of its contributors
20
* may be used to endorse or promote products derived from this software
21
* without specific prior written permission.
23
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
24
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
27
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29
* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34
===========================================================================*/
36
* FIXME: Large fixed-size buffer on stack without limit checking
38
* Fix some functions contained in this file since:
41
* - some embedded platform cannot allocate such large stack (approx. 20KB)
42
* - inefficient from the viewpoint of memory locality (cache, page, power
45
* Search "FIXME" on this file to locate the codes. I wonder other Scheme
46
* implementations may have sophisticated code. Please consider porting them to
47
* save development cost, since this part is not the primary value of
48
* SigScheme. -- YamaKen 2005-09-05
52
* FIXME: Parse properly as defined in "7.1.1 Lexical structure" of R5RS, and
53
* use the popular words for parser as used in R5RS, such as 'token'.
56
/*=======================================
58
=======================================*/
63
/*=======================================
65
=======================================*/
66
#include "sigscheme.h"
67
#include "sigschemeinternal.h"
69
/*=======================================
70
File Local Struct Declarations
71
=======================================*/
73
/*=======================================
74
File Local Macro Declarations
75
=======================================*/
76
/* Compatible with isspace(3). Use this to prevent incorrect space handlings */
77
#define CASE_ISSPACE \
78
case ' ': case '\t': case '\n': case '\r': case '\v': case '\f'
80
/*=======================================
82
=======================================*/
84
/*=======================================
85
File Local Function Declarations
86
=======================================*/
87
static int skip_comment_and_space(ScmObj port);
88
static char* read_word(ScmObj port);
89
static char* read_char_sequence(ScmObj port);
91
static ScmObj read_sexpression(ScmObj port);
92
static ScmObj read_list(ScmObj port, int closeParen);
93
static ScmObj read_char(ScmObj port);
94
static ScmObj read_string(ScmObj port);
95
static ScmObj read_symbol(ScmObj port);
96
static ScmObj parse_number(ScmObj port);
97
static ScmObj read_number_or_symbol(ScmObj port);
98
static ScmObj read_quote(ScmObj port, ScmObj quoter);
100
/*=======================================
101
Function Implementations
102
=======================================*/
103
/*===========================================================================
105
===========================================================================*/
106
ScmObj SigScm_Read(ScmObj port)
108
ScmObj sexp = SCM_FALSE;
109
DECLARE_INTERNAL_FUNCTION("SigScm_Read");
113
sexp = read_sexpression(port);
115
if ((SigScm_DebugCategories() & SCM_DBG_READ) && !EOFP(sexp)) {
116
SigScm_WriteToPort(scm_current_error_port, sexp);
117
SigScm_ErrorNewline();
124
ScmObj SigScm_Read_Char(ScmObj port)
126
DECLARE_INTERNAL_FUNCTION("SigScm_Read_Char");
130
return read_char(port);
134
static int skip_comment_and_space(ScmObj port)
138
SCM_PORT_GETC(port, c);
141
} else if(c == ';') {
143
SCM_PORT_GETC(port, c);
144
if (c == '\n' || c == '\r') {
147
if (c == EOF) return c;
150
} else if(isspace(c)) {
158
static ScmObj read_sexpression(ScmObj port)
163
CDBG((SCM_DBG_PARSER, "read_sexpression"));
166
c = skip_comment_and_space(port);
168
CDBG((SCM_DBG_PARSER, "read_sexpression c = %c", c));
172
return read_list(port, ')');
174
return read_string(port);
175
case '0': case '1': case '2': case '3': case '4':
176
case '5': case '6': case '7': case '8': case '9':
178
SCM_PORT_UNGETC(port, c);
179
return read_number_or_symbol(port);
181
return read_quote(port, SCM_QUOTE);
183
return read_quote(port, SCM_QUASIQUOTE);
185
SCM_PORT_GETC(port, c1);
187
SigScm_Error("EOF in unquote");
188
} else if (c1 == '@') {
189
return read_quote(port, SCM_UNQUOTE_SPLICING);
191
SCM_PORT_UNGETC(port, c1);
192
return read_quote(port, SCM_UNQUOTE);
196
SCM_PORT_GETC(port, c1);
203
return ScmOp_list2vector(read_list(port, ')'));
205
return read_char(port);
206
case 'b': case 'o': case 'd': case 'x':
207
SCM_PORT_UNGETC(port, c1);
208
return parse_number(port);
210
SigScm_Error("end in #");
212
SigScm_Error("Unsupported # : %c", c1);
217
SigScm_Error("invalid close parenthesis");
222
SCM_PORT_UNGETC(port, c);
223
return read_symbol(port);
228
static ScmObj read_list(ScmObj port, int closeParen)
230
ScmObj list_head = SCM_NULL;
231
ScmObj list_tail = SCM_NULL;
232
ScmObj item = SCM_NULL;
233
ScmObj cdr = SCM_NULL;
234
int line = SCM_PORT_LINE(port);
239
CDBG((SCM_DBG_PARSER, "read_list"));
242
c = skip_comment_and_space(port);
244
CDBG((SCM_DBG_PARSER, "read_list c = [%c]", c));
247
if (SCM_PORT_PORTTYPE(port) == PORT_FILE)
248
SigScm_Error("EOF inside list. (starting from line %d)", line + 1);
250
SigScm_Error("EOF inside list.");
251
} else if (c == closeParen) {
253
} else if (c == '.') {
255
SCM_PORT_GETC(port, c2);
256
CDBG((SCM_DBG_PARSER, "read_list process_dot c2 = [%c]", c2));
257
if (isspace(c2) || c2 == '(' || c2 == '"' || c2 == ';') {
258
cdr = read_sexpression(port);
259
if (NULLP(list_tail))
260
SigScm_Error(".(dot) at the start of the list.");
262
c = skip_comment_and_space(port);
264
SigScm_Error("bad dot syntax");
266
SET_CDR(list_tail, cdr);
271
* This dirty hack here picks up the current token as a
272
* symbol beginning with the dot (that's how Guile and
275
SCM_PORT_UNGETC(port, c2);
276
token = read_word(port);
277
token = (char*)realloc(token, strlen(token) + 1 + 1);
278
memmove(token + 1, token, strlen(token)+1);
280
item = Scm_Intern(token);
283
SCM_PORT_UNGETC(port, c);
284
item = read_sexpression(port);
287
/* Append item to the list_tail. */
288
if (NULLP(list_tail)) {
289
/* create new list */
290
list_head = CONS(item, SCM_NULL);
291
list_tail = list_head;
293
/* update list_tail */
294
SET_CDR(list_tail, CONS(item, SCM_NULL));
295
list_tail = CDR(list_tail);
300
static ScmObj read_char(ScmObj port)
302
char *ch = read_char_sequence(port);
304
CDBG((SCM_DBG_PARSER, "read_char : ch = %s", ch));
306
/* FIXME: Simplify with Scm_special_char_table */
307
/* check special sequence "space" and "newline" */
308
if (strcmp(ch, "space") == 0) {
312
/* to avoid portability problem, we should not support #\Space and so on */
313
} else if (strcmp(ch, "Space") == 0) {
317
} else if (strcmp(ch, "newline") == 0) {
318
/* TODO: Support platform-dependent newline character sequence */
323
/* FIXME: memory leak */
324
return Scm_NewChar(ch);
327
static ScmObj read_string(ScmObj port)
329
char stringbuf[1024]; /* FIXME! */
333
CDBG((SCM_DBG_PARSER, "read_string"));
336
SCM_PORT_GETC(port, c);
338
CDBG((SCM_DBG_PARSER, "read_string c = %c", c));
342
stringbuf[stringlen] = '\0';
343
SigScm_Error("EOF in the string : str = %s", stringbuf);
347
stringbuf[stringlen] = '\0';
348
return Scm_NewStringCopying(stringbuf);
351
/* FIXME: Simplify with Scm_special_char_table */
353
* (R5RS) 6.3.5 String
354
* A double quote can be written inside a string only by
355
* escaping it with a backslash (\).
357
SCM_PORT_GETC(port, c);
359
case '\"': stringbuf[stringlen] = c; break;
360
case 'n': stringbuf[stringlen] = '\n'; break;
361
case 'r': stringbuf[stringlen] = '\r'; break;
362
case 'f': stringbuf[stringlen] = '\f'; break;
363
case 't': stringbuf[stringlen] = '\t'; break;
364
case '\\': stringbuf[stringlen] = '\\'; break;
366
stringbuf[stringlen] = '\\';
367
stringbuf[++stringlen] = c;
374
stringbuf[stringlen] = c;
381
static ScmObj read_symbol(ScmObj port)
383
char *sym_name = read_word(port);
384
ScmObj sym = Scm_Intern(sym_name);
387
CDBG((SCM_DBG_PARSER, "read_symbol"));
393
* FIXME: Parse properly as defined in "7.1.1 Lexical structure" of R5RS. For
394
* example, 1+ is not a valid identifier and should be rejected to prevent
395
* introducing unintended R5RS-incompatibility.
397
static ScmObj read_number_or_symbol(ScmObj port)
402
char *first_nondigit = NULL;
403
ScmObj ret = SCM_NULL;
405
CDBG((SCM_DBG_PARSER, "read_number_or_symbol"));
407
/* read char sequence */
408
str = read_word(port);
409
str_len = strlen(str);
411
/* see if it's a decimal integer */
412
number = (int)strtol(str, &first_nondigit, 10);
415
ret = (*first_nondigit) ? Scm_Intern(str) : Scm_NewInt(number);
424
static char *read_word(ScmObj port)
426
char stringbuf[1024]; /* FIXME! */
432
SCM_PORT_GETC(port, c);
434
CDBG((SCM_DBG_PARSER, "c = %c", c));
437
case EOF: /* don't became an error for handling c-eval, like Scm_eval_c_string("some-symbol"); */
438
case '(': case ')': case '\"': case '\'': case ';':
440
SCM_PORT_UNGETC(port, c);
441
stringbuf[stringlen] = '\0';
442
dst = strdup(stringbuf);
446
stringbuf[stringlen++] = (char)c;
452
static char *read_char_sequence(ScmObj port)
454
char stringbuf[1024]; /* FIXME! */
460
SCM_PORT_GETC(port, c);
462
CDBG((SCM_DBG_PARSER, "c = %c", c));
466
stringbuf[stringlen] = '\0';
467
SigScm_Error("EOF in the char sequence : char = %s", stringbuf);
470
case '(': case ')': case '\"': case '\'': case ';':
472
/* pass through first char */
473
if (stringlen == 0) {
474
stringbuf[stringlen++] = (char)c;
478
SCM_PORT_UNGETC(port, c);
479
stringbuf[stringlen] = '\0';
480
dst = strdup(stringbuf);
484
stringbuf[stringlen++] = (char)c;
490
static ScmObj read_quote(ScmObj port, ScmObj quoter)
492
return SCM_LIST_2(quoter, read_sexpression(port));
495
/* str should be what appeared right after '#' (eg. #b123) */
496
static ScmObj parse_number(ScmObj port)
500
char *first_nondigit = NULL;
501
char *numstr = read_word(port);
504
case 'b': radix = 2; break;
505
case 'o': radix = 8; break;
506
case 'd': radix = 10; break;
507
case 'x': radix = 16; break;
509
SigScm_Error("ill-formatted number: #%s", numstr);
513
number = (int)strtol(numstr+1, &first_nondigit, radix);
515
SigScm_Error("ill-formatted number: #%s", numstr);
520
return Scm_NewInt(number);