1
/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006 Free Software
4
* This library is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU Lesser General Public
6
* License as published by the Free Software Foundation; either
7
* version 2.1 of the License, or (at your option) any later version.
9
* This library is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
* Lesser General Public License for more details.
14
* You should have received a copy of the GNU Lesser General Public
15
* License along with this library; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
#include "libguile/_scm.h"
24
#include "libguile/chars.h"
25
#include "libguile/eval.h"
26
#include "libguile/unif.h"
27
#include "libguile/keywords.h"
28
#include "libguile/alist.h"
29
#include "libguile/srcprop.h"
30
#include "libguile/hashtab.h"
31
#include "libguile/hash.h"
32
#include "libguile/ports.h"
33
#include "libguile/root.h"
34
#include "libguile/strings.h"
35
#include "libguile/strports.h"
36
#include "libguile/vectors.h"
37
#include "libguile/validate.h"
38
#include "libguile/srfi-4.h"
40
#include "libguile/read.h"
44
SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
45
SCM_SYMBOL (scm_keyword_prefix, "prefix");
47
scm_t_option scm_read_opts[] = {
48
{ SCM_OPTION_BOOLEAN, "copy", 0,
49
"Copy source code expressions." },
50
{ SCM_OPTION_BOOLEAN, "positions", 0,
51
"Record positions of source code expressions." },
52
{ SCM_OPTION_BOOLEAN, "case-insensitive", 0,
53
"Convert symbols to lower case."},
54
{ SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
55
"Style of keyword recognition: #f or 'prefix."}
58
{ SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
59
"Support Elisp vector syntax, namely `[...]'."},
60
{ SCM_OPTION_BOOLEAN, "elisp-strings", 0,
61
"Support `\\(' and `\\)' in strings."}
66
Give meaningful error messages for errors
70
FILE:LINE:COL: MESSAGE
73
This is not standard GNU format, but the test-suite likes the real
74
message to be in front.
80
scm_i_input_error (char const *function,
81
SCM port, const char *message, SCM arg)
83
SCM fn = (scm_is_string (SCM_FILENAME(port))
85
: scm_from_locale_string ("#<unknown port>"));
87
SCM string_port = scm_open_output_string ();
89
scm_simple_format (string_port,
90
scm_from_locale_string ("~A:~S:~S: ~A"),
92
scm_from_int (SCM_LINUM (port) + 1),
93
scm_from_int (SCM_COL (port) + 1),
94
scm_from_locale_string (message)));
96
string = scm_get_output_string (string_port);
97
scm_close_output_port (string_port);
98
scm_error_scm (scm_from_locale_symbol ("read-error"),
99
function? scm_from_locale_string (function) : SCM_BOOL_F,
106
SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
108
"Option interface for the read options. Instead of using\n"
109
"this procedure directly, use the procedures @code{read-enable},\n"
110
"@code{read-disable}, @code{read-set!} and @code{read-options}.")
111
#define FUNC_NAME s_scm_read_options
113
SCM ans = scm_options (setting,
117
if (SCM_COPY_SOURCE_P)
118
SCM_RECORD_POSITIONS_P = 1;
123
/* An association list mapping extra hash characters to procedures. */
124
static SCM *scm_read_hash_procedures;
126
SCM_DEFINE (scm_read, "read", 0, 1, 0,
128
"Read an s-expression from the input port @var{port}, or from\n"
129
"the current input port if @var{port} is not specified.\n"
130
"Any whitespace before the next token is discarded.")
131
#define FUNC_NAME s_scm_read
136
if (SCM_UNBNDP (port))
137
port = scm_current_input_port ();
138
SCM_VALIDATE_OPINPORT (1, port);
140
c = scm_flush_ws (port, (char *) NULL);
143
scm_ungetc (c, port);
145
tok_buf = scm_c_make_string (30, SCM_UNDEFINED);
146
return scm_lreadr (&tok_buf, port, ©);
153
scm_grow_tok_buf (SCM *tok_buf)
155
size_t oldlen = scm_i_string_length (*tok_buf);
156
const char *olddata = scm_i_string_chars (*tok_buf);
158
SCM newstr = scm_i_make_string (2 * oldlen, &newdata);
161
for (i = 0; i != oldlen; ++i)
162
newdata[i] = olddata[i];
168
/* Consume an SCSH-style block comment. Assume that we've already
169
read the initial `#!', and eat characters until we get a
170
exclamation-point/sharp-sign sequence.
174
skip_scsh_block_comment (SCM port)
180
int c = scm_getc (port);
183
scm_i_input_error ("skip_block_comment", port,
184
"unterminated `#! ... !#' comment", SCM_EOL);
188
else if (c == '#' && bang_seen)
196
scm_flush_ws (SCM port, const char *eoferr)
200
switch (c = scm_getc (port))
206
scm_i_input_error (eoferr,
214
switch (c = scm_getc (port))
220
case SCM_LINE_INCREMENTORS:
225
switch (c = scm_getc (port))
228
eoferr = "read_sharp";
231
skip_scsh_block_comment (port);
234
scm_ungetc (c, port);
238
case SCM_LINE_INCREMENTORS:
239
case SCM_SINGLE_SPACES:
250
scm_casei_streq (char *s1, char *s2)
253
if (scm_c_downcase((int)*s1) != scm_c_downcase((int)*s2))
260
return !(*s1 || *s2);
264
scm_i_casei_streq (const char *s1, const char *s2, size_t len2)
266
while (*s1 && len2 > 0)
267
if (scm_c_downcase((int)*s1) != scm_c_downcase((int)*s2))
275
return !(*s1 || len2 > 0);
278
/* recsexpr is used when recording expressions
279
* constructed by read:sharp.
282
recsexpr (SCM obj, long line, int column, SCM filename)
284
if (!scm_is_pair(obj)) {
288
/* If this sexpr is visible in the read:sharp source, we want to
289
keep that information, so only record non-constant cons cells
290
which haven't previously been read by the reader. */
291
if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
293
if (SCM_COPY_SOURCE_P)
295
copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
297
while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
299
SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
304
copy = SCM_CDR (copy);
306
SCM_SETCDR (copy, tmp);
310
recsexpr (SCM_CAR (obj), line, column, filename);
311
while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
312
recsexpr (SCM_CAR (tmp), line, column, filename);
313
copy = SCM_UNDEFINED;
315
scm_whash_insert (scm_source_whash,
317
scm_make_srcprops (line,
328
static SCM scm_get_hash_procedure(int c);
329
static SCM scm_i_lreadparen (SCM *, SCM, char *, SCM *, char);
331
static char s_list[]="list";
333
static char s_vector[]="vector";
337
scm_lreadr (SCM *tok_buf, SCM port, SCM *copy)
338
#define FUNC_NAME "scm_lreadr"
345
c = scm_flush_ws (port, s_scm_read);
352
return SCM_RECORD_POSITIONS_P
353
? scm_lreadrecparen (tok_buf, port, s_list, copy)
354
: scm_i_lreadparen (tok_buf, port, s_list, copy, ')');
356
scm_i_input_error (FUNC_NAME, port,"unexpected \")\"", SCM_EOL);
361
if (SCM_ELISP_VECTORS_P)
363
p = scm_i_lreadparen (tok_buf, port, s_vector, copy, ']');
364
return scm_is_null (p) ? scm_nullvect : scm_vector (p);
372
p = scm_sym_quasiquote;
377
p = scm_sym_uq_splicing;
380
scm_ungetc (c, port);
385
scm_lreadr (tok_buf, port, copy),
387
if (SCM_RECORD_POSITIONS_P)
388
scm_whash_insert (scm_source_whash,
390
scm_make_srcprops (SCM_LINUM (port),
394
? (*copy = scm_cons2 (SCM_CAR (p),
395
SCM_CAR (SCM_CDR (p)),
404
/* Check for user-defined hash procedure first, to allow
405
overriding of builtin hash read syntaxes. */
406
SCM sharp = scm_get_hash_procedure (c);
407
if (scm_is_true (sharp))
409
int line = SCM_LINUM (port);
410
int column = SCM_COL (port) - 2;
413
got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port);
414
if (scm_is_eq (got, SCM_UNSPECIFIED))
416
if (SCM_RECORD_POSITIONS_P)
417
return *copy = recsexpr (got, line, column,
418
SCM_FILENAME (port));
426
/* Vector, arrays, both uniform and not are handled by this
427
one function. It also disambiguates between '#f' and
430
case '0': case '1': case '2': case '3': case '4':
431
case '5': case '6': case '7': case '8': case '9':
432
case 'u': case 's': case 'f':
435
#if SCM_ENABLE_DEPRECATED
436
/* See below for 'i' and 'e'. */
443
return scm_i_read_array (port, c);
450
/* See above for lower case 'f'. */
456
#if SCM_ENABLE_DEPRECATED
458
/* When next char is '(', it really is an old-style
460
int next_c = scm_getc (port);
462
scm_ungetc (next_c, port);
464
return scm_i_read_array (port, c);
478
scm_ungetc (c, port);
483
/* should never happen, #!...!# block comments are skipped
484
over in scm_flush_ws. */
488
j = scm_read_token (c, tok_buf, port, 0);
489
p = scm_istr2bve (scm_c_substring_shared (*tok_buf, 1, j));
496
j = scm_read_token (c, tok_buf, port, 1);
497
return scm_string_to_symbol (scm_c_substring_copy (*tok_buf, 0, j));
501
j = scm_read_token (c, tok_buf, port, 0);
503
return SCM_MAKE_CHAR (c);
504
if (c >= '0' && c < '8')
506
/* Dirk:FIXME:: This type of character syntax is not R5RS
507
* compliant. Further, it should be verified that the constant
508
* does only consist of octal digits. Finally, it should be
509
* checked whether the resulting fixnum is in the range of
511
p = scm_c_locale_stringn_to_number (scm_i_string_chars (*tok_buf),
514
return SCM_MAKE_CHAR (SCM_I_INUM (p));
516
for (c = 0; c < scm_n_charnames; c++)
518
&& (scm_i_casei_streq (scm_charnames[c],
519
scm_i_string_chars (*tok_buf), j)))
520
return SCM_MAKE_CHAR (scm_charnums[c]);
521
scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
522
scm_list_1 (scm_c_substring (*tok_buf, 0, j)));
524
/* #:SYMBOL is a syntax for keywords supported in all contexts. */
526
return scm_symbol_to_keyword (scm_read (port));
531
SCM sharp = scm_get_hash_procedure (c);
533
if (scm_is_true (sharp))
535
int line = SCM_LINUM (port);
536
int column = SCM_COL (port) - 2;
539
got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port);
540
if (scm_is_eq (got, SCM_UNSPECIFIED))
542
if (SCM_RECORD_POSITIONS_P)
543
return *copy = recsexpr (got, line, column,
544
SCM_FILENAME (port));
550
scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
551
scm_list_1 (SCM_MAKE_CHAR (c)));
556
while ('"' != (c = scm_getc (port)))
559
str_eof: scm_i_input_error (FUNC_NAME, port,
560
"end of file in string constant",
563
while (j + 2 >= scm_i_string_length (*tok_buf))
564
scm_grow_tok_buf (tok_buf);
567
switch (c = scm_getc (port))
577
if (SCM_ESCAPED_PARENS_P)
608
if (a == EOF) goto str_eof;
610
if (b == EOF) goto str_eof;
611
if ('0' <= a && a <= '9') a -= '0';
612
else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
613
else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
614
else goto bad_escaped;
615
if ('0' <= b && b <= '9') b -= '0';
616
else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
617
else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
618
else goto bad_escaped;
624
scm_i_input_error(FUNC_NAME, port,
625
"illegal character in escape sequence: ~S",
626
scm_list_1 (SCM_MAKE_CHAR (c)));
628
scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c));
634
/* Change this to scm_c_substring_read_only when
635
SCM_STRING_CHARS has been removed.
637
return scm_c_substring_copy (*tok_buf, 0, j);
639
case '0': case '1': case '2': case '3': case '4':
640
case '5': case '6': case '7': case '8': case '9':
645
j = scm_read_token (c, tok_buf, port, 0);
646
if (j == 1 && (c == '+' || c == '-'))
647
/* Shortcut: Detected symbol '+ or '- */
650
p = scm_c_locale_stringn_to_number (scm_i_string_chars (*tok_buf), j, 10);
655
if ((j == 2) && (scm_getc (port) == '('))
657
scm_ungetc ('(', port);
658
c = scm_i_string_chars (*tok_buf)[1];
661
scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
666
if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
667
return scm_symbol_to_keyword (scm_read (port));
674
j = scm_read_token (c, tok_buf, port, 0);
678
return scm_string_to_symbol (scm_c_substring (*tok_buf, 0, j));
685
_Pragma ("noopt"); /* # pragma _CRI noopt */
689
scm_read_token (int ic, SCM *tok_buf, SCM port, int weird)
694
c = (SCM_CASE_INSENSITIVE_P ? scm_c_downcase(ic) : ic);
701
while (j + 2 >= scm_i_string_length (*tok_buf))
702
scm_grow_tok_buf (tok_buf);
703
scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c));
709
while (j + 2 >= scm_i_string_length (*tok_buf))
710
scm_grow_tok_buf (tok_buf);
722
case SCM_WHITE_SPACES:
723
case SCM_LINE_INCREMENTORS:
726
|| ((!SCM_ELISP_VECTORS_P) && ((c == '[') || (c == ']')))
731
scm_ungetc (c, port);
757
scm_ungetc (c, port);
765
c = (SCM_CASE_INSENSITIVE_P ? scm_c_downcase(c) : c);
766
scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c));
775
_Pragma ("opt"); /* # pragma _CRI opt */
779
scm_i_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy, char term_char)
780
#define FUNC_NAME "scm_i_lreadparen"
787
c = scm_flush_ws (port, name);
790
scm_ungetc (c, port);
791
if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
793
ans = scm_lreadr (tok_buf, port, copy);
795
if (term_char != (c = scm_flush_ws (port, name)))
796
scm_i_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL);
799
ans = tl = scm_cons (tmp, SCM_EOL);
800
while (term_char != (c = scm_flush_ws (port, name)))
802
scm_ungetc (c, port);
803
if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
805
SCM_SETCDR (tl, scm_lreadr (tok_buf, port, copy));
808
SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
817
scm_lreadrecparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
818
#define FUNC_NAME "scm_lreadrecparen"
822
register SCM tl, tl2 = SCM_EOL;
823
SCM ans, ans2 = SCM_EOL;
824
/* Need to capture line and column numbers here. */
825
int line = SCM_LINUM (port);
826
int column = SCM_COL (port) - 1;
828
c = scm_flush_ws (port, name);
831
scm_ungetc (c, port);
832
if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
834
ans = scm_lreadr (tok_buf, port, copy);
835
if (')' != (c = scm_flush_ws (port, name)))
836
scm_i_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL);
839
/* Build the head of the list structure. */
840
ans = tl = scm_cons (tmp, SCM_EOL);
841
if (SCM_COPY_SOURCE_P)
842
ans2 = tl2 = scm_cons (scm_is_pair (tmp)
846
while (')' != (c = scm_flush_ws (port, name)))
850
scm_ungetc (c, port);
851
if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
853
SCM_SETCDR (tl, tmp = scm_lreadr (tok_buf, port, copy));
854
if (SCM_COPY_SOURCE_P)
855
SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp)
859
if (')' != (c = scm_flush_ws (port, name)))
860
scm_i_input_error (FUNC_NAME, port,
861
"missing close paren", SCM_EOL);
865
new_tail = scm_cons (tmp, SCM_EOL);
866
SCM_SETCDR (tl, new_tail);
869
if (SCM_COPY_SOURCE_P)
871
SCM new_tail2 = scm_cons (scm_is_pair (tmp) ? *copy : tmp, SCM_EOL);
872
SCM_SETCDR (tl2, new_tail2);
877
scm_whash_insert (scm_source_whash,
879
scm_make_srcprops (line,
893
/* Manipulate the read-hash-procedures alist. This could be written in
894
Scheme, but maybe it will also be used by C code during initialisation. */
895
SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
897
"Install the procedure @var{proc} for reading expressions\n"
898
"starting with the character sequence @code{#} and @var{chr}.\n"
899
"@var{proc} will be called with two arguments: the character\n"
900
"@var{chr} and the port to read further data from. The object\n"
901
"returned will be the return value of @code{read}.")
902
#define FUNC_NAME s_scm_read_hash_extend
907
SCM_VALIDATE_CHAR (1, chr);
908
SCM_ASSERT (scm_is_false (proc)
909
|| scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
910
proc, SCM_ARG2, FUNC_NAME);
912
/* Check if chr is already in the alist. */
913
this = *scm_read_hash_procedures;
917
if (scm_is_null (this))
919
/* not found, so add it to the beginning. */
920
if (scm_is_true (proc))
922
*scm_read_hash_procedures =
923
scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
927
if (scm_is_eq (chr, SCM_CAAR (this)))
929
/* already in the alist. */
930
if (scm_is_false (proc))
933
if (scm_is_false (prev))
935
*scm_read_hash_procedures =
936
SCM_CDR (*scm_read_hash_procedures);
939
scm_set_cdr_x (prev, SCM_CDR (this));
944
scm_set_cdr_x (SCM_CAR (this), proc);
949
this = SCM_CDR (this);
952
return SCM_UNSPECIFIED;
956
/* Recover the read-hash procedure corresponding to char c. */
958
scm_get_hash_procedure (int c)
960
SCM rest = *scm_read_hash_procedures;
964
if (scm_is_null (rest))
967
if (SCM_CHAR (SCM_CAAR (rest)) == c)
968
return SCM_CDAR (rest);
970
rest = SCM_CDR (rest);
977
scm_read_hash_procedures =
978
SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
980
scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS);
981
#include "libguile/read.x"