1
/* ------------------------------------------------------------------------- */
2
/* "lexer" : Lexical analyser */
4
/* Part of Inform 6.30 */
5
/* copyright (c) Graham Nelson 1993 - 2004 */
7
/* ------------------------------------------------------------------------- */
11
int total_source_line_count, /* Number of source lines so far */
13
no_hash_printed_yet, /* Have not yet printed the first # */
14
hash_printed_since_newline, /* A hash has been printed since the
15
most recent new-line was printed
16
(generally as a result of an error
17
message or the start of pass) */
18
dont_enter_into_symbol_table, /* Return names as text (with
19
token type DQ_TT, i.e., as if
20
they had double-quotes around)
21
and not as entries in the symbol
23
return_sp_as_variable; /* When TRUE, the word "sp" denotes
24
the stack pointer variable
25
(used in assembly language only) */
26
int next_token_begins_syntax_line; /* When TRUE, start a new syntax
27
line (for error reporting, etc.)
28
on the source code line where
29
the next token appears */
31
int32 last_mapped_line; /* Last syntax line reported to debugging file */
33
/* ------------------------------------------------------------------------- */
34
/* The lexer's output is a sequence of triples, each called a "token", */
35
/* representing one lexical unit (or "lexeme") each. Instead of providing */
36
/* "lookahead" (that is, always having available the next token after the */
37
/* current one, so that syntax analysers higher up in Inform can have */
38
/* advance knowledge of what is coming), the lexer instead has a system */
39
/* where tokens can be read in and then "put back again". */
40
/* The meaning of the number (and to some extent the text) supplied with */
41
/* a token depends on its type: see "header.h" for the list of types. */
42
/* For example, the lexeme "$1e3" is understood by Inform as a hexadecimal */
43
/* number, and translated to the token: */
44
/* type NUMBER_TT, value 483, text "$1e3" */
45
/* ------------------------------------------------------------------------- */
46
/* These four variables are set to the current token on a call to */
47
/* get_next_token() (but are not changed by a call to put_token_back()). */
48
/* ------------------------------------------------------------------------- */
50
int token_type; int32 token_value; char *token_text; dbgl token_line_ref;
52
/* ------------------------------------------------------------------------- */
53
/* In order to be able to put tokens back efficiently, the lexer stores */
54
/* tokens in a "circle": the variable circle_position ranges between */
55
/* 0 and CIRCLE_SIZE-1. We only need a circle size as large as the */
56
/* maximum number of tokens ever put back at once, plus 1 (in effect, the */
57
/* maximum token lookahead ever needed in syntax analysis, plus 1). */
59
/* Unlike some compilers, Inform does not have a context-free lexer: in */
60
/* fact it has 12288 different possible states. However, the context only */
61
/* affects the interpretation of "identifiers": lexemes beginning with a */
62
/* letter and containing up to 32 chars of alphanumeric and underscore */
63
/* chars. (For example, "default" may refer to the directive or statement */
64
/* of that name, and which token values are returned depends on the */
65
/* current lexical context.) */
67
/* Along with each token, we also store the lexical context it was */
68
/* translated under; because if it is called for again, there may need */
69
/* to be a fresh interpretation of it if the context has changed. */
70
/* ------------------------------------------------------------------------- */
74
/* (The worst case for token lookahead is distinguishing between an
75
old-style "objectloop (a in b)" and a new "objectloop (a in b ...)".) */
77
static int circle_position;
78
static token_data circle[CIRCLE_SIZE];
80
static int token_contexts[CIRCLE_SIZE];
82
/* ------------------------------------------------------------------------- */
83
/* A complication, however, is that the text of some lexemes needs to be */
84
/* held in Inform's memory for much longer periods: for example, a */
85
/* dictionary word lexeme (like "'south'") must have its text preserved */
86
/* until the code generation time for the expression it occurs in, when */
87
/* the dictionary reference is actually made. Code generation in general */
88
/* occurs as early as possible in Inform: pending some better method of */
89
/* garbage collection, we simply use a buffer so large that unless */
90
/* expressions spread across 10K of source code are found, there can be */
92
/* ------------------------------------------------------------------------- */
94
static char *lexeme_memory;
95
static char *lex_p; /* Current write position */
97
/* ------------------------------------------------------------------------- */
98
/* The lexer itself needs up to 3 characters of lookahead (it uses an */
99
/* LR(3) grammar to translate characters into tokens). */
100
/* ------------------------------------------------------------------------- */
102
static int current, lookahead, /* The latest character read, and */
103
lookahead2, lookahead3; /* the three characters following it */
105
static int pipeline_made; /* Whether or not the pipeline of
106
characters has been constructed
109
static int (* get_next_char)(void); /* Routine for reading the stream of
110
characters: the lexer does not
111
need any "ungetc" routine for
112
putting them back again. End of
113
stream is signalled by returning
116
static char *source_to_analyse; /* The current lexical source:
117
NULL for "load from source files",
118
otherwise this points to a string
119
containing Inform code */
121
static int tokens_put_back; /* Count of the number of backward
122
moves made from the last-read
125
extern void describe_token(token_data t)
127
/* Many of the token types are not set in this file, but later on in
128
Inform's higher stages (for example, in the expression evaluator);
129
but this routine describes them all. */
135
/* The following token types occur in lexer output: */
137
case SYMBOL_TT: printf("symbol ");
138
describe_symbol(t.value);
140
case NUMBER_TT: printf("literal number %d", t.value);
142
case DQ_TT: printf("string \"%s\"", t.text);
144
case SQ_TT: printf("string '%s'", t.text);
146
case SEP_TT: printf("separator '%s'", t.text);
148
case EOF_TT: printf("end of file");
151
case STATEMENT_TT: printf("statement name '%s'", t.text);
153
case SEGMENT_MARKER_TT: printf("object segment marker '%s'", t.text);
155
case DIRECTIVE_TT: printf("directive name '%s'", t.text);
157
case CND_TT: printf("textual conditional '%s'", t.text);
159
case OPCODE_NAME_TT: printf("opcode name '%s'", t.text);
161
case SYSFUN_TT: printf("built-in function name '%s'", t.text);
163
case LOCAL_VARIABLE_TT: printf("local variable name '%s'", t.text);
165
case MISC_KEYWORD_TT: printf("statement keyword '%s'", t.text);
167
case DIR_KEYWORD_TT: printf("directive keyword '%s'", t.text);
169
case TRACE_KEYWORD_TT: printf("'trace' keyword '%s'", t.text);
171
case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", t.text);
174
/* The remaining are etoken types, not set by the lexer */
176
case OP_TT: printf("operator '%s'",
177
operators[t.value].description);
179
case ENDEXP_TT: printf("end of expression");
181
case SUBOPEN_TT: printf("open bracket");
183
case SUBCLOSE_TT: printf("close bracket");
185
case LARGE_NUMBER_TT: printf("large number: '%s'=%d",t.text,t.value);
187
case SMALL_NUMBER_TT: printf("small number: '%s'=%d",t.text,t.value);
189
case VARIABLE_TT: printf("variable '%s'=%d", t.text, t.value);
191
case DICTWORD_TT: printf("dictionary word '%s'", t.text);
193
case ACTION_TT: printf("action name '%s'", t.text);
197
printf("** unknown token type %d, text='%s', value=%d **",
198
t.type, t.text, t.value);
203
/* ------------------------------------------------------------------------- */
204
/* All but one of the 280 Inform keywords (118 of them opcode names used */
205
/* only by the assembler). (The one left over is "sp", a keyword used in */
206
/* assembly language only.) */
208
/* A "keyword group" is a set of keywords to be searched for. If a match */
209
/* is made on an identifier, the token type becomes that given in the KG */
210
/* and the token value is its index in the KG. */
212
/* The keyword ordering must correspond with the appropriate #define's in */
213
/* "header.h" but is otherwise not significant. */
214
/* ------------------------------------------------------------------------- */
216
#define MAX_KEYWORDS 350
218
/* The values will be filled in at compile time, when we know
219
which opcode set to use. */
220
keyword_group opcode_names =
222
OPCODE_NAME_TT, FALSE, TRUE
225
static char *opcode_list_z[] = {
226
"je", "jl", "jg", "dec_chk", "inc_chk", "jin", "test", "or", "and",
227
"test_attr", "set_attr", "clear_attr", "store", "insert_obj", "loadw",
228
"loadb", "get_prop", "get_prop_addr", "get_next_prop", "add", "sub",
229
"mul", "div", "mod", "call", "storew", "storeb", "put_prop", "sread",
230
"print_char", "print_num", "random", "push", "pull", "split_window",
231
"set_window", "output_stream", "input_stream", "sound_effect", "jz",
232
"get_sibling", "get_child", "get_parent", "get_prop_len", "inc", "dec",
233
"print_addr", "remove_obj", "print_obj", "ret", "jump", "print_paddr",
234
"load", "not", "rtrue", "rfalse", "print", "print_ret", "nop", "save",
235
"restore", "restart", "ret_popped", "pop", "quit", "new_line",
236
"show_status", "verify", "call_2s", "call_vs", "aread", "call_vs2",
237
"erase_window", "erase_line", "set_cursor", "get_cursor",
238
"set_text_style", "buffer_mode", "read_char", "scan_table", "call_1s",
239
"call_2n", "set_colour", "throw", "call_vn", "call_vn2", "tokenise",
240
"encode_text", "copy_table", "print_table", "check_arg_count", "call_1n",
241
"catch", "piracy", "log_shift", "art_shift", "set_font", "save_undo",
242
"restore_undo", "draw_picture", "picture_data", "erase_picture",
243
"set_margins", "move_window", "window_size", "window_style",
244
"get_wind_prop", "scroll_window", "pop_stack", "read_mouse",
245
"mouse_window", "push_stack", "put_wind_prop", "print_form",
246
"make_menu", "picture_table", "print_unicode", "check_unicode",
250
static char *opcode_list_g[] = {
251
"nop", "add", "sub", "mul", "div", "mod", "neg", "bitand", "bitor",
252
"bitxor", "bitnot", "shiftl", "sshiftr", "ushiftr", "jump", "jz",
253
"jnz", "jeq", "jne", "jlt", "jge", "jgt", "jle",
254
"jltu", "jgeu", "jgtu", "jleu",
256
"catch", "throw", "tailcall",
257
"copy", "copys", "copyb", "sexs", "sexb", "aload",
258
"aloads", "aloadb", "aloadbit", "astore", "astores", "astoreb",
259
"astorebit", "stkcount", "stkpeek", "stkswap", "stkroll", "stkcopy",
260
"streamchar", "streamnum", "streamstr",
261
"gestalt", "debugtrap", "getmemsize", "setmemsize", "jumpabs",
262
"random", "setrandom", "quit", "verify",
263
"restart", "save", "restore", "saveundo", "restoreundo", "protect",
264
"glk", "getstringtbl", "setstringtbl", "getiosys", "setiosys",
265
"linearsearch", "binarysearch", "linkedsearch",
266
"callf", "callfi", "callfii", "callfiii",
270
keyword_group directives =
271
{ { "abbreviate", "array", "attribute", "class", "constant",
272
"default", "dictionary", "end", "endif", "extend", "fake_action",
273
"global", "ifdef", "ifndef", "ifnot", "ifv3", "ifv5", "iftrue",
274
"iffalse", "import", "include", "link", "lowstring", "message",
275
"nearby", "object", "property", "release", "replace",
276
"serial", "switches", "statusline", "stub", "system_file", "trace",
277
"verb", "version", "zcharacter",
279
DIRECTIVE_TT, FALSE, FALSE
282
keyword_group trace_keywords =
283
{ { "dictionary", "symbols", "objects", "verbs",
284
"assembly", "expressions", "lines", "tokens", "linker",
286
TRACE_KEYWORD_TT, FALSE, TRUE
289
keyword_group segment_markers =
290
{ { "class", "has", "private", "with", "" },
291
SEGMENT_MARKER_TT, FALSE, TRUE
294
keyword_group directive_keywords =
295
{ { "alias", "long", "additive",
297
"noun", "held", "multi", "multiheld", "multiexcept",
298
"multiinside", "creature", "special", "number", "scope", "topic",
299
"reverse", "meta", "only", "replace", "first", "last",
300
"string", "table", "buffer", "data", "initial", "initstr",
301
"with", "private", "has", "class",
302
"error", "fatalerror", "warning",
305
DIR_KEYWORD_TT, FALSE, TRUE
308
keyword_group misc_keywords =
309
{ { "char", "name", "the", "a", "an", "The", "number",
310
"roman", "reverse", "bold", "underline", "fixed", "on", "off",
311
"to", "address", "string", "object", "near", "from", "property", "A", "" },
312
MISC_KEYWORD_TT, FALSE, TRUE
315
keyword_group statements =
316
{ { "box", "break", "continue", "default", "do", "else", "font", "for",
317
"give", "if", "inversion", "jump", "move", "new_line", "objectloop",
318
"print", "print_ret", "quit", "read", "remove", "restore", "return",
319
"rfalse", "rtrue", "save", "spaces", "string", "style", "switch",
320
"until", "while", "" },
321
STATEMENT_TT, FALSE, TRUE
324
keyword_group conditions =
325
{ { "has", "hasnt", "in", "notin", "ofclass", "or", "provides", "" },
329
keyword_group system_functions =
330
{ { "child", "children", "elder", "eldest", "indirect", "parent", "random",
331
"sibling", "younger", "youngest", "metaclass", "glk", "" },
332
SYSFUN_TT, FALSE, TRUE
335
keyword_group system_constants =
336
{ { "adjectives_table", "actions_table", "classes_table",
337
"identifiers_table", "preactions_table", "version_number",
338
"largest_object", "strings_offset", "code_offset",
339
"dict_par1", "dict_par2", "dict_par3", "actual_largest_object",
340
"static_memory_offset", "array_names_offset", "readable_memory_offset",
341
"cpv__start", "cpv__end", "ipv__start", "ipv__end",
342
"array__start", "array__end",
343
"lowest_attribute_number", "highest_attribute_number",
344
"attribute_names_array",
345
"lowest_property_number", "highest_property_number",
346
"property_names_array",
347
"lowest_action_number", "highest_action_number",
348
"action_names_array",
349
"lowest_fake_action_number", "highest_fake_action_number",
350
"fake_action_names_array",
351
"lowest_routine_number", "highest_routine_number", "routines_array",
352
"routine_names_array", "routine_flags_array",
353
"lowest_global_number", "highest_global_number", "globals_array",
354
"global_names_array", "global_flags_array",
355
"lowest_array_number", "highest_array_number", "arrays_array",
356
"array_names_array", "array_flags_array",
357
"lowest_constant_number", "highest_constant_number", "constants_array",
358
"constant_names_array",
359
"lowest_class_number", "highest_class_number", "class_objects_array",
360
"lowest_object_number", "highest_object_number",
362
"grammar_table", "dictionary_table", "dynam_string_table",
364
SYSTEM_CONSTANT_TT, FALSE, TRUE
367
keyword_group *keyword_groups[11]
368
= { NULL, &opcode_names, &directives, &trace_keywords, &segment_markers,
369
&directive_keywords, &misc_keywords, &statements, &conditions,
370
&system_functions, &system_constants};
372
keyword_group local_variables =
373
{ { "" }, /* Filled in when routine declared */
374
LOCAL_VARIABLE_TT, FALSE, FALSE
377
static int lexical_context(void)
379
/* The lexical context is a number representing all of the context
380
information in the lexical analyser: the same input text will
381
always translate to the same output tokens whenever the context
384
In fact, for efficiency reasons this number omits the bit of
385
information held in the variable "dont_enter_into_symbol_table".
386
Inform never needs to backtrack through tokens parsed in that
387
way (thankfully, as it would be expensive indeed to check
391
if (opcode_names.enabled) c |= 1;
392
if (directives.enabled) c |= 2;
393
if (trace_keywords.enabled) c |= 4;
394
if (segment_markers.enabled) c |= 8;
395
if (directive_keywords.enabled) c |= 16;
396
if (misc_keywords.enabled) c |= 32;
397
if (statements.enabled) c |= 64;
398
if (conditions.enabled) c |= 128;
399
if (system_functions.enabled) c |= 256;
400
if (system_constants.enabled) c |= 512;
401
if (local_variables.enabled) c |= 1024;
403
if (return_sp_as_variable) c |= 2048;
407
static void print_context(int c)
409
if ((c & 1) != 0) printf("OPC ");
410
if ((c & 2) != 0) printf("DIR ");
411
if ((c & 4) != 0) printf("TK ");
412
if ((c & 8) != 0) printf("SEG ");
413
if ((c & 16) != 0) printf("DK ");
414
if ((c & 32) != 0) printf("MK ");
415
if ((c & 64) != 0) printf("STA ");
416
if ((c & 128) != 0) printf("CND ");
417
if ((c & 256) != 0) printf("SFUN ");
418
if ((c & 512) != 0) printf("SCON ");
419
if ((c & 1024) != 0) printf("LV ");
420
if ((c & 2048) != 0) printf("sp ");
423
static int *keywords_hash_table;
424
static int *keywords_hash_ends_table;
425
static int *keywords_data_table;
427
static int *local_variable_hash_table;
428
static int *local_variable_hash_codes;
429
char **local_variable_texts;
430
static char *local_variable_text_table;
432
static char one_letter_locals[128];
434
static void make_keywords_tables(void)
439
oplist = opcode_list_z;
441
oplist = opcode_list_g;
443
for (j=0; *(oplist[j]); j++) {
444
opcode_names.keywords[j] = oplist[j];
446
opcode_names.keywords[j] = "";
448
for (i=0; i<HASH_TAB_SIZE; i++)
449
{ keywords_hash_table[i] = -1;
450
keywords_hash_ends_table[i] = -1;
453
for (i=1; i<=10; i++)
454
{ keyword_group *kg = keyword_groups[i];
455
for (j=0; *(kg->keywords[j]) != 0; j++)
456
{ h = hash_code_from_string(kg->keywords[j]);
457
if (keywords_hash_table[h] == -1)
458
keywords_hash_table[h] = tp;
460
*(keywords_data_table + 3*(keywords_hash_ends_table[h]) + 2) = tp;
461
keywords_hash_ends_table[h] = tp;
462
*(keywords_data_table + 3*tp) = i;
463
*(keywords_data_table + 3*tp+1) = j;
464
*(keywords_data_table + 3*tp+2) = -1;
470
extern void construct_local_variable_tables(void)
471
{ int i, h; char *p = local_variable_text_table;
472
for (i=0; i<HASH_TAB_SIZE; i++) local_variable_hash_table[i] = -1;
473
for (i=0; i<128; i++) one_letter_locals[i] = MAX_LOCAL_VARIABLES;
475
for (i=0; i<no_locals; i++)
476
{ char *q = local_variables.keywords[i];
478
{ one_letter_locals[q[0]] = i;
479
if (isupper(q[0])) one_letter_locals[tolower(q[0])] = i;
480
if (islower(q[0])) one_letter_locals[toupper(q[0])] = i;
482
h = hash_code_from_string(q);
483
if (local_variable_hash_table[h] == -1)
484
local_variable_hash_table[h] = i;
485
local_variable_hash_codes[i] = h;
486
local_variable_texts[i] = p;
490
for (;i<MAX_LOCAL_VARIABLES-1;i++)
491
local_variable_texts[i] = "<no such local variable>";
494
static void interpret_identifier(int pos, int dirs_only_flag)
495
{ int index, hashcode; char *p = circle[pos].text;
497
/* An identifier is either a keyword or a "symbol", a name which the
498
lexical analyser leaves to higher levels of Inform to understand. */
500
hashcode = hash_code_from_string(p);
502
if (dirs_only_flag) goto KeywordSearch;
504
/* If this is assembly language, perhaps it is "sp"? */
506
if (return_sp_as_variable && (p[0]=='s') && (p[1]=='p') && (p[2]==0))
507
{ circle[pos].value = 0; circle[pos].type = LOCAL_VARIABLE_TT;
511
/* Test for local variables first, quite quickly. */
513
if (local_variables.enabled)
515
{ index = one_letter_locals[p[0]];
516
if (index<MAX_LOCAL_VARIABLES)
517
{ circle[pos].type = LOCAL_VARIABLE_TT;
518
circle[pos].value = index+1;
522
index = local_variable_hash_table[hashcode];
524
{ for (;index<no_locals;index++)
525
{ if (hashcode == local_variable_hash_codes[index])
526
{ if (strcmpcis(p, local_variable_texts[index])==0)
527
{ circle[pos].type = LOCAL_VARIABLE_TT;
528
circle[pos].value = index+1;
536
/* Now the bulk of the keywords. Note that the lexer doesn't recognise
537
the name of a system function which has been Replaced. */
540
index = keywords_hash_table[hashcode];
542
{ int *i = keywords_data_table + 3*index;
543
keyword_group *kg = keyword_groups[*i];
544
if (((!dirs_only_flag) && (kg->enabled))
545
|| (dirs_only_flag && (kg == &directives)))
546
{ char *q = kg->keywords[*(i+1)];
547
if (((kg->case_sensitive) && (strcmp(p, q)==0))
548
|| ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0)))
549
{ if ((kg != &system_functions)
550
|| (system_function_usage[*(i+1)]!=2))
551
{ circle[pos].type = kg->change_token_type;
552
circle[pos].value = *(i+1);
560
if (dirs_only_flag) return;
562
/* Search for the name; create it if necessary. */
564
circle[pos].value = symbol_index(p, hashcode);
565
circle[pos].type = SYMBOL_TT;
569
/* ------------------------------------------------------------------------- */
570
/* The tokeniser grid aids a rapid decision about the consequences of a */
571
/* character reached in the buffer. In effect it is an efficiently stored */
572
/* transition table using an algorithm similar to that of S. C. Johnson's */
573
/* "yacc" lexical analyser (see Aho, Sethi and Ullman, section 3.9). */
574
/* My thanks to Dilip Sequeira for suggesting this. */
576
/* tokeniser_grid[c] is (16*n + m) if c is the first character of */
577
/* separator numbers n, n+1, ..., n+m-1 */
578
/* or certain special values (QUOTE_CODE, etc) */
581
/* Since 1000/16 = 62, the code numbers below will need increasing if the */
582
/* number of separators supported exceeds 61. */
583
/* ------------------------------------------------------------------------- */
585
static int tokeniser_grid[256];
587
#define QUOTE_CODE 1000
588
#define DQUOTE_CODE 1001
589
#define NULL_CODE 1002
590
#define SPACE_CODE 1003
591
#define NEGATIVE_CODE 1004
592
#define DIGIT_CODE 1005
593
#define RADIX_CODE 1006
594
#define KEYWORD_CODE 1007
595
#define EOF_CODE 1008
596
#define WHITESPACE_CODE 1009
597
#define COMMENT_CODE 1010
598
#define IDENTIFIER_CODE 1011
600
/* This list cannot safely be changed without also changing the header
601
separator #defines. The ordering is significant in that (i) all entries
602
beginning with the same character must be adjacent and (ii) that if
603
X is a an initial substring of Y then X must come before Y.
605
E.g. --> must occur before -- to prevent "-->0" being tokenised
606
wrongly as "--", ">", "0" rather than "-->", "0". */
608
static const char separators[NUMBER_SEPARATORS][4] =
609
{ "->", "-->", "--", "-", "++", "+", "*", "/", "%",
610
"||", "|", "&&", "&", "~~",
611
"~=", "~", "==", "=", ">=", ">",
612
"<=", "<", "(", ")", ",",
613
".&", ".#", "..&", "..#", "..", ".",
614
"::", ":", "@", ";", "[", "]", "{", "}",
616
"#a$", "#n$", "#r$", "#w$", "##", "#"
619
static void make_tokeniser_grid(void)
621
/* Construct the grid to the specification above. */
625
for (i=0; i<256; i++) tokeniser_grid[i]=0;
627
for (i=0; i<NUMBER_SEPARATORS; i++)
628
{ j=separators[i][0];
629
if (tokeniser_grid[j]==0)
630
tokeniser_grid[j]=i*16+1; else tokeniser_grid[j]++;
632
tokeniser_grid['\''] = QUOTE_CODE;
633
tokeniser_grid['\"'] = DQUOTE_CODE;
634
tokeniser_grid[0] = EOF_CODE;
635
tokeniser_grid[' '] = WHITESPACE_CODE;
636
tokeniser_grid['\n'] = WHITESPACE_CODE;
637
tokeniser_grid['$'] = RADIX_CODE;
638
tokeniser_grid['!'] = COMMENT_CODE;
640
tokeniser_grid['0'] = DIGIT_CODE;
641
tokeniser_grid['1'] = DIGIT_CODE;
642
tokeniser_grid['2'] = DIGIT_CODE;
643
tokeniser_grid['3'] = DIGIT_CODE;
644
tokeniser_grid['4'] = DIGIT_CODE;
645
tokeniser_grid['5'] = DIGIT_CODE;
646
tokeniser_grid['6'] = DIGIT_CODE;
647
tokeniser_grid['7'] = DIGIT_CODE;
648
tokeniser_grid['8'] = DIGIT_CODE;
649
tokeniser_grid['9'] = DIGIT_CODE;
651
tokeniser_grid['a'] = IDENTIFIER_CODE;
652
tokeniser_grid['b'] = IDENTIFIER_CODE;
653
tokeniser_grid['c'] = IDENTIFIER_CODE;
654
tokeniser_grid['d'] = IDENTIFIER_CODE;
655
tokeniser_grid['e'] = IDENTIFIER_CODE;
656
tokeniser_grid['f'] = IDENTIFIER_CODE;
657
tokeniser_grid['g'] = IDENTIFIER_CODE;
658
tokeniser_grid['h'] = IDENTIFIER_CODE;
659
tokeniser_grid['i'] = IDENTIFIER_CODE;
660
tokeniser_grid['j'] = IDENTIFIER_CODE;
661
tokeniser_grid['k'] = IDENTIFIER_CODE;
662
tokeniser_grid['l'] = IDENTIFIER_CODE;
663
tokeniser_grid['m'] = IDENTIFIER_CODE;
664
tokeniser_grid['n'] = IDENTIFIER_CODE;
665
tokeniser_grid['o'] = IDENTIFIER_CODE;
666
tokeniser_grid['p'] = IDENTIFIER_CODE;
667
tokeniser_grid['q'] = IDENTIFIER_CODE;
668
tokeniser_grid['r'] = IDENTIFIER_CODE;
669
tokeniser_grid['s'] = IDENTIFIER_CODE;
670
tokeniser_grid['t'] = IDENTIFIER_CODE;
671
tokeniser_grid['u'] = IDENTIFIER_CODE;
672
tokeniser_grid['v'] = IDENTIFIER_CODE;
673
tokeniser_grid['w'] = IDENTIFIER_CODE;
674
tokeniser_grid['x'] = IDENTIFIER_CODE;
675
tokeniser_grid['y'] = IDENTIFIER_CODE;
676
tokeniser_grid['z'] = IDENTIFIER_CODE;
678
tokeniser_grid['A'] = IDENTIFIER_CODE;
679
tokeniser_grid['B'] = IDENTIFIER_CODE;
680
tokeniser_grid['C'] = IDENTIFIER_CODE;
681
tokeniser_grid['D'] = IDENTIFIER_CODE;
682
tokeniser_grid['E'] = IDENTIFIER_CODE;
683
tokeniser_grid['F'] = IDENTIFIER_CODE;
684
tokeniser_grid['G'] = IDENTIFIER_CODE;
685
tokeniser_grid['H'] = IDENTIFIER_CODE;
686
tokeniser_grid['I'] = IDENTIFIER_CODE;
687
tokeniser_grid['J'] = IDENTIFIER_CODE;
688
tokeniser_grid['K'] = IDENTIFIER_CODE;
689
tokeniser_grid['L'] = IDENTIFIER_CODE;
690
tokeniser_grid['M'] = IDENTIFIER_CODE;
691
tokeniser_grid['N'] = IDENTIFIER_CODE;
692
tokeniser_grid['O'] = IDENTIFIER_CODE;
693
tokeniser_grid['P'] = IDENTIFIER_CODE;
694
tokeniser_grid['Q'] = IDENTIFIER_CODE;
695
tokeniser_grid['R'] = IDENTIFIER_CODE;
696
tokeniser_grid['S'] = IDENTIFIER_CODE;
697
tokeniser_grid['T'] = IDENTIFIER_CODE;
698
tokeniser_grid['U'] = IDENTIFIER_CODE;
699
tokeniser_grid['V'] = IDENTIFIER_CODE;
700
tokeniser_grid['W'] = IDENTIFIER_CODE;
701
tokeniser_grid['X'] = IDENTIFIER_CODE;
702
tokeniser_grid['Y'] = IDENTIFIER_CODE;
703
tokeniser_grid['Z'] = IDENTIFIER_CODE;
705
tokeniser_grid['_'] = IDENTIFIER_CODE;
708
/* ------------------------------------------------------------------------- */
709
/* Definition of a lexical block: a source file or a string containing */
710
/* text for lexical analysis; an independent source from the point of */
711
/* view of issuing error reports. */
712
/* ------------------------------------------------------------------------- */
714
typedef struct LexicalBlock_s
715
{ char *filename; /* Full translated name */
716
int main_flag; /* TRUE if the main file
717
(the first one opened) */
718
int sys_flag; /* TRUE if a System_File */
719
int source_line; /* Line number count */
720
int line_start; /* Char number within file
721
where the current line
723
int chars_read; /* Char number of read pos */
724
int file_no; /* Or 255 if not from a
729
static LexicalBlock NoFileOpen =
730
{ "<before compilation>", FALSE, FALSE, 0, 0, 0, 255 };
732
static LexicalBlock MakingOutput =
733
{ "<constructing output>", FALSE, FALSE, 0, 0, 0, 255 };
735
static LexicalBlock StringLB =
736
{ "<veneer routine>", FALSE, TRUE, 0, 0, 0, 255 };
738
static LexicalBlock *CurrentLB; /* The current lexical
739
block of input text */
741
extern void declare_systemfile(void)
742
{ CurrentLB->sys_flag = TRUE;
745
extern int is_systemfile(void)
746
{ return ((CurrentLB->sys_flag)?1:0);
749
extern dbgl get_current_dbgl(void)
751
X.b1 = CurrentLB->file_no;
752
X.b2 = (CurrentLB->source_line)/256;
753
X.b3 = (CurrentLB->source_line)%256;
754
n = CurrentLB->chars_read - CurrentLB->line_start;
760
static dbgl ErrorReport_dbgl;
762
extern void report_errors_at_current_line(void)
763
{ ErrorReport.line_number = CurrentLB->source_line;
764
ErrorReport.file_number = CurrentLB->file_no;
765
if (ErrorReport.file_number == 255)
766
ErrorReport.file_number = -1;
767
ErrorReport.source = CurrentLB->filename;
768
ErrorReport.main_flag = CurrentLB->main_flag;
769
if (debugfile_switch)
770
ErrorReport_dbgl = get_current_dbgl();
773
extern dbgl get_error_report_dbgl(void)
774
{ return ErrorReport_dbgl;
777
extern int32 get_current_line_start(void)
778
{ return CurrentLB->line_start;
781
/* ------------------------------------------------------------------------- */
782
/* Hash printing and line counting */
783
/* ------------------------------------------------------------------------- */
785
static void print_hash(void)
787
/* Hash-printing is the practice of printing a # character every 100
788
lines of source code (the -x switch), reassuring the user that
789
progress is being made */
791
if (no_hash_printed_yet)
792
{ printf("::"); no_hash_printed_yet = FALSE;
794
printf("#"); hash_printed_since_newline = TRUE;
797
/* On some systems, text output is buffered to a line at a time, and
798
this would frustrate the point of hash-printing, so: */
804
static void reached_new_line(void)
806
/* Called to signal that a new line has been reached in the source code */
808
forerrors_pointer = 0;
810
CurrentLB->source_line++;
811
CurrentLB->line_start = CurrentLB->chars_read;
813
total_source_line_count++;
815
if (total_source_line_count%100==0)
816
{ if (hash_switch) print_hash();
818
SpinCursor(32); /* I.e., allow other tasks to run */
823
if (total_source_line_count%((**g_pm_hndl).linespercheck) == 0)
824
{ ProcessEvents (&g_proc);
828
if (temporary_files_switch)
831
my_free(&all_text,"transcription text");
832
abort_transcript_file();
833
longjmp (g_fallback, 1);
839
static void new_syntax_line(void)
840
{ if (source_to_analyse != NULL) forerrors_pointer = 0;
841
report_errors_at_current_line();
844
/* ------------------------------------------------------------------------- */
845
/* Characters are read via a "pipeline" of variables, allowing us to look */
846
/* up to three characters ahead of the current position. */
848
/* There are two possible sources: from the source files being loaded in, */
849
/* and from a string inside Inform (which is where the code for veneer */
850
/* routines comes from). Each source has its own get-next-character */
852
/* ------------------------------------------------------------------------- */
853
/* Source 1: from files */
855
/* Note that file_load_chars(p, size) loads "size" bytes into buffer "p" */
856
/* from the current input file. If the file runs out, then if it was */
857
/* the last source file 4 EOF characters are placed in the buffer: if it */
858
/* was only an Include file ending, then a '\n' character is placed there */
859
/* (essentially to force termination of any comment line) followed by */
860
/* three harmless spaces. */
862
/* The routine returns the number of characters it has written, and note */
863
/* that this conveniently ensures that all characters in the buffer come */
864
/* from the same file. */
865
/* ------------------------------------------------------------------------- */
867
#define SOURCE_BUFFER_SIZE 4096 /* Typical disc block size */
869
typedef struct Sourcefile_s
870
{ char *buffer; /* Input buffer */
871
int read_pos; /* Read position in buffer */
872
int size; /* Number of meaningful
873
characters in buffer */
874
int la, la2, la3; /* Three characters of
875
lookahead pipeline */
876
int file_no; /* Internal file number
881
static Sourcefile *FileStack;
882
static int File_sp; /* Stack pointer */
884
static Sourcefile *CF; /* Top entry on stack */
886
static int last_no_files;
888
static void begin_buffering_file(int i, int file_no)
889
{ int j, cnt; uchar *p;
891
if (i >= MAX_INCLUSION_DEPTH)
892
memoryerror("MAX_INCLUSION_DEPTH",MAX_INCLUSION_DEPTH);
894
p = (uchar *) FileStack[i].buffer;
897
{ FileStack[i-1].la = lookahead;
898
FileStack[i-1].la2 = lookahead2;
899
FileStack[i-1].la3 = lookahead3;
902
FileStack[i].file_no = file_no;
903
FileStack[i].size = file_load_chars(file_no,
904
(char *) p, SOURCE_BUFFER_SIZE);
905
lookahead = source_to_iso_grid[p[0]];
906
lookahead2 = source_to_iso_grid[p[1]];
907
lookahead3 = source_to_iso_grid[p[2]];
908
FileStack[i].read_pos = 3;
910
if (file_no==1) FileStack[i].LB.main_flag = TRUE;
911
else FileStack[i].LB.main_flag = FALSE;
912
FileStack[i].LB.sys_flag = FALSE;
913
FileStack[i].LB.source_line = 1;
914
FileStack[i].LB.line_start = 0;
915
FileStack[i].LB.chars_read = 3;
916
FileStack[i].LB.filename = InputFiles[file_no-1].filename;
917
FileStack[i].LB.file_no = file_no;
919
CurrentLB = &(FileStack[i].LB);
920
CF = &(FileStack[i]);
922
/* Check for recursive inclusion */
925
{ if (!strcmp(FileStack[i].LB.filename, FileStack[j].LB.filename))
929
warning_named("File included more than once",
930
FileStack[j].LB.filename);
933
static void create_char_pipeline(void)
936
begin_buffering_file(File_sp++, 1);
937
pipeline_made = TRUE; last_no_files = input_file;
940
static int get_next_char_from_pipeline(void)
943
while (last_no_files < input_file)
945
/* An "Include" file must have opened since the last character
948
begin_buffering_file(File_sp++, ++last_no_files);
950
last_no_files = input_file;
953
{ lookahead = 0; lookahead2 = 0; lookahead3 = 0; return 0;
956
if (CF->read_pos == CF->size)
958
file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
962
if (CF->read_pos == -(CF->size))
965
{ lookahead = 0; lookahead2 = 0; lookahead3 = 0; return 0;
967
CF = &(FileStack[File_sp-1]);
968
CurrentLB = &(FileStack[File_sp-1].LB);
969
lookahead = CF->la; lookahead2 = CF->la2; lookahead3 = CF->la3;
970
if (CF->read_pos == CF->size)
972
file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
977
p = (uchar *) (CF->buffer);
980
lookahead = lookahead2;
981
lookahead2 = lookahead3;
982
lookahead3 = source_to_iso_grid[p[CF->read_pos++]];
984
CurrentLB->chars_read++;
985
if (forerrors_pointer < 511)
986
forerrors_buff[forerrors_pointer++] = current;
987
if (current == '\n') reached_new_line();
991
/* ------------------------------------------------------------------------- */
992
/* Source 2: from a string */
993
/* ------------------------------------------------------------------------- */
995
static int source_to_analyse_pointer; /* Current read position */
997
static int get_next_char_from_string(void)
998
{ uchar *p = (uchar *) source_to_analyse + source_to_analyse_pointer++;
999
current = source_to_iso_grid[p[0]];
1001
if (current == 0) lookahead = 0;
1002
else lookahead = source_to_iso_grid[p[1]];
1003
if (lookahead == 0) lookahead2 = 0;
1004
else lookahead2 = source_to_iso_grid[p[2]];
1005
if (lookahead2 == 0) lookahead3 = 0;
1006
else lookahead3 = source_to_iso_grid[p[3]];
1008
CurrentLB->chars_read++;
1009
if (forerrors_pointer < 511)
1010
forerrors_buff[forerrors_pointer++] = current;
1011
if (current == '\n') reached_new_line();
1015
/* ========================================================================= */
1016
/* The interface between the lexer and Inform's higher levels: */
1018
/* put_token_back() (effectively) move the read position */
1019
/* back by one token */
1021
/* get_next_token() copy the token at the current read */
1022
/* position into the triple */
1023
/* (token_type, token_value, token_text) */
1024
/* and move the read position forward */
1027
/* restart_lexer(source, name) if source is NULL, initialise the lexer */
1028
/* to read from source files; */
1029
/* otherwise, to read from this string. */
1030
/* ------------------------------------------------------------------------- */
1032
extern void put_token_back(void)
1033
{ tokens_put_back++;
1035
if (tokens_trace_level > 0)
1036
{ if (tokens_trace_level == 1) printf("<- ");
1037
else printf("<-\n");
1040
/* The following error, of course, should never happen! */
1042
if (tokens_put_back == CIRCLE_SIZE)
1043
{ compiler_error("The lexical analyser has collapsed because of a wrong \
1044
assumption inside Inform");
1050
extern void get_next_token(void)
1051
{ int d, i, j, k, quoted_size, e, radix, context; int32 n; char *r;
1053
context = lexical_context();
1055
if (tokens_put_back > 0)
1056
{ i = circle_position - tokens_put_back + 1;
1057
if (i<0) i += CIRCLE_SIZE;
1059
if (context != token_contexts[i])
1060
{ j = circle[i].type;
1061
if ((j==0) || ((j>=100) && (j<200)))
1062
interpret_identifier(i, FALSE);
1067
if (circle_position == CIRCLE_SIZE-1) circle_position = 0;
1068
else circle_position++;
1070
if (lex_p > lexeme_memory + 4*MAX_QTEXT_SIZE)
1071
lex_p = lexeme_memory;
1073
circle[circle_position].text = lex_p;
1074
circle[circle_position].value = 0;
1078
d = (*get_next_char)();
1079
e = tokeniser_grid[d];
1081
if (next_token_begins_syntax_line)
1082
{ if ((e != WHITESPACE_CODE) && (e != COMMENT_CODE))
1083
{ new_syntax_line();
1084
next_token_begins_syntax_line = FALSE;
1088
circle[circle_position].line_ref = get_current_dbgl();
1091
{ case 0: char_error("Illegal character found in source:", d);
1092
goto StartTokenAgain;
1094
case WHITESPACE_CODE:
1095
while (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1097
goto StartTokenAgain;
1100
while ((lookahead != '\n') && (lookahead != 0))
1102
goto StartTokenAgain;
1105
circle[circle_position].type = EOF_TT;
1106
strcpy(lex_p, "<end of file>");
1107
lex_p += strlen(lex_p) + 1;
1115
{ n = n*radix + character_digit_value[d];
1117
} while ((character_digit_value[lookahead] < radix)
1118
&& (d = (*get_next_char)(), TRUE));
1121
circle[circle_position].type = NUMBER_TT;
1122
circle[circle_position].value = n;
1126
radix = 16; d = (*get_next_char)();
1127
if (d == '$') { d = (*get_next_char)(); radix = 2; }
1128
if (character_digit_value[d] >= radix)
1130
error("Binary number expected after '$$'");
1132
error("Hexadecimal number expected after '$'");
1136
case QUOTE_CODE: /* Single-quotes: scan a literal string */
1139
{ e = d; d = (*get_next_char)(); *lex_p++ = d;
1140
if (quoted_size++==64)
1142
"Too much text for one pair of quotations '...' to hold");
1145
if ((d == '\'') && (e != '@'))
1146
{ if (quoted_size == 1)
1147
{ d = (*get_next_char)(); *lex_p++ = d;
1149
error("No text between quotation marks ''");
1154
if (d==EOF) ebf_error("'\''", "end of file");
1156
circle[circle_position].type = SQ_TT;
1159
case DQUOTE_CODE: /* Double-quotes: scan a literal string */
1162
{ d = (*get_next_char)(); *lex_p++ = d;
1163
if (quoted_size++==MAX_QTEXT_SIZE)
1165
"Too much text for one pair of quotations \"...\" to hold");
1170
while (*(lex_p-1) == ' ') lex_p--;
1171
if (*(lex_p-1) != '^') *lex_p++ = ' ';
1172
while ((lookahead != EOF) &&
1173
(tokeniser_grid[lookahead] == WHITESPACE_CODE))
1177
{ int newline_passed = FALSE;
1179
while ((lookahead != EOF) &&
1180
(tokeniser_grid[lookahead] == WHITESPACE_CODE))
1181
if ((d = (*get_next_char)()) == '\n')
1182
newline_passed = TRUE;
1183
if (!newline_passed)
1185
chb[0] = '\"'; chb[1] = lookahead;
1186
chb[2] = '\"'; chb[3] = 0;
1187
ebf_error("empty rest of line after '\\' in string",
1191
} while ((d != EOF) && (d!='\"'));
1192
if (d==EOF) ebf_error("'\"'", "end of file");
1194
circle[circle_position].type = DQ_TT;
1197
case IDENTIFIER_CODE: /* Letter or underscore: an identifier */
1200
while ((n<=MAX_IDENTIFIER_LENGTH)
1201
&& ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1202
|| (tokeniser_grid[lookahead] == DIGIT_CODE)))
1203
n++, *lex_p++ = (*get_next_char)();
1207
if (n > MAX_IDENTIFIER_LENGTH)
1208
{ char bad_length[100];
1210
"Name exceeds the maximum length of %d characters:",
1211
MAX_IDENTIFIER_LENGTH);
1212
error_named(bad_length, circle[circle_position].text);
1215
if (dont_enter_into_symbol_table)
1216
{ circle[circle_position].type = DQ_TT;
1217
circle[circle_position].value = 0;
1218
if (dont_enter_into_symbol_table == -2)
1219
interpret_identifier(circle_position, TRUE);
1223
interpret_identifier(circle_position, FALSE);
1228
/* The character is initial to at least one of the separators */
1230
for (j=e>>4, k=j+(e&0x0f); j<k; j++)
1231
{ r = (char *) separators[j];
1233
{ *lex_p++=d; *lex_p++=0;
1234
goto SeparatorMatched;
1238
{ if (*(r+1) == lookahead)
1240
*lex_p++=(*get_next_char)();
1242
goto SeparatorMatched;
1246
{ if ((*(r+1) == lookahead) && (*(r+2) == lookahead2))
1248
*lex_p++=(*get_next_char)();
1249
*lex_p++=(*get_next_char)();
1251
goto SeparatorMatched;
1256
/* The following contingency never in fact arises with the
1257
current set of separators, but might in future */
1259
*lex_p++ = d; *lex_p++ = lookahead; *lex_p++ = lookahead2;
1261
error_named("Unrecognised combination in source:", lex_p);
1262
goto StartTokenAgain;
1266
circle[circle_position].type = SEP_TT;
1267
circle[circle_position].value = j;
1269
{ case SEMICOLON_SEP: break;
1270
case HASHNDOLLAR_SEP:
1271
case HASHWDOLLAR_SEP:
1272
if (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1273
{ error_named("Character expected after",
1274
circle[circle_position].text);
1278
*lex_p++ = (*get_next_char)();
1279
while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1280
|| (tokeniser_grid[lookahead] == DIGIT_CODE))
1281
*lex_p++ = (*get_next_char)();
1284
case HASHADOLLAR_SEP:
1285
case HASHRDOLLAR_SEP:
1287
if (tokeniser_grid[lookahead] != IDENTIFIER_CODE)
1288
{ error_named("Alphabetic character expected after",
1289
circle[circle_position].text);
1293
while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1294
|| (tokeniser_grid[lookahead] == DIGIT_CODE))
1295
*lex_p++ = (*get_next_char)();
1302
i = circle_position;
1305
token_value = circle[i].value;
1306
token_type = circle[i].type;
1307
token_text = circle[i].text;
1308
token_line_ref = circle[i].line_ref;
1309
token_contexts[i] = context;
1311
if (tokens_trace_level > 0)
1312
{ if (tokens_trace_level == 1)
1313
printf("'%s' ", circle[i].text);
1315
{ printf("-> "); describe_token(circle[i]);
1317
if (tokens_trace_level > 2) print_context(token_contexts[i]);
1323
static char veneer_error_title[64];
1325
extern void restart_lexer(char *lexical_source, char *name)
1327
circle_position = 0;
1328
for (i=0; i<CIRCLE_SIZE; i++)
1329
{ circle[i].type = 0;
1330
circle[i].value = 0;
1331
circle[i].text = "(if this is ever visible, there is a bug)";
1332
token_contexts[i] = 0;
1335
lex_p = lexeme_memory;
1336
tokens_put_back = 0;
1337
forerrors_pointer = 0;
1338
dont_enter_into_symbol_table = FALSE;
1339
return_sp_as_variable = FALSE;
1340
next_token_begins_syntax_line = TRUE;
1342
source_to_analyse = lexical_source;
1344
if (source_to_analyse == NULL)
1345
{ get_next_char = get_next_char_from_pipeline;
1346
if (!pipeline_made) create_char_pipeline();
1347
forerrors_buff[0] = 0; forerrors_pointer = 0;
1350
{ get_next_char = get_next_char_from_string;
1351
source_to_analyse_pointer = 0;
1352
CurrentLB = &StringLB;
1353
sprintf(veneer_error_title, "<veneer routine '%s'>", name);
1354
StringLB.filename = veneer_error_title;
1356
CurrentLB->source_line = 1;
1357
CurrentLB->line_start = 0;
1358
CurrentLB->chars_read = 0;
1362
/* ========================================================================= */
1363
/* Data structure management routines */
1364
/* ------------------------------------------------------------------------- */
1366
extern void init_lexer_vars(void)
1370
extern void lexer_begin_prepass(void)
1371
{ total_source_line_count = 0;
1372
CurrentLB = &NoFileOpen;
1373
report_errors_at_current_line();
1376
extern void lexer_begin_pass(void)
1377
{ no_hash_printed_yet = TRUE;
1378
hash_printed_since_newline = FALSE;
1380
pipeline_made = FALSE;
1382
restart_lexer(NULL, NULL);
1385
extern void lexer_endpass(void)
1386
{ CurrentLB = &MakingOutput;
1387
report_errors_at_current_line();
1390
extern void lexer_allocate_arrays(void)
1393
FileStack = my_malloc(MAX_INCLUSION_DEPTH*sizeof(Sourcefile),
1394
"filestack buffer");
1396
for (i=0; i<MAX_INCLUSION_DEPTH; i++)
1397
FileStack[i].buffer = my_malloc(SOURCE_BUFFER_SIZE+4, "source file buffer");
1399
lexeme_memory = my_malloc(5*MAX_QTEXT_SIZE, "lexeme memory");
1401
keywords_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1402
"keyword hash table");
1403
keywords_hash_ends_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1404
"keyword hash end table");
1405
keywords_data_table = my_calloc(sizeof(int), 3*MAX_KEYWORDS,
1406
"keyword hashing linked list");
1407
local_variable_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1408
"local variable hash table");
1409
local_variable_text_table = my_malloc(
1410
(MAX_LOCAL_VARIABLES-1)*(MAX_IDENTIFIER_LENGTH+1),
1411
"text of local variable names");
1413
local_variable_hash_codes = my_calloc(sizeof(int), MAX_LOCAL_VARIABLES,
1414
"local variable hash codes");
1415
local_variable_texts = my_calloc(sizeof(char *), MAX_LOCAL_VARIABLES,
1416
"local variable text pointers");
1418
make_tokeniser_grid();
1419
make_keywords_tables();
1422
extern void lexer_free_arrays(void)
1425
for (i=0; i<MAX_INCLUSION_DEPTH; i++)
1426
{ p = FileStack[i].buffer;
1427
my_free(&p, "source file buffer");
1429
my_free(&FileStack, "filestack buffer");
1430
my_free(&lexeme_memory, "lexeme memory");
1432
my_free(&keywords_hash_table, "keyword hash table");
1433
my_free(&keywords_hash_ends_table, "keyword hash end table");
1434
my_free(&keywords_data_table, "keyword hashing linked list");
1435
my_free(&local_variable_hash_table, "local variable hash table");
1436
my_free(&local_variable_text_table, "text of local variable names");
1438
my_free(&local_variable_hash_codes, "local variable hash codes");
1439
my_free(&local_variable_texts, "local variable text pointers");
1442
/* ========================================================================= */