1
/* $Header: d:/cvsroot/tads/tads3/tctok.h,v 1.5 1999/07/11 00:46:59 MJRoberts Exp $ */
4
* Copyright (c) 1999, 2002 Michael J. Roberts. All Rights Reserved.
6
* Please see the accompanying license file, LICENSE.TXT, for information
7
* on using and copying this software.
11
tctok.h - TADS3 compiler tokenizer and preprocessor
15
The tokenizer is layered with the preprocessor, so that the preprocessor
16
can deal with include files, macro expansion, and preprocessor directives.
18
04/12/99 MJRoberts - Creation
37
/* ------------------------------------------------------------------------ */
42
/* maximum length of a symbol name, in characters */
43
const size_t TOK_SYM_MAX_LEN = 80;
46
* Maximum buffer required to hold a symbol, in bytes. Each UTF-8
47
* character may take up three bytes, plus we need a null terminator
50
const size_t TOK_SYM_MAX_BUFFER = (3*TOK_SYM_MAX_LEN + 1);
52
/* maximum #if nesting level */
53
const size_t TOK_MAX_IF_NESTING = 100;
55
/* maximum number of parameters per macro */
56
const int TOK_MAX_MACRO_ARGS = 128;
59
* Special token flag characters - these are a characters that can't
60
* occur in an input file (we guarantee this by converting any
61
* occurrences of this character to a space on reading input). We use
62
* these to flag certain special properties of tokens in the input
65
* We use ASCII characters in the control range (0x01 (^A) through 0x1A
66
* (^Z), excluding 0x09 (tab), 0x0A (LF), 0x0D (CR), and 0x0C (Page
67
* Feed); a well-formed source file would never use any of these
68
* characters in input. Even if it does, we won't get confused, since
69
* we'll always translate these to a space if we find them in input; but
70
* choosing characters that *should* never occur in valid input will
71
* ensure that we never alter the meaning of valid source by this
76
* macro parameter flag - we use this in the internal storage of a
77
* #define expansion to flag where the formal parameters are mentioned,
78
* so that we can substitute the actuals when expanding the macro
80
const char TOK_MACRO_FORMAL_FLAG = 0x01;
83
* Token fully expanded flag. Whenever we detect that a particular
84
* token has been fully expanded in the course of a particular macro
85
* expansion, we'll insert this byte before the token; on subsequent
86
* re-scans, whenever we see this flag, we'll realize that the token
87
* needs no further consideration of expansion.
89
const char TOK_FULLY_EXPANDED_FLAG = 0x02;
92
* Macro substitution end marker. Each time we expand a macro, we'll
93
* insert immediately after the macro expansion a special pseudo-token,
94
* consisting of this flag followed by a pointer to the symbol table
95
* entry for the symbol expanded. As we expand macros, we'll check to
96
* see if any of these special flags appear in the buffer after the
97
* macro about to be expanded. If we find such a flag matching the
98
* symbol about to be expanded, we'll know the symbol has already been
99
* fully expanded on a previous scan and thus must not be expanded
102
const char TOK_MACRO_EXP_END = 0x03;
105
* End-of-line flag. This serves as a local end-of-file marker for
106
* preprocessor lines. Because preprocessor lines must be considered in
107
* isolation, we need some way when parsing one to tell the tokenizer
108
* not to try to read another line when it reaches the end of the
109
* current line. This flag serves this purpose: when the tokenizer
110
* encounters one of these flags, it will simply return end-of-file
111
* until the caller explicitly reads a new source line.
113
const char TOK_END_PP_LINE = 0x04;
116
* "#foreach" marker flag. This marks the presence of a #foreach token in
117
* a macro's expansion. We leave the text of the expansion area intact,
118
* but we replace the #foreach token with this marker character.
120
const char TOK_MACRO_FOREACH_FLAG = 0x05;
123
* "#argcount" marker flag. This marks the presence of a #argcount token
124
* in a macro's expansion.
126
const char TOK_MACRO_ARGCOUNT_FLAG = 0x06;
129
* "#ifempty" and #ifnempty" marker flags
131
const char TOK_MACRO_IFEMPTY_FLAG = 0x07;
132
const char TOK_MACRO_IFNEMPTY_FLAG = 0x08;
135
/* ------------------------------------------------------------------------ */
141
TOKIF_NONE, /* not in a #if block at all */
142
TOKIF_IF_YES, /* processing a true #if branch */
143
TOKIF_IF_NO, /* processing a false #if branch */
144
TOKIF_IF_DONE, /* done with true #if/#elif; skip #elif's and #else */
145
TOKIF_ELSE_YES, /* processing a true #else branch */
146
TOKIF_ELSE_NO /* processing a false #else branch */
157
/* file descriptor and line number of starting #if */
158
class CTcTokFileDesc *desc;
162
/* ------------------------------------------------------------------------ */
169
TOKT_INVALID, /* invalid token */
170
TOKT_NULLTOK, /* null token - caller should read another token */
171
TOKT_EOF, /* end of file */
172
TOKT_MACRO_FORMAL, /* formal parameter replacement placeholder */
173
TOKT_MACRO_FOREACH, /* macro varargs #foreach placeholder */
174
TOKT_MACRO_ARGCOUNT, /* macro varargs #argcount placeholder */
175
TOKT_MACRO_IFEMPTY, /* #ifempty macro placeholder */
176
TOKT_MACRO_IFNEMPTY, /* #ifnempty macro placeholder */
177
TOKT_SYM, /* symbolic name */
178
TOKT_INT, /* integer */
179
TOKT_SSTR, /* single-quoted string */
180
TOKT_DSTR, /* double-quoted string */
181
TOKT_DSTR_START, /* start of a dstring with embedding - "...<< */
182
TOKT_DSTR_MID, /* middle of a dstring with embedding - >>...<< */
183
TOKT_DSTR_END, /* end of a dstring with embedding - >>..." */
184
TOKT_LPAR, /* left paren '(' */
185
TOKT_RPAR, /* right paren ')' */
186
TOKT_COMMA, /* comma ',' */
187
TOKT_DOT, /* period '.' */
188
TOKT_LBRACE, /* left brace '{' */
189
TOKT_RBRACE, /* right brace '}' */
190
TOKT_LBRACK, /* left square bracket '[' */
191
TOKT_RBRACK, /* right square bracket ']' */
192
TOKT_EQ, /* equals sign '=' */
193
TOKT_EQEQ, /* double-equals sign '==' */
194
TOKT_ASI, /* colon-equals assignment operator ':=' */
195
TOKT_PLUS, /* plus sign '+' */
196
TOKT_MINUS, /* minus sign '-' */
197
TOKT_TIMES, /* multiplication symbol '*' */
198
TOKT_DIV, /* division symbol '/' */
199
TOKT_MOD, /* modulo '%' */
200
TOKT_GT, /* greater-than sign '>' */
201
TOKT_LT, /* less-than sign '<' */
202
TOKT_GE, /* greater-or-equal sign '>=' */
203
TOKT_LE, /* less-or-equal sign '<=' */
204
TOKT_NE, /* not-equals sign '!=' or '<>' */
205
TOKT_ARROW, /* arrow symbol '->' */
206
TOKT_COLON, /* colon ':' */
207
TOKT_SEM, /* semicolon ';' */
208
TOKT_AND, /* bitwise AND '&' */
209
TOKT_ANDAND, /* logical AND '&&' */
210
TOKT_OR, /* bitwise OR '|' */
211
TOKT_OROR, /* logical OR '||' */
212
TOKT_XOR, /* bitwise XOR '^' */
213
TOKT_SHL, /* shift left '<<' */
214
TOKT_SHR, /* shift right '>>' */
215
TOKT_INC, /* increment '++' */
216
TOKT_DEC, /* decrement '--' */
217
TOKT_PLUSEQ, /* plus-equals '+=' */
218
TOKT_MINEQ, /* minus-equals '-=' */
219
TOKT_TIMESEQ, /* times-equals '*=' */
220
TOKT_DIVEQ, /* divide-equals '/=' */
221
TOKT_MODEQ, /* mod-equals '%=' */
222
TOKT_ANDEQ, /* and-equals '&=' */
223
TOKT_OREQ, /* or-equals '|=' */
224
TOKT_XOREQ, /* xor-equals '^=' */
225
TOKT_SHLEQ, /* shift-left-and-assign '<<=' */
226
TOKT_SHREQ, /* shift-right-and-assign '>>=' */
227
TOKT_NOT, /* logical not '!' */
228
TOKT_BNOT, /* bitwise not '~' */
229
TOKT_POUND, /* pound '#' */
230
TOKT_POUNDPOUND, /* double-pound '##' */
231
TOKT_POUNDAT, /* pound-at '#@' */
232
TOKT_ELLIPSIS, /* ellipsis '...' */
233
TOKT_QUESTION, /* question mark '?' */
234
TOKT_COLONCOLON, /* double-colon '::' */
235
TOKT_FLOAT, /* floating-point number */
236
TOKT_AT, /* at-sign */
289
/* type names - formerly reserved but later withdrawn */
298
/* ------------------------------------------------------------------------ */
300
* Source Block. As we read the source file, we need to keep quoted
301
* strings and symbol names around for later reference, in case they're
302
* needed after reading more tokens and flushing the line buffer. We'll
303
* copy needed text into our source blocks, which we keep in memory
304
* throughout the compilation, so that we can be certain we can
305
* reference these strings at any time.
308
/* size of a source block */
309
const size_t TCTOK_SRC_BLOCK_SIZE = 50000;
311
/* source block class */
317
/* no next block yet */
323
/* delete the next block in line */
328
/* get/set the next block */
329
CTcTokSrcBlock *get_next() const { return nxt_; }
330
void set_next(CTcTokSrcBlock *blk) { nxt_ = blk; }
332
/* get a pointer to the block's buffer */
333
char *get_buf() { return buf_; }
336
/* the next block in the list */
337
CTcTokSrcBlock *nxt_;
339
/* bytes of the list entry */
340
char buf_[TCTOK_SRC_BLOCK_SIZE];
344
/* ------------------------------------------------------------------------ */
346
* String Buffer. We use these buffers for reading input lines and
360
virtual ~CTcTokString()
362
/* delete our buffer */
367
/* ensure that a given amount of space if available */
368
virtual void ensure_space(size_t siz)
370
/* make sure there's room for the requested size plus a null byte */
371
if (buf_size_ < siz + 1)
373
/* increase to the next 4k increment */
374
buf_size_ = (siz + 4095 + 1) & ~4095;
376
/* allocate or re-allocate the buffer */
378
buf_ = (char *)t3malloc(buf_size_);
380
buf_ = (char *)t3realloc(buf_, buf_size_);
382
/* throw an error if that failed */
384
err_throw(TCERR_NO_STRBUF_MEM);
388
/* expand the buffer */
391
/* expand to the next 4k increment */
392
ensure_space(buf_size_ + 4096);
395
/* get the text and the length of the text */
396
const char *get_text() const { return buf_; }
397
size_t get_text_len() const { return buf_len_; }
399
/* get the end of the text */
400
const char *get_text_end() const { return buf_ + buf_len_; }
402
/* append text to the buffer */
403
virtual void append(const char *p) { append(p, strlen(p)); }
404
virtual void append(const char *p, size_t len)
406
/* make sure we have space available */
407
ensure_space(buf_len_ + len);
409
/* copy the text onto the end of our buffer */
410
memcpy(buf_ + buf_len_, p, len);
412
/* add it to the length of the text */
415
/* null-terminte it */
416
buf_[buf_len_] = '\0';
420
virtual void prepend(const char *p) { prepend(p, strlen(p)); }
421
virtual void prepend(const char *p, size_t len)
423
/* make sure we have enough space */
424
ensure_space(buf_len_ + len);
427
* move the existing text (including the null terminator) up in the
428
* buffer to make room for the prepended text
430
memmove(buf_ + len, buf_, buf_len_ + 1);
432
/* copy the new text to the start of the buffer */
433
memcpy(buf_, p, len);
435
/* count the new size */
440
* Append a string to the buffer, enclosing the text in single or
441
* double quote (as given by 'qu', which must be either '"' or '\'')
442
* and backslash-escaping any occurrences of the same quote character
443
* found within the string.
445
void append_qu(char qu, const char *p) { append_qu(qu, p, strlen(p)); }
446
void append_qu(char qu, const char *p, size_t len)
450
/* append the open quote */
453
/* scan for quotes we'll need to escape */
458
/* skip to the next quote */
459
for (start = p, rem = len ; rem != 0 && *p != qu ; ++p, --rem) ;
461
/* insert the chunk up to the quote */
463
append(start, p - start);
465
/* if we did find a quote, append it with a backslash escape */
468
/* append the backslash and the quote */
472
/* skip the quote in the source */
477
/* we now only have 'rem' left to consider */
481
/* finally, append the closing quote */
485
/* insert text into the buffer at the given offset */
486
virtual void insert(int ofs, const char *p, size_t len)
488
/* check to see if there's anything after the insertion point */
489
if ((size_t)ofs >= buf_len_)
492
* there's nothing after the insertion point, so this is simply
493
* equivalent to 'append' - go do the append, and we're done
499
/* ensure there's space for the added text */
500
ensure_space(buf_len_ + len);
503
* Move the existing text after the insertion point just far enough
504
* to make room for the new text. Include the null terminator.
506
memmove(buf_ + ofs + len, buf_ + ofs, buf_len_ - ofs + 1);
508
/* copy the new text in at the given offset */
509
memcpy(buf_ + ofs, p, len);
511
/* include the new text in our length */
515
/* copy text into the buffer, replacing existing text */
516
virtual void copy(const char *p, size_t len)
518
/* ensure we have enough space */
522
memcpy(buf_, p, len);
527
/* null-terminate it */
528
buf_[buf_len_] = '\0';
531
/* clear any existing text */
532
virtual void clear_text()
534
/* zero the length */
537
/* put a null terminator at the start of the buffer if possible */
542
/* get the buffer, for copying text directly into it */
543
virtual char *get_buf() const { return buf_; }
544
size_t get_buf_size() const { return buf_size_; }
547
* Set the text length - use this after copying directly into the
548
* buffer to set the length, excluding the null terminator. We'll
549
* add a null terminator at the given length.
551
virtual void set_text_len(size_t len)
553
/* set the new length */
556
/* add a null terminator after the new length */
565
/* size of the buffer */
568
/* length of the text in the buffer (excluding trailing null) */
574
* String buffer subclass for a non-allocated string that merely
575
* references another buffer. This can be used anywhere a CTcString is
576
* required, but does not require any allocation.
578
* These objects can only be used in 'const' contexts: the underlying
579
* buffer cannot be changed or expanded, since we do not own the
582
class CTcTokStringRef: public CTcTokString
587
/* we have no referenced buffer yet */
595
/* we don't own the underlying buffer, so simply forget about it */
599
/* we can't make any changes to the underlying buffer */
600
void ensure_space(size_t) { }
601
void append(const char *) { assert(FALSE); }
602
void append(const char *, size_t) { assert(FALSE); }
603
void prepend(const char *) { assert(FALSE); }
604
void prepend(const char *, size_t) { assert(FALSE); }
605
void insert(int, const char *, size_t) { assert(FALSE); }
606
void copy(const char *, size_t) { assert(FALSE); }
607
void clear_text() { assert(FALSE); }
608
char *get_buf() const { assert(FALSE); return 0; }
609
void set_text_len(size_t) { assert(FALSE); }
611
/* set my underlying buffer */
612
void set_buffer(const char *buf, size_t len)
620
/* ------------------------------------------------------------------------ */
627
/* get/set the token type */
628
tc_toktyp_t gettyp() const { return typ_; }
629
void settyp(tc_toktyp_t typ) { typ_ = typ; }
631
/* get/set the fully-expanded flag */
632
int get_fully_expanded() const { return fully_expanded_; }
633
void set_fully_expanded(int flag) { fully_expanded_ = flag; }
635
/* get/set the text pointer */
636
const char *get_text() const { return text_; }
637
size_t get_text_len() const { return text_len_; }
638
void set_text(const char *txt, size_t len)
644
/* get/set the integer value */
645
long get_int_val() const { return int_val_; }
646
void set_int_val(long val) { typ_ = TOKT_INT; int_val_ = val; }
649
* compare the text to the given string - returns true if the text
650
* matches, false if not
652
int text_matches(const char *txt, size_t len) const
654
return (len == text_len_
655
&& memcmp(txt, text_, len) == 0);
663
* Pointer to the token's text. This is a pointer into the
664
* tokenizer's symbol table or into the token list itself, so this
665
* pointer is valid as long as the tokenizer and its token list are
671
/* integer value - valid when the token type is TOKT_INT */
675
* flag: the token has been fully expanded, and should not be
676
* expanded further on any subsequent rescan for macros
678
uint fully_expanded_ : 1;
682
/* ------------------------------------------------------------------------ */
684
* Macro Expansion Resource object. This object is a collection of
685
* resources that are needed for a macro expansion. To avoid frequent
686
* allocating and freeing of these resources, we keep a pool of these
687
* objects around so that we can re-use them as needed. We'll
688
* dynamically expand the pool as necessary, so this doesn't impose any
689
* pre-set limits; it simply avoids lots of memory allocation activity.
696
/* we're not in any lists yet */
701
/* buffer for expansion of the whole line */
702
CTcTokString line_exp_;
704
/* buffer for expansion of current macro on line */
705
CTcTokString macro_exp_;
707
/* buffer for expansion of an actual parameter value */
708
CTcTokString actual_exp_buf_;
710
/* next resource object in the "available" list */
711
CTcMacroRsc *next_avail_;
713
/* next resource object in the master list */
718
/* ------------------------------------------------------------------------ */
720
* Abstract token source interface. This is used to allow external code
721
* to inject their own substreams into the main token stream.
727
* Get the next token from the source. Returns null if there are no
730
virtual const CTcToken *get_next_token() = 0;
732
/* set the enclosing external token source and current token */
733
void set_enclosing_source(CTcTokenSource *src, const CTcToken *tok)
735
/* remember the enclosing source */
736
enclosing_src_ = src;
738
/* remember the current token */
739
enclosing_curtok_ = *tok;
742
/* get the enclosing external token source */
743
CTcTokenSource *get_enclosing_source() const
744
{ return enclosing_src_; }
746
/* get the token that was current when this source was inserted */
747
const CTcToken *get_enclosing_curtok() const
748
{ return &enclosing_curtok_; }
751
/* the enclosing external token source */
752
CTcTokenSource *enclosing_src_;
755
* the current token in effect enclosing this source - this is the
756
* token that comes immediately after the source's tokens, because a
757
* source is inserted before the current token
759
CTcToken enclosing_curtok_;
763
/* ------------------------------------------------------------------------ */
765
* Tokenizer. This object reads a file and constructs a representation
766
* of the file as a token list in memory. The tokenizer interprets
767
* preprocessor directives and expands macros.
773
* Create the tokenizer and start reading from the given file. The
774
* default character set is generally specified by the user (on the
775
* compiler command line, for example), or obtained from the
778
CTcTokenizer(class CResLoader *res_loader, const char *default_charset);
780
/* destroy the tokenizer */
784
* Reset the tokenizer. Deletes the current source object and all
785
* saved token text. This can be used after compilation of a unit
786
* is completed and the intermediate parser state can be completely
792
* Set the source file. 'src_filename' is the fully-resolved local
793
* filename of the source file; 'orig_name' is the original name as
794
* given on the command line, in the makefile, or wherever it came
795
* from. We keep track of the original name so that we can pass
796
* information to the debugger indicating the name as it was originally
797
* given; this is more useful than the resolved filename, because we
798
* might want to run the debugger on another machine with a different
799
* local directory structure.
801
int set_source(const char *src_filename, const char *orig_name);
803
/* set the source to a memory buffer */
804
void set_source_buf(const char *buf);
807
* Add a #include directory to the include path. We search the
808
* include path in the order in which they were defined.
810
void add_inc_path(const char *path);
813
* Set preprocess-only mode. In this mode, we'll retain
814
* preprocessor directives that will be needed if the preprocessed
815
* result is itself compiled; for example, we'll retain #line,
816
* #pragma C, #error, and #pragma message directives.
818
void set_mode_pp_only(int flag) { pp_only_mode_ = flag; }
821
* Set list-includes mode. In this mode, we'll simply scan source
822
* files and write to the standard output a list of the names of all
823
* of the #include files.
825
void set_list_includes_mode(int flag) { list_includes_mode_ = flag; }
828
* Get/set the test-report mode. In this mode, we'll expand __FILE__
829
* macros with the root name only.
831
int get_test_report_mode() const { return test_report_mode_; }
832
void set_test_report_mode(int flag) { test_report_mode_ = flag; }
834
/* enable or disable preprocessing directives */
835
void enable_pp(int enable) { allow_pp_ = enable; }
837
/* get the type of the current token */
838
tc_toktyp_t cur() const { return curtok_.gettyp(); }
840
/* get the next token, reading a new line of source if necessary */
844
* Un-get the current token and back up to the previous token. The
845
* maximum un-get depth is one token - after un-getting one token,
846
* another token must not be un-gotten until after reading another
849
* Tokens un-got with this routine are accessible only to next(),
850
* not to any of the lower-level token readers.
854
/* get the current token */
855
const class CTcToken *getcur() const { return &curtok_; }
858
* Copy the current token. This makes a copy of the token's text in
859
* tokenizer source memory, to ensure that the reference to the text
860
* buffer the caller is keeping will remain valid forever.
862
const class CTcToken *copycur();
864
/* make a safely storable copy of a given token */
865
void copytok(class CTcToken *dst, const class CTcToken *src);
867
/* check to see if the current token matches the given text */
868
int cur_tok_matches(const char *txt, size_t len);
871
* Set an external token source. We'll read tokens from this source
872
* until it is exhausted, at which point we'll revert to the enclosing
875
* The new source is inserted before the current token, so the current
876
* token will become current once again when this source is exhausted.
877
* We'll automatically advance to the next token, which (unless we
878
* have an ungotten token stashed) will go to the first token in the
881
void set_external_source(CTcTokenSource *src)
884
* store the old source in the new source, so we can restore the
885
* old source when we have exhausted the new source
887
src->set_enclosing_source(ext_src_, &curtok_);
889
/* set the new external source */
892
/* skip to the next token */
896
/* clear all external sources, returning to the real token stream */
897
void clear_external_sources();
900
* assume that we should have found '>>' sequence after an embedded
901
* expression in a string - used by parsers to resynchronize after
902
* an apparent syntax error
904
void assume_missing_dstr_cont();
907
void add_define(const char *sym, size_t len, const char *expansion,
910
void add_define(const char *sym, const char *expansion, size_t expan_len)
911
{ add_define(sym, strlen(sym), expansion, expan_len); }
913
void add_define(const char *sym, const char *expansion)
914
{ add_define(sym, strlen(sym), expansion, strlen(expansion)); }
916
/* add a macro, given the symbol entry */
917
void add_define(class CTcHashEntryPp *entry);
919
/* undefine a previously defined macro */
920
void undefine(const char *sym, size_t len);
921
void undefine(const char *sym) { undefine(sym, strlen(sym)); }
923
/* find a #define symbol */
924
class CTcHashEntryPp *find_define(const char *sym, size_t len) const;
926
/* find an #undef symbol */
927
class CTcHashEntryPp *find_undef(const char *sym, size_t len) const;
929
/* enumerate all of the #define symbols through a callback */
930
void enum_defines(void (*func)(void *ctx, class CTcHashEntryPp *entry),
933
/* read the next line and handle preprocessor directives */
936
/* get the file descriptor and line number of the last line read */
937
class CTcTokFileDesc *get_last_desc() const { return last_desc_; }
938
long get_last_linenum() const { return last_linenum_; }
939
void get_last_pos(class CTcTokFileDesc **desc, long *linenum) const
942
*linenum = last_linenum_;
946
* set the current file descriptor and line number -- this can be
947
* used to force the line position to a previously-saved value
948
* (during code generation, for example) for error-reporting and
949
* debug-record purposes
951
void set_line_info(class CTcTokFileDesc *desc, long linenum)
954
last_linenum_ = linenum;
958
* Parse a preprocessor constant expression. We always parse out of
959
* the macro expansion buffer (expbuf_), but the caller must set p_
960
* to point to the starting point on the expansion line prior to
961
* calling this routine.
963
* If 'read_first' is true, we'll read a token into curtok_ before
964
* parsing; otherwise, we'll assume the caller has already primed
965
* the pump by reading the first token.
967
* If 'last_on_line' is true, we'll flag an error if anything is
968
* left on the line after we finish parsing the expression.
970
* If 'add_line_ending' is true, we'll add an end-of-line marker to
971
* the expansion buffer, so that the tokenizer won't attempt to read
972
* past the end of the line. Since a preprocessor expression must
973
* be contained entirely on a single logical line, we must never try
974
* to read past the end of the current line when parsing a
975
* preprocessor expression.
977
int pp_parse_expr(class CTcConstVal *result,
978
int read_first, int last_on_line, int add_line_ending);
980
/* log an error, optionally with parameters */
981
static void log_error(int errnum, ...);
984
* log an error with the current token text as the parameter,
985
* suitable for a "%.*s" format list entry (hence we'll provide two
986
* parameters: an integer with the length of the token text, and a
987
* pointer to the token text string)
989
void log_error_curtok(int errnum);
991
/* log a warning, optionally with parameters */
992
static void log_warning(int errnum, ...);
994
/* log a warning with the current token as the parameter */
995
void log_warning_curtok(int errnum);
997
/* log a warning or error for the current token */
998
void log_error_or_warning_curtok(tc_severity_t sev, int errnum);
1000
/* log a warning or error for a given token */
1001
void log_error_or_warning_with_tok(tc_severity_t sev, int errnum,
1002
const CTcToken *tok);
1005
* log then throw a fatal error (this is different from an internal
1006
* error in that it indicates an unrecoverable error in the input;
1007
* an internal error indicates that something is wrong with the
1010
static void throw_fatal_error(int errnum, ...);
1013
* log then throw an internal error (internal errors are always
1014
* fatal: these indicate that something has gone wrong in the
1015
* compiler, and are equivalent to an assert failure)
1017
static void throw_internal_error(int errnum, ...);
1019
/* display a string/number value */
1020
void msg_str(const char *str, size_t len) const;
1021
void msg_long(long val) const;
1023
/* get the current line */
1024
const char *get_cur_line() const { return linebuf_.get_text(); }
1025
size_t get_cur_line_len() const { return linebuf_.get_text_len(); }
1027
/* get the #define hash table */
1028
class CVmHashTable *get_defines_table() const { return defines_; }
1031
* look up a token as a keyword; returns true and fills in 'kw' with
1032
* the keyword token ID if the token is in fact a keyword, or
1033
* returns false if it's not a keyword
1035
int look_up_keyword(const CTcToken *tok, tc_toktyp_t *kw);
1038
* Get the next token on the line, filling in the token object.
1039
* Advances the pointer to the character immediately following the
1042
* If the token is a string, and the string contains backslash
1043
* sequences, we'll modify the source string by translating each
1044
* backslash sequences; for example, a "\n" sequence is changed into an
1047
* 'expanding' indicates whether or not we're in the initial macro
1048
* expansion pass. If this is true, we'll suppress error messages
1049
* during this pass, as we'll encounter the same tokens again when we
1050
* parse the expanded form of the line.
1052
static tc_toktyp_t next_on_line(utf8_ptr *p, CTcToken *tok,
1053
int *in_embedding, int expanding);
1056
* Get the text of an operator token. Returns a pointer to a
1057
* constant, static, null-terminated string, suitable for use in
1060
static const char *get_op_text(tc_toktyp_t op);
1063
* Store text in the source list. Text stored here is available
1064
* throughout compilation. This routine automatically reserves the
1065
* space needed, so do not call 'reserve' or 'commit' separately.
1067
const char *store_source(const char *txt, size_t len);
1069
/* reserve space for text in the source list */
1070
void reserve_source(size_t len);
1073
* Store a piece of text into pre-reserved space in the source list.
1074
* This can be used to build up a string from several pieces. You must
1075
* call 'reserve' first to allocate the space, and you must explicitly
1076
* add a null terminator at the end of the string. Do not call
1077
* 'commit'; this automatically commits the space as each substring is
1080
const char *store_source_partial(const char *txt, size_t len);
1083
* Get the index of the next source file descriptor that will be
1084
* created. The linker can use this information to fix up
1085
* references to file descriptors in an object file when loading
1086
* multiple object files.
1088
int get_next_filedesc_index() const { return next_filedesc_id_; }
1090
/* get the number of source file descriptors in the master list */
1091
int get_filedesc_count() const { return next_filedesc_id_; }
1093
/* get the file descriptor at the given (0-based) index */
1094
class CTcTokFileDesc *get_filedesc(size_t idx) const
1096
/* return the array entry at the index, if the index is valid */
1097
return (idx < desc_list_cnt_ ? desc_list_[idx] : 0);
1100
/* get the head of the master source file descriptor list */
1101
class CTcTokFileDesc *get_first_filedesc() const { return desc_head_; }
1104
* Create a new file descriptor and add it to the master list. This
1105
* creates the new descriptor unconditionally, even if a descriptor
1106
* for the same source file already exists.
1108
class CTcTokFileDesc *create_file_desc(const char *fname, size_t len)
1109
{ return get_file_desc(fname, len, TRUE, fname, len); }
1112
* Set the string capture file. Once this is set, we'll write the
1113
* contents of each string token that we encounter to this file,
1114
* with a newline after each token.
1116
void set_string_capture(osfildef *fp);
1118
/* write macros to a file, for debugger use */
1119
void write_macros_to_file_for_debug(class CVmFile *fp);
1122
* Load macros from a file. If any errors occur, we'll flag them
1123
* through the error handler object and return a non-zero value.
1124
* Returns zero on success.
1126
int load_macros_from_file(class CVmStream *fp,
1127
class CTcTokLoadMacErr *err_handler);
1129
/* receive notification that the compiler is done with all parsing */
1132
/* forget any input file position */
1133
set_line_info(0, 0);
1137
* Stuff text into the tokenizer source stream. The new text is
1138
* inserted at the current read pointer, so that the next token we
1139
* fetch will come from the start of the inserted text. If 'expand' is
1140
* true, we'll expand macros in the text; if not, we'll insert the text
1141
* exactly as is with no macro expansion.
1143
void stuff_text(const char *txt, size_t len, int expand);
1146
/* skip whitespace and token markers */
1147
static void skip_ws_and_markers(utf8_ptr *p);
1150
* get the next token on the line; if we go past the end of the
1151
* string buffer, we'll return EOF
1153
static tc_toktyp_t next_on_line(const CTcTokString *srcbuf, utf8_ptr *p,
1154
CTcToken *tok, int *in_embedding,
1158
* get the next token on the current line, updating the internal
1159
* character position pointer to point just past the token, and filling
1160
* in the internal current token object with the toen data
1162
tc_toktyp_t next_on_line()
1163
{ return next_on_line(&p_, &curtok_, 0, FALSE); }
1165
/* get the next token on the line, with string translation */
1166
tc_toktyp_t next_on_line_xlat(int *in_embedding)
1167
{ return next_on_line_xlat(&p_, &curtok_, in_embedding); }
1170
* get the next token, translating strings and storing string and
1171
* symbol text in the source block list
1173
tc_toktyp_t next_on_line_xlat_keep();
1176
* get the next token on the line, translating strings to internal
1179
tc_toktyp_t next_on_line_xlat(utf8_ptr *p, CTcToken *tok,
1183
* translate a string to internal format by converting escape
1184
* sequences; overwrites the original buffer
1186
tc_toktyp_t xlat_string(utf8_ptr *p, CTcToken *tok,
1190
* translate a string into a given buffer; if 'force_embed_end' is
1191
* true, we'll act as though we're continuing the string after the
1192
* '>>' after an embedded expression, no matter what the actual
1195
tc_toktyp_t xlat_string_to(char *dst, utf8_ptr *p, CTcToken *tok,
1196
int *in_embedding, int force_embed_end);
1199
* Translate a string, saving the translated version in the source
1200
* block list. If 'force_end_embed' is true, we'll act as though we
1201
* were looking at '>>' (or, more precisely, we'll act as though
1202
* '>>' immediately preceded the current input), regardless of what
1203
* the actual input looks like.
1205
tc_toktyp_t xlat_string_to_src(int *in_embedding, int force_end_embed);
1207
/* initialize the source block list */
1208
void init_src_block_list();
1210
/* delete current source file, including all including parents */
1211
void delete_source();
1214
* Read the next line; processes comments, but does not expand macros
1215
* or parse preprocessor directives. This always reads into linebuf_;
1216
* the return value is the offset within linebuf_ of the new text. A
1217
* return value of -1 indicates that we're at end of file.
1219
int read_line(int append);
1222
* Set the source read pointer to the start of a new line, given the
1223
* CTcTokString object containing the buffer, and the offset within
1226
void start_new_line(CTcTokString *str, int ofs)
1228
/* remember the buffer we're reading out of */
1231
/* set the read pointer to the start of the new line's text */
1232
p_.set((char *)str->get_text() + ofs);
1235
/* unsplice text from the current line and make it the next line */
1236
void unsplice_line(const char *new_line_start);
1239
* Commit space in the source list - this is used when text is directly
1240
* stored after reserving space. The size reserved may be greater than
1241
* the size committed, because it is sometimes more efficient to make a
1242
* guess that may overestimate the amount we actually end up needing.
1244
void commit_source(size_t len);
1246
/* parse a string */
1247
static tc_toktyp_t tokenize_string(utf8_ptr *p, CTcToken *tok,
1250
/* process comments */
1251
void process_comments(size_t start_ofs);
1253
/* splice lines for a string that runs across multiple lines */
1254
void splice_string();
1256
/* expand macros in the current line */
1257
int expand_macros_curline(int read_more, int allow_defined,
1258
int append_to_expbuf);
1261
* Expand the macros in the given text, filling in the given
1262
* CTcTokString with the results. The expansion will clear out any
1263
* existing text in the result buffer. Returns zero on success, or
1264
* non-zero on error.
1266
int expand_macros(class CTcTokString *dest, const char *str, size_t len)
1268
CTcTokStringRef srcbuf;
1270
/* set up a CTcTokString for the source */
1271
srcbuf.set_buffer(str, len);
1273
/* go expand macros */
1274
return expand_macros(&srcbuf, 0, dest, FALSE, FALSE, FALSE);
1277
/* expand all of the macros in the given text */
1278
int expand_macros(class CTcTokString *srcbuf, utf8_ptr *src,
1279
class CTcTokString *expbuf, int read_more,
1280
int allow_defined, int append);
1282
/* expand the macro at the current token on the current line */
1283
int expand_macro(class CTcMacroRsc *res, class CTcTokString *expbuf,
1284
const class CTcTokString *srcbuf, utf8_ptr *src,
1285
size_t macro_srcbuf_ofs, CTcHashEntryPp *entry,
1286
int read_more, int allow_defined, int *expanded);
1289
* Remove our special expansion flags from an expanded macro buffer.
1290
* This can be called after all expansion has been completed to clean
1291
* up the buffer for human consumption.
1293
void remove_expansion_flags(CTcTokString *buf);
1295
/* scan for a prior expansion of a macro within the current context */
1296
static int scan_for_prior_expansion(utf8_ptr src, const char *src_end,
1297
const class CTcHashEntryPp *entry);
1299
/* remove end-of-macro-expansion flags from a buffer */
1300
static void remove_end_markers(class CTcTokString *buf);
1302
/* change a buffer to use individual token full-expansion markers */
1303
void mark_full_exp_tokens(CTcTokString *dstbuf,
1304
const class CTcTokString *srcbuf,
1307
/* allocate a macro expansion resource */
1308
class CTcMacroRsc *alloc_macro_rsc();
1310
/* release a macro expansion resource */
1311
void release_macro_rsc(class CTcMacroRsc *rsc);
1314
* Parse the actual parameters to a macro. Fills in argofs[] and
1315
* arglen[] with the offsets (from srcbuf->get_buf()) and lengths,
1316
* respectively, of each actual parameter's text.
1318
int parse_macro_actuals(const class CTcTokString *srcbuf, utf8_ptr *src,
1319
const CTcHashEntryPp *macro_entry,
1320
size_t argofs[TOK_MAX_MACRO_ARGS],
1321
size_t arglen[TOK_MAX_MACRO_ARGS],
1322
int read_more, int *found_actuals);
1324
/* splice the next line for reading more macro actuals */
1325
tc_toktyp_t actual_splice_next_line(const CTcTokString *srcbuf,
1326
utf8_ptr *src, CTcToken *tok);
1328
/* substitute the actual parameters in a macro's expansion */
1329
int substitute_macro_actuals(class CTcMacroRsc *rsc,
1330
class CTcTokString *subexp,
1331
CTcHashEntryPp *macro_entry,
1332
const class CTcTokString *srcbuf,
1333
const size_t *argofs, const size_t *arglen,
1336
/* stringize a macro actual parameter into an expansion buffer */
1337
void stringize_macro_actual(class CTcTokString *expbuf,
1338
const char *actual_val, size_t actual_len,
1339
char quote_char, int add_open_quote,
1340
int add_close_quote);
1342
/* skip a delimited macro expansion area (#foreach, #ifempty, etc) */
1343
void skip_delimited_group(utf8_ptr *p, int parts_to_skip);
1345
/* expand a defined() preprocessor operator */
1346
int expand_defined(class CTcTokString *subexp,
1347
const class CTcTokString *srcbuf, utf8_ptr *src);
1349
/* add a file to the list of files to be included only once */
1350
void add_include_once(const char *fname);
1352
/* find a file in the list of files to be included only once */
1353
int find_include_once(const char *fname);
1355
/* process a #pragma directive */
1358
/* process a #charset directive */
1361
/* process a #include directive */
1364
/* process a #define directive */
1367
/* process a #if directive */
1370
/* process a #ifdef directive */
1373
/* process a #ifdef directive */
1376
/* process a #ifdef or #ifndef */
1377
void pp_ifdef_or_ifndef(int sense);
1379
/* process a #else directive */
1382
/* process a #elif directive */
1385
/* process a #endif directive */
1388
/* process a #error directive */
1391
/* process a #undef directive */
1394
/* process a #line directive */
1397
/* get a lone identifier for a preprocessor directive */
1398
int pp_get_lone_ident(char *buf, size_t bufl);
1400
/* process a #pragma C directive */
1401
// void pragma_c(); - not currently used
1403
/* process a #pragma once directive */
1406
/* process a #pragma all_once directive */
1407
void pragma_all_once();
1409
/* process a #pragma message directive */
1410
void pragma_message();
1412
/* process a #pragma newline_spacing(on/off) directive */
1413
void pragma_newline_spacing();
1415
/* process a #pragma sourceTextGroup directive */
1416
void pragma_source_text_group();
1419
* Determine if we're in a false #if branch. If we're inside a #if
1420
* block, and the state is either IF_NO, IF_DONE, or ELSE_NO, or
1421
* we're inside a #if nested within any negative branch, we're in a
1422
* not-taken branch of a #if block.
1424
int in_false_if() const
1427
&& (if_false_level_ != 0
1428
|| if_stack_[if_sp_ - 1].state == TOKIF_IF_NO
1429
|| if_stack_[if_sp_ - 1].state == TOKIF_IF_DONE
1430
|| if_stack_[if_sp_ - 1].state == TOKIF_ELSE_NO));
1433
/* push a new #if level with the given state */
1434
void push_if(tok_if_t state);
1436
/* get the current #if state */
1437
tok_if_t get_if_state() const
1442
return if_stack_[if_sp_ - 1].state;
1445
/* switch the current #if level to the given state */
1446
void change_if_state(tok_if_t state)
1449
if_stack_[if_sp_ - 1].state = state;
1452
/* pop the current #if level */
1456
* Find or create a descriptor for the given filename. 'fname' is
1457
* the full file system path specifying the file. 'orig_fname' is
1458
* the filename as originally specified by the user, if different;
1459
* in the case of #include files, this indicates the name that was
1460
* specified in the directive itself, whereas 'fname' is the actual
1461
* filename that resulted from searching the include path for the
1464
class CTcTokFileDesc *get_file_desc(const char *fname, size_t fname_len,
1466
const char *orig_fname,
1467
size_t orig_fname_len);
1469
/* clear the line buffer */
1470
void clear_linebuf();
1472
/* flag: ALL_ONCE mode - we include each file only once */
1475
/* flag: warn on ignoring a redundant #include file */
1476
int warn_on_ignore_incl_ : 1;
1479
* Flag: in preprocess-only mode. In this mode, we'll leave certain
1480
* preprocessor directives intact in the source, since they'll be
1481
* needed in a subsequent compilation of the preprocessed source.
1482
* For example, we'll leave #line directives, #pragma C, #error, and
1483
* #pragma message directives in the preprocessed result.
1485
int pp_only_mode_ : 1;
1488
* Flag: in test reporting mode. In this mode, we'll expand __FILE__
1489
* macros with the root name only.
1491
int test_report_mode_ : 1;
1494
* Flag: in preprocess-for-includes mode. In this mode, we'll do
1495
* nothing except run the preprocessor and generate a list of the
1496
* header files that are included, along with header files they
1497
* include, and so on.
1499
int list_includes_mode_ : 1;
1502
* Flag: treat newlines in strings as whitespace. When this is true,
1503
* whenever we find a newline character in a string, we'll convert the
1504
* newline and all leading whitespace on the next line to a single
1505
* space character. When this is false, we'll entirely strip out each
1506
* newline in a string and all whitespace that immediately follows;
1507
* this mode is desirable for some languages, such as Chinese, where
1508
* whitespace is not conventionally used as a token separator in
1511
int string_newline_spacing_ : 1;
1514
* flag: we're parsing a preprocessor constant expression (for a
1515
* #if, for example; this doesn't apply to simple macro expansion)
1517
int in_pp_expr_ : 1;
1519
/* resource loader */
1520
class CResLoader *res_loader_;
1523
* name of our default character set - this is generally specified
1524
* by the user (on the compiler command line, for example), or
1525
* obtained from the operating system
1527
char *default_charset_;
1529
/* input (to unicode) character mapper for the default character set */
1530
class CCharmapToUni *default_mapper_;
1532
/* head of list of previously-included files */
1533
struct tctok_incfile_t *prev_includes_;
1535
/* head and tail of include path list */
1536
struct tctok_incpath_t *incpath_head_;
1537
struct tctok_incpath_t *incpath_tail_;
1539
/* file descriptor and line number of last line read */
1540
class CTcTokFileDesc *last_desc_;
1543
/* file descriptor and line number of last line appended */
1544
class CTcTokFileDesc *appended_desc_;
1545
long appended_linenum_;
1547
/* current input stream */
1548
class CTcTokStream *str_;
1550
/* master list of file descriptors */
1551
class CTcTokFileDesc *desc_head_;
1552
class CTcTokFileDesc *desc_tail_;
1555
* array of file descriptors (we keep the list in both an array and
1556
* a linked list, since we need both sequential and indexed access;
1557
* this isn't a lot of trouble since we never need to remove an
1558
* entry from the list)
1560
class CTcTokFileDesc **desc_list_;
1562
/* number of entries in desc_list_ */
1563
size_t desc_list_cnt_;
1565
/* number of slots allocated in desc_list_ array */
1566
size_t desc_list_alo_;
1568
/* next file descriptor ID to be assigned */
1569
int next_filedesc_id_;
1571
/* pointer to current position in current line */
1575
* The CTcTokString object containing the current line. This is the
1576
* buffer object we're currently reading from, and will be either
1577
* linebuf_ or expbuf_. p_ always points into this buffer.
1579
CTcTokString *curbuf_;
1581
/* raw file input buffer */
1582
CTcTokString linebuf_;
1585
* unsplice buffer - we'll put any unspliced text into this buffer,
1586
* then read it back at the next read_line()
1588
CTcTokString unsplicebuf_;
1590
/* macro expansion buffer */
1591
CTcTokString expbuf_;
1594
* Flag: in a string. If this is '\0', we're not in a string;
1595
* otherwise, this is the quote character that ends the string.
1599
/* flag: in an embedded expression during line processing */
1600
uint comment_in_embedding_ : 1;
1602
/* flag: macro processing token stream is in an embedded expression */
1603
int macro_in_embedding_;
1605
/* flag: main token stream is in an embedded expression */
1606
int main_in_embedding_;
1609
* #if state stack. if_sp_ is the index of the next nesting slot;
1610
* if if_sp_ is zero, it means that we're not in a #if at all.
1612
* Separately, the if_false_level_ is the level of #if's contained
1613
* within a false #if branch. This is separate because, once we're
1614
* in a false #if branch, everything within it is false.
1617
tok_if_info_t if_stack_[TOK_MAX_IF_NESTING];
1618
int if_false_level_;
1620
/* source block list head */
1621
CTcTokSrcBlock *src_head_;
1623
/* current (and last) source block */
1624
CTcTokSrcBlock *src_cur_;
1626
/* pointer to next available byte in the current source block */
1629
/* number of bytes remaining in the current source block */
1635
/* previous token (for unget) */
1639
* next token, if a token has been un-gotten, and a flag indicating
1640
* that this is indeed the case.
1643
unsigned int nxttok_valid_ : 1;
1645
/* the external token source, if any */
1646
CTcTokenSource *ext_src_;
1648
/* symbol table for #define symbols */
1649
class CVmHashTable *defines_;
1652
* symbol table for symbols explicitly undefined; we keep track of
1653
* these so that we can exclude anything ever undefined from the debug
1654
* macro records, since only static global macros can be handled in the
1657
class CVmHashTable *undefs_;
1659
/* symbol table for TADS keywords */
1660
class CVmHashTable *kw_;
1662
/* head of macro resource pool list */
1663
class CTcMacroRsc *macro_res_head_;
1665
/* head of list of available macro resources */
1666
class CTcMacroRsc *macro_res_avail_;
1669
* string capture file - if this is non-null, we'll capture all of
1670
* the strings we read to this file, one string per line
1672
osfildef *string_fp_;
1674
/* character mapper for writing to the string capture file */
1675
class CCharmapToLocal *string_fp_map_;
1677
/* true -> allow preprocessor directives */
1678
unsigned int allow_pp_;
1681
/* ------------------------------------------------------------------------ */
1683
* Error handler interface. Callers of load_macros_from_file() in
1684
* CTcTokenizer must provide an implementation of this interface to handle
1685
* errors that occur while loading macros.
1687
class CTcTokLoadMacErr
1691
* Flag an error. The error codes are taken from the following list:
1693
* 1 - a macro name symbol in the file is too long (it exceeds the
1694
* maximum symbol length for the preprocessor)
1696
* 2 - a formal parameter name is too long
1698
virtual void log_error(int err) = 0;
1701
/* ------------------------------------------------------------------------ */
1703
* Tokenizer File Descriptor. Each unique source file has a separate
1704
* file descriptor, which keeps track of the file's name.
1706
class CTcTokFileDesc
1709
/* create a file descriptor */
1710
CTcTokFileDesc(const char *fname, size_t fname_len, int index,
1711
CTcTokFileDesc *orig_desc,
1712
const char *orig_fname, size_t orig_fname_len);
1714
/* delete the descriptor */
1717
/* get the filename */
1718
const char *get_fname() const { return fname_; }
1720
/* get the original filename string */
1721
const char *get_orig_fname() const { return orig_fname_; }
1724
* get the filename as a double-quoted string (backslashes and
1725
* double-quotes will be escaped with backslashes)
1727
const char *get_dquoted_fname() const { return dquoted_fname_; }
1730
* get the root filename (i.e., with no path prefix) as a
1731
* double-quoted string
1733
const char *get_dquoted_rootname() const { return dquoted_rootname_; }
1735
/* get the filename as a single-quoted string */
1736
const char *get_squoted_fname() const { return squoted_fname_; }
1738
/* get the root filename as a single-quoted string */
1739
const char *get_squoted_rootname() const { return squoted_rootname_; }
1741
/* get/set the next file descriptor in the descriptor chain */
1742
CTcTokFileDesc *get_next() const { return next_; }
1743
void set_next(CTcTokFileDesc *nxt) { next_ = nxt; }
1745
/* get my index in the master list */
1746
int get_index() const { return index_; }
1748
/* get the original descriptor for this file in the list */
1749
CTcTokFileDesc *get_orig() const { return orig_; }
1752
* get the list index of the original entry (returns my own list
1753
* index if I am the original entry)
1755
int get_orig_index() const
1756
{ return orig_ == 0 ? index_ : orig_->get_index(); }
1759
* Add a source line position to our list. We keep an index of the
1760
* byte-code address for each executable source line, so that
1761
* debuggers can find the compiled code corresponding to a source
1762
* location. The image builder gives us this information during the
1763
* linking process. The address is the absolute location in the
1764
* image file of the executable code for the given source line (the
1765
* first line in the file is numbered 1).
1767
void add_source_line(ulong linenum, ulong line_addr);
1770
* Enumerate the source lines, calling the callback for each one.
1771
* We will only enumerate source lines which actually have an
1772
* associated code location - source lines that generated no
1773
* executable code are skipped. We'll enumerate the lines in
1774
* ascending order of line number, and each line number will appear
1777
void enum_source_lines(void (*cbfunc)(void *ctx, ulong linenum,
1778
ulong byte_code_addr),
1782
/* index in the master list */
1785
/* filename string - this is the actual file system filename */
1789
* original filename string, if different from fname_ - this is the
1790
* filename as specified by the user, before it was adjusted with
1791
* include paths or other extra location information
1795
/* double-quoted version of the filename */
1796
char *dquoted_fname_;
1798
/* single-quoted version of the filename */
1799
char *squoted_fname_;
1801
/* single-quoted version of the root filename */
1802
char *squoted_rootname_;
1804
/* double-quoted version of the root filename */
1805
char *dquoted_rootname_;
1807
/* next descriptor in the master descriptor list */
1808
CTcTokFileDesc *next_;
1811
* The original file descriptor with the same filename. If we
1812
* create multiple descriptors for the same filename (because, for
1813
* example, the same header is included in several different object
1814
* files), we'll keep track of the original descriptor for the file
1815
* in all of the copies.
1817
CTcTokFileDesc *orig_;
1819
/* source line pages */
1820
struct CTcTokSrcPage **src_pages_;
1822
/* number of source line page slots allocated */
1823
size_t src_pages_alo_;
1827
/* ------------------------------------------------------------------------ */
1829
* Tokenizer Input Stream
1834
/* create a token stream */
1835
CTcTokStream(class CTcTokFileDesc *desc, class CTcSrcObject *src,
1836
CTcTokStream *parent, int charset_error,
1839
/* delete the stream */
1842
/* get/set the associated file descriptor */
1843
class CTcTokFileDesc *get_desc() const { return desc_; }
1844
void set_desc(class CTcTokFileDesc *desc) { desc_ = desc; }
1846
/* get the underlying source file */
1847
class CTcSrcObject *get_src() const { return src_; }
1849
/* get the line number of the next line to be read */
1850
long get_next_linenum() const { return next_linenum_; }
1852
/* set the next line number */
1853
void set_next_linenum(long l) { next_linenum_ = l; }
1855
/* get the enclosing stream */
1856
CTcTokStream *get_parent() const { return parent_; }
1858
/* count having read a line */
1859
void count_line() { ++next_linenum_; }
1861
/* was there a #charset error when opening the file? */
1862
int get_charset_error() const { return charset_error_; }
1864
/* get/set the in-comment status */
1865
int is_in_comment() const { return in_comment_; }
1866
void set_in_comment(int f) { in_comment_ = f; }
1868
/* get/set the pragma C mode */
1869
// int is_pragma_c() const { return pragma_c_; }
1870
// void set_pragma_c(int f) { pragma_c_ = f; }
1872
/* get/set if nesting level at the start of the file */
1873
int get_init_if_level() const { return init_if_level_; }
1874
void set_init_if_level(int level) { init_if_level_ = level; }
1876
/* get/set the newline spacing mode */
1877
int get_newline_spacing() const { return newline_spacing_; }
1878
void set_newline_spacing(int f) { newline_spacing_ = f; }
1881
/* file descriptor associated with this file */
1882
class CTcTokFileDesc *desc_;
1884
/* the underlying source reader */
1885
class CTcSrcObject *src_;
1888
* the enclosing stream - this is the stream that #include'd the
1891
CTcTokStream *parent_;
1893
/* line number of next line to be read */
1894
ulong next_linenum_;
1896
/* #if nesting level at the start of the file */
1899
/* flag: we were unable to load the map in the #charset directive */
1900
uint charset_error_ : 1;
1902
/* the stream is in a multi-line comment */
1903
uint in_comment_ : 1;
1905
/* newline_spacing mode when the stream was stacked */
1906
uint newline_spacing_ : 1;
1908
/* flag: we're in #pragma C+ mode */
1909
// uint pragma_c_ : 1; - #pragma C is not currently used
1912
/* ------------------------------------------------------------------------ */
1914
* Keyword Hash Table Entry
1916
class CTcHashEntryKw: public CVmHashEntryCS
1919
CTcHashEntryKw(const textchar_t *str, tc_toktyp_t tokid)
1920
: CVmHashEntryCS(str, strlen(str), FALSE)
1922
/* save the token ID for the keyword */
1926
/* get the token ID */
1927
tc_toktyp_t get_tok_id() const { return tokid_; }
1934
/* ------------------------------------------------------------------------ */
1936
* basic #define symbol table entry
1938
class CTcHashEntryPp: public CVmHashEntryCS
1941
CTcHashEntryPp(const textchar_t *str, size_t len, int copy)
1942
: CVmHashEntryCS(str, len, copy)
1944
/* by default, we have no arguments */
1946
has_varargs_ = FALSE;
1952
/* get the expansion text */
1953
virtual const char *get_expansion() const = 0;
1954
virtual size_t get_expan_len() const = 0;
1956
/* certain special macros (__LINE__, __FILE__) aren't undef'able */
1957
virtual int is_undefable() const { return TRUE; }
1960
* most macros are real symbols, created by #define's, but some are
1961
* special pseudo-macros, like __LINE__ and __FILE__, that the
1962
* preprocessor provides
1964
virtual int is_pseudo() const { return FALSE; }
1966
/* does the macro have an argument list? */
1967
int has_args() const { return has_args_; }
1969
/* get the number of arguments */
1970
int get_argc() const { return argc_; }
1972
/* do we have a variable number of arguments? */
1973
int has_varargs() const { return has_varargs_; }
1976
* get the minimum number of allowed arguments - if we have varargs,
1977
* this is one less than the number of formals listed, since the last
1978
* formal can correspond to any number of actuals, including zero
1980
int get_min_argc() const { return has_varargs_ ? argc_ - 1 : argc_; }
1982
/* get the name of an argument by position (0 = first argument) */
1983
const char *get_arg_name(int idx) const { return argv_[idx]; }
1985
/* get the parameter hash table entry for the parameter */
1986
class CTcHashEntryPpArg *get_arg_entry(int idx) const
1987
{ return arg_entry_[idx]; }
1989
/* get the parameters hash table */
1990
const CVmHashTable *get_params_table() const { return params_table_; }
1996
/* list of parameter hash entries */
1997
class CTcHashEntryPpArg **arg_entry_;
1999
/* parameter hash table */
2000
CVmHashTable *params_table_;
2002
/* argument count */
2005
/* flag: the macro has a parameter list */
2009
* flag: the parameter list takes a variable number of arguments; if
2010
* this is set, then argc_ is one greater than the minimum number of
2011
* arguments required, and the last formal receives the varying part
2012
* of the actual parameter list, which can contain zero or more
2015
uint has_varargs_ : 1;
2019
* #define symbol hash table entry
2021
class CTcHashEntryPpDefine: public CTcHashEntryPp
2025
* Create the hash entry. argc is the number of arguments to the
2026
* macro, and argv is an array of pointers to null-terminated
2027
* strings with the argument names, in the order defined in the
2030
* If has_args is false, the macro does not take a parameter list at
2031
* all. Note that it is possible for has_args to be true and argc
2032
* to be zero, because a macro can be defined to take an argument
2033
* list with no arguments (i.e., empty parens). A macro with an
2034
* empty argument list is distinct from a macro with no argument
2035
* list: in the former case, the empty parens are required, and are
2036
* removed from the input stream and replaced with the macro's
2039
* We'll make a copy of the argument list vector, strings, and
2040
* expansion text, so the caller is free to forget all of that after
2041
* creating the entry instance.
2043
CTcHashEntryPpDefine(const textchar_t *str, size_t len, int copy,
2044
int has_args, int argc, int has_varargs,
2045
const char **argv, const size_t *argvlen,
2046
const char *expansion, size_t expan_len);
2048
~CTcHashEntryPpDefine();
2050
/* get the expansion text and its length */
2051
const char *get_expansion() const { return expan_; }
2052
size_t get_expan_len() const { return expan_len_; }
2062
* Hash table entry for __FILE__ and __LINE__
2064
class CTcHashEntryPpSpecial: public CTcHashEntryPp
2067
CTcHashEntryPpSpecial(CTcTokenizer *tok, const char *str)
2068
: CTcHashEntryPp(str, strlen(str), FALSE)
2070
/* remember my tokenizer */
2074
/* these special macros are not undef'able */
2075
virtual int is_undefable() const { return FALSE; }
2077
/* special macros are pseudo-macros provided by the preprocessor */
2078
virtual int is_pseudo() const { return TRUE; }
2085
class CTcHashEntryPpFILE: public CTcHashEntryPpSpecial
2088
CTcHashEntryPpFILE(CTcTokenizer *tok)
2089
: CTcHashEntryPpSpecial(tok, "__FILE__") { }
2091
/* our expansion is the current filename, in single quotes */
2092
const char *get_expansion() const { return get_base_text(); }
2093
size_t get_expan_len() const { return strlen(get_base_text()); }
2096
/* get our expansion base text */
2097
const char *get_base_text() const
2100
* if we're in test-report mode, use the root name only;
2101
* otherwise, use the full name with path
2103
if (tok_->get_test_report_mode())
2104
return tok_->get_last_desc()->get_squoted_rootname();
2106
return tok_->get_last_desc()->get_squoted_fname();
2110
class CTcHashEntryPpLINE: public CTcHashEntryPpSpecial
2113
CTcHashEntryPpLINE(CTcTokenizer *tok)
2114
: CTcHashEntryPpSpecial(tok, "__LINE__") { }
2116
/* our expansion is the line number as a decimal string */
2117
const char *get_expansion() const
2118
{ gen_expansion(tok_); return buf_; }
2119
size_t get_expan_len() const
2120
{ gen_expansion(tok_); return strlen(buf_); }
2123
/* generate the expansion text into our internal buffer */
2124
static void gen_expansion(CTcTokenizer *tok)
2125
{ sprintf(buf_, "%ld", tok->get_last_linenum()); }
2127
/* internal buffer */
2128
static char buf_[20];
2133
* Hash entry for preprocessor arguments
2135
class CTcHashEntryPpArg: public CVmHashEntryCS
2138
CTcHashEntryPpArg(const char *str, size_t len, int copy, int argnum)
2139
: CVmHashEntryCS(str, len, copy)
2141
/* remember the argument number */
2145
/* get my argument number */
2146
int get_argnum() const { return argnum_; }
2149
/* argument number */
2154
/* ------------------------------------------------------------------------ */
2156
* Previously-included file list entry. Each time we include a file,
2157
* we'll add an entry to a list of files; in the future, we'll consult
2158
* this list to ensure that we don't include the same file again.
2160
struct tctok_incfile_t
2162
/* next entry in the list of previously-included files */
2163
tctok_incfile_t *nxt;
2165
/* name of this file (we'll allocate memory to hold the name) */
2169
/* ------------------------------------------------------------------------ */
2171
* Include path list entry. This structure defines one include path; we
2172
* maintain a list of these structures.
2174
struct tctok_incpath_t
2176
/* next entry in the list */
2177
tctok_incpath_t *nxt;
2183
#endif /* TCTOK_H */