1
/* $Header: d:/cvsroot/tads/tads3/TCPNBASE.H,v 1.3 1999/07/11 00:46:53 MJRoberts Exp $ */
4
* Copyright (c) 1999, 2002 Michael J. Roberts. All Rights Reserved.
6
* Please see the accompanying license file, LICENSE.TXT, for information
7
* on using and copying this software.
11
tcpn.h - Parse Node - base class
13
Defines the target-independent base class for parse nodes
15
All expression parse nodes are derived from the target-specific
16
subclass of this class. The target-independent base class is
17
CTcPrsNodeBase; the target-specific class is CTcPrsNode.
19
05/10/99 MJRoberts - Creation
27
/* ------------------------------------------------------------------------ */
29
* Parse Tree Allocation Object. This is a base class that can be used
30
* for tree objects that are to be allocated from the parser node pool.
36
* Override operator new() - allocate all parse node objects out of
37
* the parse node pool.
39
void *operator new(size_t siz);
43
/* ------------------------------------------------------------------------ */
45
* adjust_for_debug() information structure
47
struct tcpn_debug_info
49
/* true -> speculative evaluation mode */
53
* stack level - 0 is the active level, 1 is the first enclosing
59
/* ------------------------------------------------------------------------ */
61
* Parse Tree Expression Node - base class. As we parse an expression,
62
* we build a tree of these objects to describe the source code.
64
* This class is subclassed for each type of parsing node: each type of
65
* statement has a node type, some statements have helper node types for
66
* parts of statements, and each expression operator has a node type.
67
* These subclasses contain the information specific to the type of
68
* parsing construct represented.
70
* Each parsing subclass is then further subclassed for each target
71
* architecture. This final subclass contains the code generator for
72
* the node in the target architecture.
74
* The target-independent base version of each subclass is called
75
* CTPNXxxBase. The target-specific subclass derived from this base
76
* class is CTPNXxx. For example, the final subclass for constant
77
* nodes, which is derived from the target-independent base class
78
* CTPNConstBase, is CTPNConst. (Note that each target uses the same
79
* name for the final subclass, so we can only link one target
80
* architecture into a given build of the compiler. Each additional
81
* target requires a separate compiler executable with the appropriate
82
* CTPNConst classes linked in.)
84
class CTcPrsNodeBase: public CTcPrsAllocObj
88
* Generate code for the expression for the target architecture.
89
* This method is defined only by the final target-specific
92
* This method is used to generate code to evaluate the expression
95
* If 'discard' is true, it indicates that any value yielded by the
96
* expression will not be used, in which case the generated code
97
* need not leave the result of the expression on the stack. We can
98
* generate code more efficiently for certain types of expressions
99
* when we know that we're evaluating them only for side effects.
100
* For example, an assignment expression has a result value, but
101
* this value need not be pushed onto the stack if it will simply be
102
* discarded. Also, an operator like "+" that has no side effects
103
* of its own can merely evaluate its operands for their side
104
* effects, but need not compute its own result if that result would
105
* simply be discarded.
107
* If 'for_condition' is true, it indicates that the result of the
108
* expression will be used directly for a conditional of some kind
109
* (for a "?:" operator, an "if" statement, a "while" statement, or
110
* the like). In some cases, we can avoid extra conversions to some
111
* values when they're going to be used directly for a comparison;
112
* for example, the "&&" operator must return a true/nil value, but
113
* the code generator may be able to avoid the extra conversion when
114
* the value will be used for an "if" statement's conditional value.
116
virtual void gen_code(int discard, int for_condition) = 0;
119
* Get the constant value of the parse node, if available. Most
120
* parse nodes have no constant value, so by default this returns
121
* null. Only constant parse nodes can provide a constant value, so
122
* they should override this.
124
virtual class CTcConstVal *get_const_val() { return 0; }
126
/* determine if the node has a constant value */
127
int is_const() { return get_const_val() != 0; }
129
/* determine if I have a given constant integer value */
130
int is_const_int(int val)
133
&& get_const_val()->get_type() == TC_CVT_INT
134
&& get_const_val()->get_val_int() == val);
138
* Set the constant value of the parse node from that of another
139
* node. The caller must already have checked that this node and
140
* the value being assigned are both valid constant values.
142
void set_const_val(class CTcPrsNode *src)
144
/* set my constant value from the source's constant value */
145
get_const_val()->set(((CTcPrsNodeBase *)src)->get_const_val());
149
* Check to see if this expression can possibly be a valid lvalue.
150
* Return true if so, false if not. This check is made before
151
* symbol resolution; when it is not certain whether or not a symbol
152
* expression can be an lvalue, assume it can be at this point. By
153
* default, we'll return false; operator nodes whose result can be
154
* used as an lvalue should override this to return true.
156
virtual int check_lvalue() const { return FALSE; }
159
* Check to see if this expression is an valid lvalue, after
160
* resolving symbols in the given scope. Returns true if so, false
163
virtual int check_lvalue_resolved(class CTcPrsSymtab *symtab) const
167
* Check to see if this expression can possibly be a valid address
168
* value, so that the address-of ("&") operator can be applied.
169
* Returns true if it is possible, false if not. The only type of
170
* expression whose address can be taken is a simple symbol. The
171
* address of a symbol can be taken only if the symbol is a function
172
* or property name, but we won't know this at parse time, so we'll
173
* indicate that any symbol is acceptable. By default, this returns
174
* false, since the address of most expressions cannot be taken.
176
virtual int has_addr() const { return FALSE; }
179
* Check to see if this expression is an address expression of some
180
* kind (i.e., of class CTPNAddrBase, or of a class derived from
181
* CTPNAddrBase). Returns true if so, false if not.
183
virtual int is_addr() const { return FALSE; }
186
* Determine if this node is of type double-quoted string (dstring).
187
* Returns true if so, false if not. By default, we return false.
189
virtual int is_dstring() const { return FALSE; }
192
* Determine if this is a simple assignment operator node. Returns
193
* true if so, false if not. By default, we return false.
195
virtual int is_simple_asi() const { return FALSE; }
198
* Determine if this node yields a value when evaluated. Returns
199
* true if so, false if not. When it cannot be determined at
200
* compile-time whether or not the node has a value (for example,
201
* for a call to a pointer to a function whose return type is not
202
* declared), this should indicate that a value is returned.
204
* Most nodes yield a value when executed, so we'll return true by
207
virtual int has_return_value() const { return TRUE; }
210
* Determine if this node yields a return value when called as a
211
* function. We assume by default that it does.
213
virtual int has_return_value_on_call() const { return TRUE; }
216
* Get the text of the symbol for this node, if any. If the node is
217
* not some kind of symbol node, this returns null.
219
virtual const textchar_t *get_sym_text() const { return 0; }
220
virtual size_t get_sym_text_len() const { return 0; }
223
* Fold constant expressions, given a finished symbol table. We do
224
* most of our constant folding during the initial parsing, but some
225
* constant folding must wait until the symbol table is finished; in
226
* particular, we can't figure out what to do with symbols until we
227
* know what the symbols mean.
229
* For most nodes, this function should merely recurse into subnodes
230
* and fold constants. Nodes that are affected by symbol
231
* resolution, directly or indirectly, should override this.
233
* For example, a list can change from unknown to constant during
234
* this operation. If the list contains a symbol, the list will
235
* initially be set to unknown, since the symbol could turn out to
236
* be a property evaluation, which would be non-constant, or an
237
* object name, which would be constant.
239
* Returns the folded version of the node, or simply 'this' if no
240
* folding takes place.
242
virtual class CTcPrsNode *fold_constants(class CTcPrsSymtab *symtab) = 0;
245
* generate a constant value node for the address of this node;
246
* returns null if the symbol has no address
248
virtual class CTcPrsNode *fold_addr_const(class CTcPrsSymtab *)
250
/* by default, we have no address */
255
* Adjust the expression for use as a debugger expression. Code
256
* generation for debugger expressions is somewhat different than
257
* for normal expressions; this routine should allocate a new node,
258
* if necessary, for debugger use. Returns the current node if no
259
* changes are necessary, or a new node if changes are needed.
261
* If 'speculative' is true, the expression is being evaluated
262
* speculatively by the debugger. This means that the user hasn't
263
* explicitly asked for the expression to be evaluated, but rather
264
* the debugger is making a guess that the expression might be of
265
* interest to the user and is making an unsolicited attempt to
266
* offer it to the user. Because the debugger is only guessing that
267
* the expression is interesting, the expression must not be
268
* evaluated if it has any side effects at all.
270
virtual class CTcPrsNode *adjust_for_debug(const tcpn_debug_info *info);
273
/* ------------------------------------------------------------------------ */
275
* Symbol Table Entry. Each symbol has an entry in one of the symbol
278
* - The global symbol table contains object, property, and built-in
279
* functions from the default function set.
281
* - Local symbol tables contain local variables and parameters. Local
282
* tables have block-level scope.
284
* - Label symbol tables contain code labels (for "goto" statements).
285
* Label tables have function-level or method-level scope.
289
* Basic symbol table entry. The target
291
class CTcSymbolBase: public CVmHashEntryCS
294
CTcSymbolBase(const char *str, size_t len, int copy, tc_symtype_t typ)
295
: CVmHashEntryCS(str, len, copy)
300
/* allocate symbol entries from the parser memory pool */
301
void *operator new(size_t siz);
303
/* get the symbol type */
304
tc_symtype_t get_type() const { return typ_; }
306
/* get the symbol text and length */
307
const char *get_sym() const { return getstr(); }
308
size_t get_sym_len() const { return getlen(); }
311
* Generate a constant value node for this symbol, if possible;
312
* returns null if the symbol does not evaluate to a compile-time
313
* constant value. An object name, for example, evaluates to a
314
* compile-time constant equal to the object reference; a property
315
* name, in contrast, is (when not qualified by another operator) an
316
* invocation of the property, hence must be executed at run time,
317
* hence is not a compile-time constant.
319
virtual class CTcPrsNode *fold_constant()
321
/* by default, a symbol's value is not a constant */
326
* generate a constant value node for the address of this symbol;
327
* returns null if the symbol has no address
329
virtual class CTcPrsNode *fold_addr_const()
331
/* by default, a symbol has no address */
335
/* determine if this symbol can be used as an lvalue */
336
virtual int check_lvalue() const { return FALSE; }
338
/* determine if this symbol can have its address taken */
339
virtual int has_addr() const { return FALSE; }
341
/* determine if I have a return value when evaluated */
342
virtual int has_return_value_on_call() const { return TRUE; }
345
* Write the symbol to a symbol export file. By default, we'll
346
* write the type and symbol name to the file. Some subclasses
347
* might wish to override this to write additional data, or to write
348
* something different or nothing at all (for example, built-in
349
* function symbols are not written to a symbol export file).
351
* When a subclass does override this, it must write the type as a
352
* UINT2 value as the first thing written to the file. The generic
353
* file reader switches on this type code to determine what to call
354
* to load the entry, then calls the subclass-specific loader to do
357
* Returns true if we wrote the symbol to the file, false if not.
358
* (False doesn't indicate an error - it indicates that we chose not
359
* to store the symbol because the symbol is not of a type that we
360
* want to put in the export file.)
362
virtual int write_to_sym_file(class CVmFile *fp);
364
/* write the symbol name (with a UINT2 length prefix) to a file */
365
int write_name_to_file(class CVmFile *fp);
368
* Write the symbol to an object file. By default, we'll write the
369
* type and symbol name to the file. Some subclasses might wish to
370
* override this to write additional data, or to write something
371
* different or nothing at all (for example, built-in function
372
* symbols are not written to an object file).
374
* When a subclass does override this, it must write the type as a
375
* UINT2 value as the first thing written to the file. The generic
376
* file reader switches on this type code to determine what to call
377
* to load the entry, then calls the subclass-specific loader to do
380
* Returns true if we wrote the symbol to the file, false if not.
381
* (False doesn't indicate an error - it indicates that we chose not
382
* to store the symbol because the symbol is not of a type that we
383
* want to put in the export file.)
385
virtual int write_to_obj_file(class CVmFile *fp);
388
* Write the symbol's cross references to the object file. This can
389
* write references to other symbols by storing the other symbol's
390
* index in the object file. Most symbols don't have any cross
391
* references, so this does nothing by default.
393
* If this writes anything, the first thing written must be a UINT4
394
* giving the object file index of this symbol. On loading, we'll
395
* read this and look up the loaded symbol.
397
virtual int write_refs_to_obj_file(class CVmFile *) { return FALSE; }
400
* perform basic writing to a file - this performs common work that
401
* can be used for object or symbol files
403
int write_to_file_gen(CVmFile *fp);
406
* Read a symbol from a symbol file, returning the new symbol
408
static class CTcSymbol *read_from_sym_file(class CVmFile *fp);
411
* Load a symbol from an object file. Stores the symbol in the
412
* global symbol table, and fills in the appropriate translation
413
* mapping table when necessary. Returns zero on success; logs
414
* error messages and return non-zero on failure.
416
static int load_from_obj_file(class CVmFile *fp,
417
const textchar_t *fname,
418
tctarg_obj_id_t *obj_xlat,
419
tctarg_prop_id_t *prop_xlat,
423
* Load references from the object file - reads the information that
424
* write_refs_to_obj_file() wrote, except that the caller will have
425
* read the first UINT4 giving the symbol's object file index before
426
* calling this routine.
428
virtual void load_refs_from_obj_file(class CVmFile *,
429
const textchar_t * /*obj_fname*/,
430
tctarg_obj_id_t * /*obj_xlat*/,
431
tctarg_prop_id_t * /*prop_xlat*/)
433
/* by default, do nothing */
437
* Log an object file loading conflict with this symbol. The given
438
* type is the new type found in the object file of the given name.
440
void log_objfile_conflict(const textchar_t *fname, tc_symtype_t new_type)
444
* Get a pointer to the head of the fixup list for this symbol.
445
* Symbols such as functions that keep a list of fixups for
446
* references to the symbol must override this to provide a fixup
447
* list head; by default, symbols keep no fixup list, so we'll just
450
virtual struct CTcAbsFixup **get_fixup_list_anchor() { return 0; }
453
* Set my code stream anchor object. By default, symbols don't keep
454
* track of any stream anchors. Symbols that refer to code or data
455
* stream locations directly must keep an anchor, since they must
456
* keep track of their fixup list in order to fix up generated
457
* references to the symbol. This must be overridden by any
458
* subclasses that keep anchors.
460
virtual void set_anchor(struct CTcStreamAnchor *) { }
463
* Determine if this symbol is external and unresolved. By default,
464
* a symbol cannot be external at all, so this will return false.
465
* Subclasses for symbol types that can be external should override
466
* this to return true if the symbol is an unresolved external
469
virtual int is_unresolved_extern() const { return FALSE; }
472
* Mark the symbol as referenced. Some symbol types keep track of
473
* whether they've been referenced or not; those types can override
474
* this to keep track. This method is called each time the symbol
475
* is found in the symbol table via the find() or find_or_def()
476
* methods. By default, we do nothing.
478
virtual void mark_referenced() { }
481
* Apply internal fixups. If the symbol keeps its own internal
482
* fixup information, it can translate the fixups here. By default,
485
virtual void apply_internal_fixups() { }
488
* Build dictionary entries for this symbol. Most symbols do
489
* nothing here; objects which can have associated vocabulary words
490
* should insert their vocabulary into the dictionary.
492
virtual void build_dictionary() { }
495
* Create a new "context variable" version of this symbol for use in
496
* an anonymous function. This is only needed for symbols that can
497
* exist in a local scope.
499
virtual class CTcSymbol *new_ctx_var() const { return 0; }
502
* Apply context variable conversion. If this symbol has not been
503
* referenced, this should simply remove the symbol from the symbol
504
* table. Otherwise, this should apply the necessary conversions to
505
* the original symbol from which this symbol was created to ensure
506
* that the original and this symbol share a context variable slot.
508
* Returns true if a conversion was performed (i.e., the symbol was
509
* referenced), false if not.
511
virtual int apply_ctx_var_conv(class CTcPrsSymtab *,
512
class CTPNCodeBody *)
516
* Finalize context variable conversion. This should do nothing if
517
* the variable hasn't already been notified that it's a context
518
* variable (how this happens varies by symbol type - see locals in
519
* particular). This is called with the variable's own scope active
520
* in the parser, so the final variable assignments for the symbol
523
virtual void finish_ctx_var_conv() { }
526
* Check for local references. For variables that can exist in
527
* local scope, such as locals, this will be called when all of the
528
* code for the scope has been parsed; this should check to see if
529
* the symbol has been referenced in the scope, and display an
530
* appropriate warning message if not.
532
virtual void check_local_references() { }
535
* Add an entry for this symbol to a "runtime symbol table," which is
536
* a symbol table that we can pass to the interpreter. This must be
537
* overridden by each symbol type for each target architecture,
538
* because the nature of the runtime symbol table varies by target
541
* By default, this does nothing. Symbol types that don't need to
542
* generate runtime symbol table entries don't need to override this.
544
virtual void add_runtime_symbol(class CVmRuntimeSymbols *) { }
548
* Base routine to read from a symbol file - reads the symbol name.
549
* Returns a pointer to the symbol name (stored in tokenizer memory
550
* that will remain valid throughout the compilation) on success; on
551
* failure, logs an error and returns null.
553
static const char *base_read_from_sym_file(class CVmFile *fp);