3
% Copyright 2009-2010 Taco Hoekwater <taco@@luatex.org>
5
% This file is part of LuaTeX.
7
% LuaTeX is free software; you can redistribute it and/or modify it under
8
% the terms of the GNU General Public License as published by the Free
9
% Software Foundation; either version 2 of the License, or (at your
10
% option) any later version.
12
% LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13
% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
% FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15
% License for more details.
17
% You should have received a copy of the GNU General Public License along
18
% with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
23
static const char _svn_version[] =
24
"$Id: expand.w 3587 2010-04-03 14:32:25Z taco $"
25
"$URL: http://foundry.supelec.fr/svn/luatex/tags/beta-0.60.1/source/texk/web2c/luatexdir/tex/expand.w $";
28
@ Only a dozen or so command codes |>max_command| can possibly be returned by
29
|get_next|; in increasing order, they are |undefined_cs|, |expand_after|,
30
|no_expand|, |input|, |if_test|, |fi_or_else|, |cs_name|, |convert|, |the|,
31
|top_bot_mark|, |call|, |long_call|, |outer_call|, |long_outer_call|, and
32
|end_template|.{\emergencystretch=40pt\par}
34
Sometimes, recursive calls to the following |expand| routine may
35
cause exhaustion of the run-time calling stack, resulting in
36
forced execution stops by the operating system. To diminish the chance
37
of this happening, a counter is used to keep track of the recursion
38
depth, in conjunction with a constant called |expand_depth|.
40
Note that this does not catch all possible infinite recursion loops,
41
just the ones that exhaust the application calling stack. The
42
actual maximum value of |expand_depth| is outside of our control, but
43
the initial setting of |100| should be enough to prevent problems.
44
@^system dependencies@>
47
static int expand_depth_count = 0;
50
@ The |expand| subroutine is used when |cur_cmd>max_command|. It removes a
51
``call'' or a conditional or one of the other special operations just
52
listed. It follows that |expand| might invoke itself recursively. In all
53
cases, |expand| destroys the current token, but it sets things up so that
54
the next |get_next| will deliver the appropriate next token. The value of
55
|cur_tok| need not be known when |expand| is called.
57
Since several of the basic scanning routines communicate via global variables,
58
their values are saved as local variables of |expand| so that
59
recursive calls don't invalidate them.
63
boolean is_in_csname = false;
68
halfword t; /* token that is being ``expanded after'' */
69
halfword p; /* for list manipulation */
70
halfword cur_ptr; /* for a local token list pointer */
71
int cv_backup; /* to save the global quantity |cur_val| */
72
int cvl_backup, radix_backup, co_backup; /* to save |cur_val_level|, etc. */
73
halfword backup_backup; /* to save |link(backup_head)| */
74
int save_scanner_status; /* temporary storage of |scanner_status| */
75
incr(expand_depth_count);
76
if (expand_depth_count >= expand_depth)
77
overflow("expansion depth", (unsigned) expand_depth);
79
cvl_backup = cur_val_level;
81
co_backup = cur_order;
82
backup_backup = token_link(backup_head);
84
if (cur_cmd < call_cmd) {
85
/* Expand a nonmacro */
86
if (int_par(tracing_commands_code) > 1)
89
case top_bot_mark_cmd:
90
/* Insert the appropriate mark text into the scanner */
91
t = cur_chr % marks_code;
92
if (cur_chr >= marks_code)
98
cur_ptr = first_mark(cur_val);
101
cur_ptr = bot_mark(cur_val);
103
case split_first_mark_code:
104
cur_ptr = split_first_mark(cur_val);
106
case split_bot_mark_code:
107
cur_ptr = split_bot_mark(cur_val);
110
cur_ptr = top_mark(cur_val);
114
begin_token_list(cur_ptr, mark_text);
116
case expand_after_cmd:
118
/* Expand the token after the next token */
119
/* It takes only a little shuffling to do what \TeX\ calls \.{\\expandafter}. */
123
if (cur_cmd > max_command_cmd)
130
} else { /* \\unless */
131
/* Negate a boolean conditional and |goto reswitch| */
132
/* The result of a boolean condition is reversed when the conditional is
133
preceded by \.{\\unless}. */
135
if ((cur_cmd == if_test_cmd) && (cur_chr != if_case_code)) {
136
cur_chr = cur_chr + unless_code;
139
print_err("You can't use `\\unless' before `");
140
print_cmd_chr((quarterword) cur_cmd, cur_chr);
142
help1("Continue, and I'll forget that it ever happened.");
148
/* Suppress expansion of the next token */
149
/* The implementation of \.{\\noexpand} is a bit trickier, because it is
150
necessary to insert a special `|dont_expand|' marker into \TeX's reading
151
mechanism. This special marker is processed by |get_next|, but it does
152
not slow down the inner loop.
154
Since \.{\\outer} macros might arise here, we must also
155
clear the |scanner_status| temporarily.
158
save_scanner_status = scanner_status;
159
scanner_status = normal;
161
scanner_status = save_scanner_status;
163
back_input(); /* now |start| and |loc| point to the backed-up token |t| */
164
if (t >= cs_token_flag) {
166
set_token_info(p, cs_token_flag + frozen_dont_expand);
167
set_token_link(p, iloc);
173
/* Implement \.{\\primitive} */
175
The \.{\\primitive} handling. If the primitive meaning of the next
176
token is an expandable command, it suffices to replace the current
177
token with the primitive one and restart |expand|.
179
Otherwise, the token we just read has to be pushed back, as well
180
as a token matching the internal form of \.{\\primitive}, that is
181
sneaked in as an alternate form of |ignore_spaces|.
183
An implementation problem surfaces: There really is no |cur_cs|
184
attached to the inserted primitive command, so it is safer to set
185
|cur_cs| to zero. |cur_tok| has a similar problem. And for the
186
non-expanded branch, simply pushing back a token that matches the
187
correct internal command does not work, because that approach would
188
not survive roundtripping to a temporary file or even a token list.
190
In a next version, it would be smart to create |frozen_| versions of
191
all the primitives. Then, this problem would not happen, at the
192
expense of a few hundred extra control sequences.
194
save_scanner_status = scanner_status;
195
scanner_status = normal;
197
scanner_status = save_scanner_status;
198
cur_cs = prim_lookup(cs_text(cur_cs));
199
if (cur_cs != undefined_primitive) {
200
t = get_prim_eq_type(cur_cs);
201
if (t > max_command_cmd) {
203
cur_chr = get_prim_equiv(cur_cs);
204
cur_tok = token_val(cur_cmd, cur_chr);
208
back_input(); /* now |loc| and |start| point to a one-item list */
210
set_token_info(p, cs_token_flag + frozen_primitive);
211
set_token_link(p, iloc);
216
print_err("Missing primitive name");
218
("The control sequence marked <to be read again> does not",
219
"represent any known primitive.");
226
/* Manufacture a control sequence name; */
227
manufacture_csname();
230
conv_toks(); /* this procedure is discussed in Part 27 below */
233
ins_the_toks(); /* this procedure is discussed in Part 27 below */
236
conditional(); /* this procedure is discussed in Part 28 below */
239
/* Terminate the current conditional and skip to \.{\\fi} */
240
/* The processing of conditionals is complete except for the following
241
code, which is actually part of |expand|. It comes into play when
242
\.{\\or}, \.{\\else}, or \.{\\fi} is scanned. */
244
if (int_par(tracing_ifs_code) > 0)
245
if (int_par(tracing_commands_code) <= 1)
247
if (cur_chr > if_limit) {
248
if (if_limit == if_code) {
249
insert_relax(); /* condition not yet evaluated */
252
print_cmd_chr(fi_or_else_cmd, cur_chr);
253
help1("I'm ignoring this; it doesn't match any \\if.");
257
while (cur_chr != fi_code)
258
pass_text(); /* skip to \.{\\fi} */
259
pop_condition_stack();
264
/* Initiate or terminate input from a file */
267
else if (cur_chr == 2)
269
else if (cur_chr == 3) {
272
} else if (name_in_progress)
278
/* Complain about an undefined macro */
279
print_err("Undefined control sequence");
280
help5("The control sequence at the end of the top line",
281
"of your error message was never \\def'ed. If you have",
282
"misspelled it (e.g., `\\hobx'), type `I' and the correct",
283
"spelling (e.g., `I\\hbox'). Otherwise just continue,",
284
"and I'll forget about whatever was undefined.");
288
} else if (cur_cmd < end_template_cmd) {
291
/* Insert a token containing |frozen_endv| */
292
/* An |end_template| command is effectively changed to an |endv| command
293
by the following code. (The reason for this is discussed below; the
294
|frozen_end_template| at the end of the template has passed the
295
|check_outer_validity| test, so its mission of error detection has been
298
cur_tok = cs_token_flag + frozen_endv;
303
cur_val_level = cvl_backup;
304
radix = radix_backup;
305
cur_order = co_backup;
306
set_token_link(backup_head, backup_backup);
307
decr(expand_depth_count);
311
void complain_missing_csname(void)
313
print_err("Missing \\endcsname inserted");
314
help2("The control sequence marked <to be read again> should",
315
"not appear between \\csname and \\endcsname.");
320
void manufacture_csname(void)
325
p = r; /* head of the list of characters */
330
store_new_token(cur_tok);
331
} while (cur_cs == 0);
332
if (cur_cmd != end_cs_name_cmd) {
333
/* Complain about missing \.{\\endcsname} */
334
complain_missing_csname();
336
is_in_csname = false;
337
/* Look up the characters of list |r| in the hash table, and set |cur_cs| */
339
ss = tokenlist_to_lstring(r, true);
341
no_new_control_sequence = false;
342
cur_cs = string_lookup((char *) ss->s, ss->l);
343
no_new_control_sequence = true;
345
cur_cs = null_cs; /* the list is empty */
349
if (eq_type(cur_cs) == undefined_cs_cmd) {
350
eq_define(cur_cs, relax_cmd, too_big_char); /* N.B.: The |save_stack| might change */
351
}; /* the control sequence will now match `\.{\\relax}' */
352
cur_tok = cur_cs + cs_token_flag;
357
@ Sometimes the expansion looks too far ahead, so we want to insert
358
a harmless \.{\\relax} into the user's input.
361
void insert_relax(void)
363
cur_tok = cs_token_flag + cur_cs;
365
cur_tok = cs_token_flag + frozen_relax;
367
token_type = inserted;
371
@ Here is a recursive procedure that is \TeX's usual way to get the
372
next token of input. It has been slightly optimized to take account of
376
void get_x_token(void)
377
{ /* sets |cur_cmd|, |cur_chr|, |cur_tok|, and expands macros */
380
if (cur_cmd <= max_command_cmd)
382
if (cur_cmd >= call_cmd) {
383
if (cur_cmd < end_template_cmd) {
386
cur_cs = frozen_endv;
388
goto DONE; /* |cur_chr=null_list| */
396
cur_tok = token_val(cur_cmd, cur_chr);
398
cur_tok = cs_token_flag + cur_cs;
402
@ The |get_x_token| procedure is equivalent to two consecutive
403
procedure calls: |get_next; x_token|.
407
{ /* |get_x_token| without the initial |get_next| */
408
while (cur_cmd > max_command_cmd) {
413
cur_tok = token_val(cur_cmd, cur_chr);
415
cur_tok = cs_token_flag + cur_cs;
419
@ A control sequence that has been \.{\\def}'ed by the user is expanded by
420
\TeX's |macro_call| procedure.
422
Before we get into the details of |macro_call|, however, let's consider the
423
treatment of primitives like \.{\\topmark}, since they are essentially
424
macros without parameters. The token lists for such marks are kept in five
425
global arrays of pointers; we refer to the individual entries of these
426
arrays by symbolic macros |top_mark|, etc. The value of |top_mark(x)|, etc.
427
is either |null| or a pointer to the reference count of a token list.
429
The variable |biggest_used_mark| is an aid to try and keep the code
430
somehwat efficient without too much extra work: it registers the
431
highest mark class ever instantiated by the user, so the loops
432
in |fire_up| and |vsplit| do not have to traverse the full range
436
halfword top_marks_array[(biggest_mark + 1)];
437
halfword first_marks_array[(biggest_mark + 1)];
438
halfword bot_marks_array[(biggest_mark + 1)];
439
halfword split_first_marks_array[(biggest_mark + 1)];
440
halfword split_bot_marks_array[(biggest_mark + 1)];
441
halfword biggest_used_mark;
444
void initialize_marks(void)
447
biggest_used_mark = 0;
448
for (i = 0; i <= biggest_mark; i++) {
450
first_mark(i) = null;
452
split_first_mark(i) = null;
453
split_bot_mark(i) = null;
458
@ Now let's consider |macro_call| itself, which is invoked when \TeX\ is
459
scanning a control sequence whose |cur_cmd| is either |call|, |long_call|,
460
|outer_call|, or |long_outer_call|. The control sequence definition
461
appears in the token list whose reference count is in location |cur_chr|
464
The global variable |long_state| will be set to |call| or to |long_call|,
465
depending on whether or not the control sequence disallows \.{\\par}
466
in its parameters. The |get_next| routine will set |long_state| to
467
|outer_call| and emit \.{\\par}, if a file ends or if an \.{\\outer}
468
control sequence occurs in the midst of an argument.
471
int long_state; /* governs the acceptance of \.{\\par} */
473
@ The parameters, if any, must be scanned before the macro is expanded.
474
Parameters are token lists without reference counts. They are placed on
475
an auxiliary stack called |pstack| while they are being scanned, since
476
the |param_stack| may be losing entries during the matching process.
477
(Note that |param_stack| can't be gaining entries, since |macro_call| is
478
the only routine that puts anything onto |param_stack|, and it
482
halfword pstack[9]; /* arguments supplied to a macro */
485
@ After parameter scanning is complete, the parameters are moved to the
486
|param_stack|. Then the macro body is fed to the scanner; in other words,
487
|macro_call| places the defined text of the control sequence at the
488
top of\/ \TeX's input stack, so that |get_next| will proceed to read it
491
The global variable |cur_cs| contains the |eqtb| address of the control sequence
492
being expanded, when |macro_call| begins. If this control sequence has not been
493
declared \.{\\long}, i.e., if its command code in the |eq_type| field is
494
not |long_call| or |long_outer_call|, its parameters are not allowed to contain
495
the control sequence \.{\\par}. If an illegal \.{\\par} appears, the macro
496
call is aborted, and the \.{\\par} will be rescanned.
499
void macro_call(void)
500
{ /* invokes a user-defined control sequence */
501
halfword r; /* current node in the macro's token list */
502
halfword p = null; /* current node in parameter token list being built */
503
halfword q; /* new node being put into the token list */
504
halfword s; /* backup pointer for parameter matching */
505
halfword t; /* cycle pointer for backup recovery */
506
halfword u, v; /* auxiliary pointers for backup recovery */
507
halfword rbrace_ptr = null; /* one step before the last |right_brace| token */
508
int n = 0; /* the number of parameters scanned */
509
halfword unbalance; /* unmatched left braces in current parameter */
510
halfword m = 0; /* the number of tokens or groups (usually) */
511
halfword ref_count; /* start of the token list */
512
int save_scanner_status = scanner_status; /* |scanner_status| upon entry */
513
halfword save_warning_index = warning_index; /* |warning_index| upon entry */
514
int match_chr = 0; /* character used in parameter */
515
warning_index = cur_cs;
517
r = token_link(ref_count);
518
if (int_par(tracing_macros_code) > 0) {
519
/* Show the text of the macro being expanded */
522
print_cs(warning_index);
523
token_show(ref_count);
524
end_diagnostic(false);
526
if (token_info(r) == protected_token)
528
if (token_info(r) != end_match_token) {
529
/* Scan the parameters and make |link(r)| point to the macro body; but
530
|return| if an illegal \.{\\par} is detected */
531
/* At this point, the reader will find it advisable to review the explanation
532
of token list format that was presented earlier, since many aspects of that
533
format are of importance chiefly in the |macro_call| routine.
535
The token list might begin with a string of compulsory tokens before the
536
first |match| or |end_match|. In that case the macro name is supposed to be
537
followed by those tokens; the following program will set |s=null| to
538
represent this restriction. Otherwise |s| will be set to the first token of
539
a string that will delimit the next parameter.
542
scanner_status = matching;
544
long_state = eq_type(cur_cs);
545
if (long_state >= outer_call_cmd)
546
long_state = long_state - 2;
548
set_token_link(temp_token_head, null);
549
if ((token_info(r) >= end_match_token)
550
|| (token_info(r) < match_token)) {
553
match_chr = token_info(r) - match_token;
559
/* Scan a parameter until its delimiter string has been found; or, if |s=null|,
560
simply scan the delimiter string; */
562
/* If |info(r)| is a |match| or |end_match| command, it cannot be equal to
563
any token found by |get_token|. Therefore an undelimited parameter---i.e.,
564
a |match| that is immediately followed by |match| or |end_match|---will
565
always fail the test `|cur_tok=info(r)|' in the following algorithm. */
567
get_token(); /* set |cur_tok| to the next token of input */
568
if (cur_tok == token_info(r)) {
569
/* Advance |r|; |goto found| if the parameter delimiter has been
570
fully matched, otherwise |goto continue| */
571
/* A slightly subtle point arises here: When the parameter delimiter ends
572
with `\.{\#\{}', the token list will have a left brace both before and
573
after the |end_match|\kern-.4pt. Only one of these should affect the
574
|align_state|, but both will be scanned, so we must make a correction.
577
if ((token_info(r) >= match_token)
578
&& (token_info(r) <= end_match_token)) {
579
if (cur_tok < left_brace_limit)
587
/* Contribute the recently matched tokens to the current parameter, and
588
|goto continue| if a partial match is still in effect; but abort if |s=null| */
590
/* When the following code becomes active, we have matched tokens from |s| to
591
the predecessor of |r|, and we have found that |cur_tok<>info(r)|. An
592
interesting situation now presents itself: If the parameter is to be
593
delimited by a string such as `\.{ab}', and if we have scanned `\.{aa}',
594
we want to contribute one `\.a' to the current parameter and resume
595
looking for a `\.b'. The program must account for such partial matches and
596
for others that can be quite complex. But most of the time we have |s=r|
597
and nothing needs to be done.
599
Incidentally, it is possible for \.{\\par} tokens to sneak in to certain
600
parameters of non-\.{\\long} macros. For example, consider a case like
601
`\.{\\def\\a\#1\\par!\{...\}}' where the first \.{\\par} is not followed
602
by an exclamation point. In such situations it does not seem appropriate
603
to prohibit the \.{\\par}, so \TeX\ keeps quiet about this bending of
608
/* Report an improper use of the macro and abort */
609
print_err("Use of ");
610
sprint_cs(warning_index);
611
tprint(" doesn't match its definition");
613
("If you say, e.g., `\\def\\a1{...}', then you must always",
614
"put `1' after `\\a', since control sequence names are",
615
"made up of letters only. The macro here has not been",
616
"followed by the required stuff, so I'm ignoring it.");
623
store_new_token(token_info(t));
629
if (cur_tok != token_info(v)) {
636
if (token_info(u) != token_info(v))
644
r = s; /* at this point, no tokens are recently matched */
648
if (cur_tok == par_token)
649
if (long_state != long_call_cmd)
650
if (!int_par(suppress_long_error_code)) {
653
if (cur_tok < right_brace_limit) {
654
if (cur_tok < left_brace_limit) {
655
/* Contribute an entire group to the current parameter */
658
fast_store_new_token(cur_tok);
660
if (cur_tok == par_token) {
661
if (long_state != long_call_cmd) {
662
if (!int_par(suppress_long_error_code)) {
668
if (cur_tok < right_brace_limit) {
669
if (cur_tok < left_brace_limit) {
679
store_new_token(cur_tok);
682
/* Report an extra right brace and |goto continue| */
684
print_err("Argument of ");
685
sprint_cs(warning_index);
686
tprint(" has an extra }");
688
("I've run across a `}' that doesn't seem to match anything.",
689
"For example, `\\def\\a#1{...}' and `\\a}' would produce",
690
"this error. If you simply proceed now, the `\\par' that",
691
"I've just inserted will cause me to report a runaway",
692
"argument that might be the root of the problem. But if",
693
"your `}' was spurious, just type `2' and it will go away.");
695
long_state = call_cmd;
699
/* a white lie; the \.{\\par} won't always trigger a runaway */
702
/* Store the current token, but |goto continue| if it is
703
a blank space that would become an undelimited parameter */
704
if (cur_tok == space_token)
705
if (token_info(r) <= end_match_token)
706
if (token_info(r) >= match_token)
708
store_new_token(cur_tok);
712
if (token_info(r) > end_match_token)
714
if (token_info(r) < match_token)
718
/* Tidy up the parameter just scanned, and tuck it away */
719
/* If the parameter consists of a single group enclosed in braces, we must
720
strip off the enclosing braces. That's why |rbrace_ptr| was introduced. */
721
if ((m == 1) && (token_info(p) < right_brace_limit)
722
&& (p != temp_token_head)) {
723
set_token_link(rbrace_ptr, null);
725
p = token_link(temp_token_head);
726
pstack[n] = token_link(p);
729
pstack[n] = token_link(temp_token_head);
732
if (int_par(tracing_macros_code) > 0) {
737
show_token_list(pstack[n - 1], null, 1000);
738
end_diagnostic(false);
743
/* now |info(r)| is a token whose command code is either |match| or |end_match| */
744
} while (token_info(r) != end_match_token);
747
/* Feed the macro body and its parameters to the scanner */
748
/* Before we put a new token list on the input stack, it is wise to clean off
749
all token lists that have recently been depleted. Then a user macro that ends
750
with a call to itself will not require unbounded stack space. */
751
while ((istate == token_list) && (iloc == null)
752
&& (token_type != v_template))
753
end_token_list(); /* conserve stack space */
754
begin_token_list(ref_count, macro);
755
iname = warning_index;
756
iloc = token_link(r);
758
if (param_ptr + n > max_param_stack) {
759
max_param_stack = param_ptr + n;
760
if (max_param_stack > param_size)
761
overflow("parameter stack size", (unsigned) param_size);
763
for (m = 0; m <= n - 1; m++)
764
param_stack[param_ptr + m] = pstack[m];
765
param_ptr = param_ptr + n;
769
/* Report a runaway argument and abort */
770
/* If |long_state=outer_call|, a runaway argument has already been reported. */
771
if (long_state == call_cmd) {
773
print_err("Paragraph ended before ");
774
sprint_cs(warning_index);
775
tprint(" was complete");
776
help3("I suspect you've forgotten a `}', causing me to apply this",
777
"control sequence to too much text. How can we recover?",
778
"My plan is to forget the whole thing and hope for the best.");
781
pstack[n] = token_link(temp_token_head);
782
align_state = align_state - unbalance;
783
for (m = 0; m <= n; m++)
784
flush_list(pstack[m]);
787
scanner_status = save_scanner_status;
788
warning_index = save_warning_index;