1
/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
3
* This program is free software; you can redistribute it and/or modify
4
* it under the terms of the GNU Library General Public License as published by
5
* the Free Software Foundation; either version 2, or (at your option)
8
* This program is distributed in the hope that it will be useful,
9
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
* GNU Library General Public License for more details.
13
* You should have received a copy of the GNU Library General Public License
14
* along with this software; see the file COPYING. If not, write to
15
* the Free Software Foundation, 59 Temple Place - Suite 330,
16
* Boston, MA 02111-1307, USA.
20
#include <sys/types.h>
22
#include "rxgnucomp.h"
23
#include "inst-rxposix.h"
29
/* Define the syntax basics for \<, \>, etc.
34
#define CHAR_SET_SIZE (1 << CHARBITS)
36
#define SYNTAX(c) re_syntax_table[c]
37
char re_syntax_table[CHAR_SET_SIZE];
41
init_syntax_once (void)
53
rx_bzero ((char *)re_syntax_table, sizeof re_syntax_table);
55
for (c = 'a'; c <= 'z'; c++)
56
re_syntax_table[c] = Sword;
58
for (c = 'A'; c <= 'Z'; c++)
59
re_syntax_table[c] = Sword;
61
for (c = '0'; c <= '9'; c++)
62
re_syntax_table[c] = Sword;
64
re_syntax_table['_'] = Sword;
68
#endif /* not emacs */
74
const char *rx_error_msg[] =
77
"No match", /* REG_NOMATCH */
78
"Invalid regular expression", /* REG_BADPAT */
79
"Invalid collation character", /* REG_ECOLLATE */
80
"Invalid character class name", /* REG_ECTYPE */
81
"Trailing backslash", /* REG_EESCAPE */
82
"Invalid back reference", /* REG_ESUBREG */
83
"Unmatched [ or [^", /* REG_EBRACK */
84
"Unmatched ( or \\(", /* REG_EPAREN */
85
"Unmatched \\{", /* REG_EBRACE */
86
"Invalid content of \\{\\}", /* REG_BADBR */
87
"Invalid range end", /* REG_ERANGE */
88
"Memory exhausted", /* REG_ESPACE */
89
"Invalid preceding regular expression", /* REG_BADRPT */
90
"Premature end of regular expression", /* REG_EEND */
91
"Regular expression too big", /* REG_ESIZE */
92
"Unmatched ) or \\)", /* REG_ERPAREN */
98
* Macros used while compiling patterns.
100
* By convention, PEND points just past the end of the uncompiled pattern,
101
* P points to the read position in the pattern. `translate' is the name
102
* of the translation table (`TRANSLATE' is the name of a macro that looks
103
* things up in `translate').
108
* Fetch the next character in the uncompiled pattern---translating it
109
* if necessary. *Also cast from a signed character in the constant
110
* string passed to us by the user to an unsigned char that we can use
111
* as an array index (in, e.g., `translate').
113
#define PATFETCH(c) \
114
do {if (p == pend) return REG_EEND; \
115
c = (unsigned char) *p++; \
120
* Fetch the next character in the uncompiled pattern, with no
123
#define PATFETCH_RAW(c) \
124
do {if (p == pend) return REG_EEND; \
125
c = (unsigned char) *p++; \
128
/* Go backwards one character in the pattern. */
129
#define PATUNFETCH p--
132
#define TRANSLATE(d) translate[(unsigned char) (d)]
134
typedef int regnum_t;
136
/* Since offsets can go either forwards or backwards, this type needs to
137
* be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.
139
typedef int pattern_offset_t;
143
struct rexp_node ** top_expression;
144
struct rexp_node ** last_expression;
145
struct rexp_node ** last_non_regular_expression;
146
pattern_offset_t inner_group_offset;
148
} compile_stack_elt_t;
152
compile_stack_elt_t *stack;
154
unsigned avail; /* Offset of next open position. */
155
} compile_stack_type;
158
#define INIT_COMPILE_STACK_SIZE 32
160
#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
161
#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
163
/* The next available element. */
164
#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
167
/* Set the bit for character C in a list. */
168
#define SET_LIST_BIT(c) \
169
(b[((unsigned char) (c)) / CHARBITS] \
170
|= 1 << (((unsigned char) c) % CHARBITS))
172
/* Get the next unsigned number in the uncompiled pattern. */
173
#define GET_UNSIGNED_NUMBER(num) \
177
while (isdigit (c)) \
181
num = num * 10 + c - '0'; \
189
#define CHAR_CLASS_MAX_LENGTH 64
191
#define IS_CHAR_CLASS(string) \
192
(!strcmp (string, "alpha") || !strcmp (string, "upper") \
193
|| !strcmp (string, "lower") || !strcmp (string, "digit") \
194
|| !strcmp (string, "alnum") || !strcmp (string, "xdigit") \
195
|| !strcmp (string, "space") || !strcmp (string, "print") \
196
|| !strcmp (string, "punct") || !strcmp (string, "graph") \
197
|| !strcmp (string, "cntrl") || !strcmp (string, "blank"))
200
/* These predicates are used in regex_compile. */
202
/* P points to just after a ^ in PATTERN. Return true if that ^ comes
203
* after an alternative or a begin-subexpression. We assume there is at
204
* least one character before the ^.
209
at_begline_loc_p (const char *pattern, const char * p, unsigned long syntax)
212
at_begline_loc_p (pattern, p, syntax)
215
unsigned long syntax;
218
const char *prev = p - 2;
219
int prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\'));
223
(/* After a subexpression? */
224
((*prev == '(') && ((syntax & RE_NO_BK_PARENS) || prev_prev_backslash))
226
/* After an alternative? */
227
((*prev == '|') && ((syntax & RE_NO_BK_VBAR) || prev_prev_backslash))
231
/* The dual of at_begline_loc_p. This one is for $. We assume there is
232
* at least one character after the $, i.e., `P < PEND'.
237
at_endline_loc_p (const char *p, const char *pend, int syntax)
240
at_endline_loc_p (p, pend, syntax)
246
const char *next = p;
247
int next_backslash = (*next == '\\');
248
const char *next_next = (p + 1 < pend) ? (p + 1) : 0;
252
/* Before a subexpression? */
253
((syntax & RE_NO_BK_PARENS)
255
: (next_backslash && next_next && (*next_next == ')')))
257
/* Before an alternative? */
258
((syntax & RE_NO_BK_VBAR)
260
: (next_backslash && next_next && (*next_next == '|')))
265
unsigned char rx_id_translation[256] =
267
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
268
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
269
20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
270
30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
271
40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
272
50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
273
60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
274
70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
275
80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
276
90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
278
100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
279
110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
280
120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
281
130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
282
140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
283
150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
284
160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
285
170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
286
180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
287
190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
289
200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
290
210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
291
220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
292
230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
293
240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
294
250, 251, 252, 253, 254, 255
297
/* The compiler keeps an inverted translation table.
298
* This looks up/inititalize elements.
299
* VALID is an array of booleans that validate CACHE.
304
inverse_translation (int * n_members, int cset_size, char * valid, rx_Bitset cache, unsigned char * translate, int c)
307
inverse_translation (n_members, cset_size, valid, cache, translate, c)
312
unsigned char * translate;
317
cs = cache + c * rx_bitset_numb_subsets (cset_size);
326
rx_bitset_null (cset_size, cs);
328
for (x = 0; x < 256; ++x)
329
if (TRANSLATE(x) == c_tr)
331
RX_bitset_enjoin (cs, x);
335
n_members[c] = membs;
343
/* More subroutine declarations and macros for regex_compile. */
345
/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
351
group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
354
group_in_compile_stack (compile_stack, regnum)
355
compile_stack_type compile_stack;
361
for (this_element = compile_stack.avail - 1;
364
if (compile_stack.stack[this_element].regnum == regnum)
372
* Read the ending character of a range (in a bracket expression) from the
373
* uncompiled pattern *P_PTR (which ends at PEND). We assume the
374
* starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
375
* Then we set the translation of all bits between the starting and
376
* ending characters (inclusive) in the compiled pattern B.
378
* Return an error code.
380
* We use these short variable names so we can use the same macros as
381
* `regex_compile' itself.
386
compile_range (int * n_members, int cset_size, rx_Bitset cs, const char ** p_ptr, const char * pend, unsigned char * translate, unsigned long syntax, rx_Bitset inv_tr, char * valid_inv_tr)
389
compile_range (n_members, cset_size, cs, p_ptr, pend, translate, syntax, inv_tr, valid_inv_tr)
395
unsigned char * translate;
396
unsigned long syntax;
403
const char *p = *p_ptr;
405
unsigned char range_end;
406
unsigned char range_start = TRANSLATE(p[-2]);
411
PATFETCH (range_end);
415
if (range_start > range_end)
416
return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
418
for (this_char = range_start; this_char <= range_end; this_char++)
421
inverse_translation (n_members, cset_size, valid_inv_tr, inv_tr, translate, this_char);
422
rx_bitset_union (cset_size, cs, it);
431
pointless_if_repeated (struct rexp_node * node)
434
pointless_if_repeated (node)
435
struct rexp_node * node;
448
return (pointless_if_repeated (node->params.pair.left)
449
&& pointless_if_repeated (node->params.pair.right));
454
return pointless_if_repeated (node->params.pair.left);
456
switch (node->params.intval)
478
factor_string (struct rexp_node *** lastp, int cset_size)
481
factor_string (lastp, cset_size)
482
struct rexp_node *** lastp;
486
struct rexp_node ** expp;
487
struct rexp_node * exp;
489
struct rexp_node * cset_node;
492
exp = *expp; /* presumed r_string */
494
cs = rx_cset (cset_size);
497
RX_bitset_enjoin (cs, exp->params.cstr.contents[exp->params.cstr.len - 1]);
498
cset_node = rx_mk_r_cset (r_cset, cset_size, cs);
504
if (exp->params.cstr.len == 1)
508
/* lastp remains the same */
513
struct rexp_node * concat_node;
514
exp->params.cstr.len--;
515
concat_node = rx_mk_r_binop (r_concat, exp, cset_node);
518
rx_free_rexp (cset_node);
522
*lastp = &concat_node->params.pair.right;
529
#define isa_blank(C) (((C) == ' ') || ((C) == '\t'))
533
rx_parse (struct rexp_node ** rexp_p,
536
unsigned long syntax,
538
unsigned char *translate)
541
rx_parse (rexp_p, pattern, size, syntax, cset_size, translate)
542
struct rexp_node ** rexp_p;
545
unsigned long syntax;
547
unsigned char *translate;
552
inverse_translate [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
553
char validate_inv_tr [CHAR_SET_SIZE];
554
int n_members [CHAR_SET_SIZE];
556
/* We fetch characters from PATTERN here. Even though PATTERN is
557
* `char *' (i.e., signed), we declare these variables as unsigned, so
558
* they can be reliably used as array indices.
560
register unsigned char c;
561
register unsigned char c1;
563
/* A random tempory spot in PATTERN. */
566
/* Keeps track of unclosed groups. */
567
compile_stack_type compile_stack;
569
/* Points to the current (ending) position in the pattern. */
573
/* When parsing is done, this will hold the expression tree. */
574
struct rexp_node * rexp;
576
/* This and top_expression are saved on the compile stack. */
577
struct rexp_node ** top_expression;
578
struct rexp_node ** last_non_regular_expression;
579
struct rexp_node ** last_expression;
581
/* Parameter to `goto append_node' */
582
struct rexp_node * append;
584
/* Counts open-groups as they are encountered. This is the index of the
585
* innermost group being compiled.
589
/* True iff the sub-expression just started
590
* is purely syntactic. Otherwise, a regmatch data
591
* slot is allocated for the subexpression.
593
int syntax_only_parens;
595
/* Place in the uncompiled pattern (i.e., the {) to
596
* which to go back if the interval is invalid.
598
const char *beg_interval;
605
translate = rx_id_translation;
607
/* Points to the current (ending) position in the pattern. */
609
pend = pattern + size;
611
/* When parsing is done, this will hold the expression tree. */
614
/* In the midst of compilation, this holds onto the regexp
615
* first parst while rexp goes on to aquire additional constructs.
617
top_expression = &rexp;
618
last_non_regular_expression = top_expression;
619
last_expression = top_expression;
621
/* Counts open-groups as they are encountered. This is the index of the
622
* innermost group being compiled.
626
rx_bzero ((char *)validate_inv_tr, sizeof (validate_inv_tr));
629
/* Initialize the compile stack. */
630
compile_stack.stack = (( compile_stack_elt_t *) malloc ((INIT_COMPILE_STACK_SIZE) * sizeof ( compile_stack_elt_t)));
631
if (compile_stack.stack == 0)
634
compile_stack.size = INIT_COMPILE_STACK_SIZE;
635
compile_stack.avail = 0;
637
#if !defined (emacs) && !defined (SYNTAX_TABLE)
638
/* Initialize the syntax table. */
642
/* Loop through the uncompiled pattern until we're at the end. */
651
if ( /* If at start of pattern, it's an operator. */
653
/* If context independent, it's an operator. */
654
|| syntax & RE_CONTEXT_INDEP_ANCHORS
655
/* Otherwise, depends on what's come before. */
656
|| at_begline_loc_p (pattern, p, syntax))
659
= rx_mk_r_int (r_context, '^');
673
if ( /* If at end of pattern, it's an operator. */
675
/* If context independent, it's an operator. */
676
|| syntax & RE_CONTEXT_INDEP_ANCHORS
677
/* Otherwise, depends on what's next. */
678
|| at_endline_loc_p (p, pend, syntax))
681
= rx_mk_r_int (r_context, '$');
695
if ((syntax & RE_BK_PLUS_QM)
696
|| (syntax & RE_LIMITED_OPS))
701
/* If there is no previous pattern... */
702
if (pointless_if_repeated (*last_expression))
704
if (syntax & RE_CONTEXT_INVALID_OPS)
706
compile_error = REG_BADRPT;
709
else if (!(syntax & RE_CONTEXT_INDEP_OPS))
714
/* 1 means zero (many) matches is allowed. */
715
char zero_times_ok = 0, many_times_ok = 0;
717
/* If there is a sequence of repetition chars, collapse it
718
down to just one (the right one). We can't combine
719
interval operators with these because of, e.g., `a{2}*',
720
which should only match an even number of `a's. */
724
zero_times_ok |= c != '+';
725
many_times_ok |= c != '?';
733
|| (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
736
else if (syntax & RE_BK_PLUS_QM && c == '\\')
740
compile_error = REG_EESCAPE;
745
if (!(c1 == '+' || c1 == '?'))
760
/* If we get here, we found another repeat character. */
763
/* Now we know whether or not zero matches is allowed
764
* and also whether or not two or more matches is allowed.
768
struct rexp_node * inner_exp;
769
struct rexp_node * star;
771
if (*last_expression && ((*last_expression)->type == r_string))
772
if (factor_string (&last_expression, cset_size))
774
inner_exp = *last_expression;
775
star = rx_mk_r_monop ((many_times_ok
776
? (zero_times_ok ? r_star : r_plus)
781
*last_expression = star;
790
struct rexp_node * n;
791
cs = rx_cset (cset_size);
794
n = rx_mk_r_cset (r_cset, cset_size, cs);
800
rx_bitset_universe (cset_size, cs);
801
if (!(syntax & RE_DOT_NEWLINE))
802
RX_bitset_remove (cs, '\n');
803
if (syntax & RE_DOT_NOT_NULL)
804
RX_bitset_remove (cs, 0);
815
compile_error = REG_EBRACK;
821
struct rexp_node * node;
825
is_inverted = *p == '^';
826
cs = rx_cset (cset_size);
829
node = rx_mk_r_cset (r_cset, cset_size ,cs);
836
/* This branch of the switch is normally exited with
844
/* Remember the first position in the bracket expression. */
847
/* Read in characters and ranges, setting map bits. */
852
compile_error = REG_EBRACK;
858
/* \ might escape characters inside [...] and [^...]. */
859
if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
863
compile_error = REG_EESCAPE;
869
rx_Bitset it = inverse_translation (n_members,
875
rx_bitset_union (cset_size, cs, it);
880
/* Could be the end of the bracket expression. If it's
881
not (i.e., when the bracket expression is `[]' so
882
far), the ']' character bit gets set way below. */
883
if (c == ']' && p != p1 + 1)
884
goto finalize_class_and_append;
886
/* Look ahead to see if it's a range when the last thing
887
was a character class. */
888
if (had_char_class && c == '-' && *p != ']')
890
compile_error = REG_ERANGE;
894
/* Look ahead to see if it's a range when the last thing
895
was a character: if this is a hyphen not at the
896
beginning or the end of a list, then it's the range
899
&& !(p - 2 >= pattern && p[-2] == '[')
900
&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
904
= compile_range (n_members, cset_size, cs, &p, pend, translate, syntax,
905
inverse_translate, validate_inv_tr);
906
if (ret != REG_NOERROR)
913
else if (p[0] == '-' && p[1] != ']')
914
{ /* This handles ranges made up of characters only. */
917
/* Move past the `-'. */
920
ret = compile_range (n_members, cset_size, cs, &p, pend, translate, syntax,
921
inverse_translate, validate_inv_tr);
922
if (ret != REG_NOERROR)
929
/* See if we're at the beginning of a possible character
932
else if ((syntax & RE_CHAR_CLASSES)
933
&& (c == '[') && (*p == ':'))
935
char str[CHAR_CLASS_MAX_LENGTH + 1];
940
/* If pattern is `[[:'. */
943
compile_error = REG_EBRACK;
950
if (c == ':' || c == ']' || p == pend
951
|| c1 == CHAR_CLASS_MAX_LENGTH)
957
/* If isn't a word bracketed by `[:' and:`]':
958
undo the ending character, the letters, and leave
959
the leading `:' and `[' (but set bits for them). */
960
if (c == ':' && *p == ']')
962
if (!strncmp (str, "cut", 3))
965
if (1 != sscanf (str + 3, " %d", &val))
967
compile_error = REG_ECTYPE;
970
/* Throw away the ]] */
974
struct rexp_node * cut;
975
cut = rx_mk_r_int (r_cut, val);
980
else if (!strncmp (str, "(", 1))
982
/* Throw away the ]] */
985
syntax_only_parens = 1;
988
else if (!strncmp (str, ")", 1))
990
/* Throw away the ]] */
993
syntax_only_parens = 1;
999
int is_alnum = !strcmp (str, "alnum");
1000
int is_alpha = !strcmp (str, "alpha");
1001
int is_blank = !strcmp (str, "blank");
1002
int is_cntrl = !strcmp (str, "cntrl");
1003
int is_digit = !strcmp (str, "digit");
1004
int is_graph = !strcmp (str, "graph");
1005
int is_lower = !strcmp (str, "lower");
1006
int is_print = !strcmp (str, "print");
1007
int is_punct = !strcmp (str, "punct");
1008
int is_space = !strcmp (str, "space");
1009
int is_upper = !strcmp (str, "upper");
1010
int is_xdigit = !strcmp (str, "xdigit");
1012
if (!IS_CHAR_CLASS (str))
1014
compile_error = REG_ECTYPE;
1018
/* Throw away the ] at the end of the character
1022
if (p == pend) { compile_error = REG_EBRACK; goto error_return; }
1024
for (ch = 0; ch < 1 << CHARBITS; ch++)
1026
if ( (is_alnum && isalnum (ch))
1027
|| (is_alpha && isalpha (ch))
1028
|| (is_blank && isa_blank (ch))
1029
|| (is_cntrl && iscntrl (ch))
1030
|| (is_digit && isdigit (ch))
1031
|| (is_graph && isgraph (ch))
1032
|| (is_lower && islower (ch))
1033
|| (is_print && isprint (ch))
1034
|| (is_punct && ispunct (ch))
1035
|| (is_space && isspace (ch))
1036
|| (is_upper && isupper (ch))
1037
|| (is_xdigit && isxdigit (ch)))
1040
inverse_translation (n_members,
1046
rx_bitset_union (cset_size,
1060
inverse_translation (n_members,
1066
rx_bitset_union (cset_size,
1071
inverse_translation (n_members,
1077
rx_bitset_union (cset_size,
1087
rx_Bitset it = inverse_translation (n_members,
1093
rx_bitset_union (cset_size, cs, it);
1098
finalize_class_and_append:
1101
rx_bitset_complement (cset_size, cs);
1102
if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
1103
RX_bitset_remove (cs, '\n');
1111
if (syntax & RE_NO_BK_PARENS)
1113
syntax_only_parens = 0;
1121
if (syntax & RE_NO_BK_PARENS)
1123
syntax_only_parens = 0;
1131
if (syntax & RE_NEWLINE_ALT)
1138
if (syntax & RE_NO_BK_VBAR)
1145
if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1146
goto handle_interval;
1152
if (p == pend) { compile_error = REG_EESCAPE; goto error_return; }
1154
/* Do not translate the character after the \, so that we can
1155
distinguish, e.g., \B from \b, even if we normally would
1156
translate, e.g., B to b. */
1162
if (syntax & RE_NO_BK_PARENS)
1163
goto normal_backslash;
1165
syntax_only_parens = 0;
1168
if (!syntax_only_parens)
1171
if (COMPILE_STACK_FULL)
1174
= ((compile_stack_elt_t *)
1175
realloc (compile_stack.stack,
1176
(compile_stack.size << 1) * sizeof (compile_stack_elt_t)));
1177
if (compile_stack.stack == 0)
1179
compile_stack.size <<= 1;
1182
if (*last_non_regular_expression)
1184
struct rexp_node * concat;
1185
concat = rx_mk_r_binop (r_concat, *last_non_regular_expression, 0);
1188
*last_non_regular_expression = concat;
1189
last_non_regular_expression = &concat->params.pair.right;
1190
last_expression = last_non_regular_expression;
1194
* These are the values to restore when we hit end of this
1197
COMPILE_STACK_TOP.top_expression = top_expression;
1198
COMPILE_STACK_TOP.last_expression = last_expression;
1199
COMPILE_STACK_TOP.last_non_regular_expression = last_non_regular_expression;
1201
if (syntax_only_parens)
1202
COMPILE_STACK_TOP.regnum = -1;
1204
COMPILE_STACK_TOP.regnum = regnum;
1206
compile_stack.avail++;
1208
top_expression = last_non_regular_expression;
1213
if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1214
syntax_only_parens = 0;
1217
/* See similar code for backslashed left paren above. */
1218
if (COMPILE_STACK_EMPTY)
1219
if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1222
{ compile_error = REG_ERPAREN; goto error_return; }
1224
/* Since we just checked for an empty stack above, this
1229
/* We don't just want to restore into `regnum', because
1230
* later groups should continue to be numbered higher,
1231
* as in `(ab)c(de)' -- the second group is #2.
1233
regnum_t this_group_regnum;
1234
struct rexp_node ** inner;
1235
struct rexp_node * parens;
1237
inner = top_expression;
1238
compile_stack.avail--;
1240
if (!!syntax_only_parens != (COMPILE_STACK_TOP.regnum == -1))
1241
{ compile_error = REG_ERPAREN; goto error_return; }
1243
top_expression = COMPILE_STACK_TOP.top_expression;
1244
last_expression = COMPILE_STACK_TOP.last_expression;
1245
last_non_regular_expression = COMPILE_STACK_TOP.last_non_regular_expression;
1246
this_group_regnum = COMPILE_STACK_TOP.regnum;
1249
parens = rx_mk_r_monop (r_parens, *inner);
1252
parens->params.intval = this_group_regnum;
1258
case '|': /* `\|'. */
1259
if ((syntax & RE_LIMITED_OPS) || (syntax & RE_NO_BK_VBAR))
1260
goto normal_backslash;
1262
if (syntax & RE_LIMITED_OPS)
1266
struct rexp_node * alt;
1268
alt = rx_mk_r_binop (r_alternate, *top_expression, 0);
1271
*top_expression = alt;
1272
last_expression = &alt->params.pair.right;
1273
last_non_regular_expression = &alt->params.pair.right;
1279
/* If \{ is a literal. */
1280
if (!(syntax & RE_INTERVALS)
1281
/* If we're at `\{' and it's not the open-interval
1283
|| ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1284
|| (p - 2 == pattern && p == pend))
1285
goto normal_backslash;
1289
/* If got here, then the syntax allows intervals.
1292
/* At least (most) this many matches must be made.
1300
/* We're about to parse the bounds of the interval.
1301
* It may turn out that this isn't an interval after
1302
* all, in which case these same characters will have
1303
* to be reparsed as literals. This remembers
1304
* the backtrack point in the parse:
1306
beg_interval = p - 1;
1310
if (syntax & RE_NO_BK_BRACES)
1311
goto unfetch_interval;
1313
{ compile_error = REG_EBRACE; goto error_return; }
1316
GET_UNSIGNED_NUMBER (lower_bound);
1320
GET_UNSIGNED_NUMBER (upper_bound);
1321
if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1324
/* Interval such as `{n}' => match exactly n times.
1326
upper_bound = lower_bound;
1329
|| upper_bound > RE_DUP_MAX
1330
|| lower_bound > upper_bound)
1332
if (syntax & RE_NO_BK_BRACES)
1333
goto unfetch_interval;
1335
{ compile_error = REG_BADBR; goto error_return; }
1338
if (!(syntax & RE_NO_BK_BRACES))
1340
if (c != '\\') { compile_error = REG_EBRACE; goto error_return; }
1346
if (syntax & RE_NO_BK_BRACES)
1347
goto unfetch_interval;
1349
{ compile_error = REG_BADBR; goto error_return; }
1352
/* We just parsed a valid interval.
1353
* lower_bound and upper_bound are set.
1356
/* If it's invalid to have no preceding re.
1358
if (pointless_if_repeated (*last_expression))
1360
if (syntax & RE_CONTEXT_INVALID_OPS)
1361
{ compile_error = REG_BADRPT; goto error_return; }
1362
else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1363
/* treat the interval spec as literal chars. */
1364
goto unfetch_interval;
1368
struct rexp_node * interval;
1370
if (*last_expression && ((*last_expression)->type == r_string))
1371
if (factor_string (&last_expression, cset_size))
1373
interval = rx_mk_r_monop (r_interval, *last_expression);
1376
interval->params.intval = lower_bound;
1377
interval->params.intval2 = upper_bound;
1378
*last_expression = interval;
1379
last_non_regular_expression = last_expression;
1386
/* If an invalid interval, match the characters as literals. */
1390
/* normal_char and normal_backslash need `c'. */
1393
if (!(syntax & RE_NO_BK_BRACES))
1395
if ((p > pattern) && (p[-1] == '\\'))
1396
goto normal_backslash;
1401
/* There is no way to specify the before_dot and after_dot
1402
* operators. rms says this is ok. --karl
1406
goto add_side_effect;
1413
struct rexp_node * set;
1415
cs = rx_cset (&cset_size);
1418
set = rx_mk_r_cset (r_cset, cset_size, cs);
1425
rx_bitset_universe (cset_size, cs);
1430
enum syntaxcode code = syntax_spec_code [c];
1431
for (x = 0; x < 256; ++x)
1434
if (SYNTAX (x) == code)
1437
inverse_translation (n_members,
1438
cset_size, validate_inv_tr,
1441
rx_bitset_xor (cset_size, cs, it);
1456
struct rexp_node * n;
1458
cs = rx_cset (cset_size);
1459
n = (cs ? rx_mk_r_cset (r_cset, cset_size, cs) : 0);
1467
rx_bitset_universe (cset_size ,cs);
1470
for (x = cset_size - 1; x > 0; --x)
1471
if (SYNTAX(x) & Sword)
1472
RX_bitset_toggle (cs, x);
1481
goto add_side_effect;
1486
goto add_side_effect;
1491
goto add_side_effect;
1496
goto add_side_effect;
1501
goto add_side_effect;
1506
goto add_side_effect;
1511
struct rexp_node * se;
1512
se = rx_mk_r_int (r_context, side);
1520
case '1': case '2': case '3': case '4': case '5':
1521
case '6': case '7': case '8': case '9':
1522
if (syntax & RE_NO_BK_REFS)
1527
/* Can't back reference to a subexpression if inside of it. */
1528
if (group_in_compile_stack (compile_stack, c1))
1532
{ compile_error = REG_ESUBREG; goto error_return; }
1535
goto add_side_effect;
1540
if (syntax & RE_BK_PLUS_QM)
1543
goto normal_backslash;
1547
/* You might think it would be useful for \ to mean
1548
* not to translate; but if we don't translate it
1549
* it will never match anything.
1558
/* Expects the character in `c'. */
1562
struct rexp_node * match;
1565
it = inverse_translation (n_members,
1566
cset_size, validate_inv_tr,
1567
inverse_translate, translate, c);
1569
if (1 != n_members[c])
1571
cs = rx_cset (cset_size);
1572
match = (cs ? rx_mk_r_cset (r_cset, cset_size, cs) : 0);
1579
rx_bitset_union (CHAR_SET_SIZE, cs, it);
1585
if (*last_expression && (*last_expression)->type == r_string)
1587
if (rx_adjoin_string (&((*last_expression)->params.cstr), c))
1593
append = rx_mk_r_str (r_string, c);
1602
/* This genericly appends the rexp APPEND to *LAST_EXPRESSION
1603
* and then parses the next character normally.
1605
if (RX_regular_node_type (append->type))
1607
if (!*last_expression)
1608
*last_expression = append;
1611
struct rexp_node * concat;
1612
concat = rx_mk_r_binop (r_concat,
1613
*last_expression, append);
1616
*last_expression = concat;
1617
last_expression = &concat->params.pair.right;
1622
if (!*last_non_regular_expression)
1624
*last_non_regular_expression = append;
1625
last_expression = last_non_regular_expression;
1629
struct rexp_node * concat;
1630
concat = rx_mk_r_binop (r_concat,
1631
*last_non_regular_expression, append);
1634
*last_non_regular_expression = concat;
1635
last_non_regular_expression = &concat->params.pair.right;
1636
last_expression = last_non_regular_expression;
1641
} /* while p != pend */
1644
/* Through the pattern now. */
1646
if (!COMPILE_STACK_EMPTY)
1647
{ compile_error = REG_EPAREN; goto error_return; }
1648
free (compile_stack.stack);
1655
compile_error = REG_ESPACE;
1658
free (compile_stack.stack);
1659
/* Free expressions pushed onto the compile stack! */
1661
rx_free_rexp (rexp);
1662
return compile_error;