40
Re_node node; Re_Lit l;
42
l = (Re_Lit) new_node(l);
43
node = (Re_node) new_node(node);
44
if (l == NULL || node == NULL) return NULL;
46
lit_pos(l) = pos_cnt++;
47
if (type == C_SET) lit_cset(l) = cset;
48
else lit_char(l) = ch; /* type == C_LIT */
51
Nullable(node) = FALSE;
52
Firstpos(node) = create_pos(lit_pos(l));
53
Lastpos(node) = Firstpos(node);
45
new_node(Re_Lit, l, l);
46
new_node(Re_node, node, node);
47
if (l == NULL || node == NULL) {
48
if (l != NULL) free(l);
49
if (node != NULL) free(node);
53
lit_pos(l) = pos_cnt++;
54
if (type == C_SET) lit_cset(l) = cset;
55
else lit_char(l) = ch; /* type == C_LIT */
58
Nullable(node) = FALSE;
59
Firstpos(node) = create_pos(lit_pos(l));
60
Lastpos(node) = Firstpos(node);
57
64
/* parse_cset() takes a pointer to a pointer to a string and parses
61
68
Re_node parse_cset(s)
64
Ch_Set cs_ptr, curr_ptr, prev_ptr;
71
Ch_Set cs_ptr, curr_ptr, prev_ptr;
73
Ch_Range range = NULL;
68
if (Unexpected(s, ']')) return NULL;
69
curr_ptr = (Ch_Set) new_node(curr_ptr); cs_ptr = curr_ptr;
70
while (!Unexpected(s, ']')) {
71
range = (Ch_Range)new_node(range);
72
curr_ptr->elt = range;
74
if (ch == '-') return NULL; /* invalid range */
76
if (**s == NUL) return NULL;
77
else if (**s == '-') { /* character range */
79
if (Invalid_range(**s, ch)) return NULL;
80
else range->hi_bd = NextChar(s);
82
else range->hi_bd = ch;
84
curr_ptr = (Ch_Set) new_node(curr_ptr);
85
prev_ptr->rest = curr_ptr;
88
prev_ptr->rest = NULL;
89
return mk_leaf(LITERAL, C_SET, NUL, cs_ptr);
75
if (Unexpected(s, ']')) return NULL;
76
new_node(Ch_Set, curr_ptr, curr_ptr);
78
while (!Unexpected(s, ']')) {
79
new_node(Ch_Range, range, range);
80
curr_ptr->elt = range;
85
return NULL; /* invalid range */
93
else if (**s == '-') { /* character range */
95
if (Invalid_range(**s, ch)) {
100
else range->hi_bd = NextChar(s);
102
else range->hi_bd = ch;
104
new_node(Ch_Set, curr_ptr, curr_ptr);
105
prev_ptr->rest = curr_ptr;
108
prev_ptr->rest = NULL;
109
return mk_leaf(LITERAL, C_SET, NUL, cs_ptr);
112
if (range != NULL) free(range);
92
116
} /* parse_cset */
99
123
Re_node parse_wildcard()
101
Ch_Set s; Ch_Range r;
103
r = (Ch_Range) new_node(r);
104
r->low_bd = ASCII_MIN; /* smallest ASCII value */
105
r->hi_bd = ASCII_MAX; /* greatest ASCII value */
106
s = (Ch_Set) new_node(s);
109
return mk_leaf(LITERAL, C_SET, NUL, s);
128
new_node(Ch_Range, r, r);
129
r->low_bd = ASCII_MIN; /* smallest ASCII value */
130
r->hi_bd = ASCII_MAX; /* greatest ASCII value */
131
new_node(Ch_Set, s, s);
134
return mk_leaf(LITERAL, C_SET, NUL, s);
112
137
/* parse_chlit() parses a character literal. It is assumed that the
116
141
Re_node parse_chlit(ch)
119
if (ch == NUL) return NULL;
120
else return mk_leaf(LITERAL, C_LIT, ch, NULL);
144
if (ch == NUL) return NULL;
145
else return mk_leaf(LITERAL, C_LIT, ch, NULL);
148
/* routine to free the malloced token */
149
void free_tok(next_token)
152
if (next_token == NULL) return;
153
switch (tok_type(next_token)) {
155
free_re(tok_val(next_token));
124
168
/* get_token() returns the next token -- this may be a character
125
169
literal, a character set, an escaped character, a punctuation (i.e.
131
175
Tok_node get_token(s)
136
if (s == NULL || *s == NULL) return NULL; /* error */
137
rn = (Tok_node) new_node(rn);
138
if (**s == NUL) tok_type(rn) = EOS; /* end of string */
141
case '.': /* wildcard */
142
tok_type(rn) = LITERAL;
143
tok_val(rn) = parse_wildcard();
144
if (tok_val(rn) == NULL) return NULL;
146
case '[': /* character set literal */
148
tok_type(rn) = LITERAL;
149
tok_val(rn) = parse_cset(s);
150
if (tok_val(rn) == NULL) return NULL;
153
tok_type(rn) = LPAREN;
156
tok_type(rn) = RPAREN;
159
tok_type(rn) = OPSTAR;
162
tok_type(rn) = OPALT;
165
tok_type(rn) = OPOPT;
167
case '\\': /* escaped character */
169
default : /* must be ordinary character */
170
tok_type(rn) = LITERAL;
171
tok_val(rn) = parse_chlit(**s);
172
if (tok_val(rn) == NULL) return NULL;
180
if (s == NULL || *s == NULL) return NULL; /* error */
181
new_node(Tok_node, rn, rn);
182
if (**s == NUL) tok_type(rn) = EOS; /* end of string */
185
case '.': /* wildcard */
186
tok_type(rn) = LITERAL;
187
tok_val(rn) = parse_wildcard();
188
if (tok_val(rn) == NULL) {
193
case '[': /* character set literal */
195
tok_type(rn) = LITERAL;
196
tok_val(rn) = parse_cset(s);
197
if (tok_val(rn) == NULL) {
203
tok_type(rn) = LPAREN;
206
tok_type(rn) = RPAREN;
209
tok_type(rn) = OPSTAR;
212
tok_type(rn) = OPALT;
215
tok_type(rn) = OPOPT;
217
case '\\': /* escaped character */
219
default : /* must be ordinary character */
220
tok_type(rn) = LITERAL;
221
tok_val(rn) = parse_chlit(**s);
222
if (tok_val(rn) == NULL) {
180
233
/* cat2() takes a stack of RE-nodes and, if the stack contains
190
if (stk == NULL) return NULL;
191
if (*stk == NULL || (*stk)->next == NULL) return *stk;
192
r = (Re_node) new_node(r);
193
if (r == NULL) return NULL; /* can't allocate memory */
195
Rchild(r) = Pop(stk);
196
Lchild(r) = Pop(stk);
197
if (Push(stk, r) == NULL) return NULL;
198
Nullable(r) = Nullable(Lchild(r)) && Nullable(Rchild(r));
199
if (Nullable(Lchild(r)))
200
Firstpos(r) = pset_union(Firstpos(Lchild(r)), Firstpos(Rchild(r)));
201
else Firstpos(r) = Firstpos(Lchild(r));
202
if (Nullable(Rchild(r)))
203
Lastpos(r) = pset_union(Lastpos(Lchild(r)), Lastpos(Rchild(r)));
204
else Lastpos(r) = Lastpos(Rchild(r));
243
if (stk == NULL) return NULL;
244
if (*stk == NULL || (*stk)->next == NULL) return *stk;
245
new_node(Re_node, r, r);
246
if (r == NULL) return NULL; /* can't allocate memory */
248
Rchild(r) = Pop(stk);
249
Lchild(r) = Pop(stk);
250
if (Push(stk, r) == NULL) {
256
Nullable(r) = Nullable(Lchild(r)) && Nullable(Rchild(r));
257
if (Nullable(Lchild(r)))
258
Firstpos(r) = pset_union(Firstpos(Lchild(r)), Firstpos(Rchild(r)), 0);
259
else Firstpos(r) = pset_union(Firstpos(Lchild(r)), NULL, 0); /* added pset_union with NULL 26/Aug/1996 */
260
if (Nullable(Rchild(r)))
261
Lastpos(r) = pset_union(Lastpos(Lchild(r)), Lastpos(Rchild(r)), 0);
262
else Lastpos(r) = pset_union(Lastpos(Rchild(r)), NULL, 0); /* added pset_union with NULL 26/Aug/1996 */
208
266
/* wrap() takes a stack and an operator, takes the top element of the
218
if (s == NULL || *s == NULL) return NULL;
219
r = (Re_node) new_node(r);
220
if (r == NULL) return NULL;
223
if (Push(s, r) == NULL) return NULL;
225
Firstpos(r) = Firstpos(Child(r));
226
Lastpos(r) = Lastpos(Child(r));
276
if (s == NULL || *s == NULL) return NULL;
277
new_node(Re_node, r, r);
278
if (r == NULL) return NULL;
281
if (Push(s, r) == NULL) {
287
Firstpos(r) = pset_union(Firstpos(Child(r)), NULL, 0); /* added pset_union with NULL 26/Aug/1996 */
288
Lastpos(r) = pset_union(Lastpos(Child(r)), NULL, 0); /* added pset_union with NULL 26/Aug/1996 */
230
292
/* mk_alt() takes a stack and a regular expression, creates an ALT-node
240
if (s == NULL || *s == NULL || r == NULL) return NULL;
241
node = (Re_node) new_node(node);
242
if (node == NULL) return NULL;
244
Lchild(node) = Pop(s);
246
if (Push(s, node) == NULL) return NULL;
247
Nullable(node) = Nullable(Lchild(node)) || Nullable(Rchild(node));
248
Firstpos(node) = pset_union(Firstpos(Lchild(node)), Firstpos(Rchild(node)));
249
Lastpos(node) = pset_union(Lastpos(Lchild(node)), Lastpos(Rchild(node)));
302
if (s == NULL || *s == NULL || r == NULL) return NULL;
303
new_node(Re_node, node, node);
304
if (node == NULL) return NULL;
306
Lchild(node) = Pop(s);
308
if (Push(s, node) == NULL) return NULL;
309
Nullable(node) = Nullable(Lchild(node)) || Nullable(Rchild(node));
310
Firstpos(node) = pset_union(Firstpos(Lchild(node)), Firstpos(Rchild(node)), 0);
311
Lastpos(node) = pset_union(Lastpos(Lchild(node)), Lastpos(Rchild(node)), 0);
253
315
/* parse_re() takes a pointer to a string and traverses that string,
261
Stack stk = NULL, temp;
323
Stack stk = NULL, ret = NULL, top, temp;
324
Tok_node next_token, t1;
325
Re_node re = NULL, val;
265
if (s == NULL || *s == NULL) return NULL;
267
next_token = get_token(s);
268
if (next_token == NULL) return NULL;
269
switch (tok_type(next_token)) {
273
if (end == tok_type(next_token)) return Top(cat2(&stk));
276
re = parse_re(s, RPAREN);
277
if (Push(&stk, re) == NULL) return NULL;
278
if (tok_type(get_token(s)) != RPAREN || re == NULL) return NULL;
281
stk->next = cat2(&temp); /* condense CAT nodes */
282
if (stk->next == NULL) return NULL;
283
else stk->size = stk->next->size + 1;
287
if (wrap(&stk, OPSTAR) == NULL) return NULL;
290
if (wrap(&stk, OPOPT) == NULL) return NULL;
293
if (cat2(&stk) == NULL) return NULL;
294
re = parse_re(s, end);
295
if (re == NULL) return NULL;
296
if (mk_alt(&stk, re) == NULL) return NULL;
299
if (Push(&stk, tok_val(next_token)) == NULL) return NULL;
302
stk->next = cat2(&temp); /* condense CAT nodes */
303
if (stk->next == NULL) return NULL;
304
else stk->size = stk->next->size + 1;
308
printf("parse_re: unknown token type %d\n", tok_type(next_token));
327
if (s == NULL || *s == NULL) return NULL;
330
if ((next_token = get_token(s)) == NULL) return NULL;
331
switch (tok_type(next_token)) {
335
if (end == tok_type(next_token)) {
336
free_tok(next_token);
343
free_tok(next_token);
347
free_tok(next_token);
348
re = parse_re(s, RPAREN);
349
if ((ret = Push(&stk, re)) == NULL) {
350
free_re(re); /* ZZZZZZZZZZZZZZZZZZ */
353
if ((t1 = get_token(s)) == NULL) {
354
free_re(re); /* ZZZZZZZZZZZZZZZZZZ */
358
if ((tok_type(t1) != RPAREN) || (re == NULL)) {
359
free_re(re); /* ZZZZZZZZZZZZZZZZZZ */
367
stk->next = cat2(&temp); /* condense CAT nodes */
368
if (stk->next == NULL) {
369
free_re(re); /* ZZZZZZZZZZZZZZZZZZ */
373
else stk->size = stk->next->size + 1;
377
if ((ret = wrap(&stk, OPSTAR)) == NULL) {
378
free_tok(next_token);
381
free_tok(next_token); /* ZZZZZZZZZZZZZZZZZZ */
384
if ((ret = wrap(&stk, OPOPT)) == NULL) {
385
free_tok(next_token);
388
free_tok(next_token); /* ZZZZZZZZZZZZZZZZZZ */
391
if ((ret = cat2(&stk)) == NULL) {
392
free_tok(next_token);
395
re = parse_re(s, end);
398
free_tok(next_token);
401
if (mk_alt(&stk, re) == NULL) {
403
free_tok(next_token);
406
free_tok(next_token); /* ZZZZZZZZZZZZZZZZZZ */
409
if ((ret = Push(&stk, tok_val(next_token))) == NULL) {
410
free_tok(next_token);
416
stk->next = cat2(&temp); /* condense CAT nodes */
417
if (stk->next == NULL) {
421
else stk->size = stk->next->size + 1;
425
printf("parse_re: unknown token type %d\n", tok_type(next_token));
426
free_tok(next_token); /* ZZZZZZZZZZZZZZZZZZ */
429
/* free_tok(next_token); */
314
433
/* parse() essentially just calls parse_re(). Its purpose is to stick an
441
Re_node val, tree, temp;
442
Stack top, stk = NULL;
325
tree = parse_re(&s, NUL);
326
if (tree == NULL || Push(&stk, tree) == NULL) return NULL;
327
temp = mk_leaf(EOS, C_LIT, NUL, NULL);
328
if (temp == NULL || Push(&stk, temp) == NULL) return NULL;
329
final_pos = --pos_cnt;
330
return Top(cat2(&stk));
444
if ((tree = parse_re(&s, NUL)) == NULL) return NULL;
445
if (Push(&stk, tree) == NULL) return NULL;
446
temp = mk_leaf(EOS, C_LIT, NUL, NULL);
447
if (temp == NULL || Push(&stk, temp) == NULL) return NULL;
448
final_pos = --pos_cnt;