1
/* Copyright (C) 1995, 1996 Tom Lord
3
* This program is free software; you can redistribute it and/or modify
4
* it under the terms of the GNU Library General Public License as published by
5
* the Free Software Foundation; either version 2, or (at your option)
8
* This program is distributed in the hope that it will be useful,
9
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
* GNU Library General Public License for more details.
13
* You should have received a copy of the GNU Library General Public License
14
* along with this software; see the file COPYING. If not, write to
15
* the Free Software Foundation, 59 Temple Place - Suite 330,
16
* Boston, MA 02111-1307, USA.
21
* Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu)
30
#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
33
/* {Low Level Data Structure Code}
36
/* Constructs a new nfa node. */
39
rx_nfa_state (struct rx *rx)
46
struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
49
rx_bzero ((char *)n, sizeof (*n));
50
n->next = rx->nfa_states;
58
rx_free_nfa_state (struct rx_nfa_state * n)
62
struct rx_nfa_state * n;
69
/* This adds an edge between two nodes, but doesn't initialize the
75
rx_nfa_edge (struct rx *rx,
76
enum rx_nfa_etype type,
77
struct rx_nfa_state *start,
78
struct rx_nfa_state *dest)
81
rx_nfa_edge (rx, type, start, dest)
83
enum rx_nfa_etype type;
84
struct rx_nfa_state *start;
85
struct rx_nfa_state *dest;
88
struct rx_nfa_edge *e;
89
e = (struct rx_nfa_edge *)malloc (sizeof (*e));
92
e->next = start->edges;
102
rx_free_nfa_edge (struct rx_nfa_edge * e)
106
struct rx_nfa_edge * e;
113
/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
114
* of an NFA. These are added to an nfa automaticly by eclose_nfa.
118
static struct rx_possible_future *
119
rx_possible_future (struct rx * rx,
120
struct rx_se_list * effects)
122
static struct rx_possible_future *
123
rx_possible_future (rx, effects)
125
struct rx_se_list * effects;
128
struct rx_possible_future *ec;
129
ec = (struct rx_possible_future *) malloc (sizeof (*ec));
134
ec->effects = effects;
141
rx_free_possible_future (struct rx_possible_future * pf)
144
rx_free_possible_future (pf)
145
struct rx_possible_future * pf;
154
rx_free_nfa_graph (struct rx *rx)
157
rx_free_nfa_graph (rx)
161
while (rx->nfa_states)
163
while (rx->nfa_states->edges)
165
switch (rx->nfa_states->edges->type)
168
rx_free_cset (rx->nfa_states->edges->params.cset);
174
struct rx_nfa_edge * e;
175
e = rx->nfa_states->edges;
176
rx->nfa_states->edges = rx->nfa_states->edges->next;
177
rx_free_nfa_edge (e);
179
} /* while (rx->nfa_states->edges) */
181
/* Iterate over the partial epsilon closures of rx->nfa_states */
182
struct rx_possible_future * pf = rx->nfa_states->futures;
185
struct rx_possible_future * pft = pf;
187
rx_free_possible_future (pft);
191
struct rx_nfa_state *n;
193
rx->nfa_states = rx->nfa_states->next;
194
rx_free_nfa_state (n);
201
/* {Translating a Syntax Tree into an NFA}
206
/* This is the Thompson regexp->nfa algorithm.
207
* It is modified to allow for `side-effect epsilons.' Those are
208
* edges that are taken whenever a similarly placed epsilon edge
209
* would be, but which also imply that some side effect occurs
210
* when the edge is taken.
212
* Side effects are used to model parts of the pattern langauge
213
* that are not regular.
218
rx_build_nfa (struct rx *rx,
219
struct rexp_node *rexp,
220
struct rx_nfa_state **start,
221
struct rx_nfa_state **end)
224
rx_build_nfa (rx, rexp, start, end)
226
struct rexp_node *rexp;
227
struct rx_nfa_state **start;
228
struct rx_nfa_state **end;
231
struct rx_nfa_edge *edge;
233
/* Start & end nodes may have been allocated by the caller. */
234
*start = *start ? *start : rx_nfa_state (rx);
245
*end = *end ? *end : rx_nfa_state (rx);
249
rx_free_nfa_state (*start);
256
edge = rx_nfa_edge (rx, ne_cset, *start, *end);
257
(*start)->has_cset_edges = 1;
260
edge->params.cset = rx_copy_cset (rx->local_cset_size,
262
if (!edge->params.cset)
264
rx_free_nfa_edge (edge);
271
if (rexp->params.cstr.len == 1)
273
edge = rx_nfa_edge (rx, ne_cset, *start, *end);
274
(*start)->has_cset_edges = 1;
277
edge->params.cset = rx_cset (rx->local_cset_size);
278
if (!edge->params.cset)
280
rx_free_nfa_edge (edge);
283
RX_bitset_enjoin (edge->params.cset, rexp->params.cstr.contents[0]);
288
struct rexp_node copied;
289
struct rx_nfa_state * shared;
293
copied.params.cstr.len--;
294
copied.params.cstr.contents++;
295
if (!rx_build_nfa (rx, &copied, &shared, end))
297
copied.params.cstr.len = 1;
298
copied.params.cstr.contents--;
299
return rx_build_nfa (rx, &copied, start, &shared);
304
return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
305
&& rx_nfa_edge (rx, ne_epsilon, *start, *end));
309
struct rx_nfa_state * star_start = 0;
310
struct rx_nfa_state * star_end = 0;
311
struct rx_nfa_state * shared;
314
if (!rx_build_nfa (rx, rexp->params.pair.left, start, &shared))
316
return (rx_build_nfa (rx, rexp->params.pair.left,
317
&star_start, &star_end)
320
&& rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
321
&& rx_nfa_edge (rx, ne_epsilon, shared, star_start)
322
&& rx_nfa_edge (rx, ne_epsilon, star_end, *end)
323
&& rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
329
struct rx_nfa_state * star_start = 0;
330
struct rx_nfa_state * star_end = 0;
331
return (rx_build_nfa (rx, rexp->params.pair.left,
332
&star_start, &star_end)
335
&& rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
336
&& rx_nfa_edge (rx, ne_epsilon, *start, star_start)
337
&& rx_nfa_edge (rx, ne_epsilon, star_end, *end)
339
&& rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
344
struct rx_nfa_state * cut_end = 0;
346
cut_end = rx_nfa_state (rx);
347
if (!(cut_end && rx_nfa_edge (rx, ne_epsilon, *start, cut_end)))
349
rx_free_nfa_state (*start);
350
rx_free_nfa_state (*end);
352
rx_free_nfa_state (cut_end);
355
cut_end->is_final = rexp->params.intval;
360
return rx_build_nfa (rx, rexp->params.pair.left, start, end);
364
struct rx_nfa_state *shared = 0;
366
(rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
367
&& rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
372
struct rx_nfa_state *ls = 0;
373
struct rx_nfa_state *le = 0;
374
struct rx_nfa_state *rs = 0;
375
struct rx_nfa_state *re = 0;
376
return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
377
&& rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
378
&& rx_nfa_edge (rx, ne_epsilon, *start, ls)
379
&& rx_nfa_edge (rx, ne_epsilon, *start, rs)
380
&& rx_nfa_edge (rx, ne_epsilon, le, *end)
381
&& rx_nfa_edge (rx, ne_epsilon, re, *end));
385
edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
388
edge->params.side_effect = (void *)rexp->params.intval;
392
/* this should never happen */
397
/* {Low Level Data Structures for the Static NFA->SuperNFA Analysis}
399
* There are side effect lists -- lists of side effects occuring
400
* along an uninterrupted, acyclic path of side-effect epsilon edges.
401
* Such paths are collapsed to single edges in the course of computing
402
* epsilon closures. The resulting single edges are labled with a list
403
* of all the side effects from the original multi-edge path. Equivalent
404
* lists of side effects are made == by the constructors below.
406
* There are also nfa state sets. These are used to hold a list of all
407
* states reachable from a starting state for a given type of transition
408
* and side effect list. These are also hash-consed.
413
/* The next several functions compare, construct, etc. lists of side
414
* effects. See ECLOSE_NFA (below) for details.
417
/* Ordering of rx_se_list
418
* (-1, 0, 1 return value convention).
423
se_list_cmp (void * va, void * vb)
431
struct rx_se_list * a = (struct rx_se_list *)va;
432
struct rx_se_list * b = (struct rx_se_list *)vb;
440
: ((long)a->car < (long)b->car
442
: ((long)a->car > (long)b->car
444
: se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
450
se_list_equal (void * va, void * vb)
453
se_list_equal (va, vb)
458
return !(se_list_cmp (va, vb));
461
static struct rx_hash_rules se_list_hash_rules = { se_list_equal, 0, 0, 0, 0 };
465
static struct rx_se_list *
466
side_effect_cons (struct rx * rx,
467
void * se, struct rx_se_list * list)
469
static struct rx_se_list *
470
side_effect_cons (rx, se, list)
473
struct rx_se_list * list;
476
struct rx_se_list * l;
477
l = ((struct rx_se_list *) malloc (sizeof (*l)));
487
static struct rx_se_list *
488
hash_cons_se_prog (struct rx * rx,
489
struct rx_hash * memo,
490
void * car, struct rx_se_list * cdr)
492
static struct rx_se_list *
493
hash_cons_se_prog (rx, memo, car, cdr)
495
struct rx_hash * memo;
497
struct rx_se_list * cdr;
500
long hash = (long)car ^ (long)cdr;
501
struct rx_se_list template;
506
struct rx_hash_item * it = rx_hash_store (memo, hash,
508
&se_list_hash_rules);
511
if (it->data == (void *)&template)
513
struct rx_se_list * consed;
514
consed = (struct rx_se_list *) malloc (sizeof (*consed));
516
it->data = (void *)consed;
518
return (struct rx_se_list *)it->data;
524
static struct rx_se_list *
525
hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
527
static struct rx_se_list *
528
hash_se_prog (rx, memo, prog)
530
struct rx_hash * memo;
531
struct rx_se_list * prog;
534
struct rx_se_list * answer = 0;
537
answer = hash_cons_se_prog (rx, memo, prog->car, answer);
547
/* {Constructors, etc. for NFA State Sets}
552
nfa_set_cmp (void * va, void * vb)
560
struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
561
struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
569
: (a->car->id < b->car->id
571
: (a->car->id > b->car->id
573
: nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
578
nfa_set_equal (void * va, void * vb)
581
nfa_set_equal (va, vb)
586
return !nfa_set_cmp (va, vb);
589
static struct rx_hash_rules nfa_set_hash_rules = { nfa_set_equal, 0, 0, 0, 0 };
593
static struct rx_nfa_state_set *
594
nfa_set_cons (struct rx * rx,
595
struct rx_hash * memo, struct rx_nfa_state * state,
596
struct rx_nfa_state_set * set)
598
static struct rx_nfa_state_set *
599
nfa_set_cons (rx, memo, state, set)
601
struct rx_hash * memo;
602
struct rx_nfa_state * state;
603
struct rx_nfa_state_set * set;
606
struct rx_nfa_state_set template;
607
struct rx_hash_item * node;
608
template.car = state;
610
node = rx_hash_store (memo,
611
(((long)state) >> 8) ^ (long)set,
612
&template, &nfa_set_hash_rules);
615
if (node->data == &template)
617
struct rx_nfa_state_set * l;
618
l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
619
node->data = (void *) l;
624
return (struct rx_nfa_state_set *)node->data;
629
static struct rx_nfa_state_set *
630
nfa_set_enjoin (struct rx * rx,
631
struct rx_hash * memo, struct rx_nfa_state * state,
632
struct rx_nfa_state_set * set)
634
static struct rx_nfa_state_set *
635
nfa_set_enjoin (rx, memo, state, set)
637
struct rx_hash * memo;
638
struct rx_nfa_state * state;
639
struct rx_nfa_state_set * set;
642
if (!set || (state->id < set->car->id))
643
return nfa_set_cons (rx, memo, state, set);
644
if (state->id == set->car->id)
648
struct rx_nfa_state_set * newcdr
649
= nfa_set_enjoin (rx, memo, state, set->cdr);
650
if (newcdr != set->cdr)
651
set = nfa_set_cons (rx, memo, set->car, newcdr);
657
/* {Computing Epsilon/Side-effect Closures.}
662
struct rx_se_list *prog_backwards;
666
/* This is called while computing closures for "outnode".
667
* The current node in the traversal is "node".
668
* "frame" contains a list of a all side effects between
669
* "outnode" and "node" from most to least recent.
671
* Explores forward from "node", adding new possible
672
* futures to outnode.
674
* Returns 0 on allocation failure.
679
eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
680
struct rx_nfa_state *node, struct eclose_frame *frame)
683
eclose_node (rx, outnode, node, frame)
685
struct rx_nfa_state *outnode;
686
struct rx_nfa_state *node;
687
struct eclose_frame *frame;
690
struct rx_nfa_edge *e = node->edges;
692
/* For each node, we follow all epsilon paths to build the closure.
693
* The closure omits nodes that have only epsilon edges.
694
* The closure is split into partial closures -- all the states in
695
* a partial closure are reached by crossing the same list of
696
* of side effects (though not necessarily the same path).
702
/* If "node" has more than just epsilon and
703
* and side-effect transitions (id >= 0), or is final,
704
* then it has to be added to the possible futures
707
if (node->id >= 0 || node->is_final)
709
struct rx_possible_future **ec;
710
struct rx_se_list * prog_in_order;
713
prog_in_order = ((struct rx_se_list *)hash_se_prog (rx,
715
frame->prog_backwards));
717
ec = &outnode->futures;
721
cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
727
if (!*ec || (cmp < 0))
729
struct rx_possible_future * pf;
730
pf = rx_possible_future (rx, prog_in_order);
738
(*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
739
node, (*ec)->destset);
745
/* Recurse on outgoing epsilon and side effect nodes.
752
if (!eclose_node (rx, outnode, e->dest, frame))
757
frame->prog_backwards = side_effect_cons (rx,
758
e->params.side_effect,
759
frame->prog_backwards);
760
if (!frame->prog_backwards)
762
if (!eclose_node (rx, outnode, e->dest, frame))
765
struct rx_se_list * dying = frame->prog_backwards;
766
frame->prog_backwards = frame->prog_backwards->cdr;
767
free ((char *)dying);
782
struct rx_possible_future *
783
rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n)
785
struct rx_possible_future *
786
rx_state_possible_futures (rx, n)
788
struct rx_nfa_state * n;
791
if (n->futures_computed)
795
struct eclose_frame frame;
796
frame.prog_backwards = 0;
797
if (!eclose_node (rx, n, n, &frame))
801
n->futures_computed = 1;
809
/* {Storing the NFA in a Contiguous Region of Memory}
816
se_memo_freer (struct rx_hash_item * node)
820
struct rx_hash_item * node;
823
free ((char *)node->data);
829
nfa_set_freer (struct rx_hash_item * node)
833
struct rx_hash_item * node;
836
free ((char *)node->data);
841
rx_free_nfa (struct rx *rx)
848
rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
849
rx_bzero ((char *)&rx->se_list_memo, sizeof (rx->se_list_memo));
850
rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
851
rx_bzero ((char *)&rx->set_list_memo, sizeof (rx->set_list_memo));
852
rx_free_nfa_graph (rx);