252
246
//charset conversion is disabled for now,
253
247
//it hinders tree optimization in some cases, so it need to be either
254
248
//done post optimization, or have extra factoring rules added
256
249
static Node *merge_charset(Node *a, Node *b)
258
if (dynamic_cast<CharNode *>(a) && dynamic_cast<CharNode *>(b)) {
260
chars.insert(dynamic_cast<CharNode *>(a)->c);
261
chars.insert(dynamic_cast<CharNode *>(b)->c);
262
CharSetNode *n = new CharSetNode(chars);
264
} else if (dynamic_cast<CharNode *>(a) &&
265
dynamic_cast<CharSetNode *>(b)) {
266
Chars *chars = &dynamic_cast<CharSetNode *>(b)->chars;
267
chars->insert(dynamic_cast<CharNode *>(a)->c);
269
} else if (dynamic_cast<CharSetNode *>(a) &&
270
dynamic_cast<CharSetNode *>(b)) {
271
Chars *from = &dynamic_cast<CharSetNode *>(a)->chars;
272
Chars *to = &dynamic_cast<CharSetNode *>(b)->chars;
273
for (Chars::iterator i = from->begin(); i != from->end(); i++)
251
Chars *from = &dynamic_cast<CharSetNode *>(b)->chars;
252
Chars *to = &dynamic_cast<CharSetNode *>(a)->chars;
253
for (Chars::iterator i = from->begin(); i != from->end(); i++)
280
static Node *alt_to_charsets(Node *t, int dir)
286
for (;dynamic_cast<AltNode *>(i);) {
287
if (dynamic_cast<CharNode *>(i->child[dir]) ||
288
dynamic_cast<CharNodeSet *>(i->child[dir])) {
294
first->child[dir] = merge_charset(first->child[dir],
296
p->child[!dir] = i->child[!dir];
298
i = tmp->child[!dir];
299
tmp->child[!dir] = NULL;
259
/* given a constructed alt vector do duplicate elimination and merging */
260
eliminate_dups_and_merge(vector<Node *> vec) {
261
std::sort(vec->begin(), vec->end(), ???);
264
if (vec->begin()->is_charset()) {
265
for (j = i+1; *j->is_charset() && j != vec->end();
267
merge_charset(*i, *j);
307
// last altnode of chain check other dir as well
308
if (first && (dynamic_cast<charNode *>(i) ||
309
dynamic_cast<charNodeSet *>(i))) {
315
if (dynamic_cast<CharNode *>(t->child[dir]) ||
316
dynamic_cast<CharSetNode *>(t->child[dir]))
319
(dynamic_cast<CharNode *>(i->child[dir]) ||
320
dynamic_cast<CharSetNode *>(i->child[dir])))) {
275
/* merged charsets, now eliminate other dups */
276
for (j = i + 1; ??; ???) {
281
flatten_altnode(Node *t) {
284
elimintated_alt_nodes++;
286
eliminate_dups_and_merge();
289
flatten_catnode(Node *t) {
292
eliminated_cat_nodes++;
294
/* only elimination to be done is accept nodes */
299
factor everything from right, then left
301
factor longest/most - look at both left and right, which is most?
302
to determine which dir to factor first
304
ab | abc | abcd | abcde
306
a (b | bc | bcd | bcde)
308
a ( b (E | c | cd | cde))
310
a ( b (E | c (E | d | de))
312
a ( b (E | c (E | d (E | e))))
314
so once flattened, work top to bottom
316
may actually want to flatten charsets into single chars in altnode
317
to make it easier to factor them
326
321
static Node *basic_alt_factor(Node *t, int dir)
556
556
if (flags & DFA_DUMP_TREE_STATS) {
557
struct node_counts counts = { 0, 0, 0, 0, 0, 0, 0, 0 };
557
struct node_counts counts = { 0, 0, 0, 0, 0, 0, 0 };
558
558
count_tree_nodes(t, &counts);
560
"expr tree: c %d, [] %d, [^] %d, | %d, + %d, * %d, . %d, cat %d\n",
561
counts.charnode, counts.charset, counts.notcharset,
560
"expr tree: [] %d, [^] %d, | %d, + %d, * %d, . %d, cat %d\n",
561
counts.charset, counts.notcharset,
562
562
counts.alt, counts.plus, counts.star, counts.any,
591
591
} while (update);
592
592
if (flags & DFA_DUMP_TREE_STATS) {
593
struct node_counts counts = { 0, 0, 0, 0, 0, 0, 0, 0 };
593
struct node_counts counts = { 0, 0, 0, 0, 0, 0, 0 };
594
594
count_tree_nodes(t, &counts);
596
"simplified expr tree: c %d, [] %d, [^] %d, | %d, + %d, * %d, . %d, cat %d\n",
597
counts.charnode, counts.charset, counts.notcharset,
596
"simplified expr tree: [] %d, [^] %d, | %d, + %d, * %d, . %d, cat %d\n",
597
counts.charset, counts.notcharset,
598
598
counts.alt, counts.plus, counts.star, counts.any,