2
* (C) 2006, 2007 Andreas Gruenbacher <agruen@suse.de>
3
* Copyright (c) 2003-2008 Novell, Inc. (All rights reserved)
4
* Copyright 2009-2010 Canonical Ltd.
6
* The libapparmor library is licensed under the terms of the GNU
7
* Lesser General Public License, version 2.1. Please see the file
10
* This library is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU Lesser General Public License for more details.
15
* You should have received a copy of the GNU Lesser General Public License
16
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19
* Functions to create/manipulate an expression tree for regular expressions
20
* that have been parsed.
22
* The expression tree can be used directly after the parse creates it, or
23
* it can be factored so that the set of important nodes is smaller.
24
* Having a reduced set of important nodes generally results in a dfa that
25
* is closer to minimum (fewer redundant states are created). It also
26
* results in fewer important nodes in a the state set during subset
27
* construction resulting in less memory used to create a dfa.
29
* Generally it is worth doing expression tree simplification before dfa
30
* construction, if the regular expression tree contains any alternations.
31
* Even if the regular expression doesn't simplification should be fast
32
* enough that it can be used with minimal overhead.
34
#ifndef __LIBAA_RE_EXPR_H
35
#define __LIBAA_RE_EXPR_H
44
#include "apparmor_re.h"
48
typedef unsigned char uchar;
49
typedef set<uchar> Chars;
51
ostream &operator<<(ostream &os, uchar c);
53
/* Compute the union of two sets. */
54
template<class T> set<T> operator+(const set<T> &a, const set<T> &b)
57
c.insert(b.begin(), b.end());
62
* When creating DFAs from regex trees, a DFA state is constructed from
63
* a set of important nodes in the syntax tree. This includes AcceptNodes,
64
* which indicate that when a match ends in a particular state, the
65
* regular expressions that the AcceptNode belongs to match.
69
typedef set<ImportantNode *> NodeSet;
72
* Text-dump a state (for debugging).
74
ostream &operator<<(ostream &os, const NodeSet &state);
77
* Out-edges from a state to another: we store the follow-set of Nodes
78
* for each input character that is not a default match in
79
* cases (i.e., following a CharNode or CharSetNode), and default
80
* matches in otherwise as well as in all matching explicit cases
81
* (i.e., following an AnyCharNode or NotCharSetNode). This avoids
82
* enumerating all the explicit tranitions for default matches.
84
typedef struct NodeCases {
85
typedef map<uchar, NodeSet *>::iterator iterator;
86
iterator begin() { return cases.begin(); }
87
iterator end() { return cases.end(); }
89
NodeCases(): otherwise(0) { }
90
map<uchar, NodeSet *> cases;
96
ostream &operator<<(ostream &os, Node &node);
98
/* An abstract node in the syntax tree. */
101
Node(): nullable(false) { child[0] = child[1] = 0; }
102
Node(Node *left): nullable(false)
107
Node(Node *left, Node *right): nullable(false)
121
* See the "Dragon Book" for an explanation of nullable, firstpos,
122
* lastpos, and followpos.
124
virtual void compute_nullable() { }
125
virtual void compute_firstpos() = 0;
126
virtual void compute_lastpos() = 0;
127
virtual void compute_followpos() { }
128
virtual int eq(Node *other) = 0;
129
virtual ostream &dump(ostream &os) = 0;
130
void dump_syntax_tree(ostream &os);
133
NodeSet firstpos, lastpos, followpos;
134
/* child 0 is left, child 1 is right */
137
unsigned int label; /* unique number for debug etc */
139
* We indirectly release Nodes through a virtual function because
140
* accept and Eps Nodes are shared, and must be treated specially.
141
* We could use full reference counting here but the indirect release
142
* is sufficient and has less overhead
144
virtual void release(void) { delete this; }
147
class InnerNode: public Node {
149
InnerNode(): Node() { };
150
InnerNode(Node *left): Node(left) { };
151
InnerNode(Node *left, Node *right): Node(left, right) { };
154
class OneChildNode: public InnerNode {
156
OneChildNode(Node *left): InnerNode(left) { };
159
class TwoChildNode: public InnerNode {
161
TwoChildNode(Node *left, Node *right): InnerNode(left, right) { };
164
class LeafNode: public Node {
166
LeafNode(): Node() { };
169
/* Match nothing (//). */
170
class EpsNode: public LeafNode {
172
EpsNode(): LeafNode()
179
/* don't delete Eps nodes because there is a single static
180
* instance shared by all trees. Look for epsnode in the code
184
void compute_firstpos() { }
185
void compute_lastpos() { }
188
if (dynamic_cast<EpsNode *>(other))
192
ostream &dump(ostream &os)
199
* Leaf nodes in the syntax tree are important to us: they describe the
200
* characters that the regular expression matches. We also consider
201
* AcceptNodes import: they indicate when a regular expression matches.
203
class ImportantNode: public LeafNode {
205
ImportantNode(): LeafNode() { }
206
void compute_firstpos() { firstpos.insert(this); }
207
void compute_lastpos() { lastpos.insert(this); }
208
virtual void follow(NodeCases &cases) = 0;
211
/* common base class for all the different classes that contain
212
* character information.
214
class CNode: public ImportantNode {
216
CNode(): ImportantNode() { }
219
/* Match one specific character (/c/). */
220
class CharNode: public CNode {
222
CharNode(uchar c): c(c) { }
223
void follow(NodeCases &cases)
225
NodeSet **x = &cases.cases[c];
228
*x = new NodeSet(*cases.otherwise);
232
(*x)->insert(followpos.begin(), followpos.end());
236
CharNode *o = dynamic_cast<CharNode *>(other);
242
ostream &dump(ostream &os)
250
/* Match a set of characters (/[abc]/). */
251
class CharSetNode: public CNode {
253
CharSetNode(Chars &chars): chars(chars) { }
254
void follow(NodeCases &cases)
256
for (Chars::iterator i = chars.begin(); i != chars.end(); i++) {
257
NodeSet **x = &cases.cases[*i];
260
*x = new NodeSet(*cases.otherwise);
264
(*x)->insert(followpos.begin(), followpos.end());
269
CharSetNode *o = dynamic_cast<CharSetNode *>(other);
270
if (!o || chars.size() != o->chars.size())
273
for (Chars::iterator i = chars.begin(), j = o->chars.begin();
274
i != chars.end() && j != o->chars.end(); i++, j++) {
280
ostream &dump(ostream &os)
283
for (Chars::iterator i = chars.begin(); i != chars.end(); i++)
291
/* Match all except one character (/[^abc]/). */
292
class NotCharSetNode: public CNode {
294
NotCharSetNode(Chars &chars): chars(chars) { }
295
void follow(NodeCases & cases)
297
if (!cases.otherwise)
298
cases.otherwise = new NodeSet;
299
for (Chars::iterator j = chars.begin(); j != chars.end(); j++) {
300
NodeSet **x = &cases.cases[*j];
302
*x = new NodeSet(*cases.otherwise);
304
/* Note: Add to the nonmatching characters after copying away
305
* the old otherwise state for the matching characters.
307
cases.otherwise->insert(followpos.begin(), followpos.end());
308
for (NodeCases::iterator i = cases.begin(); i != cases.end();
310
if (chars.find(i->first) == chars.end())
311
i->second->insert(followpos.begin(),
317
NotCharSetNode *o = dynamic_cast<NotCharSetNode *>(other);
318
if (!o || chars.size() != o->chars.size())
321
for (Chars::iterator i = chars.begin(), j = o->chars.begin();
322
i != chars.end() && j != o->chars.end(); i++, j++) {
328
ostream &dump(ostream &os)
331
for (Chars::iterator i = chars.begin(); i != chars.end(); i++)
339
/* Match any character (/./). */
340
class AnyCharNode: public CNode {
343
void follow(NodeCases &cases)
345
if (!cases.otherwise)
346
cases.otherwise = new NodeSet;
347
cases.otherwise->insert(followpos.begin(), followpos.end());
348
for (NodeCases::iterator i = cases.begin(); i != cases.end();
350
i->second->insert(followpos.begin(), followpos.end());
354
if (dynamic_cast<AnyCharNode *>(other))
358
ostream &dump(ostream &os) { return os << "."; }
362
* Indicate that a regular expression matches. An AcceptNode itself
363
* doesn't match anything, so it will never generate any transitions.
365
class AcceptNode: public ImportantNode {
370
/* don't delete AcceptNode via release as they are shared, and
371
* will be deleted when the table the are stored in is deleted
375
void follow(NodeCases &cases __attribute__ ((unused)))
377
/* Nothing to follow. */
380
/* requires accept nodes to be common by pointer */
383
if (dynamic_cast<AcceptNode *>(other))
384
return (this == other);
389
/* Match a node zero or more times. (This is a unary operator.) */
390
class StarNode: public OneChildNode {
392
StarNode(Node *left): OneChildNode(left) { nullable = true; }
393
void compute_firstpos() { firstpos = child[0]->firstpos; }
394
void compute_lastpos() { lastpos = child[0]->lastpos; }
395
void compute_followpos()
397
NodeSet from = child[0]->lastpos, to = child[0]->firstpos;
398
for (NodeSet::iterator i = from.begin(); i != from.end(); i++) {
399
(*i)->followpos.insert(to.begin(), to.end());
404
if (dynamic_cast<StarNode *>(other))
405
return child[0]->eq(other->child[0]);
408
ostream &dump(ostream &os)
416
/* Match a node one or more times. (This is a unary operator.) */
417
class PlusNode: public OneChildNode {
419
PlusNode(Node *left): OneChildNode(left) {
421
void compute_nullable() { nullable = child[0]->nullable; }
422
void compute_firstpos() { firstpos = child[0]->firstpos; }
423
void compute_lastpos() { lastpos = child[0]->lastpos; }
424
void compute_followpos()
426
NodeSet from = child[0]->lastpos, to = child[0]->firstpos;
427
for (NodeSet::iterator i = from.begin(); i != from.end(); i++) {
428
(*i)->followpos.insert(to.begin(), to.end());
431
int eq(Node *other) {
432
if (dynamic_cast<PlusNode *>(other))
433
return child[0]->eq(other->child[0]);
436
ostream &dump(ostream &os) {
443
/* Match a pair of consecutive nodes. */
444
class CatNode: public TwoChildNode {
446
CatNode(Node *left, Node *right): TwoChildNode(left, right) { }
447
void compute_nullable()
449
nullable = child[0]->nullable && child[1]->nullable;
451
void compute_firstpos()
453
if (child[0]->nullable)
454
firstpos = child[0]->firstpos + child[1]->firstpos;
456
firstpos = child[0]->firstpos;
458
void compute_lastpos()
460
if (child[1]->nullable)
461
lastpos = child[0]->lastpos + child[1]->lastpos;
463
lastpos = child[1]->lastpos;
465
void compute_followpos()
467
NodeSet from = child[0]->lastpos, to = child[1]->firstpos;
468
for (NodeSet::iterator i = from.begin(); i != from.end(); i++) {
469
(*i)->followpos.insert(to.begin(), to.end());
474
if (dynamic_cast<CatNode *>(other)) {
475
if (!child[0]->eq(other->child[0]))
477
return child[1]->eq(other->child[1]);
481
ostream &dump(ostream &os)
489
/* Match one of two alternative nodes. */
490
class AltNode: public TwoChildNode {
492
AltNode(Node *left, Node *right): TwoChildNode(left, right) { }
493
void compute_nullable()
495
nullable = child[0]->nullable || child[1]->nullable;
497
void compute_lastpos()
499
lastpos = child[0]->lastpos + child[1]->lastpos;
501
void compute_firstpos()
503
firstpos = child[0]->firstpos + child[1]->firstpos;
507
if (dynamic_cast<AltNode *>(other)) {
508
if (!child[0]->eq(other->child[0]))
510
return child[1]->eq(other->child[1]);
514
ostream &dump(ostream &os)
525
/* Traverse the syntax tree depth-first in an iterator-like manner. */
526
class depth_first_traversal {
528
void push_left(Node *node) {
531
while (dynamic_cast<InnerNode *>(node)) {
532
pos.push(node->child[0]);
533
node = node->child[0];
537
depth_first_traversal(Node *node) { push_left(node); }
538
Node *operator*() { return pos.top(); }
539
Node *operator->() { return pos.top(); }
540
operator bool() { return !pos.empty(); }
543
Node *last = pos.top();
547
/* no need to dynamic cast, as we just popped a node so
548
* the top node must be an inner node */
549
InnerNode *node = (InnerNode *) (pos.top());
550
if (node->child[1] && node->child[1] != last) {
551
push_left(node->child[1]);
568
extern EpsNode epsnode;
570
int debug_tree(Node *t);
571
Node *simplify_tree(Node *t, dfaflags_t flags);
572
void label_nodes(Node *root);
573
unsigned long hash_NodeSet(NodeSet *ns);
574
void flip_tree(Node *node);
576
/* Comparison operator for sets of <NodeSet *>.
577
* Compare set hashes, and if the sets have the same hash
578
* do compare pointer comparison on set of <Node *>, the pointer comparison
579
* allows us to determine which Sets of <Node *> we have seen already from
580
* new ones when constructing the DFA.
582
struct deref_less_than {
583
bool operator()(pair<unsigned long, NodeSet *>const &lhs,
584
pair<unsigned long, NodeSet *>const &rhs)const
586
if (lhs.first == rhs.first)
587
return *(lhs.second) < *(rhs.second);
589
return lhs.first < rhs.first;
593
class MatchFlag: public AcceptNode {
595
MatchFlag(uint32_t flag, uint32_t audit): flag(flag), audit(audit) { }
596
ostream &dump(ostream &os) { return os << '<' << flag << '>'; }
602
class ExactMatchFlag: public MatchFlag {
604
ExactMatchFlag(uint32_t flag, uint32_t audit): MatchFlag(flag, audit) {}
607
class DenyMatchFlag: public MatchFlag {
609
DenyMatchFlag(uint32_t flag, uint32_t quiet): MatchFlag(flag, quiet) {}
612
#endif /* __LIBAA_RE_EXPR */