59
52
#include "others.h"
60
53
#include "regex_list.h"
61
54
#include "matcher-ac.h"
65
enum token_op_t {OP_CHAR,OP_STDCLASS,OP_CUSTOMCLASS,OP_DOT,OP_LEAF,OP_ROOT,OP_PARCLOSE};
66
typedef unsigned char* char_bitmap_p;
69
* OP_CHAR: 1 character, c = character
71
* OP_STDCLASS: standard character class, c = char class, class: 1<<(index into std_class of class name)
72
* OP_CUSTOMCLASS: custom character class, first pointer in ptr array is a pointer to the bitmap table for this class
73
* OP_DOT: single . matching any character except \n
74
* OP_LEAF: this is a leaf node, reinterpret structure
77
struct tree_node* next;/* next regex/complex sibling, or parent, if no more siblings , can't be NULL except for root node*/
80
char alternatives;/* number of (non-regex) children of node, i.e. sizeof(children)*/
81
char listend;/* no more siblings, next pointer is pointer to parent*/
83
struct tree_node** children;/* alternatives nr. of children, followed by (a null pointer terminated) regex leaf node pointers) */
84
char_bitmap_p* bitmap;
85
struct leaf_info* leaf;
90
char* info;/* what does it mean that we reached the leaf...*/
91
regex_t* preg;/* this is NULL if leaf node, and non-regex*/
94
/* Character classes */
95
static const char* std_class[] = {
108
/* don't change the order of these strings, unless you change them in generate_tables.c too, and regenerate the tables*/
112
#define STD_CLASS_CNT sizeof(std_class)/sizeof(std_class[0])
114
/* generated by contrib/phishing/generate_tables.c */
115
static const unsigned char char_class_bitmap[STD_CLASS_CNT][32] = {
116
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x03,
117
0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07,
118
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
119
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
121
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x03,
122
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
123
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
124
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
126
{0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0x00, 0xfc,
127
0x01, 0x00, 0x00, 0xf8, 0x01, 0x00, 0x00, 0x78,
128
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
129
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
131
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
132
0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07,
133
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
134
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
136
{0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0xff,
137
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f,
138
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
139
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
141
{0x00, 0x3e, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
142
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
143
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
144
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
146
{0x00, 0x02, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
147
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
148
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
149
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
151
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
152
0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07,
153
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
154
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
156
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
157
0xfe, 0xff, 0xff, 0x07, 0x00, 0x00, 0x00, 0x00,
158
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
159
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
161
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
162
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
163
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
164
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
166
{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
167
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f,
168
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
169
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
171
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x03,
172
0x7e, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00,
173
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
174
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
177
static const unsigned short int char_class[256] = {
178
0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x260, 0x220, 0x220, 0x220, 0x220, 0x200, 0x200,
179
0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200,
180
0x460, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414,
181
0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414,
182
0x414, 0xd19, 0xd19, 0xd19, 0xd19, 0xd19, 0xd19, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519,
183
0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x414, 0x414, 0x414, 0x414, 0x414,
184
0x414, 0xc99, 0xc99, 0xc99, 0xc99, 0xc99, 0xc99, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499,
185
0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x414, 0x414, 0x414, 0x414, 0x200,
186
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
187
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
188
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
189
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
190
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
191
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
192
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
193
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000
196
static const size_t std_class_cnt = sizeof(std_class)/sizeof(std_class[0]);
58
#include "jsparse/textbuf.h"
59
#include "regex_suffix.h"
199
static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,const char* info,int hostOnly);
200
static int match_node(struct tree_node* node,const unsigned char* c,size_t len,const char** info);
201
static void destroy_tree(struct regex_matcher* matcher);
202
static struct tree_node* tree_root_alloc(void);
203
static int build_regex_list(struct regex_matcher* matcher);
204
static void stack_destroy(struct node_stack* stack);
207
void dump_tree(struct tree_node* root);
210
#define MATCH_SUCCESS 0
61
static regex_t *new_preg(struct regex_matcher *matcher);
62
static size_t reverse_string(char *pattern);
63
static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *regex);
64
static int add_static_pattern(struct regex_matcher *matcher, char* pattern);
67
/* ----- shift-or filtering -------------- */
69
#define BITMAP_CONTAINS(bmap, val) ((bmap)[(val) >> 5] & (1 << ((val) & 0x1f)))
70
#define BITMAP_INSERT(bmap, val) ((bmap)[(val) >> 5] |= (1 << ((val) & 0x1f)))
72
static void SO_init(struct filter *m)
74
memset(m->B, ~0, sizeof(m->B));
75
memset(m->end, ~0, sizeof(m->end));
76
memset(m->end_fast, ~0, sizeof(m->end_fast));
79
/* because we use uint32_t */
80
#define MAXSOPATLEN 32
82
/* merge another pattern into the filter
83
* add('abc'); add('bcd'); will match [ab][bc][cd] */
84
static int SO_preprocess_add(struct filter *m, const unsigned char *pattern, size_t len)
89
/* cut length, and make it modulo 2 */
90
if(len > MAXSOPATLEN) {
93
/* we use 2-grams, must be multiple of 2 */
99
/* Shift-Or like preprocessing */
100
for(j=0;j < len-1;j++) {
101
/* use overlapping 2-grams. We need them overlapping because matching can start at any position */
102
q = cli_readint16( &pattern[j] );
103
m->B[q] &= ~(1 << j);
105
/* we use variable length patterns, use last character to mark pattern end,
106
* can lead to false positives.*/
107
/* mark that at state j, the q-gram q can end the pattern */
110
m->end[q] &= ~(1 << j);
111
m->end_fast[pattern[j+1]] &= ~(1<<j);
116
/* this is like a FSM, with multiple active states at the same time.
117
* each bit in "state" means an active state, when a char is encountered
118
* we determine what states can remain active.
119
* The FSM transition rules are expressed as bit-masks */
120
long SO_search(const struct filter *m, const unsigned char *data, unsigned long len)
124
const uint32_t *B = m->B;
125
const uint32_t *End = m->end;
126
const uint32_t *EndFast = m->end_fast;
128
/* cut length, and make it modulo 2 */
129
if(len > MAXSOPATLEN) {
132
/* we use 2-grams, must be multiple of 2 */
136
/* Shift-Or like search algorithm */
137
for(j=0;j < len-1; j++) {
138
const uint16_t q0 = cli_readint16( &data[j] );
140
state = (state << 1) | B[q0];
141
/* state marks with a 0 bit all active states
142
* End[q0] marks with a 0 bit all states where the q-gram 'q' can end a pattern
143
* if we got two 0's at matching positions, it means we encountered a pattern's end */
144
match_end = state | EndFast[data[j+1]];
145
if((match_end != 0xffffffff) && (state | End[q0]) != 0xffffffff) {
146
/* note: we rely on short-circuit eval here, we only evaluate and fetch End[q0], if
147
* end_fast has matched. This reduces cache pressure on End[], and allows us to keep the working
150
/* if state is reachable, and this character can finish a pattern, assume match */
151
/* to reduce false positives check if qgram can finish the pattern */
152
/* return position of probable match */
153
/* find first 0 starting from MSB, the position of that bit as counted from LSB, is the length of the
154
* longest pattern that could match */
155
return j >= MAXSOPATLEN ? j - MAXSOPATLEN : 0;
162
/* ----------------------------------------------------------- */
165
#define MATCH_SUCCESS 0
211
166
#define MATCH_FAILED -1
678
559
/* Done with this matcher, free resources */
679
560
void regex_list_done(struct regex_matcher* matcher)
683
regex_list_cleanup(matcher);
684
if(matcher->list_loaded) {
685
if(matcher->root_hosts) {
687
for(i=0;i<matcher->root_hosts_cnt;i++)
688
cli_ac_free(&matcher->root_hosts[i]);
689
free(matcher->root_hosts);
690
matcher->root_hosts=NULL;
693
matcher->root_hosts_cnt=0;
694
matcher->list_built=0;
695
destroy_tree(matcher);
696
matcher->list_loaded=0;
698
564
if(matcher->list_inited) {
699
matcher->list_inited=0;
701
stack_destroy(&matcher->node_stack);
702
stack_destroy(&matcher->node_stack_alt);
705
/* Tree matcher algorithm */
709
const unsigned char* start;
710
char_bitmap_p bitmap;
717
enum {TOKEN_CHAR,TOKEN_DOT,TOKEN_PAR_OPEN,TOKEN_PAR_CLOSE,TOKEN_BRACKET,TOKEN_ALT,TOKEN_REGEX,TOKEN_DONE};
719
static const unsigned char* getNextToken(const unsigned char* pat,struct token_t* token)
726
token->type=TOKEN_CHAR;
727
token->u.c = *(++pat);
728
if(islower(token->u.c)) {
729
/* handle \n, \t, etc. */
730
char fmt[3] = {'\\', '\0', '\0'};
734
if(snprintf(&c,1,fmt)!=1) {
735
token->type=TOKEN_REGEX;
736
token->u.start = pat;
744
token->type=TOKEN_ALT;
751
token->type=TOKEN_REGEX;
756
/*see if it is something simple like a list of characters, a range, or negated ...*/
757
const unsigned char* old=pat++;/* save this in case we change our mind and decide this is too complicated for us to handle*/
758
unsigned char range_start=0;
760
char_bitmap_p bitmap = cli_malloc(32);
764
memset(bitmap,0xFF,32);/*match chars not in brackets*/
768
memset(bitmap,0x00,32);
770
/* literal ] can be first character, so test for it at the end of the loop, for example: []] */
771
if (*pat=='-' && hasprev) {
773
unsigned char range_end;
775
massert(range_start);
779
if(pat[2]=='-' && pat[3]=='.' && pat[4]==']')
782
/* this is getting complicated, bail out */
783
cli_warnmsg("confused about collating sequences in regex,bailing out");
785
token->type=TOKEN_REGEX;
793
for(c=range_start+1;c<=range_end;c++)
794
bitmap[c>>3] ^= 1<<(c&0x7);
797
else if (pat[0]=='[' && pat[1]==':') {
798
const unsigned char* end;
803
end=(unsigned char*)strstr((const char*)pat,":]");
805
cli_warnmsg("confused about std char class syntax regex,bailing out");
807
token->type=TOKEN_REGEX;
812
for(i=0;i<std_class_cnt;i++)
813
if(!strncmp((const char*)pat,std_class[i],len)) {
819
if(char_class[i]&(1<<found))
820
bitmap[i>>3] ^= 1<<(i&0x7);
824
cli_warnmsg("confused about regex bracket expression, bailing out");
826
token->type=TOKEN_REGEX;
831
bitmap[*pat>>3] ^= 1<<(*pat&0x7);
837
/*TODO: see if this bitmap already exists, then reuse*/
838
token->type = TOKEN_BRACKET;
839
token->u.bitmap = bitmap;
843
massert(0 && "Encountered ] without matching [");
847
token->type=TOKEN_DOT;
850
token->type=TOKEN_PAR_OPEN;
853
token->type=TOKEN_PAR_CLOSE;
856
token->type=TOKEN_CHAR;
864
#define INITIAL_ALT_STACK 10
865
#define ALT_STACK_GROW 20
867
static const unsigned char* find_regex_start(const unsigned char* pat)
869
struct token_t token;
870
/*TODO: find where the regex part begins, for ex:
871
* abcd+, regex begins at 'd'
873
const unsigned char* last=NULL;
874
const unsigned char* tmp=NULL;
875
const unsigned char** altpositions = cli_malloc(INITIAL_ALT_STACK*sizeof(*altpositions));
876
size_t altpositions_capacity = INITIAL_ALT_STACK;
877
size_t altpositions_cnt = 0;
883
/* Try to parse pattern till special regex chars are encountered, that the tree-matcher doesn't handle, like: +,*,{}.
884
* The tricky part is that once we encounter these, the previous 'atom' has to be passed on to the regex matcher, so we have to
885
* back up to the last known good position
886
* Example, if we have: abc(defg)+, then only abc can be handled by tree parser, so we have to return the position of (.
887
* Another example: abc(defg|xyz|oz+|pdo), the last known good position is |, after xyz
888
* TODO: what about open parantheses? maybe once we found a special char, we have top back out before the first (?
892
pat = getNextToken(pat,&token);
893
if(token.type!=TOKEN_REGEX) {
895
lasttype = token.type;
896
if(token.type==TOKEN_BRACKET && token.u.bitmap)
897
free(token.u.bitmap);
898
if(token.type==TOKEN_ALT || token.type==TOKEN_PAR_OPEN) {
899
/* save this position on stack, succesfully parsed till here*/
900
if(altpositions_cnt && altpositions[altpositions_cnt-1][0]=='|')
901
/* encountered another alternate (|) operator, override previous | position stored */
902
altpositions[altpositions_cnt-1]=last;
904
altpositions[altpositions_cnt++] = last;
905
if(altpositions_cnt == altpositions_capacity) {
906
altpositions_capacity += ALT_STACK_GROW;
907
altpositions = cli_realloc2(altpositions,altpositions_capacity*sizeof(*altpositions));
912
} else if (lasttype==TOKEN_PAR_CLOSE) {
913
/* remove last stored position from stack, succesfully this last group */
915
massert(altpositions_cnt>0);
920
last = altpositions[0 /*altpositions_cnt-1*/];/*TODO: which index here?, see above TODO... */
921
/*last stored 'safe' position where no special (+,*,{}) regex chars were encountered*/
923
} while(*pat && token.type!=TOKEN_REGEX);
925
return *pat ? last : last+1;
928
static struct tree_node* tree_node_alloc(struct tree_node* next,char listend)
930
struct tree_node* node = cli_malloc(sizeof(*node));
932
node->alternatives=0;
934
node->listend=listend;
935
node->u.children=NULL;
940
static struct tree_node* tree_root_alloc(void)
942
struct tree_node* root=tree_node_alloc(NULL,1);
952
static struct tree_node* tree_node_char_binsearch(const struct tree_node* node,const char csearch,int* left)
955
struct tree_node **children;
959
children = tree_node_get_children(node);
960
right = node->alternatives-1;
962
if(!node->alternatives)
965
while(*left<=right) {
966
int mid = *left+(right-*left)/2;
967
if(children[mid]->c == csearch)
968
return children[mid];
969
else if(children[mid]->c < csearch)
977
static struct tree_node* tree_get_next(struct tree_node* node)
979
struct tree_node** children;
981
children = tree_node_get_children(node);
983
if(!node->alternatives && children && children[0])
985
else if(node->alternatives<=1)
988
return children[0]->next;
991
static size_t tree_node_get_array_size(const struct tree_node* node)
994
/* if op is CUSTOMCLASS, then first pointer is pointer to bitmap, so array size is +1 */
995
return (node->alternatives + (node->op==OP_CUSTOMCLASS ? 1 : 0)) * sizeof(node->u.children[0]);
998
static struct tree_node* tree_node_char_insert(struct tree_node* node,const char c,int left)
1000
struct tree_node* new, *alt = tree_get_next(node);
1001
struct tree_node **children;
1002
node->alternatives++;
1003
node->u.children = cli_realloc2(node->u.children,tree_node_get_array_size(node));
1004
if(!node->u.children)
1007
children = node->op==OP_CUSTOMCLASS ? node->u.children+1 : node->u.children;
1009
new = tree_node_alloc(alt , node == alt );
1015
if(node->alternatives-left-1>0)
1016
memmove(&children[left+1],&children[left],(node->alternatives-left-1)*sizeof(node->u.children[0]));
1017
children[left] = new;
1022
static void tree_node_insert_nonbin(struct tree_node* node, struct tree_node* new)
1024
struct tree_node **children;
1028
children = tree_node_get_children(node);
1029
if(node->alternatives) {
1031
if(children[0]->next == node) {
1034
for(i=0;i<node->alternatives;i++) {
1035
children[i]->next = new;
1036
children[i]->listend = 0;
1040
struct tree_node* p;
1041
for(p = children[0]->next ; p->next != node ; p = p->next)
1042
massert(!p->listend);
1049
int idx = node->op==OP_CUSTOMCLASS ? 1 : 0;
1050
if(node->u.children)
1051
if(node->u.children[idx]) {
1052
node = node->u.children[idx];
1053
while(node->next && !node->listend)
1056
new->next = node->next;
1061
node->u.children = cli_realloc2(node->u.children,sizeof(node->u.children[0])*(2));
1062
if(node->u.children) {
1063
node->u.children[idx] = new;
1068
static unsigned char char_getclass(const unsigned char* bitmap)
1073
for(i=0;i<std_class_cnt;i++)
1074
if(!memcmp(bitmap,char_class_bitmap[i],256>>3))
1076
return std_class_cnt;
1079
static void stack_destroy(struct node_stack* stack)
1085
stack->capacity = 0;
1088
/* call this after whitelist load is complete, and the tree is no longer going to be modified */
1089
void regex_list_cleanup(struct regex_matcher* matcher)
1093
stack_destroy(&matcher->node_stack);
1094
stack_destroy(&matcher->node_stack_alt);
1095
stack_init(&matcher->node_stack);
1096
stack_init(&matcher->node_stack_alt);
566
cli_ac_free(&matcher->suffixes);
567
if(matcher->suffix_regexes) {
568
for(i=0;i<matcher->suffix_cnt;i++) {
569
struct regex_list *r = matcher->suffix_regexes[i].head;
571
struct regex_list *q = r;
577
free(matcher->suffix_regexes);
578
matcher->suffix_regexes = NULL;
580
if(matcher->all_pregs) {
581
for(i=0;i<matcher->regex_cnt;i++) {
582
regex_t *r = matcher->all_pregs[i];
586
free(matcher->all_pregs);
588
hashtab_free(&matcher->suffix_hash);
589
cli_bm_free(&matcher->md5_hashes);
1099
593
int is_regex_ok(struct regex_matcher* matcher)
1102
596
return (!matcher->list_inited || matcher->list_inited!=-1);/* either we don't have a regexlist, or we initialized it successfully */
1105
/* returns 0 on success, regexec error code otherwise */
1106
static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,const char* info, int hostonly)
1109
const unsigned char* pat_end = find_regex_start(pat);
1110
struct token_t token;
1111
struct tree_node* node;
1115
node = hostonly ? matcher->root_regex_hostonly : matcher->root_regex;
1117
stack_reset(&matcher->node_stack);
1118
stack_reset(&matcher->node_stack_alt);
1119
stack_push(&matcher->node_stack,node);
1121
for(;node->op!=OP_LEAF;){
1123
pat = getNextToken(pat,&token);
1125
token.type = TOKEN_REGEX;
1129
token.type = TOKEN_DONE;
1131
switch(token.type) {
1134
/* search for char in tree */
1136
struct tree_node* newnode = tree_node_char_binsearch(node,token.u.c,&left);
1140
/* not found, insert it */
1141
node = tree_node_char_insert(node,token.u.c,left);
1146
case TOKEN_PAR_OPEN:
1147
stack_push(&matcher->node_stack_alt,NULL);/* marker */
1148
stack_push(&matcher->node_stack,node);
1151
case TOKEN_PAR_CLOSE: {
1152
/*TODO: test this!!!*/
1153
struct tree_node* node_alt = node;
1154
node = tree_node_alloc(NULL,1);
1155
node->op=OP_PARCLOSE;
1158
tree_node_insert_nonbin(node_alt,node);
1159
while (( node_alt = stack_pop(&matcher->node_stack_alt) )) {
1160
tree_node_insert_nonbin(node_alt,node);
1162
stack_pop(&matcher->node_stack);
1167
stack_push(&matcher->node_stack_alt,node);
1168
node = stack_pop(&matcher->node_stack);
1169
stack_push(&matcher->node_stack,node);
1174
struct tree_node* new = tree_node_alloc(tree_get_next(node),1);
1175
unsigned char charclass = char_getclass(token.u.bitmap);
1176
if(charclass == std_class_cnt) {/*not a std char class*/
1177
new->op = OP_CUSTOMCLASS;
1178
new->u.children = cli_malloc(sizeof(new->u.children[0])*2);
1179
if(!new->u.children)
1181
new->u.bitmap[0] = token.u.bitmap;
1182
new->u.bitmap[1] = NULL;
1183
tree_node_insert_nonbin(node,new);
1187
new->op = OP_STDCLASS;
1189
tree_node_insert_nonbin(node,new);
1197
struct tree_node* new = tree_node_alloc(tree_get_next(node),1);
1199
tree_node_insert_nonbin(node,new);
1206
struct leaf_info* leaf=cli_malloc(sizeof(*leaf));
1209
leaf->info = cli_strdup(info);
1210
if(token.type==TOKEN_REGEX) {
1212
struct tree_node* new;
1214
preg=cli_malloc(sizeof(*preg));
1217
rc = cli_regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL));
1221
new=cli_malloc(sizeof(*new));
1226
new->alternatives=0;
1229
tree_node_insert_nonbin(node,new);
1233
node->alternatives=0;
1246
/* c has to be unsigned char here!! */
1247
static int match_node(struct tree_node* node,const unsigned char* c,size_t len,const char** info)
1249
struct tree_node** children;
1256
if(!node->u.children)
1257
return MATCH_FAILED;/* tree empty */
1263
children = node->u.children;
1269
/*this isn't a real character, so don't move*/
1275
massert(*c==node->c && "We know this has to match");
1276
rc = 1;/* *c==node->c;- we know it has matched */
1282
rc = char_class[*c]&(node->c);
1284
case OP_CUSTOMCLASS:
1286
char_bitmap_p bitmap;
1288
bitmap = (char_bitmap_p)node->u.bitmap[0];
1290
rc = bitmap[*c>>3]&(1<<(*c&0x7));
1295
const struct leaf_info* leaf = node->u.leaf;
1298
rc = !cli_regexec(leaf->preg,(const char*)c,0,NULL,0);
1301
massert(*c==node->c && "We know this has to match[2]");
1306
return MATCH_SUCCESS;
1312
cli_errmsg("Encountered invalid operator in tree:%d\n",node->op);
1319
const char csearch = *c;
1320
int left = 0,right = node->alternatives-1;
1322
/*matched so far, go deeper*/
1323
/*do a binary search between children */
1325
while(left<=right) {
1326
mid = left+(right-left)/2;
1327
if (children[mid]->c == csearch)
1329
else if(children[mid]->c < csearch)
1335
node = children[mid];
1339
if(node->alternatives) {
1340
if(!children[0]->listend) {
1345
while(node && node->listend) {
1346
node = node->next;/* climb up */
1350
if(!node || !node->next)
1351
return MATCH_FAILED;/* reached root node */
1356
else if(node->u.children) {
1357
struct tree_node* rewrite_next = NULL;
1358
if(node->op==OP_PARCLOSE)
1359
rewrite_next = node;
1362
massert(node->op!=OP_CHAR);
1364
node->next = rewrite_next;/* this node is pointed to by several parent nodes,
1366
from which one we came, so we can find out way back
1367
should we fail to match somewhere deeper*/
1372
/* this node didn't match, try sibling, or parent (if no more siblings) */
1373
while(node && node->listend) {
1374
node = node->next;/* sibling of parent */
1378
if(!node || !node->next) /* reached root node, it has no next */
1379
return MATCH_FAILED;
1387
return MATCH_FAILED;
1390
/* push node on stack, only if it isn't there already */
1391
static void stack_push_once(struct node_stack* stack,struct tree_node* node)
1397
for(i=0;i < stack->cnt;i++)
1398
if(stack->data[i]==node)
1400
stack_push(stack,node);
1403
static void destroy_tree_internal(struct regex_matcher* matcher,struct tree_node* node)
1405
struct tree_node **children;
1409
children = tree_node_get_children(node);
1410
if(node->op==OP_LEAF) {
1411
struct leaf_info* leaf = node->u.leaf;
1412
if(node->next && !node->listend)
1413
destroy_tree_internal(matcher,node->next);
1414
stack_push_once(&matcher->node_stack,(struct tree_node*)node->u.leaf);/* cast to make compiler happy, and to not make another stack implementation for storing void* */
1415
stack_push_once(&matcher->node_stack,node);
1417
cli_regfree(leaf->preg);
1427
if(node->alternatives) {
1429
struct tree_node* p;
1431
p = children[0]->op==OP_LEAF ? NULL : children[0]->next;
1432
for(i=0;i<node->alternatives;i++)
1433
destroy_tree_internal(matcher,children[i]);
1435
destroy_tree_internal(matcher,p);/*?? is this ok, or without _internal?*/
1440
destroy_tree_internal(matcher,children[0]);
1443
if(node->op!=OP_LEAF && node->next && !node->listend)
1444
destroy_tree_internal(matcher,node->next);
1445
if(node->u.children)
1446
stack_push_once(&matcher->node_stack,(struct tree_node*)node->u.children);/* cast to make compiler happy, it isn't really a tree_node* */
1447
if(node->op==OP_CUSTOMCLASS && node->u.children[0]) {
1448
free(node->u.children[0]);
1449
node->u.children[0]=NULL;
1451
stack_push_once(&matcher->node_stack,node);
1454
static void destroy_tree(struct regex_matcher* matcher)
1456
/* we might have the same node linked by different nodes, so a recursive walk&free doesn't work in all situations,
1457
* i.e. it might double-free, so instead of freeing, just push the nodes on a stack, and later free the nodes in that stack,
1458
* (and push to stack only if it doesn't contain it already*/
1461
stack_reset(&matcher->node_stack);
1462
destroy_tree_internal(matcher,matcher->root_regex);
1463
destroy_tree_internal(matcher,matcher->root_regex_hostonly);
1464
while (matcher->node_stack.cnt) {
1465
struct tree_node* node = stack_pop(&matcher->node_stack);
1471
static void dump_node(struct tree_node* node)
1474
struct tree_node* p,**children;
1476
if(node->op==OP_LEAF) {
1477
if(node->u.leaf->preg)
1478
printf("n%p [label=\"regex\\nleaf\"]",(void*)node);
1480
printf("n%p [label=\"%c\\nleaf\"];\n",(void*)node,node->c);
1481
if(node->next && !node->listend) {
1482
printf("n%p -> n%p;\n",(void*)node,(void*)node->next);
1483
dump_node(node->next);
1487
printf("n%p [label=\"%c\\n%d\\nlistend:%d\"];\n",(void*)node,(node->op==OP_ROOT||node->op==OP_PARCLOSE) ?'@' :node->c,node->op,node->listend);
1489
printf("n%p -> n%p;\n",(void*)node,(void*)node->next);
1490
printf("n%p -> {",(void*)node);/*using address of node as id*/
1491
children = tree_node_get_children(node);
1492
if(node->alternatives)
1494
for(i=0;i<node->alternatives;i++)
1495
printf("n%p ",(void*)children[i]);
1496
if(node->alternatives && children[0]->op!=OP_LEAF)
1497
for(p=children[0]->next;p!=node;p=p->next)
1500
printf("n%p ",(void*)p);
1501
if(p->op==OP_LEAF || p->listend)
1504
if(!node->alternatives && children && children[0])
1505
printf("n%p ",(void*)children[0]);
1507
printf("{rank=same;");
1508
for(i=0;i<node->alternatives;i++)
1509
printf("n%p ",(void*)node->u.children[i]);
1510
if(node->alternatives && children[0]->op!=OP_LEAF)
1511
for(p=children[0]->next;p!=node;p=p->next)
1513
printf("n%p ",(void*)p);
1514
if(p->op==OP_LEAF || p->listend)
1517
if(!node->alternatives && children && children[0])
1518
printf("n%p ",(void*)children[0]);
1520
for(i=0;i<node->alternatives;i++)
1521
dump_node(children[i]);
1522
if(node->alternatives && children[0]->op!=OP_LEAF)
1523
for(p=children[0]->next;p!=node;p=p->next)
1526
if(p->op==OP_LEAF || p->listend)
1529
if(!node->alternatives && children && children[0])
1530
dump_node(children[0]);
1533
void dump_tree(struct tree_node* root)
1535
/*use dot/dotty from graphviz to view it*/
1537
printf("digraph tree {\n");
1545
/*------------------------New version of regex_list.c------------------------*/
1548
* A scalable, trie-based implementation for matching against
1549
* a list of regular expressions.
1551
* A trivial way to implement matching against a list of regular expressions
1552
* would have been to construct a single regular expression, by concatenating
1553
* the list with the alternate (|) operator.
1554
* BUT a usual DFA implementation of regular expression matching (eg.: GNU libc)
1555
* leads to "state explosion" when there are many (5000+) alternate (|) operators.
1556
* This is the reason for using a trie-based implementation.
1559
* Design considerations:
1561
* Recursive call points: there are situations when match has to be retried on a different sub-trie, or with a different repeat count.
1562
* Alternate operators, and repeat/range operators (+,*,{}) are recursiv call points. When a failure is encountered during a match,
1563
* the function simply returns from the recursive call, and ends up at a failure point (recursive call point).
1565
* "go to parent" below actually means, return from recursive call.
1567
* fail_action: we need to return to closest failure point (recursive call point),
1568
* and switch current node to node pointed by fail_action
1571
* OP_ROOT: contains information that applies to the entire trie.
1572
* it can only appear as root node, and not as child node.
1573
* On child fail: match has failed
1574
* This is NOT a recursive call point
1575
* OP_CHAR_BINARY_SEARCH: chooses a sub-trie, based on current character;
1576
* using binary-search
1577
* On fail: go to node indicated by fail_action, or if
1578
* fail_action is NULL, to parent
1579
* On child fail: execute fail of current node
1580
* OP_ALTERNATIVES: try matching each sub-trie, if all fails execute fail
1581
* action of current node. This is a recursive call point
1582
* OP_CHAR_REPEAT: repeat specified character a number of times in range:
1583
* [min_range, max_range];
1584
* min_range: 0 for * operator
1586
* max_range: remaining length of current string for *,+ operator
1587
* OR: min_range, max_range as specified by the {min,max} operator
1588
* On fail: fail_action, or parent if NULL
1589
* On child fail: reduce match repeat count, try again on child, if
1590
* repeat count<min_range, execute fail of current node
1591
* Also has a bitmap on what characters are accepted beyond it,
1592
* as an optimizations for the case, when a maximum match isn't possible
1593
* Not recomended to use this when min_range=max_range=1
1594
* This is a recursive call point
1595
* OP_DOT_REPEAT: like OP_CHAR_REPEAT but accept any character
1596
* Not recomended to use this when min_range=max_range=1
1597
* This is a recursive call point
1598
* OP_GROUP_START: start of a group "(", also specifies group flags:
1599
* repeat: is_repeat, min_range, max_range
1600
* This is a recursive call point if is_repeat
1601
* OP_GROUP_END: end of group ")"
1602
* OP_STRCMP: compare with specified string,
1603
* it has an array of fail actions, one for each character
1604
* default fail action: go to parent
1605
* This was introduced from memory- and speed-efficiency
1607
* OP_CHAR_CLASS_REPEAT: match character with character class
1608
* min_range, max_range
1609
* For a single character class min_range=max_range=1
1610
* OP_MATCH_OK: match has succeeded
1612
* TODO: maybe we'll need a more efficient way to choose between character classes.
1613
* OP_DOT_REPEAT/OP_CHAR_REPEAT needs a more efficient specification of its failure function, instead of using
1614
* backtracking approach.
1616
* The failure function/action is just for optimization, the match algorithms works even without it.
1617
* TODO:In this first draft fail action will always be NULL, in a later version I'll implement fail actions too.
1623
#include "cltypes.h"
1626
/* offsetof is not ANSI C */
1628
# define offsetof(type,memb) ((size_t)&((type*)0)->memb)
1631
#define container_of(ptr, type, member) ( (type *) ((char *)ptr - offsetof(type, member)) )
1632
#define container_of_const(ptr, type, member) ( (type *) ((const char *)ptr - offsetof(type, member)) )
1634
enum trie_node_type {
1636
OP_CHAR_BINARY_SEARCH,
1640
OP_CHAR_CLASS_REPEAT,
1648
/* the comon definition of a trie node */
1651
enum trie_node_type type;
1654
struct trie_node_match {
1655
struct trie_node node;
1656
/* additional match info */
1659
struct trie_node_root
1661
struct trie_node node;
1662
struct trie_node* child;
1665
struct trie_node_binary_search
1667
struct trie_node node;
1668
uint8_t children_count;/* number of children to search among -1! 255 = 256 children*/
1669
struct trie_node* fail_action;
1670
unsigned char* char_choices;/* children_count elements */
1671
struct trie_node** children;/*children_count elements */
1674
struct trie_node_alternatives
1676
struct trie_node node;
1677
uint32_t alternatives_count;
1678
/* need to support node with lots of alternatives,
1679
* for a worst-case scenario where each line ends up as a sub-trie of OP_ALTERNATIVES*/
1680
struct trie_node* fail_action;
1681
struct trie_node** children;
1684
struct trie_node_char_repeat
1686
struct trie_node node;
1687
unsigned char character;
1688
uint8_t range_min, range_max;/* according to POSIX we need not support more than 255 repetitions*/
1689
struct char_bitmap* bitmap_accept_after;/* bitmap of characters accepted after this,
1690
to optimize repeat < max_range case; if its NULL
1691
there is no optimization*/
1692
struct trie_node* child;
1693
struct trie_node* fail_action;
1696
struct trie_node_dot_repeat
1698
struct trie_node node;
1699
uint8_t range_min, range_max;/* according to POSIX we need not support more than 255 repetitions*/
1700
struct char_bitmap* bitmap_accept_after;/* bitmap of characters accepted after this,
1701
to optimize repeat < max_range case; if its NULL
1702
there is no optimization*/
1703
struct trie_node* child;
1704
struct trie_node* fail_action;
1707
struct trie_node_group_start
1709
struct trie_node node;
1710
uint8_t range_min, range_max;/* if range_min==range_max==1, then this is NOT a repeat, thus not a recursive call point*/
1711
struct trie_node* child;
1712
struct trie_node* fail_action;
1715
struct trie_node_group_end
1717
struct trie_node node;
1718
struct trie_node* child;
1721
struct trie_node_strcmp
1723
struct trie_node node;
1724
uint8_t string_length;/* for longer strings a sequence of node_strcmp should be used */
1725
unsigned char* string;
1726
struct trie_node* child;
1727
struct trie_node** fail_actions;/* this has string_length elements, or NULL if no fail_actions are computed */
1730
struct trie_node_char_class_repeat
1732
struct trie_node node;
1733
struct char_bitmap* bitmap;
1734
struct char_bitmap* bitmap_accept_after;
1735
uint8_t range_min, range_max;
1736
struct trie_node* child;
1737
struct trie_node* fail_action;
1740
static inline int bitmap_accepts(const struct char_bitmap* bitmap, const char c)
1742
/* TODO: check if c is accepted by bitmap */
1746
#define MATCH_FAILED 0
1749
#define FAIL_ACTION( fail_node ) (*fail_action = (fail_node), MATCH_FAILED)
1753
#define MIN(a,b) ((a)<(b) ? (a) : (b))
1756
static int match_node(const struct trie_node* node, const unsigned char* text, const unsigned char* text_end, const struct trie_node** fail_action);
1758
static int match_repeat(const unsigned char* text, const unsigned char* text_end, const size_t range_min, const size_t repeat_start,
1759
const struct char_bitmap* bitmap_accept_after, const struct trie_node* child, const struct trie_node** fail_action,
1760
const struct trie_node* this_fail_action)
1763
for(i = repeat_start;i > range_min;i--) {
1764
if(!bitmap_accept_after || bitmap_accepts( bitmap_accept_after, text[i-1])) {
1765
int rc = match_node(child, &text[i], text_end, fail_action);
1766
/* ignore fail_action for now, we have the bitmap_accepts_after optimization */
1773
/* this match is optional, try child only */
1774
int rc = match_node(child, text, text_end, fail_action);
1779
return FAIL_ACTION(this_fail_action);
1782
/* text_end points to \0 in text */
1783
static int match_node(const struct trie_node* node, const unsigned char* text, const unsigned char* text_end, const struct trie_node** fail_action)
1785
while(node && text < text_end) {
1786
switch(node->type) {
1789
const struct trie_node_root* root_node = container_of_const(node, const struct trie_node_root, node);
1790
node = root_node->child;
1793
case OP_CHAR_BINARY_SEARCH:
1795
const struct trie_node_binary_search* bin_node = container_of_const(node, const struct trie_node_binary_search, node);
1796
const unsigned char csearch = *text;
1797
size_t mid, left = 0, right = bin_node->children_count-1;
1798
while(left<=right) {
1799
mid = left+(right-left)/2;
1800
if(bin_node->char_choices[mid] == csearch)
1802
else if(bin_node->char_choices[mid] < csearch)
1808
/* match successful */
1809
node = bin_node->children[mid];
1813
return FAIL_ACTION( bin_node->fail_action );
1817
case OP_ALTERNATIVES:
1819
const struct trie_node_alternatives* alt_node = container_of_const(node, const struct trie_node_alternatives, node);
1821
*fail_action = NULL;
1822
for(i=0;i < alt_node->alternatives_count;i++) {
1823
int rc = match_node(alt_node->children[i], text, text_end, fail_action);
1827
/* supporting fail_actions is tricky,
1828
* if we just go to the node specified, what happens if the match fails, and no
1829
* further fail_action is specified? We should know where to continue the search.
1830
* For now fail_action isn't supported for OP_ALTERNATIVES*/
1834
case OP_CHAR_REPEAT:
1836
const struct trie_node_char_repeat* char_rep_node = container_of_const(node, const struct trie_node_char_repeat, node);
1837
const size_t max_len = MIN( text_end - text, char_rep_node->range_max-1);
1838
/* todo: what about the 8 bit limitation of range_max, and what about inf (+,*)? */
1839
const char caccept = char_rep_node->character;
1842
if(max_len < char_rep_node->range_min)
1843
return FAIL_ACTION(char_rep_node->fail_action);
1845
for(rep=0;rep < max_len;rep++) {
1846
if(text[rep] != caccept) {
1851
return match_repeat(text, text_end, char_rep_node->range_min, rep,
1852
char_rep_node->bitmap_accept_after, char_rep_node->child, fail_action,
1853
char_rep_node->fail_action);
1857
const struct trie_node_dot_repeat* dot_rep_node = container_of_const(node, const struct trie_node_dot_repeat, node);
1858
const size_t max_len = MIN( text_end - text, dot_rep_node->range_max-1);
1859
/* todo: what about the 8 bit limitation of range_max, and what about inf (+,*)? */
1861
if(max_len < dot_rep_node->range_min)
1862
return FAIL_ACTION(dot_rep_node->fail_action);
1864
return match_repeat(text, text_end, dot_rep_node->range_min, max_len,
1865
dot_rep_node->bitmap_accept_after, dot_rep_node->child, fail_action,
1866
dot_rep_node->fail_action);
1868
case OP_CHAR_CLASS_REPEAT:
1870
const struct trie_node_char_class_repeat* class_rep_node = container_of_const(node, const struct trie_node_char_class_repeat, node);
1871
const size_t max_len = MIN( text_end - text, class_rep_node->range_max-1);
1872
/* todo: what about the 8 bit limitation of range_max, and what about inf (+,*)? */
1875
if(max_len < class_rep_node->range_min)
1876
return FAIL_ACTION(class_rep_node->fail_action);
1878
for(rep=0;rep < max_len;rep++) {
1879
if(!bitmap_accepts( class_rep_node->bitmap, text[rep])) {
1884
return match_repeat(text, text_end, class_rep_node->range_min, rep,
1885
class_rep_node->bitmap_accept_after, class_rep_node->child, fail_action,
1886
class_rep_node->fail_action);
1891
const struct trie_node_strcmp* strcmp_node = container_of_const(node, const struct trie_node_strcmp, node);
1893
if(strcmp_node->fail_actions) {
1894
const size_t max_len = MIN(strcmp_node->string_length, text_end-text);
1895
/* we don't use strncmp, because we need the exact match-fail point */
1896
for(i=0;i < max_len;i++) {
1897
if(text[i] != strcmp_node->string[i]) {
1898
return FAIL_ACTION( strcmp_node->fail_actions[i] );
1901
if(max_len < strcmp_node->string_length) {
1902
/* failed, because text was shorter */
1903
return FAIL_ACTION( strcmp_node->fail_actions[max_len] );
1907
/* no fail_actions computed, some shortcuts possible on compare */
1908
if((text_end - text < strcmp_node->string_length) ||
1909
strncmp((const char*)text, (const char*)strcmp_node->string, strcmp_node->string_length)) {
1911
return FAIL_ACTION( NULL );
1914
/* match successful */
1915
node = strcmp_node->child;
1916
text += strcmp_node->string_length;
1919
case OP_GROUP_START:
1921
const struct trie_node_group_start* group_start_node = container_of_const(node, const struct trie_node_group_start, node);
1922
/* TODO: implement */
1927
const struct trie_node_group_end* group_end_node = container_of_const(node, const struct trie_node_group_end, node);
1928
/* TODO: implement */
1937
/* if fail_action was NULL, or text ended*/
1938
return MATCH_FAILED;
599
static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info, const char *suffix, size_t len)
601
struct cli_matcher *root = &matcher->suffixes;
602
struct cli_ac_patt *new = cli_calloc(1,sizeof(*new));
608
assert(root && suffix);
620
new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE;
621
if(new->length > root->maxpatlen)
622
root->maxpatlen = new->length;
624
new->pattern = cli_malloc(sizeof(new->pattern[0])*len);
630
new->pattern[i] = suffix[i];/*new->pattern is short int* */
632
new->customdata = info;
634
if((ret = cli_ac_addpatt(root,new))) {
639
SO_preprocess_add(&matcher->filter, (const unsigned char*)suffix, len);
643
#define MODULE "regex_list: "
644
/* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */
646
static void list_add_tail(struct regex_list_ht *ht, struct regex_list *regex)
651
ht->tail->nxt = regex;
656
/* returns 0 on success, clamav error code otherwise */
657
static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *iregex)
659
struct regex_matcher *matcher = cbdata;
660
struct regex_list *regex = cli_malloc(sizeof(*regex));
661
const struct element *el;
666
regex->pattern = iregex->pattern ? cli_strdup(iregex->pattern) : NULL;
667
regex->preg = iregex->preg;
669
el = hashtab_find(&matcher->suffix_hash, suffix, suffix_len);
670
/* TODO: what if suffixes are prefixes of eachother and only one will
673
/* existing suffix */
674
assert((size_t)el->data < matcher->suffix_cnt);
675
list_add_tail(&matcher->suffix_regexes[el->data], regex);
676
cli_dbgmsg(MODULE "added new regex to existing suffix %s: %s\n", suffix, regex->pattern);
679
size_t n = matcher->suffix_cnt++;
680
el = hashtab_insert(&matcher->suffix_hash, suffix, suffix_len, n);
681
matcher->suffix_regexes = cli_realloc(matcher->suffix_regexes, (n+1)*sizeof(*matcher->suffix_regexes));
682
if(!matcher->suffix_regexes)
684
matcher->suffix_regexes[n].tail = regex;
685
matcher->suffix_regexes[n].head = regex;
686
add_newsuffix(matcher, regex, suffix, suffix_len);
687
cli_dbgmsg(MODULE "added new suffix %s, for regex: %s\n", suffix, regex->pattern);
692
static size_t reverse_string(char *pattern)
694
size_t len = strlen(pattern);
696
for(i=0; i < (len/2); i++) {
697
char aux = pattern[i];
698
pattern[i] = pattern[len-i-1];
699
pattern[len-i-1] = aux;
704
static regex_t *new_preg(struct regex_matcher *matcher)
707
matcher->all_pregs = cli_realloc(matcher->all_pregs, ++matcher->regex_cnt * sizeof(*matcher->all_pregs));
708
if(!matcher->all_pregs)
710
r = cli_malloc(sizeof(*r));
713
matcher->all_pregs[matcher->regex_cnt-1] = r;
717
static int add_static_pattern(struct regex_matcher *matcher, char* pattern)
720
struct regex_list regex;
723
len = reverse_string(pattern);
725
regex.pattern = cli_strdup(pattern);
727
rc = add_pattern_suffix(matcher, pattern, len, ®ex);
732
int regex_list_add_pattern(struct regex_matcher *matcher, char *pattern)
737
/* we only match the host, so remove useless stuff */
738
const char remove_end[] = "([/?].*)?/";
739
const char remove_end2[] = "([/?].*)/";
741
len = strlen(pattern);
742
if(len > sizeof(remove_end)) {
743
if(strncmp(&pattern[len - sizeof(remove_end)+1], remove_end, sizeof(remove_end)-1) == 0) {
744
len -= sizeof(remove_end) - 1;
747
if(strncmp(&pattern[len - sizeof(remove_end2)+1], remove_end2, sizeof(remove_end2)-1) == 0) {
748
len -= sizeof(remove_end2) - 1;
754
preg = new_preg(matcher);
758
rc = cli_regex2suffix(pattern, preg, add_pattern_suffix, (void*)matcher);