33
147
#include <stdio.h>
34
148
#include <stdlib.h>
35
150
#include <string.h>
151
#ifdef HAVE_STRINGS_H
36
154
#include <ctype.h>
39
156
#include <limits.h>
40
157
#include <sys/types.h>
44
#include "regex/regex.h"
164
#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
47
168
#include "clamav.h"
48
169
#include "others.h"
170
#include "defaults.h"
172
#include "filetypes.h"
49
174
#include "regex_list.h"
50
175
#include "matcher-ac.h"
54
#include "jsparse/textbuf.h"
55
#include "regex_suffix.h"
179
enum token_op_t {OP_CHAR,OP_STDCLASS,OP_CUSTOMCLASS,OP_DOT,OP_LEAF,OP_ROOT,OP_PARCLOSE};
180
typedef unsigned char* char_bitmap_p;
183
* OP_CHAR: 1 character, c = character
185
* OP_STDCLASS: standard character class, c = char class, class: 1<<(index into std_class of class name)
186
* OP_CUSTOMCLASS: custom character class, first pointer in ptr array is a pointer to the bitmap table for this class
187
* OP_DOT: single . matching any character except \n
188
* OP_LEAF: this is a leaf node, reinterpret structure
191
struct tree_node* next;/* next regex/complex sibling, or parent, if no more siblings , can't be NULL except for root node*/
194
char alternatives;/* number of (non-regex) children of node, i.e. sizeof(children)*/
195
char listend;/* no more siblings, next pointer is pointer to parent*/
197
struct tree_node** children;/* alternatives nr. of children, followed by (a null pointer terminated) regex leaf node pointers) */
198
char_bitmap_p* bitmap;
199
struct leaf_info* leaf;
204
char* info;/* what does it mean that we reached the leaf...*/
205
regex_t* preg;/* this is NULL if leaf node, and non-regex*/
208
/* Character classes */
209
static const char* std_class[] = {
222
/* don't change the order of these strings, unless you change them in generate_tables.c too, and regenerate the tables*/
226
#define STD_CLASS_CNT sizeof(std_class)/sizeof(std_class[0])
228
/* generated by contrib/phishing/generate_tables.c */
229
static const unsigned char char_class_bitmap[STD_CLASS_CNT][32] = {
230
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x03,
231
0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07,
232
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
233
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
235
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x03,
236
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
237
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
238
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
240
{0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0x00, 0xfc,
241
0x01, 0x00, 0x00, 0xf8, 0x01, 0x00, 0x00, 0x78,
242
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
243
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
245
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
246
0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07,
247
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
248
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
250
{0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0xff,
251
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f,
252
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
253
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
255
{0x00, 0x3e, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
256
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
257
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
258
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
260
{0x00, 0x02, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
261
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
262
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
263
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
265
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
266
0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07,
267
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
268
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
270
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
271
0xfe, 0xff, 0xff, 0x07, 0x00, 0x00, 0x00, 0x00,
272
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
273
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
275
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
276
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
277
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
278
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
280
{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
281
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f,
282
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
283
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
285
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x03,
286
0x7e, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00,
287
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
288
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
291
static const unsigned short int char_class[256] = {
292
0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x260, 0x220, 0x220, 0x220, 0x220, 0x200, 0x200,
293
0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200,
294
0x460, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414,
295
0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414,
296
0x414, 0xd19, 0xd19, 0xd19, 0xd19, 0xd19, 0xd19, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519,
297
0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x414, 0x414, 0x414, 0x414, 0x414,
298
0x414, 0xc99, 0xc99, 0xc99, 0xc99, 0xc99, 0xc99, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499,
299
0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x414, 0x414, 0x414, 0x414, 0x200,
300
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
301
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
302
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
303
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
304
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
305
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
306
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
307
0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000
310
static const size_t std_class_cnt = sizeof(std_class)/sizeof(std_class[0]);
62
static regex_t *new_preg(struct regex_matcher *matcher);
63
static size_t reverse_string(char *pattern);
64
static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *regex);
65
static int add_static_pattern(struct regex_matcher *matcher, char* pattern);
68
#define MATCH_SUCCESS 0
313
static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,const char* info);
314
static int match_node(struct tree_node* node,const unsigned char* c,size_t len,const char** info);
315
static void destroy_tree(struct regex_matcher* matcher);
316
static struct tree_node* tree_root_alloc(void);
317
static int build_regex_list(struct regex_matcher* matcher);
318
static void stack_destroy(struct node_stack* stack);
321
void dump_tree(struct tree_node* root);
324
#define MATCH_SUCCESS 0
69
325
#define MATCH_FAILED -1
149
347
* Do not send NULL pointers to this function!!
152
int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup,int hostOnly,const char **info, int is_whitelist)
350
int regex_list_match(struct regex_matcher* matcher,const char* real_url,const char* display_url,int hostOnly,const char** info,int is_whitelist)
154
char* orig_real_url = real_url;
155
struct regex_list *regex;
156
size_t real_len, display_len, buffer_len;
354
massert(display_url);
162
356
if(!matcher->list_inited)
164
assert(matcher->list_built);
165
/* skip initial '.' inserted by get_host */
166
if(real_url[0] == '.') real_url++;
167
if(display_url[0] == '.') display_url++;
168
real_len = strlen(real_url);
169
display_len = strlen(display_url);
170
buffer_len = (hostOnly && !is_whitelist) ? real_len + 1 : real_len + display_len + 1 + 1;
172
/* too short, no match possible */
358
massert(matcher->list_built);
176
char *buffer = cli_malloc(buffer_len+1);
360
size_t real_len = strlen(real_url);
361
size_t display_len = strlen(display_url);
362
size_t buffer_len = (hostOnly && !is_whitelist) ? real_len : real_len + display_len + 1 + (is_whitelist ? 1 : 0);
363
char* buffer = cli_malloc(buffer_len+1);
179
366
struct cli_ac_data mdata;
180
struct cli_ac_result *res = NULL;
185
371
strncpy(buffer,real_url,real_len);
186
buffer[real_len]= (!is_whitelist && hostOnly) ? '/' : ':';
372
buffer[real_len]= (!is_whitelist && hostOnly) ? '\0' : ':';
187
373
if(!hostOnly || is_whitelist) {
188
374
strncpy(buffer+real_len+1,display_url,display_len);
376
buffer[buffer_len - 1] = '/';
377
buffer[buffer_len]=0;
190
buffer[buffer_len - 1] = '/';
191
buffer[buffer_len]=0;
192
379
cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
194
if((rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
197
bufrev = cli_strdup(buffer);
200
reverse_string(bufrev);
201
rc = filter_search(&matcher->filter, (const unsigned char*)bufrev, buffer_len) != -1;
205
/* filter says this suffix doesn't match.
206
* The filter has false positives, but no false
210
rc = cli_ac_scanbuff((const unsigned char*)bufrev,buffer_len, NULL, (void*)®ex, &res, &matcher->suffixes,&mdata,0,0,NULL,AC_SCAN_VIR,NULL);
212
cli_ac_freedata(&mdata);
215
root = matcher->root_regex_idx;
217
struct cli_ac_result *q;
219
regex = matcher->suffix_regexes[root].head;
222
regex = res->customdata;
224
while(!rc && regex) {
225
/* loop over multiple regexes corresponding to
228
/* we matched a static pattern */
229
rc = validate_subdomain(regex, pre_fixup, buffer, buffer_len, real_url, real_len, orig_real_url);
231
rc = !cli_regexec(regex->preg, buffer, 0, NULL, 0);
233
if(rc) *info = regex->pattern;
382
if((rc = cli_ac_initdata(&mdata, 0, AC_DEFAULT_TRACKLEN)))
386
for(i = 0; i < matcher->root_hosts_cnt; i++) {
387
if(( rc = cli_ac_scanbuff((unsigned char*)buffer,buffer_len,info, &matcher->root_hosts[i] ,&mdata,0,0,0,-1,NULL) ))
394
rc = match_node(matcher->root_regex,(unsigned char*)buffer,buffer_len,info) == MATCH_SUCCESS ? CL_VIRUS : CL_SUCCESS;
244
cli_dbgmsg("Lookup result: not in regex list\n");
246
cli_dbgmsg("Lookup result: in regex list\n");
397
cli_dbgmsg("not in regex list\n");
403
#define NODE_STACK_INITIAL 1024
404
#define NODE_STACK_GROW 4096
405
/* Initialize @stack */
406
static int stack_init(struct node_stack* stack)
411
stack->capacity = NODE_STACK_INITIAL;
412
stack->data = cli_malloc(stack->capacity * sizeof(*stack->data));
419
/* Reset @stack pointer, but don't realloc */
420
static void stack_reset(struct node_stack* stack)
427
/* Push @node on @stack, growing it if necessarry */
428
static inline int stack_push(struct node_stack* stack,struct tree_node* node)
431
massert(stack->data);
433
if(stack->cnt == stack->capacity) {
434
stack->capacity += NODE_STACK_GROW;
435
stack->data = cli_realloc(stack->data,stack->capacity*sizeof(*stack->data));
439
stack->data[stack->cnt++] = node;
443
/* Pops node from @stack, doesn't realloc */
444
static inline struct tree_node* stack_pop(struct node_stack* stack)
447
massert(stack->data);
448
massert(stack->cnt);/*don't pop from empty stack */
450
return stack->cnt ? stack->data[--stack->cnt] : NULL;
252
453
/* Initialization & loading */
253
455
/* Initializes @matcher, allocating necesarry substructures */
254
int init_regex_list(struct regex_matcher* matcher, uint8_t dconf_prefiltering)
456
int init_regex_list(struct regex_matcher* matcher)
257
mpool_t *mp = matcher->mempool;
262
memset(matcher, 0, sizeof(*matcher));
461
cli_dbgmsg("Matcher engine not initialized\n");
467
matcher->list_inited = 0;
468
matcher->root_hosts_cnt = 0;
469
matcher->root_hosts = NULL;
470
matcher->root_hosts_cnt = 0;
472
matcher->root_regex = tree_root_alloc();
473
if(!matcher->root_regex) {
477
if(( rc = stack_init(&matcher->node_stack) )) {
478
free(matcher->root_regex);
481
if(( rc = stack_init(&matcher->node_stack_alt) )) {
482
free(matcher->root_regex);
483
stack_destroy(&matcher->node_stack);
264
487
matcher->list_inited=1;
265
matcher->list_built=0;
488
matcher->list_built=1;/* its empty, but pretend its built, so that load_ will realloc root_hosts */
266
489
matcher->list_loaded=0;
267
cli_hashtab_init(&matcher->suffix_hash, 512);
269
matcher->mempool = mp;
270
matcher->suffixes.mempool = mp;
271
assert(mp && "mempool must be initialized");
273
if((rc = cli_ac_init(&matcher->suffixes, 2, 32, dconf_prefiltering))) {
277
matcher->sha256_hashes.mempool = mp;
278
matcher->hostkey_prefix.mempool = mp;
280
if((rc = cli_bm_init(&matcher->sha256_hashes))) {
283
if((rc = cli_bm_init(&matcher->hostkey_prefix))) {
286
filter_init(&matcher->filter);
287
491
return CL_SUCCESS;
494
/* inserts @pattern into @root, using ac-matcher
495
* although the name might be confusing, @pattern is not a regex!*/
496
static int add_regex_list_element(struct cli_matcher* root,const char* pattern,char* info)
499
struct cli_ac_patt *new = cli_calloc(1,sizeof(*new));
507
len = strlen(pattern);
517
if(new->length > root->maxpatlen)
518
root->maxpatlen = new->length;
520
new->pattern = cli_malloc(sizeof(new->pattern[0])*len);
526
new->pattern[i]=pattern[i];/*new->pattern is short int* */
528
new->virname = strdup(info);
529
if((ret = cli_ac_addpatt(root,new))) {
290
538
static int functionality_level_check(char* line)
327
575
return CL_EMALFDB;
329
577
return CL_SUCCESS;
333
static int add_hash(struct regex_matcher *matcher, char* pattern, const char fl, int is_prefix)
336
struct cli_bm_patt *pat = mpool_calloc(matcher->mempool, 1, sizeof(*pat));
337
struct cli_matcher *bm;
338
const char *vname = NULL;
341
pat->pattern = (unsigned char*)cli_mpool_hex2str(matcher->mempool, pattern);
347
bm = &matcher->hostkey_prefix;
349
bm = &matcher->sha256_hashes;
352
if (!matcher->sha256_pfx_set.keys) {
353
if((rc = cli_hashset_init(&matcher->sha256_pfx_set, 1048576, 90))) {
358
if (fl != 'W' && pat->length == 32 &&
359
cli_hashset_contains(&matcher->sha256_pfx_set, cli_readint32(pat->pattern)) &&
360
cli_bm_scanbuff(pat->pattern, 32, &vname, NULL, &matcher->sha256_hashes,0,NULL,NULL) == CL_VIRUS) {
362
/* hash is whitelisted in local.gdb */
363
cli_dbgmsg("Skipping hash %s\n", pattern);
364
mpool_free(matcher->mempool, pat->pattern);
365
mpool_free(matcher->mempool, pat);
369
pat->virname = mpool_malloc(matcher->mempool, 1);
375
cli_hashset_addkey(&matcher->sha256_pfx_set, cli_readint32(pat->pattern));
376
if((rc = cli_bm_addpatt(bm, pat, "*"))) {
377
cli_errmsg("add_hash: failed to add BM pattern\n");
387
582
/* Load patterns/regexes from file */
388
int load_regex_matcher(struct cl_engine *engine,struct regex_matcher* matcher,FILE* fd,unsigned int *signo,unsigned int options,int is_whitelist,struct cli_dbio *dbio, uint8_t dconf_prefiltering)
583
int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist)
390
int rc,line=0,entry=0;
391
586
char buffer[FILEBUFF];
395
591
if(matcher->list_inited==-1)
396
592
return CL_EMALFDB; /* already failed to load */
593
/* if(matcher->list_loaded) {
594
cli_warnmsg("Regex list has already been loaded, ignoring further requests for load\n");
398
598
cli_errmsg("Unable to load regex list (null file)\n");
402
602
cli_dbgmsg("Loading regex_list\n");
403
603
if(!matcher->list_inited) {
404
rc = init_regex_list(matcher, dconf_prefiltering);
604
rc = init_regex_list(matcher);
405
605
if (!matcher->list_inited) {
406
606
cli_errmsg("Regex list failed to initialize!\n");
407
607
fatal_error(matcher);
610
/*atexit(regex_list_done); TODO: destroy this in manager.c */
412
613
* Regexlist db format (common to .wdb(whitelist) and .pdb(domainlist) files:
413
614
* Multiple lines of form, (empty lines are skipped):
414
615
* Flags RealURL DisplayedURL
419
* R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing
617
* Flags: R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing
420
618
* flags that should be filtered.
421
619
* [i.e. phishcheck urls.flags that we don't want to be done for this particular host]
425
* Y - host-only regex
426
* M - host simple pattern
620
* Note:Flag filtering only makes sense in .pdb files.
428
622
* If a line in the file doesn't conform to this format, loading fails
431
while(cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
625
while(fgets(buffer,FILEBUFF,fd)) {
436
628
cli_chomp(buffer);
439
630
continue;/* skip empty lines */
441
if(functionality_level_check(buffer))
444
if(engine->cb_sigload && engine->cb_sigload("phishing", buffer, engine->cb_sigload_ctx)) {
445
cli_dbgmsg("load_regex_matcher: skipping %s due to callback\n", buffer);
632
if(functionality_level_check(buffer))
450
636
pattern = strchr(buffer,':');
452
638
cli_errmsg("Malformed regex list line %d\n",line);
453
639
fatal_error(matcher);
454
640
return CL_EMALFDB;
457
643
flags = buffer+1;
460
pattern_len = strlen(pattern);
461
if(pattern_len < FILEBUFF) {
462
pattern[pattern_len] = '/';
463
pattern[pattern_len+1] = '\0';
647
const size_t pattern_len = strlen(pattern);
648
if(pattern_len < FILEBUFF) {
649
pattern[pattern_len] = '/';
650
pattern[pattern_len+1] = '\0';
653
cli_errmsg("Overlong regex line %d\n",line);
654
fatal_error(matcher);
659
if((buffer[0] == 'R' && !is_whitelist) || (buffer[0] == 'X' && is_whitelist)) {/*regex*/
660
if(( rc = add_pattern(matcher,(const unsigned char*)pattern,flags) ))
661
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
663
else if( ( buffer[0] == 'H' && !is_whitelist) || (buffer[0] == 'M' && is_whitelist)) {/*matches displayed host*/
664
if(matcher->list_built) {
665
struct cli_matcher* old_hosts = matcher->root_hosts;
666
matcher->root_hosts_cnt++;
668
matcher->root_hosts = cli_realloc(matcher->root_hosts, matcher->root_hosts_cnt * sizeof(*matcher->root_hosts));
669
if(!matcher->root_hosts) {
670
matcher->root_hosts = old_hosts;/* according to manpage this must still be valid*/
673
memset(&matcher->root_hosts[matcher->root_hosts_cnt-1], 0, sizeof(struct cli_matcher));
674
matcher->root_hosts[matcher->root_hosts_cnt-1].ac_root = cli_calloc(1, sizeof(struct cli_ac_node));
675
if(!matcher->root_hosts[matcher->root_hosts_cnt-1].ac_root) {
676
matcher->root_hosts_cnt--;
679
cli_dbgmsg("Increased number of root_hosts in regex_list.c\n");
680
matcher->list_built = 0;
682
if(( rc = add_regex_list_element(&matcher->root_hosts[matcher->root_hosts_cnt-1],pattern,flags) ))
683
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
466
cli_errmsg("Overlong regex line %d\n",line);
467
fatal_error(matcher);
471
if((buffer[0] == 'R' && !is_whitelist) || ((buffer[0] == 'X' || buffer[0] == 'Y') && is_whitelist)) {
472
/* regex for hostname*/
473
if (( rc = regex_list_add_pattern(matcher, pattern) ))
474
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
476
else if( ( buffer[0] == 'H' && !is_whitelist) || (buffer[0] == 'M' && is_whitelist)) {
477
/*matches displayed host*/
478
if (( rc = add_static_pattern(matcher, pattern) ))
479
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
480
} else if (buffer[0] == 'S' && (!is_whitelist || pattern[0]=='W')) {
481
pattern[pattern_len] = '\0';
484
if((pattern[0]=='W' || pattern[0]=='F' || pattern[0]=='P') && pattern[1]==':') {
486
if (( rc = add_hash(matcher, pattern, flags[0], pattern[-2] == 'P') )) {
487
cli_errmsg("Error loading at line: %d\n", line);
488
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
491
cli_errmsg("Error loading line: %d, %c\n", line, *pattern);
687
/* this is useless, we have host, and regex matches
688
if(( rc = add_regex_list_element(matcher->root_urls,pattern,flags) ))
689
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;*/
498
692
matcher->list_loaded = 1;
693
if(( rc = build_regex_list(matcher) ))
697
/* dump_tree(matcher->root_regex);*/
699
if(!matcher->list_built) {
700
cli_errmsg("Regex list not loaded: build failed!\n");
701
fatal_error(matcher);
704
regex_list_cleanup(matcher);
502
705
return CL_SUCCESS;
709
static void tree_node_merge_nonbin(struct tree_node* into,const struct tree_node* node)
714
if(node->alternatives){
715
if(node->u.children[0]->next == node) {
716
*no non-bin alternatives here*
720
for(p = node->u.children[0]->next; p->next != node; p = p->next)
721
tree_node_insert_nonbin(into,p);
725
tree_node_insert_nonbin(into,node->u.children[0]);
728
static void tree_node_merge_bin(struct tree_node* into,const struct tree_node* node)
730
if(node->u.children && node->alternatives) {
731
if(!into->alternatives) {
732
* into has no bin part, just copy+link the node there*
734
struct tree_node* next = into->u.children[0];
735
into->u.children = node->u.children;
736
into->alternatives = node->alternatives;
737
for(i=0;i < into->alternatives;i++) {
738
if(into->u.children[i]->next == node) {
739
into->u.children[i]->next = next;
740
into->u.children[i]->listend = 0;
744
for(p = into->u.children[0]->next; p->next != node; p = p->next);
750
const size_t new_size = tree_node_get_array_size(into) + tree_node_get_array_size(node);
751
struct tree_node** new_children = cli_malloc(sizeof(
753
* else: no bin part to merge *
757
static struct tree_node ** tree_node_get_children(const struct tree_node* node)
759
return node->op==OP_CUSTOMCLASS ? (node->u.children[1] ? node->u.children+1 : NULL) :node->u.children;
761
/* don't do this, it wastes too much memory, and has no benefit
762
static void regex_list_dobuild(struct tree_node* called_from,struct tree_node* node)
764
struct tree_node **children;
767
children = tree_node_get_children(node);
768
if(node->op!=OP_ROOT)
769
massert(called_from);
770
if(node->op==OP_TMP_PARCLOSE) {
771
const size_t array_size = (node->alternatives +(called_from->op==OP_CUSTOMCLASS ? 1:0))*sizeof(*called_from->u.children);
773
return;* already processed this common node*
776
* copy children to called_from from this node
777
* called_from should have 0 alternatives, and a link to this node via ->u.children[0]
779
massert(called_from->alternatives == 0);
780
massert(called_from->u.children);
781
massert(called_from->u.children[0] == node);
782
called_from->u.children = cli_realloc(called_from->u.children,array_size);
783
called_from->u.children = node->u.children;
784
called_from->alternatives = node->alternatives;
785
if(called_from->alternatives) {
786
* fix parent pointers *
787
int i;TODO: do a deep copy of children here
788
struct tree_node **from_children = tree_node_get_children(called_from);
789
massert(from_children);
790
for(i=0;i < called_from->alternatives;i++) {
792
for(p=from_children[i];p->next != node; p = p->next);
793
p->next = called_from;
798
if(node->op==OP_LEAF)
800
else if (node->alternatives) {
804
p = children[0]->op==OP_LEAF ? NULL : children[0]->next;
805
for(i=0;i<node->alternatives;i++)
806
regex_list_dobuild(node,children[i]);
808
regex_list_dobuild(node,p);
812
regex_list_dobuild(node,children[0]);
814
if(node->next && !node->listend)
815
regex_list_dobuild(node,node->next);
816
if(node->op==OP_TMP_PARCLOSE)
506
822
/* Build the matcher list */
507
int cli_build_regex_list(struct regex_matcher* matcher)
823
static int build_regex_list(struct regex_matcher* matcher)
512
826
if(!matcher->list_inited || !matcher->list_loaded) {
513
827
cli_errmsg("Regex list not loaded!\n");
514
828
return -1;/*TODO: better error code */
516
830
cli_dbgmsg("Building regex list\n");
517
cli_hashtab_free(&matcher->suffix_hash);
518
if(( rc = cli_ac_buildtrie(&matcher->suffixes) ))
831
if(matcher->root_hosts)
832
if(( rc = cli_ac_buildtrie(&matcher->root_hosts[matcher->root_hosts_cnt-1]) ))
520
834
matcher->list_built=1;
521
cli_hashset_destroy(&matcher->sha256_pfx_set);
523
836
return CL_SUCCESS;
526
839
/* Done with this matcher, free resources */
527
840
void regex_list_done(struct regex_matcher* matcher)
531
if(matcher->list_inited == 1) {
533
cli_ac_free(&matcher->suffixes);
534
if(matcher->suffix_regexes) {
535
for(i=0;i<matcher->suffix_cnt;i++) {
536
struct regex_list *r = matcher->suffix_regexes[i].head;
538
struct regex_list *q = r;
544
free(matcher->suffix_regexes);
545
matcher->suffix_regexes = NULL;
547
if(matcher->all_pregs) {
548
for(i=0;i<matcher->regex_cnt;i++) {
549
regex_t *r = matcher->all_pregs[i];
551
mpool_free(matcher->mempool, r);
553
mpool_free(matcher->mempool, matcher->all_pregs);
555
cli_hashtab_free(&matcher->suffix_hash);
556
cli_bm_free(&matcher->sha256_hashes);
557
cli_bm_free(&matcher->hostkey_prefix);
844
regex_list_cleanup(matcher);
845
if(matcher->list_loaded) {
846
if(matcher->root_hosts) {
848
for(i=0;i<matcher->root_hosts_cnt;i++)
849
cli_ac_free(&matcher->root_hosts[i]);
850
free(matcher->root_hosts);
851
matcher->root_hosts=NULL;
854
matcher->root_hosts_cnt=0;
855
matcher->list_built=0;
856
destroy_tree(matcher);
857
matcher->list_loaded=0;
859
if(matcher->list_inited) {
860
matcher->list_inited=0;
862
stack_destroy(&matcher->node_stack);
863
stack_destroy(&matcher->node_stack_alt);
866
/* Tree matcher algorithm */
870
const unsigned char* start;
871
char_bitmap_p bitmap;
878
enum {TOKEN_CHAR,TOKEN_DOT,TOKEN_PAR_OPEN,TOKEN_PAR_CLOSE,TOKEN_BRACKET,TOKEN_ALT,TOKEN_REGEX,TOKEN_DONE};
880
static const unsigned char* getNextToken(const unsigned char* pat,struct token_t* token)
887
token->type=TOKEN_CHAR;
888
token->u.c = *(++pat);
889
if(islower(token->u.c)) {
890
/* handle \n, \t, etc. */
891
char fmt[3] = {'\\', '\0', '\0'};
895
if(snprintf(&c,1,fmt)!=1) {
896
token->type=TOKEN_REGEX;
897
token->u.start = pat;
905
token->type=TOKEN_ALT;
912
token->type=TOKEN_REGEX;
913
/* massert(0 && "find_regex_start should have forbidden us from finding regex special chars");*/
918
/*see if it is something simple like a list of characters, a range, or negated ...*/
919
const unsigned char* old=pat++;/* save this in case we change our mind and decide this is too complicated for us to handle*/
920
unsigned char range_start=0;
922
char_bitmap_p bitmap = cli_malloc(32);
926
memset(bitmap,0xFF,32);/*match chars not in brackets*/
930
memset(bitmap,0x00,32);
932
/* literal ] can be first character, so test for it at the end of the loop, for example: []] */
933
if (*pat=='-' && hasprev) {
935
unsigned char range_end;
937
massert(range_start);
941
if(pat[2]=='-' && pat[3]=='.' && pat[4]==']')
944
/* this is getting complicated, bail out */
945
cli_warnmsg("confused about collating sequences in regex,bailing out");
947
token->type=TOKEN_REGEX;
955
for(c=range_start+1;c<=range_end;c++)
956
bitmap[c>>3] ^= 1<<(c&0x7);
959
else if (pat[0]=='[' && pat[1]==':') {
960
const unsigned char* end;
965
end=(unsigned char*)strstr((const char*)pat,":]");
967
cli_warnmsg("confused about std char class syntax regex,bailing out");
969
token->type=TOKEN_REGEX;
974
for(i=0;i<std_class_cnt;i++)
975
if(!strncmp((const char*)pat,std_class[i],len)) {
981
if(char_class[i]&(1<<found))
982
bitmap[i>>3] ^= 1<<(i&0x7);
986
cli_warnmsg("confused about regex bracket expression, bailing out");
988
token->type=TOKEN_REGEX;
993
bitmap[*pat>>3] ^= 1<<(*pat&0x7);
999
/*TODO: see if this bitmap already exists, then reuse*/
1000
token->type = TOKEN_BRACKET;
1001
token->u.bitmap = bitmap;
1005
massert(0 && "Encountered ] without matching [");
1009
token->type=TOKEN_DOT;
1012
token->type=TOKEN_PAR_OPEN;
1015
token->type=TOKEN_PAR_CLOSE;
1018
token->type=TOKEN_CHAR;
1026
#define INITIAL_ALT_STACK 10
1027
#define ALT_STACK_GROW 20
1029
static const unsigned char* find_regex_start(const unsigned char* pat)
1031
struct token_t token;
1032
/*TODO: find where the regex part begins, for ex:
1033
* abcd+, regex begins at 'd'
1035
const unsigned char* last=NULL;
1036
const unsigned char* tmp=NULL;
1037
const unsigned char** altpositions = cli_malloc(INITIAL_ALT_STACK*sizeof(*altpositions));
1038
size_t altpositions_capacity = INITIAL_ALT_STACK;
1039
size_t altpositions_cnt = 0;
1045
/* Try to parse pattern till special regex chars are encountered, that the tree-matcher doesn't handle, like: +,*,{}.
1046
* The tricky part is that once we encounter these, the previous 'atom' has to be passed on to the regex matcher, so we have to
1047
* back up to the last known good position
1048
* Example, if we have: abc(defg)+, then only abc can be handled by tree parser, so we have to return the position of (.
1049
* Another example: abc(defg|xyz|oz+|pdo), the last known good position is |, after xyz
1050
* TODO: what about open parantheses? maybe once we found a special char, we have top back out before the first (?
1054
pat = getNextToken(pat,&token);
1055
if(token.type!=TOKEN_REGEX) {
1057
lasttype = token.type;
1058
if(token.type==TOKEN_BRACKET && token.u.bitmap)
1059
free(token.u.bitmap);
1060
if(token.type==TOKEN_ALT || token.type==TOKEN_PAR_OPEN) {
1061
/* save this position on stack, succesfully parsed till here*/
1062
if(altpositions_cnt && altpositions[altpositions_cnt-1][0]=='|')
1063
/* encountered another alternate (|) operator, override previous | position stored */
1064
altpositions[altpositions_cnt-1]=last;
1066
altpositions[altpositions_cnt++] = last;
1067
if(altpositions_cnt == altpositions_capacity) {
1068
altpositions_capacity += ALT_STACK_GROW;
1069
altpositions = cli_realloc(altpositions,altpositions_capacity*sizeof(*altpositions));
1074
} else if (lasttype==TOKEN_PAR_CLOSE) {
1075
/* remove last stored position from stack, succesfully this last group */
1077
massert(altpositions_cnt>0);
1081
if(altpositions_cnt)
1082
last = altpositions[0 /*altpositions_cnt-1*/];/*TODO: which index here?, see above TODO... */
1083
/*last stored 'safe' position where no special (+,*,{}) regex chars were encountered*/
1085
} while(*pat && token.type!=TOKEN_REGEX);
1087
return *pat ? last : last+1;
1090
static struct tree_node* tree_node_alloc(struct tree_node* next,char listend)
1092
struct tree_node* node = cli_malloc(sizeof(*node));
1094
node->alternatives=0;
1096
node->listend=listend;
1097
node->u.children=NULL;
1102
static struct tree_node* tree_root_alloc(void)
1104
struct tree_node* root=tree_node_alloc(NULL,1);
1114
static inline struct tree_node* tree_node_char_binsearch(const struct tree_node* node,const char csearch,int* left)
1117
struct tree_node **children;
1121
children = tree_node_get_children(node);
1122
right = node->alternatives-1;
1124
if(!node->alternatives)
1127
while(*left<=right) {
1128
int mid = *left+(right-*left)/2;
1129
if(children[mid]->c == csearch)
1130
return children[mid];
1131
else if(children[mid]->c < csearch)
1139
static inline struct tree_node* tree_get_next(struct tree_node* node)
1141
struct tree_node** children;
1143
children = tree_node_get_children(node);
1145
if(!node->alternatives && children && children[0])
1147
else if(node->alternatives<=1)
1150
return children[0]->next;
1153
static inline size_t tree_node_get_array_size(const struct tree_node* node)
1156
/* if op is CUSTOMCLASS, then first pointer is pointer to bitmap, so array size is +1 */
1157
return (node->alternatives + (node->op==OP_CUSTOMCLASS ? 1 : 0)) * sizeof(node->u.children[0]);
1160
static inline struct tree_node* tree_node_char_insert(struct tree_node* node,const char c,int left)
1162
struct tree_node* new, *alt = tree_get_next(node);
1163
struct tree_node **children;
1164
node->alternatives++;
1165
node->u.children = cli_realloc(node->u.children,tree_node_get_array_size(node));
1166
if(!node->u.children)
1169
children = node->op==OP_CUSTOMCLASS ? node->u.children+1 : node->u.children;
1171
new = tree_node_alloc(alt , node == alt );
1177
if(node->alternatives-left-1>0)
1178
memmove(&children[left+1],&children[left],(node->alternatives-left-1)*sizeof(node->u.children[0]));
1179
children[left] = new;
1184
static inline void tree_node_insert_nonbin(struct tree_node* node, struct tree_node* new)
1186
struct tree_node **children;
1190
children = tree_node_get_children(node);
1191
if(node->alternatives) {
1193
if(children[0]->next == node) {
1196
for(i=0;i<node->alternatives;i++) {
1197
children[i]->next = new;
1198
children[i]->listend = 0;
1202
struct tree_node* p;
1203
for(p = children[0]->next ; p->next != node ; p = p->next)
1204
massert(!p->listend);
1211
int idx = node->op==OP_CUSTOMCLASS ? 1 : 0;
1212
if(node->u.children)
1213
if(node->u.children[idx]) {
1214
node = node->u.children[idx];
1215
while(node->next && !node->listend)
1221
node->u.children = cli_realloc(node->u.children,sizeof(node->u.children[0])*(2));
1222
if(node->u.children) {
1223
node->u.children[idx] = new;
1228
static inline unsigned char char_getclass(const unsigned char* bitmap)
1233
for(i=0;i<std_class_cnt;i++)
1234
if(!memcmp(bitmap,char_class_bitmap[i],256>>3))
1236
return std_class_cnt;
1239
static void stack_destroy(struct node_stack* stack)
1245
stack->capacity = 0;
1248
/* call this after whitelist load is complete, and the tree is no longer going to be modified */
1249
void regex_list_cleanup(struct regex_matcher* matcher)
1253
stack_destroy(&matcher->node_stack);
1254
stack_destroy(&matcher->node_stack_alt);
1255
stack_init(&matcher->node_stack);
1256
stack_init(&matcher->node_stack_alt);
561
1259
int is_regex_ok(struct regex_matcher* matcher)
564
1262
return (!matcher->list_inited || matcher->list_inited!=-1);/* either we don't have a regexlist, or we initialized it successfully */
567
static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info, const char *suffix, size_t len)
569
struct cli_matcher *root = &matcher->suffixes;
570
struct cli_ac_patt *new = mpool_calloc(matcher->mempool,1,sizeof(*new));
576
assert(root && suffix);
585
new->offset_min = CLI_OFF_ANY;
588
new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE;
589
if(new->length > root->maxpatlen)
590
root->maxpatlen = new->length;
592
new->pattern = mpool_malloc(matcher->mempool, sizeof(new->pattern[0])*len);
594
mpool_free(matcher->mempool, new);
598
new->pattern[i] = suffix[i];/*new->pattern is short int* */
600
new->customdata = info;
602
if((ret = cli_ac_addpatt(root,new))) {
603
mpool_free(matcher->mempool, new->pattern);
604
mpool_free(matcher->mempool, new);
607
filter_add_static(&matcher->filter, (const unsigned char*)suffix, len, "regex");
611
#define MODULE "regex_list: "
612
/* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */
614
static void list_add_tail(struct regex_list_ht *ht, struct regex_list *regex)
619
ht->tail->nxt = regex;
624
/* returns 0 on success, clamav error code otherwise */
625
static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *iregex)
627
struct regex_matcher *matcher = cbdata;
628
struct regex_list *regex = cli_malloc(sizeof(*regex));
629
const struct cli_element *el;
634
regex->pattern = iregex->pattern ? cli_strdup(iregex->pattern) : NULL;
635
regex->preg = iregex->preg;
637
el = cli_hashtab_find(&matcher->suffix_hash, suffix, suffix_len);
638
/* TODO: what if suffixes are prefixes of eachother and only one will
641
/* existing suffix */
642
assert((size_t)el->data < matcher->suffix_cnt);
643
list_add_tail(&matcher->suffix_regexes[el->data], regex);
646
size_t n = matcher->suffix_cnt++;
647
el = cli_hashtab_insert(&matcher->suffix_hash, suffix, suffix_len, n);
648
matcher->suffix_regexes = cli_realloc(matcher->suffix_regexes, (n+1)*sizeof(*matcher->suffix_regexes));
649
if(!matcher->suffix_regexes)
651
matcher->suffix_regexes[n].tail = regex;
652
matcher->suffix_regexes[n].head = regex;
653
if (suffix[0] == '/' && suffix[1] == '\0')
654
matcher->root_regex_idx = n;
655
add_newsuffix(matcher, regex, suffix, suffix_len);
1265
/* returns 0 on success, regexec error code otherwise */
1266
static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,const char* info)
1269
const unsigned char* pat_end = find_regex_start(pat);
1270
struct token_t token;
1271
struct tree_node* node;
1275
node = matcher->root_regex;
1277
stack_reset(&matcher->node_stack);
1278
stack_reset(&matcher->node_stack_alt);
1279
stack_push(&matcher->node_stack,node);
1281
for(;node->op!=OP_LEAF;){
1283
pat = getNextToken(pat,&token);
1285
token.type = TOKEN_REGEX;
1289
token.type = TOKEN_DONE;
1291
switch(token.type) {
1294
/* search for char in tree */
1296
struct tree_node* newnode = tree_node_char_binsearch(node,token.u.c,&left);
1300
/* not found, insert it */
1301
node = tree_node_char_insert(node,token.u.c,left);
1306
case TOKEN_PAR_OPEN:
1307
stack_push(&matcher->node_stack_alt,NULL);/* marker */
1308
stack_push(&matcher->node_stack,node);
1311
case TOKEN_PAR_CLOSE: {
1312
/*TODO: test this!!!*/
1313
struct tree_node* node_alt = node;
1314
node = tree_node_alloc(NULL,1);
1315
node->op=OP_PARCLOSE;
1318
tree_node_insert_nonbin(node_alt,node);
1319
while (( node_alt = stack_pop(&matcher->node_stack_alt) )) {
1320
tree_node_insert_nonbin(node_alt,node);
1322
stack_pop(&matcher->node_stack);
1327
stack_push(&matcher->node_stack_alt,node);
1328
node = stack_pop(&matcher->node_stack);
1329
stack_push(&matcher->node_stack,node);
1334
struct tree_node* new = tree_node_alloc(tree_get_next(node),1);
1335
unsigned char charclass = char_getclass(token.u.bitmap);
1336
if(charclass == std_class_cnt) {/*not a std char class*/
1337
new->op = OP_CUSTOMCLASS;
1338
new->u.children = cli_malloc(sizeof(new->u.children[0])*2);
1339
if(!new->u.children)
1341
new->u.bitmap[0] = token.u.bitmap;
1342
new->u.bitmap[1] = NULL;
1343
tree_node_insert_nonbin(node,new);
1347
new->op = OP_STDCLASS;
1349
tree_node_insert_nonbin(node,new);
1357
struct tree_node* new = tree_node_alloc(tree_get_next(node),1);
1359
tree_node_insert_nonbin(node,new);
1366
struct leaf_info* leaf=cli_malloc(sizeof(*leaf));
1369
leaf->info=strdup(info);
1370
if(token.type==TOKEN_REGEX) {
1372
struct tree_node* new;
1374
preg=cli_malloc(sizeof(*preg));
1377
rc = regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL));
1381
new=cli_malloc(sizeof(*new));
1386
new->alternatives=0;
1389
tree_node_insert_nonbin(node,new);
1393
node->alternatives=0;
660
static size_t reverse_string(char *pattern)
662
size_t len = strlen(pattern);
1406
/* c has to be unsigned char here!! */
1407
static int match_node(struct tree_node* node,const unsigned char* c,size_t len,const char** info)
1409
struct tree_node** children;
1416
if(!node->u.children)
1417
return MATCH_FAILED;/* tree empty */
1423
children = node->u.children;
1429
/*this isn't a real character, so don't move*/
1435
massert(*c==node->c && "We know this has to match");
1436
rc = 1;/* *c==node->c;- we know it has matched */
1442
rc = char_class[*c]&(node->c);
1444
case OP_CUSTOMCLASS:
1446
char_bitmap_p bitmap;
1448
bitmap = (char_bitmap_p)node->u.bitmap[0];
1450
rc = bitmap[*c>>3]&(1<<(*c&0x7));
1455
const struct leaf_info* leaf = node->u.leaf;
1458
rc = !regexec(leaf->preg,(const char*)c,0,NULL,0);
1461
massert(*c==node->c && "We know this has to match[2]");
1466
return MATCH_SUCCESS;
1472
cli_errmsg("Encountered invalid operator in tree:%d\n",node->op);
1479
const char csearch = *c;
1480
int left = 0,right = node->alternatives-1;
1482
/*matched so far, go deeper*/
1483
/*do a binary search between children */
1485
while(left<=right) {
1486
mid = left+(right-left)/2;
1487
if (children[mid]->c == csearch)
1489
else if(children[mid]->c < csearch)
1495
node = children[mid];
1499
if(node->alternatives) {
1500
if(!children[0]->listend) {
1505
while(node && node->listend) {
1506
node = node->next;/* climb up */
1510
if(!node || !node->next)
1511
return MATCH_FAILED;/* reached root node */
1516
else if(node->u.children) {
1517
struct tree_node* rewrite_next = NULL;
1518
if(node->op==OP_PARCLOSE)
1519
rewrite_next = node;
1522
massert(node->op!=OP_CHAR);
1524
node->next = rewrite_next;/* this node is pointed to by several parent nodes,
1526
from which one we came, so we can find out way back
1527
should we fail to match somewhere deeper*/
1532
/* this node didn't match, try sibling, or parent (if no more siblings) */
1533
while(node && node->listend) {
1534
node = node->next;/* sibling of parent */
1538
if(!node || !node->next) /* reached root node, it has no next */
1539
return MATCH_FAILED;
1547
return MATCH_FAILED;
1550
/* push node on stack, only if it isn't there already */
1551
static inline void stack_push_once(struct node_stack* stack,struct tree_node* node)
664
for(i=0; i < (len/2); i++) {
665
char aux = pattern[i];
666
pattern[i] = pattern[len-i-1];
667
pattern[len-i-1] = aux;
672
static regex_t *new_preg(struct regex_matcher *matcher)
675
matcher->all_pregs = mpool_realloc(matcher->mempool, matcher->all_pregs, ++matcher->regex_cnt * sizeof(*matcher->all_pregs));
676
if(!matcher->all_pregs)
678
r = mpool_malloc(matcher->mempool, sizeof(*r));
681
matcher->all_pregs[matcher->regex_cnt-1] = r;
685
static int add_static_pattern(struct regex_matcher *matcher, char* pattern)
688
struct regex_list regex;
691
len = reverse_string(pattern);
693
regex.pattern = cli_strdup(pattern);
695
rc = add_pattern_suffix(matcher, pattern, len, ®ex);
700
int regex_list_add_pattern(struct regex_matcher *matcher, char *pattern)
705
/* we only match the host, so remove useless stuff */
706
const char remove_end[] = "([/?].*)?/";
707
const char remove_end2[] = "([/?].*)/";
709
len = strlen(pattern);
710
if(len > sizeof(remove_end)) {
711
if(strncmp(&pattern[len - sizeof(remove_end)+1], remove_end, sizeof(remove_end)-1) == 0) {
712
len -= sizeof(remove_end) - 1;
715
if(strncmp(&pattern[len - sizeof(remove_end2)+1], remove_end2, sizeof(remove_end2)-1) == 0) {
716
len -= sizeof(remove_end2) - 1;
722
preg = new_preg(matcher);
726
rc = cli_regex2suffix(pattern, preg, add_pattern_suffix, (void*)matcher);
1557
for(i=0;i < stack->cnt;i++)
1558
if(stack->data[i]==node)
1560
stack_push(stack,node);
1563
static void destroy_tree_internal(struct regex_matcher* matcher,struct tree_node* node)
1565
struct tree_node **children;
1569
children = tree_node_get_children(node);
1570
if(node->op==OP_LEAF) {
1571
struct leaf_info* leaf = node->u.leaf;
1572
if(node->next && !node->listend)
1573
destroy_tree_internal(matcher,node->next);
1574
stack_push_once(&matcher->node_stack,(struct tree_node*)node->u.leaf);/* cast to make compiler happy, and to not make another stack implementation for storing void* */
1575
stack_push_once(&matcher->node_stack,node);
1577
regfree(leaf->preg);
1587
if(node->alternatives) {
1589
struct tree_node* p;
1591
p = children[0]->op==OP_LEAF ? NULL : children[0]->next;
1592
for(i=0;i<node->alternatives;i++)
1593
destroy_tree_internal(matcher,children[i]);
1595
destroy_tree_internal(matcher,p);/*?? is this ok, or without _internal?*/
1600
destroy_tree_internal(matcher,children[0]);
1603
if(node->op!=OP_LEAF && node->next && !node->listend)
1604
destroy_tree_internal(matcher,node->next);
1605
if(node->u.children)
1606
stack_push_once(&matcher->node_stack,(struct tree_node*)node->u.children);/* cast to make compiler happy, it isn't really a tree_node* */
1607
if(node->op==OP_CUSTOMCLASS && node->u.children[0]) {
1608
free(node->u.children[0]);
1609
node->u.children[0]=NULL;
1611
stack_push_once(&matcher->node_stack,node);
1614
static void destroy_tree(struct regex_matcher* matcher)
1616
/* we might have the same node linked by different nodes, so a recursive walk&free doesn't work in all situations,
1617
* i.e. it might double-free, so instead of freeing, just push the nodes on a stack, and later free the nodes in that stack,
1618
* (and push to stack only if it doesn't contain it already*/
1621
stack_reset(&matcher->node_stack);
1622
destroy_tree_internal(matcher,matcher->root_regex);
1623
while (matcher->node_stack.cnt) {
1624
struct tree_node* node = stack_pop(&matcher->node_stack);
1630
static void dump_node(struct tree_node* node)
1633
struct tree_node* p,**children;
1635
if(node->op==OP_LEAF) {
1636
if(node->u.leaf->preg)
1637
printf("n%p [label=\"regex\\nleaf\"]",(void*)node);
1639
printf("n%p [label=\"%c\\nleaf\"];\n",(void*)node,node->c);
1640
if(node->next && !node->listend) {
1641
printf("n%p -> n%p;\n",(void*)node,(void*)node->next);
1642
dump_node(node->next);
1646
printf("n%p [label=\"%c\\n%d\\nlistend:%d\"];\n",(void*)node,(node->op==OP_ROOT||node->op==OP_PARCLOSE) ?'@' :node->c,node->op,node->listend);
1648
printf("n%p -> n%p;\n",(void*)node,(void*)node->next);
1649
printf("n%p -> {",(void*)node);/*using address of node as id*/
1650
children = tree_node_get_children(node);
1651
if(node->alternatives)
1653
for(i=0;i<node->alternatives;i++)
1654
printf("n%p ",(void*)children[i]);
1655
if(node->alternatives && children[0]->op!=OP_LEAF)
1656
for(p=children[0]->next;p!=node;p=p->next)
1659
printf("n%p ",(void*)p);
1660
if(p->op==OP_LEAF || p->listend)
1663
if(!node->alternatives && children && children[0])
1664
printf("n%p ",(void*)children[0]);
1666
printf("{rank=same;");
1667
for(i=0;i<node->alternatives;i++)
1668
printf("n%p ",(void*)node->u.children[i]);
1669
if(node->alternatives && children[0]->op!=OP_LEAF)
1670
for(p=children[0]->next;p!=node;p=p->next)
1672
printf("n%p ",(void*)p);
1673
if(p->op==OP_LEAF || p->listend)
1676
if(!node->alternatives && children && children[0])
1677
printf("n%p ",(void*)children[0]);
1679
for(i=0;i<node->alternatives;i++)
1680
dump_node(children[i]);
1681
if(node->alternatives && children[0]->op!=OP_LEAF)
1682
for(p=children[0]->next;p!=node;p=p->next)
1685
if(p->op==OP_LEAF || p->listend)
1688
if(!node->alternatives && children && children[0])
1689
dump_node(children[0]);
1692
void dump_tree(struct tree_node* root)
1694
/*use dot/dotty from graphviz to view it*/
1696
printf("digraph tree {\n");