65
65
static int add_static_pattern(struct regex_matcher *matcher, char* pattern);
68
/* ----- shift-or filtering -------------- */
70
#define BITMAP_CONTAINS(bmap, val) ((bmap)[(val) >> 5] & (1 << ((val) & 0x1f)))
71
#define BITMAP_INSERT(bmap, val) ((bmap)[(val) >> 5] |= (1 << ((val) & 0x1f)))
73
static void SO_init(struct filter *m)
75
memset(m->B, ~0, sizeof(m->B));
76
memset(m->end, ~0, sizeof(m->end));
77
memset(m->end_fast, ~0, sizeof(m->end_fast));
80
/* because we use uint32_t */
81
#define MAXSOPATLEN 32
83
/* merge another pattern into the filter
84
* add('abc'); add('bcd'); will match [ab][bc][cd] */
85
static int SO_preprocess_add(struct filter *m, const unsigned char *pattern, size_t len)
90
/* cut length, and make it modulo 2 */
91
if(len > MAXSOPATLEN) {
94
/* we use 2-grams, must be multiple of 2 */
100
/* Shift-Or like preprocessing */
101
for(j=0;j < len-1;j++) {
102
/* use overlapping 2-grams. We need them overlapping because matching can start at any position */
103
q = cli_readint16( &pattern[j] );
104
m->B[q] &= ~(1 << j);
106
/* we use variable length patterns, use last character to mark pattern end,
107
* can lead to false positives.*/
108
/* mark that at state j, the q-gram q can end the pattern */
111
m->end[q] &= ~(1 << j);
112
m->end_fast[pattern[j+1]] &= ~(1<<j);
117
/* this is like a FSM, with multiple active states at the same time.
118
* each bit in "state" means an active state, when a char is encountered
119
* we determine what states can remain active.
120
* The FSM transition rules are expressed as bit-masks */
121
long SO_search(const struct filter *m, const unsigned char *data, unsigned long len)
125
const uint32_t *B = m->B;
126
const uint32_t *End = m->end;
127
const uint32_t *EndFast = m->end_fast;
129
/* cut length, and make it modulo 2 */
130
if(len > MAXSOPATLEN) {
133
/* we use 2-grams, must be multiple of 2 */
137
/* Shift-Or like search algorithm */
138
for(j=0;j < len-1; j++) {
139
const uint16_t q0 = cli_readint16( &data[j] );
141
state = (state << 1) | B[q0];
142
/* state marks with a 0 bit all active states
143
* End[q0] marks with a 0 bit all states where the q-gram 'q' can end a pattern
144
* if we got two 0's at matching positions, it means we encountered a pattern's end */
145
match_end = state | EndFast[data[j+1]];
146
if((match_end != 0xffffffff) && (state | End[q0]) != 0xffffffff) {
147
/* note: we rely on short-circuit eval here, we only evaluate and fetch End[q0], if
148
* end_fast has matched. This reduces cache pressure on End[], and allows us to keep the working
151
/* if state is reachable, and this character can finish a pattern, assume match */
152
/* to reduce false positives check if qgram can finish the pattern */
153
/* return position of probable match */
154
/* find first 0 starting from MSB, the position of that bit as counted from LSB, is the length of the
155
* longest pattern that could match */
156
return j >= MAXSOPATLEN ? j - MAXSOPATLEN : 0;
163
/* ----------------------------------------------------------- */
166
68
#define MATCH_SUCCESS 0
167
69
#define MATCH_FAILED -1
289
191
buffer[buffer_len]=0;
290
192
cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
292
if((rc = cli_ac_initdata(&mdata, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
194
if((rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
295
197
bufrev = cli_strdup(buffer);
298
200
reverse_string(bufrev);
299
rc = SO_search(&matcher->filter, (const unsigned char*)bufrev, buffer_len) != -1;
201
rc = filter_search(&matcher->filter, (const unsigned char*)bufrev, buffer_len) != -1;
308
rc = cli_ac_scanbuff((const unsigned char*)bufrev,buffer_len, NULL, (void*)®ex, &res, &matcher->suffixes,&mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL);
210
rc = cli_ac_scanbuff((const unsigned char*)bufrev,buffer_len, NULL, (void*)®ex, &res, &matcher->suffixes,&mdata,0,0,NULL,AC_SCAN_VIR,NULL);
310
212
cli_ac_freedata(&mdata);
456
358
if (fl != 'W' && pat->length == 32 &&
457
359
cli_hashset_contains(&matcher->sha256_pfx_set, cli_readint32(pat->pattern)) &&
458
cli_bm_scanbuff(pat->pattern, 32, &vname, NULL, &matcher->sha256_hashes,0,0,-1) == CL_VIRUS) {
360
cli_bm_scanbuff(pat->pattern, 32, &vname, NULL, &matcher->sha256_hashes,0,NULL,NULL) == CL_VIRUS) {
459
361
if (*vname == 'W') {
460
362
/* hash is whitelisted in local.gdb */
461
363
cli_dbgmsg("Skipping hash %s\n", pattern);