2
* C implementation of the Aho-Corasick pattern matching algorithm. It's based
3
* on the ScannerDaemon's version (coded in Java) by Kurt Huwig and
4
* http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html
5
* Thanks to Kurt Huwig for pointing me to this page.
7
* Copyright (C) 2002 - 2005 Tomasz Kojm <tkojm@clamav.net>
9
* This program is free software; you can redistribute it and/or modify
10
* it under the terms of the GNU General Public License as published by
11
* the Free Software Foundation; either version 2 of the License, or
12
* (at your option) any later version.
14
* This program is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
* GNU General Public License for more details.
19
* You should have received a copy of the GNU General Public License
20
* along with this program; if not, write to the Free Software
21
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25
#include "clamav-config.h"
36
#include "matcher-ac.h"
38
#include "filetypes.h"
40
#define AC_MIN_LENGTH 2
43
struct cli_ac_node *node;
44
struct nodelist *next;
47
int cli_ac_addpatt(struct cl_node *root, struct cli_ac_patt *pattern)
49
struct cli_ac_node *pos, *next;
52
if(pattern->length < AC_MIN_LENGTH)
57
for(i = 0; i < AC_MIN_LENGTH; i++) {
58
next = pos->trans[((unsigned char) pattern->pattern[i]) & 0xff];
61
next = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
63
cli_dbgmsg("Unable to allocate pattern node (%d)\n", sizeof(struct cl_node));
68
root->ac_nodetable = (struct cli_ac_node **) cli_realloc(root->ac_nodetable, (root->ac_nodes) * sizeof(struct cli_ac_node *));
69
if(root->ac_nodetable == NULL) {
70
cli_dbgmsg("Unable to realloc nodetable (%d)\n", (root->ac_nodes) * sizeof(struct cl_node *));
73
root->ac_nodetable[root->ac_nodes - 1] = next;
75
pos->trans[((unsigned char) pattern->pattern[i]) & 0xff] = next;
83
pattern->next = pos->list;
89
static int cli_enqueue(struct nodelist **bfs, struct cli_ac_node *n)
93
new = (struct nodelist *) cli_calloc(1, sizeof(struct nodelist));
95
cli_dbgmsg("Unable to allocate node list (%d)\n", sizeof(struct nodelist));
105
static struct cli_ac_node *cli_dequeue(struct nodelist **bfs)
107
struct nodelist *handler, *prev = NULL;
108
struct cli_ac_node *pt;
112
while(handler && handler->next) {
114
handler = handler->next;
131
static int cli_maketrans(struct cl_node *root)
133
struct nodelist *bfs = NULL;
134
struct cli_ac_node *ac_root = root->ac_root, *child, *node;
138
ac_root->fail = NULL;
139
if((ret = cli_enqueue(&bfs, ac_root)) != 0) {
143
while((node = cli_dequeue(&bfs))) {
147
for(i = 0; i < 256; i++) {
148
child = node->trans[i];
151
node->trans[i] = (node->fail)->trans[i];
153
node->trans[i] = ac_root;
156
child->fail = (node->fail)->trans[i];
158
child->fail = ac_root;
160
if((ret = cli_enqueue(&bfs, child)) != 0) {
169
int cli_ac_buildtrie(struct cl_node *root)
177
cli_dbgmsg("Pattern matcher not initialised\n");
181
if((ret = cli_addtypesigs(root)))
184
return cli_maketrans(root);
187
static void cli_freepatt(struct cli_ac_patt *list)
189
struct cli_ac_patt *handler, *prev;
196
free(handler->pattern);
197
free(handler->virname);
198
if(handler->offset && (!handler->sigid || handler->partno == 1))
199
free(handler->offset);
202
for(i = 0; i < handler->alt; i++)
203
free(handler->altc[i]);
207
handler = handler->next;
212
void cli_ac_free(struct cl_node *root)
217
for(i = 0; i < root->ac_nodes; i++) {
218
cli_freepatt(root->ac_nodetable[i]->list);
219
free(root->ac_nodetable[i]);
222
if(root->ac_nodetable)
223
free(root->ac_nodetable);
229
inline static int cli_findpos(const char *buffer, int offset, int length, const struct cli_ac_patt *pattern)
231
int bufferpos = offset + AC_MIN_LENGTH;
232
int postfixend = offset + length;
233
unsigned int i, j, alt = 0, found = 0;
236
if(bufferpos >= length)
239
for(i = AC_MIN_LENGTH; i < pattern->length; i++) {
241
if(bufferpos == postfixend)
244
if(pattern->pattern[i] == CLI_ALT) {
245
for(j = 0; j < pattern->altn[alt]; j++) {
246
if(pattern->altc[alt][j] == buffer[bufferpos])
254
} else if(pattern->pattern[i] != CLI_IGN && (char) pattern->pattern[i] != buffer[bufferpos])
259
if(bufferpos == length)
266
int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, short otfrec, unsigned long int offset, unsigned long int *partoff, unsigned short ftype, int fd)
268
struct cli_ac_node *current;
269
struct cli_ac_patt *pt;
270
int position, type = CL_CLEAN, dist, t;
277
if(!partcnt || !partoff) {
278
cli_dbgmsg("cli_ac_scanbuff(): partcnt == NULL || partoff == NULL\n");
282
current = root->ac_root;
284
for(i = 0; i < length; i++) {
285
current = current->trans[(unsigned char) buffer[i] & 0xff];
287
if(current->islast) {
288
position = i - AC_MIN_LENGTH + 1;
292
if(cli_findpos(buffer, position, length, pt)) {
293
if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
294
if(ftype == CL_TYPE_UNKNOWN_TEXT)
299
if((fd == -1 && !t) || !cli_validatesig(pt->target, t, pt->offset, offset + position, fd, pt->virname)) {
305
if(pt->sigid) { /* it's a partial signature */
306
if(partcnt[pt->sigid] + 1 == pt->partno) {
309
if(offset + i - partoff[pt->sigid] > pt->maxdist)
312
if(dist && pt->mindist)
313
if(offset + i - partoff[pt->sigid] < pt->mindist)
317
partoff[pt->sigid] = offset + i + pt->length;
319
if(++partcnt[pt->sigid] == pt->parts) { /* the last one */
322
if(pt->type > type) {
323
cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
329
*virname = pt->virname;
337
} else { /* old type signature */
340
if(pt->type > type) {
341
cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
348
*virname = pt->virname;
358
current = current->fail;
362
return otfrec ? type : CL_CLEAN;