~ubuntu-branches/ubuntu/feisty/clamav/feisty

« back to all changes in this revision

Viewing changes to libclamav/matcher-bm.c

  • Committer: Bazaar Package Importer
  • Author(s): Kees Cook
  • Date: 2007-02-20 10:33:44 UTC
  • mto: This revision was merged to the branch mainline in revision 16.
  • Revision ID: james.westby@ubuntu.com-20070220103344-zgcu2psnx9d98fpa
Tags: upstream-0.90
ImportĀ upstreamĀ versionĀ 0.90

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
 
 *  Copyright (C) 2007-2009 Sourcefire, Inc.
3
 
 *
4
 
 *  Authors: Tomasz Kojm
 
2
 *  Copyright (C) 2004 - 2005 Tomasz Kojm <tkojm@clamav.net>
5
3
 *
6
4
 *  This program is free software; you can redistribute it and/or modify
7
 
 *  it under the terms of the GNU General Public License version 2 as
8
 
 *  published by the Free Software Foundation.
 
5
 *  it under the terms of the GNU General Public License as published by
 
6
 *  the Free Software Foundation; either version 2 of the License, or
 
7
 *  (at your option) any later version.
9
8
 *
10
9
 *  This program is distributed in the hope that it will be useful,
11
10
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18
17
 *  MA 02110-1301, USA.
19
18
 */
20
19
 
21
 
#if HAVE_CONFIG_H
22
 
#include "clamav-config.h"
23
 
#endif
24
 
 
25
 
#include <stdio.h>
26
 
#include <assert.h>
27
20
#include "clamav.h"
28
21
#include "memory.h"
29
22
#include "others.h"
31
24
#include "matcher.h"
32
25
#include "matcher-bm.h"
33
26
#include "filetypes.h"
34
 
#include "filtering.h"
35
27
 
36
 
#include "mpool.h"
 
28
/* TODO: Check prefix regularity and automatically transfer some signatures
 
29
 *       to AC
 
30
 */
37
31
 
38
32
#define BM_MIN_LENGTH   3
 
33
/* #define BM_TEST_OFFSET       5 */
39
34
#define BM_BLOCK_SIZE   3
 
35
 
40
36
#define HASH(a,b,c) (211 * a + 37 * b + c)
41
37
 
42
 
int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern, const char *offset)
 
38
 
 
39
int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern)
43
40
{
44
 
        uint16_t idx, i;
 
41
        int i;
 
42
        uint16_t idx;
45
43
        const unsigned char *pt = pattern->pattern;
46
44
        struct cli_bm_patt *prev, *next = NULL;
47
 
        int ret;
48
45
 
49
46
 
50
47
    if(pattern->length < BM_MIN_LENGTH) {
51
 
        cli_errmsg("cli_bm_addpatt: Signature for %s is too short\n", pattern->virname);
52
 
        return CL_EMALFDB;
53
 
    }
54
 
 
55
 
    if((ret = cli_caloff(offset, NULL, root->type, pattern->offdata, &pattern->offset_min, &pattern->offset_max))) {
56
 
        cli_errmsg("cli_bm_addpatt: Can't calculate offset for signature %s\n", pattern->virname);
57
 
        return ret;
58
 
    }
59
 
    if(pattern->offdata[0] != CLI_OFF_ANY) {
60
 
        if(pattern->offdata[0] == CLI_OFF_ABSOLUTE)
61
 
            root->bm_absoff_num++;
62
 
        else
63
 
            root->bm_reloff_num++;
64
 
    }
65
 
 
66
 
    /* bm_offmode doesn't use the prefilter for BM signatures anyway, so
67
 
     * don't add these to the filter. */
68
 
    if(root->filter && !root->bm_offmode) {
69
 
        /* the bm_suffix load balancing below can shorten the sig,
70
 
         * we want to see the entire signature! */
71
 
        if (filter_add_static(root->filter, pattern->pattern, pattern->length, pattern->virname) == -1) {
72
 
            cli_warnmsg("cli_bm_addpatt: cannot use filter for trie\n");
73
 
            mpool_free(root->mempool, root->filter);
74
 
            root->filter = NULL;
75
 
        }
76
 
        /* TODO: should this affect maxpatlen? */
77
 
    }
78
 
 
79
 
#if BM_MIN_LENGTH == BM_BLOCK_SIZE
80
 
    /* try to load balance bm_suffix (at the cost of bm_shift) */
81
 
    for(i = 0; i < pattern->length - BM_BLOCK_SIZE + 1; i++) {
82
 
        idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
83
 
        if(!root->bm_suffix[idx]) {
84
 
            if(i) {
85
 
                pattern->prefix = pattern->pattern;
86
 
                pattern->prefix_length = i;
87
 
                pattern->pattern = &pattern->pattern[i];
88
 
                pattern->length -= i;
89
 
                pt = pattern->pattern;
90
 
            }
91
 
            break;
92
 
        }
93
 
    }
94
 
#endif
95
 
 
96
 
    for(i = 0; i <= BM_MIN_LENGTH - BM_BLOCK_SIZE; i++) {
 
48
        cli_errmsg("Signature for %s is too short\n", pattern->virname);
 
49
        return CL_EPATSHORT;
 
50
    }
 
51
 
 
52
    for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i >= 0; i--) {
97
53
        idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
98
54
        root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i);
99
55
    }
100
56
 
 
57
    i = BM_MIN_LENGTH - BM_BLOCK_SIZE;
 
58
    idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
 
59
 
101
60
    prev = next = root->bm_suffix[idx];
 
61
 
102
62
    while(next) {
103
 
        if(pt[0] >= next->pattern0)
 
63
        if(pt[0] >= next->pattern[0])
104
64
            break;
105
65
        prev = next;
106
66
        next = next->next;
108
68
 
109
69
    if(next == root->bm_suffix[idx]) {
110
70
        pattern->next = root->bm_suffix[idx];
111
 
        if(root->bm_suffix[idx])
112
 
            pattern->cnt = root->bm_suffix[idx]->cnt;
113
71
        root->bm_suffix[idx] = pattern;
114
72
    } else {
115
73
        pattern->next = prev->next;
116
74
        prev->next = pattern;
117
75
    }
118
 
    pattern->pattern0 = pattern->pattern[0];
119
 
    root->bm_suffix[idx]->cnt++;
120
 
 
121
 
    if(root->bm_offmode) {
122
 
        root->bm_pattab = (struct cli_bm_patt **) mpool_realloc2(root->mempool, root->bm_pattab, (root->bm_patterns + 1) * sizeof(struct cli_bm_patt *));
123
 
        if(!root->bm_pattab) {
124
 
            cli_errmsg("cli_bm_addpatt: Can't allocate memory for root->bm_pattab\n");
125
 
            return CL_EMEM;
126
 
        }
127
 
        root->bm_pattab[root->bm_patterns] = pattern;
128
 
        if(pattern->offdata[0] != CLI_OFF_ABSOLUTE)
129
 
            pattern->offset_min = root->bm_patterns;
130
 
    }
131
 
 
132
 
    root->bm_patterns++;
133
 
    return CL_SUCCESS;
 
76
 
 
77
    return 0;
134
78
}
135
79
 
136
80
int cli_bm_init(struct cli_matcher *root)
137
81
{
138
 
        uint16_t i, size = HASH(255, 255, 255) + 1;
139
 
#ifdef USE_MPOOL
140
 
    assert (root->mempool && "mempool must be initialized");
141
 
#endif
142
 
 
143
 
    if(!(root->bm_shift = (uint8_t *) mpool_calloc(root->mempool, size, sizeof(uint8_t))))
 
82
        unsigned int i;
 
83
        unsigned int size = HASH(256, 256, 256);
 
84
 
 
85
 
 
86
    cli_dbgmsg("in cli_bm_init()\n");
 
87
    cli_dbgmsg("BM: Number of indexes = %d\n", size);
 
88
 
 
89
    if(!(root->bm_shift = (int *) cli_malloc(size * sizeof(int))))
144
90
        return CL_EMEM;
145
91
 
146
 
    if(!(root->bm_suffix = (struct cli_bm_patt **) mpool_calloc(root->mempool, size, sizeof(struct cli_bm_patt *)))) {
147
 
        mpool_free(root->mempool, root->bm_shift);
 
92
    if(!(root->bm_suffix = (struct cli_bm_patt **) cli_calloc(size, sizeof(struct cli_bm_patt *)))) {
 
93
        free(root->bm_shift);
148
94
        return CL_EMEM;
149
95
    }
150
96
 
151
97
    for(i = 0; i < size; i++)
152
98
        root->bm_shift[i] = BM_MIN_LENGTH - BM_BLOCK_SIZE + 1;
153
99
 
154
 
    return CL_SUCCESS;
155
 
}
156
 
 
157
 
int cli_bm_initoff(const struct cli_matcher *root, struct cli_bm_off *data, const struct cli_target_info *info)
158
 
{
159
 
        int ret;
160
 
        unsigned int i;
161
 
        struct cli_bm_patt *patt;
162
 
 
163
 
 
164
 
    if(!root->bm_patterns) {
165
 
        data->offtab = data->offset = NULL;
166
 
        data->cnt = data->pos = 0;
167
 
        return CL_SUCCESS;
168
 
    }
169
 
 
170
 
    data->cnt = data->pos = 0;
171
 
    data->offtab = (uint32_t *) cli_malloc(root->bm_patterns * sizeof(uint32_t));
172
 
    if(!data->offtab) {
173
 
        cli_errmsg("cli_bm_initoff: Can't allocate memory for data->offtab\n");
174
 
        return CL_EMEM;
175
 
    }
176
 
    data->offset = (uint32_t *) cli_malloc(root->bm_patterns * sizeof(uint32_t));
177
 
    if(!data->offset) {
178
 
        cli_errmsg("cli_bm_initoff: Can't allocate memory for data->offset\n");
179
 
        free(data->offtab);
180
 
        return CL_EMEM;
181
 
    }
182
 
    for(i = 0; i < root->bm_patterns; i++) {
183
 
        patt = root->bm_pattab[i];
184
 
        if(patt->offdata[0] == CLI_OFF_ABSOLUTE) {
185
 
            data->offtab[data->cnt] = patt->offset_min + patt->prefix_length;
186
 
            if(data->offtab[data->cnt] >= info->fsize)
187
 
                continue;
188
 
            data->cnt++;
189
 
        } else if((ret = cli_caloff(NULL, info, root->type, patt->offdata, &data->offset[patt->offset_min], NULL))) {
190
 
            cli_errmsg("cli_bm_initoff: Can't calculate relative offset in signature for %s\n", patt->virname);
191
 
            free(data->offtab);
192
 
            free(data->offset);
193
 
            return ret;
194
 
        } else if((data->offset[patt->offset_min] != CLI_OFF_NONE) && (data->offset[patt->offset_min] + patt->length <= info->fsize)) {
195
 
            if(!data->cnt || (data->offset[patt->offset_min] + patt->prefix_length != data->offtab[data->cnt - 1])) {
196
 
                data->offtab[data->cnt] = data->offset[patt->offset_min] + patt->prefix_length;
197
 
                if(data->offtab[data->cnt] >= info->fsize)
198
 
                    continue;
199
 
                data->cnt++;
200
 
            }
201
 
        }
202
 
    }
203
 
 
204
 
    cli_qsort(data->offtab, data->cnt, sizeof(uint32_t), NULL);
205
 
    return CL_SUCCESS;
206
 
}
207
 
 
208
 
void cli_bm_freeoff(struct cli_bm_off *data)
209
 
{
210
 
    free(data->offset);
211
 
    data->offset = NULL;
212
 
    free(data->offtab);
213
 
    data->offtab = NULL;
 
100
    return 0;
214
101
}
215
102
 
216
103
void cli_bm_free(struct cli_matcher *root)
217
104
{
218
 
        struct cli_bm_patt *patt, *prev;
219
 
        uint16_t i, size = HASH(255, 255, 255) + 1;
 
105
        struct cli_bm_patt *b1, *b2;
 
106
        unsigned int i;
 
107
        unsigned int size = HASH(256, 256, 256);
220
108
 
221
109
 
222
110
    if(root->bm_shift)
223
 
        mpool_free(root->mempool, root->bm_shift);
224
 
 
225
 
    if(root->bm_pattab)
226
 
        mpool_free(root->mempool, root->bm_pattab);
 
111
        free(root->bm_shift);
227
112
 
228
113
    if(root->bm_suffix) {
229
114
        for(i = 0; i < size; i++) {
230
 
            patt = root->bm_suffix[i];
231
 
            while(patt) {
232
 
                prev = patt;
233
 
                patt = patt->next;
234
 
                if(prev->prefix)
235
 
                    mpool_free(root->mempool, prev->prefix);
236
 
                else
237
 
                    mpool_free(root->mempool, prev->pattern);
238
 
                if(prev->virname)
239
 
                    mpool_free(root->mempool, prev->virname);
240
 
                mpool_free(root->mempool, prev);
 
115
            b1 = root->bm_suffix[i];
 
116
            while(b1) {
 
117
                b2 = b1;
 
118
                b1 = b1->next;
 
119
                if(b2->virname)
 
120
                    free(b2->virname);
 
121
                if(b2->offset)
 
122
                    free(b2->offset);
 
123
                if(b2->pattern)
 
124
                    free(b2->pattern);
 
125
                free(b2);
241
126
            }
242
127
        }
243
 
        mpool_free(root->mempool, root->bm_suffix);
 
128
        free(root->bm_suffix);
244
129
    }
245
130
}
246
131
 
247
 
int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata)
 
132
int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, cli_file_t ftype, int fd)
248
133
{
249
 
        uint32_t i, j, off, off_min, off_max;
250
 
        uint8_t found, pchain, shift;
251
 
        uint16_t idx, idxchk;
 
134
        unsigned int i, j, shift, off, found = 0;
 
135
        int idxtest;
 
136
        uint16_t idx;
252
137
        struct cli_bm_patt *p;
253
 
        const unsigned char *bp, *pt;
 
138
        const unsigned char *bp;
254
139
        unsigned char prefix;
255
 
        int ret;
256
 
 
257
 
    if(!root || !root->bm_shift)
 
140
        struct cli_target_info info;
 
141
 
 
142
 
 
143
    if(!root->bm_shift)
258
144
        return CL_CLEAN;
259
145
 
260
146
    if(length < BM_MIN_LENGTH)
261
147
        return CL_CLEAN;
262
148
 
263
 
    i = BM_MIN_LENGTH - BM_BLOCK_SIZE;
264
 
    if(offdata) {
265
 
        if(!offdata->cnt)
266
 
            return CL_CLEAN;
267
 
        if(offdata->pos == offdata->cnt)
268
 
            offdata->pos--;
269
 
        for(; offdata->pos && offdata->offtab[offdata->pos] > offset; offdata->pos--);
270
 
        if(offdata->offtab[offdata->pos] < offset)
271
 
            offdata->pos++;
272
 
        if(offdata->pos >= offdata->cnt)
273
 
            return CL_CLEAN;
274
 
        i += offdata->offtab[offdata->pos] - offset;
275
 
    }
276
 
    for(; i < length - BM_BLOCK_SIZE + 1; ) {
 
149
    memset(&info, 0, sizeof(info));
 
150
 
 
151
    for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i < length - BM_BLOCK_SIZE + 1; ) {
277
152
        idx = HASH(buffer[i], buffer[i + 1], buffer[i + 2]);
 
153
 
278
154
        shift = root->bm_shift[idx];
279
155
 
280
156
        if(shift == 0) {
 
157
 
281
158
            prefix = buffer[i - BM_MIN_LENGTH + BM_BLOCK_SIZE];
282
159
            p = root->bm_suffix[idx];
283
 
            if(p && p->cnt == 1 && p->pattern0 != prefix) {
284
 
                if(offdata) {
285
 
                    off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
286
 
                    for(; offdata->pos < offdata->cnt && off >= offdata->offtab[offdata->pos]; offdata->pos++);
287
 
                    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos])
288
 
                        return CL_CLEAN;
289
 
                    i += offdata->offtab[offdata->pos] - off;
290
 
                } else {
291
 
                    i++;
292
 
                }
293
 
                continue;
294
 
            }
295
 
            pchain = 0;
296
 
            while(p) {
297
 
                if(p->pattern0 != prefix) {
298
 
                    if(pchain)
299
 
                        break;
300
 
                    p = p->next;
301
 
                    continue;
302
 
                } else pchain = 1;
303
 
 
 
160
 
 
161
            while(p && p->pattern[0] != prefix)
 
162
                p = p->next;
 
163
 
 
164
            while(p && p->pattern[0] == prefix) {
304
165
                off = i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
305
166
                bp = buffer + off;
306
167
 
307
 
                if((off + p->length > length) || (p->prefix_length > off)) {
 
168
#ifdef BM_TEST_OFFSET
 
169
                if(bp[BM_TEST_OFFSET] != p->pattern[BM_TEST_OFFSET]) {
308
170
                    p = p->next;
309
171
                    continue;
310
172
                }
311
 
 
312
 
                if(offdata) {
313
 
                    if(p->offdata[0] == CLI_OFF_ABSOLUTE) {
314
 
                        if(p->offset_min != offset + off - p->prefix_length) {
315
 
                            p = p->next;
316
 
                            continue;
317
 
                        }
318
 
                    } else if((offdata->offset[p->offset_min] == CLI_OFF_NONE) || (offdata->offset[p->offset_min] != offset + off - p->prefix_length)) {
319
 
                        p = p->next;
320
 
                        continue;
321
 
                    }
322
 
                }
323
 
 
324
 
                idxchk = MIN(p->length, length - off) - 1;
325
 
                if(idxchk) {
326
 
                    if((bp[idxchk] != p->pattern[idxchk]) ||  (bp[idxchk / 2] != p->pattern[idxchk / 2])) {
327
 
                        p = p->next;
328
 
                        continue;
329
 
                    }
330
 
                }
331
 
 
332
 
                if(p->prefix_length) {
333
 
                    off -= p->prefix_length;
334
 
                    bp -= p->prefix_length;
335
 
                    pt = p->prefix;
336
 
                } else {
337
 
                    pt = p->pattern;
 
173
#endif
 
174
 
 
175
                idxtest = MIN (p->length, length - off ) - 1;
 
176
                if(idxtest >= 0) {
 
177
                    if(bp[idxtest] != p->pattern[idxtest]) {
 
178
                        p = p->next;
 
179
                        continue;
 
180
                    }
338
181
                }
339
182
 
340
183
                found = 1;
341
 
                for(j = 0; j < p->length + p->prefix_length && off < length; j++, off++) {
342
 
                    if(bp[j] != pt[j]) {
 
184
                for(j = 0; j < p->length && off < length; j++, off++) {
 
185
                    if(bp[j] != p->pattern[j]) {
343
186
                        found = 0;
344
187
                        break;
345
188
                    }
346
189
                }
347
190
 
348
 
                if(found && (p->boundary & BM_BOUNDARY_EOL)) {
349
 
                    if(off != length) {
350
 
                        p = p->next;
351
 
                        continue;
352
 
                    }
353
 
                }
354
 
 
355
 
                if(found && p->length + p->prefix_length == j) {
356
 
                    if(!offdata && (p->offset_min != CLI_OFF_ANY)) {
357
 
                        if(p->offdata[0] != CLI_OFF_ABSOLUTE) {
358
 
                            if(!info) {
359
 
                                p = p->next;
360
 
                                continue;
361
 
                            }
362
 
                            ret = cli_caloff(NULL, info, root->type, p->offdata, &off_min, &off_max);
363
 
                            if(ret != CL_SUCCESS) {
364
 
                                cli_errmsg("cli_bm_scanbuff: Can't calculate relative offset in signature for %s\n", p->virname);
365
 
                                return ret;
366
 
                            }
367
 
                        } else {
368
 
                            off_min = p->offset_min;
369
 
                            off_max = p->offset_max;
370
 
                        }
371
 
                        off = offset + i - p->prefix_length - BM_MIN_LENGTH + BM_BLOCK_SIZE;
372
 
                        if(off_min == CLI_OFF_NONE || off_max < off || off_min > off) {
 
191
                if(found && p->length == j) {
 
192
 
 
193
                    if(p->target || p->offset) {
 
194
                        off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
 
195
 
 
196
                        if((fd == -1 && !ftype) || !cli_validatesig(ftype, p->offset, off, &info, fd, p->virname)) {
373
197
                            p = p->next;
374
198
                            continue;
375
199
                        }
376
200
                    }
 
201
 
377
202
                    if(virname)
378
203
                        *virname = p->virname;
379
 
                    if(patt)
380
 
                        *patt = p;
 
204
 
 
205
                    if(info.exeinfo.section)
 
206
                        free(info.exeinfo.section);
 
207
 
381
208
                    return CL_VIRUS;
382
209
                }
 
210
 
383
211
                p = p->next;
384
212
            }
 
213
 
385
214
            shift = 1;
386
215
        }
387
216
 
388
 
        if(offdata) {
389
 
            off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
390
 
            for(; offdata->pos < offdata->cnt && off >= offdata->offtab[offdata->pos]; offdata->pos++);
391
 
            if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos])
392
 
                return CL_CLEAN;
393
 
            i += offdata->offtab[offdata->pos] - off;
394
 
        } else {
395
 
            i += shift;
396
 
        }
397
 
 
 
217
        i += shift;
398
218
    }
399
219
 
 
220
    if(info.exeinfo.section)
 
221
        free(info.exeinfo.section);
 
222
 
400
223
    return CL_CLEAN;
401
224
}