~ubuntu-branches/ubuntu/raring/clamav/raring

« back to all changes in this revision

Viewing changes to libclamav/regex_suffix.c

  • Committer: Bazaar Package Importer
  • Author(s): Stephen Gran
  • Date: 2008-09-05 17:25:34 UTC
  • mfrom: (0.35.1 lenny)
  • Revision ID: james.westby@ubuntu.com-20080905172534-yi3f8fkye1o7u1r3
* New upstream version (closes: #497662, #497773)
  - lots of new options for clamd.conf
  - fixes CVEs CVE-2008-3912, CVE-2008-3913, CVE-2008-3914, and
    CVE-2008-1389
* No longer supports --unzip option, so typo is gone (closes: #496276)
* Translations:
  - sv (thanks Martin Bagge <brother@bsnet.se>) (closes: #491760)

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 *  Parse a regular expression, and extract a static suffix.
 
3
 *
 
4
 *  Copyright (C) 2007-2008 Sourcefire, Inc.
 
5
 *
 
6
 *  Authors: Török Edvin
 
7
 *
 
8
 *  This program is free software; you can redistribute it and/or modify
 
9
 *  it under the terms of the GNU General Public License version 2 as
 
10
 *  published by the Free Software Foundation.
 
11
 *
 
12
 *  This program is distributed in the hope that it will be useful,
 
13
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
14
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
15
 *  GNU General Public License for more details.
 
16
 *
 
17
 *  You should have received a copy of the GNU General Public License
 
18
 *  along with this program; if not, write to the Free Software
 
19
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 
20
 *  MA 02110-1301, USA.
 
21
 */
 
22
#if HAVE_CONFIG_H
 
23
#include "clamav-config.h"
 
24
#endif
 
25
 
 
26
#ifndef CL_DEBUG
 
27
#define NDEBUG
 
28
#endif
 
29
 
 
30
#include <stdio.h>
 
31
#include <stdlib.h>
 
32
#include <string.h>
 
33
#include <assert.h>
 
34
 
 
35
#include "others.h"
 
36
#include "jsparse/textbuf.h"
 
37
#include "regex_suffix.h"
 
38
#define MODULE "regex_suffix: "
 
39
 
 
40
 
 
41
enum node_type {
 
42
        root=0,
 
43
        concat,
 
44
        alternate, /* | */
 
45
        optional,/* ?, * */
 
46
        leaf, /* a character */
 
47
        leaf_class /* character class */
 
48
        /* (x)+ is transformed into (x)*(x) */
 
49
};
 
50
 
 
51
struct node {
 
52
        enum node_type type;
 
53
        struct node *parent;
 
54
        union {
 
55
                struct {
 
56
                        struct node* left;
 
57
                        struct node* right;
 
58
                } children;
 
59
                uint8_t*    leaf_class_bitmap;
 
60
                uint8_t     leaf_char;
 
61
        } u;
 
62
};
 
63
 
 
64
/* --- Prototypes --*/
 
65
static int build_suffixtree_descend(struct node *n, struct text_buffer *buf, suffix_callback cb, void *cbdata, struct regex_list *regex);
 
66
/* -----------------*/
 
67
 
 
68
static uint8_t dot_bitmap[32] = {
 
69
        0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
 
70
        0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
 
71
        0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
 
72
        0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
 
73
};
 
74
 
 
75
static struct node* make_node(enum node_type type, struct node *left, struct node *right)
 
76
{
 
77
        struct node *n;
 
78
        if(type == concat) {
 
79
                if(left == NULL)
 
80
                        return right;
 
81
                if(right == NULL)
 
82
                        return left;
 
83
        }
 
84
        n = cli_malloc(sizeof(*n));
 
85
        if(!n)
 
86
                return NULL;
 
87
        n->type = type;
 
88
        n->parent = NULL;
 
89
        n->u.children.left = left;
 
90
        n->u.children.right = right;
 
91
        if(left)
 
92
                left->parent = n;
 
93
        if(right)
 
94
                right->parent = n;
 
95
        return n;
 
96
}
 
97
 
 
98
static struct node *dup_node(struct node *p)
 
99
{
 
100
        struct node *node_left, *node_right;
 
101
        struct node *d;
 
102
 
 
103
        if(!p)
 
104
                return NULL;
 
105
        d = cli_malloc(sizeof(*d));
 
106
        if(!d)
 
107
                return NULL;
 
108
        d->type = p->type;
 
109
        d->parent = NULL;
 
110
        switch(p->type) {
 
111
                case leaf:
 
112
                        d->u.leaf_char = p->u.leaf_char;
 
113
                        break;
 
114
                case leaf_class:
 
115
                        d->u.leaf_class_bitmap = cli_malloc(32);
 
116
                        if(!d->u.leaf_class_bitmap)
 
117
                                return NULL;
 
118
                        memcpy(d->u.leaf_class_bitmap, p->u.leaf_class_bitmap, 32);
 
119
                        break;
 
120
                default:
 
121
                        node_left = dup_node(p->u.children.left);
 
122
                        node_right = dup_node(p->u.children.right);
 
123
                        d->u.children.left = node_left;
 
124
                        d->u.children.right = node_right;
 
125
                        if(node_left)
 
126
                                node_left->parent = d;
 
127
                        if(node_right)
 
128
                                node_right->parent = d;
 
129
                        break;
 
130
        }
 
131
        return d;
 
132
}
 
133
 
 
134
static struct node *make_charclass(uint8_t *bitmap)
 
135
{
 
136
        struct node *v = cli_malloc(sizeof(*v));
 
137
        if(!v)
 
138
                return NULL;
 
139
        v->type = leaf_class;
 
140
        v->parent = NULL;
 
141
        v->u.leaf_class_bitmap = bitmap;
 
142
        return v;
 
143
}
 
144
 
 
145
static struct node *make_leaf(char c)
 
146
{
 
147
        struct node *v = cli_malloc(sizeof(*v));
 
148
        if(!v)
 
149
                return NULL;
 
150
        v->type = leaf;
 
151
        v->parent = NULL;
 
152
        v->u.leaf_char = c;
 
153
        return v;
 
154
}
 
155
 
 
156
static void destroy_tree(struct node *n)
 
157
{
 
158
        if(!n)
 
159
                return;
 
160
        switch(n->type) {
 
161
                case concat:
 
162
                case alternate:
 
163
                case optional:
 
164
                        destroy_tree(n->u.children.left);
 
165
                        destroy_tree(n->u.children.right);
 
166
                        break;
 
167
                case leaf_class:
 
168
                        if(n->u.leaf_class_bitmap != dot_bitmap)
 
169
                          free(n->u.leaf_class_bitmap);
 
170
                        break;
 
171
                case root:
 
172
                case leaf:
 
173
                        break;
 
174
        }
 
175
        free(n);
 
176
}
 
177
 
 
178
static uint8_t* parse_char_class(const char *pat, size_t *pos)
 
179
{
 
180
        unsigned char range_start=0;
 
181
        int hasprev = 0;
 
182
        uint8_t* bitmap = cli_malloc(32);
 
183
        if(!bitmap)
 
184
                return NULL;
 
185
        if (pat[*pos]=='^') {
 
186
                memset(bitmap,0xFF,32);/*match chars not in brackets*/
 
187
                ++*pos;
 
188
        }
 
189
        else
 
190
                memset(bitmap,0x00,32);
 
191
        do {
 
192
                /* literal ] can be first character, so test for it at the end of the loop, for example: []] */
 
193
                if (pat[*pos]=='-' && hasprev) {
 
194
                        /* it is a range*/
 
195
                        unsigned char range_end;
 
196
                        unsigned int c;
 
197
                        assert(range_start);
 
198
                        ++*pos;
 
199
                        if (pat[*pos]=='[')
 
200
                                if (pat[*pos+1]=='.') {
 
201
                                        /* collating sequence not handled */
 
202
                                        free(bitmap);
 
203
                                        /* we are parsing the regex for a
 
204
                                         * filter, be conservative and
 
205
                                         * tell the filter that anything could
 
206
                                         * match here */
 
207
                                        while(pat[*pos] != ']') ++*pos;
 
208
                                        ++*pos;
 
209
                                        while(pat[*pos] != ']') ++*pos;
 
210
                                        return dot_bitmap;
 
211
                                }
 
212
                                else
 
213
                                        range_end = pat[*pos];
 
214
                        else
 
215
                                range_end = pat[*pos];
 
216
                        for(c=range_start+1;c<=range_end;c++)
 
217
                                bitmap[c>>3] ^= 1<<(c&0x7);
 
218
                        hasprev = 0;
 
219
                }
 
220
                else if (pat[*pos]=='[' && pat[*pos]==':') {
 
221
                        /* char class */
 
222
                        free(bitmap);
 
223
                        while(pat[*pos] != ']') ++*pos;
 
224
                        ++*pos;
 
225
                        while(pat[*pos] != ']') ++*pos;
 
226
                        return dot_bitmap;
 
227
                } else {
 
228
                        bitmap[pat[*pos]>>3] ^= 1<<(pat[*pos]&0x7);
 
229
                        range_start = pat[*pos];
 
230
                        ++*pos;
 
231
                        hasprev = 1;
 
232
                }
 
233
        } while(pat[*pos]!=']');
 
234
        return bitmap;
 
235
}
 
236
 
 
237
static struct node* parse_regex(const char *p, size_t *last)
 
238
{
 
239
        struct node *v = NULL;
 
240
        struct node *right;
 
241
        struct node *tmp;
 
242
 
 
243
        while(p[*last] != '$' && p[*last] != '\0') {
 
244
                switch(p[*last]) {
 
245
                        case '|':
 
246
                                ++*last;
 
247
                                right = parse_regex(p, last);
 
248
                                v = make_node(alternate, v, right);
 
249
                                if(!v)
 
250
                                        return NULL;
 
251
                                break;
 
252
                        case '*':
 
253
                        case '?':
 
254
                                v = make_node(optional, v, NULL);
 
255
                                if(!v)
 
256
                                        return NULL;
 
257
                                ++*last;
 
258
                                break;
 
259
                        case '+':
 
260
                                /* (x)* */
 
261
                                tmp = make_node(optional, v, NULL);
 
262
                                if(!tmp)
 
263
                                        return NULL;
 
264
                                /* (x) */
 
265
                                right = dup_node(v);
 
266
                                if(!right)
 
267
                                        return NULL;
 
268
                                /* (x)*(x) => (x)+ */
 
269
                                v = make_node(concat, tmp, right);
 
270
                                if(!v)
 
271
                                        return NULL;
 
272
                                ++*last;
 
273
                                break;
 
274
                        case '(':
 
275
                                ++*last;
 
276
                                right = parse_regex(p, last);
 
277
                                if(!right)
 
278
                                        return NULL;
 
279
                                ++*last;
 
280
                                v = make_node(concat, v, right);
 
281
                                break;
 
282
                        case ')':
 
283
                                return v;
 
284
                        case '.':
 
285
                                right = make_charclass(dot_bitmap);
 
286
                                if(!right)
 
287
                                        return NULL;
 
288
                                v = make_node(concat, v, right);
 
289
                                if(!v)
 
290
                                        return NULL;
 
291
                                ++*last;
 
292
                                break;
 
293
                        case '[':
 
294
                                ++*last;
 
295
                                right = make_charclass( parse_char_class(p, last) );
 
296
                                if(!right)
 
297
                                        return NULL;
 
298
                                v = make_node(concat, v, right);
 
299
                                if(!v)
 
300
                                        return NULL;
 
301
                                ++*last;
 
302
                                break;
 
303
                        case '\\':
 
304
                                /* next char is escaped, advance pointer
 
305
                                 * and let fall-through handle it */
 
306
                                ++*last;
 
307
                        default:
 
308
                                right = make_leaf(p[*last]);
 
309
                                v = make_node(concat, v, right);
 
310
                                if(!v)
 
311
                                        return NULL;
 
312
                                ++*last;
 
313
                                break;
 
314
                }
 
315
        }
 
316
        return v;
 
317
}
 
318
 
 
319
#define BITMAP_HASSET(b, i) (b[i>>3] & (1<<(i&7)))
 
320
 
 
321
static int build_suffixtree_ascend(struct node *n, struct text_buffer *buf, struct node *prev, suffix_callback cb, void *cbdata, struct regex_list *regex)
 
322
{
 
323
        size_t i, cnt;
 
324
        while(n) {
 
325
                struct node *q = n;
 
326
                switch(n->type) {
 
327
                        case root:
 
328
                                textbuffer_putc(buf, '\0');
 
329
                                if(cb(cbdata, buf->data, buf->pos-1, regex) < 0)
 
330
                                        return CL_EMEM;
 
331
                                return 0;
 
332
                        case leaf:
 
333
                                textbuffer_putc(buf, n->u.leaf_char);
 
334
                                n = n->parent;
 
335
                                break;
 
336
                        case leaf_class:
 
337
                                cnt = 0;
 
338
                                for(i=0;i<255;i++)
 
339
                                        if (BITMAP_HASSET(n->u.leaf_class_bitmap, i))
 
340
                                                cnt++;
 
341
                                if (cnt > 16) {
 
342
                                        textbuffer_putc(buf, '\0');
 
343
                                        if(cb(cbdata, buf->data, buf->pos-1, regex) < 0)
 
344
                                                return CL_EMEM;
 
345
                                        return 0;
 
346
                                }
 
347
                                /* handle small classes by expanding */
 
348
                                for(i=0;i<255;i++) {
 
349
                                        if(BITMAP_HASSET(n->u.leaf_class_bitmap, i)) {
 
350
                                                size_t pos;
 
351
                                                pos = buf->pos;
 
352
                                                textbuffer_putc(buf, i);
 
353
                                                if(build_suffixtree_ascend(n->parent, buf, n, cb, cbdata, regex) < 0)
 
354
                                                        return CL_EMEM;
 
355
                                                buf->pos = pos;
 
356
                                        }
 
357
                                }
 
358
                                return 0;
 
359
                        case concat:
 
360
                                if(prev != n->u.children.left) {
 
361
                                        if(build_suffixtree_descend(n->u.children.left, buf, cb, cbdata, regex) < 0)
 
362
                                                return CL_EMEM;
 
363
                                        /* we're done here, descend will call
 
364
                                         * ascend if needed */
 
365
                                        return 0;
 
366
                                } else {
 
367
                                        n = n->parent;
 
368
                                }
 
369
                                break;
 
370
                        case alternate:
 
371
                                n = n->parent;
 
372
                                break;
 
373
                        case optional:
 
374
                                textbuffer_putc(buf, '\0');
 
375
                                if(cb(cbdata, buf->data, buf->pos-1, regex) < 0)
 
376
                                        return CL_EMEM;
 
377
                                return 0;
 
378
                }
 
379
                prev = q;
 
380
        }
 
381
        return 0;
 
382
}
 
383
 
 
384
static int build_suffixtree_descend(struct node *n, struct text_buffer *buf, suffix_callback cb, void *cbdata, struct regex_list *regex)
 
385
{
 
386
        size_t pos;
 
387
        while(n && n->type == concat) {
 
388
                n = n->u.children.right;
 
389
        }
 
390
        if(!n)
 
391
                return 0;
 
392
        /* find out end of the regular expression,
 
393
         * if it ends with a static pattern */
 
394
        switch(n->type) {
 
395
                case alternate:
 
396
                        /* save pos as restart point */
 
397
                        pos = buf->pos;
 
398
                        if(build_suffixtree_descend(n->u.children.left, buf, cb, cbdata, regex) < 0)
 
399
                                return CL_EMEM;
 
400
                        buf->pos = pos;
 
401
                        if(build_suffixtree_descend(n->u.children.right, buf, cb, cbdata, regex) < 0)
 
402
                                return CL_EMEM;
 
403
                        buf->pos = pos;
 
404
                        break;
 
405
                case optional:
 
406
                        textbuffer_putc(buf, '\0');
 
407
                        if(cb(cbdata, buf->data, buf->pos-1, regex) < 0)
 
408
                                return CL_EMEM;
 
409
                        return 0;
 
410
                case leaf:
 
411
                case leaf_class:
 
412
                        if(build_suffixtree_ascend(n, buf, NULL, cb, cbdata, regex) < 0)
 
413
                                return CL_EMEM;
 
414
                        return 0;
 
415
                default:
 
416
                        break;
 
417
        }
 
418
        return 0;
 
419
}
 
420
 
 
421
int cli_regex2suffix(const char *pattern, regex_t *preg, suffix_callback cb, void *cbdata)
 
422
{
 
423
        struct regex_list regex;
 
424
        struct text_buffer buf;
 
425
        struct node root_node;
 
426
        struct node *n;
 
427
        size_t last = 0;
 
428
        int rc;
 
429
 
 
430
        assert(pattern);
 
431
 
 
432
        regex.preg = preg;
 
433
        rc = cli_regcomp(regex.preg, pattern, REG_EXTENDED);
 
434
        if(rc) {
 
435
                size_t buflen = cli_regerror(rc, regex.preg, NULL, 0);
 
436
                char *errbuf = cli_malloc(buflen);
 
437
                if(errbuf) {
 
438
                        cli_regerror(rc, regex.preg, errbuf, buflen);
 
439
                        cli_errmsg(MODULE "Error compiling regular expression %s: %s\n", pattern, errbuf);
 
440
                        free(errbuf);
 
441
                } else {
 
442
                        cli_errmsg(MODULE "Error compiling regular expression: %s\n", pattern);
 
443
                }
 
444
                return rc;
 
445
        }
 
446
        regex.nxt = NULL;
 
447
        regex.pattern = cli_strdup(pattern);
 
448
 
 
449
        n = parse_regex(pattern, &last);
 
450
        if(!n)
 
451
                return REG_ESPACE;
 
452
        memset(&buf, 0, sizeof(buf));
 
453
        memset(&root_node, 0, sizeof(buf));
 
454
        n->parent = &root_node;
 
455
 
 
456
        rc = build_suffixtree_descend(n, &buf, cb, cbdata, &regex);
 
457
        free(regex.pattern);
 
458
        free(buf.data);
 
459
        destroy_tree(n);
 
460
        return rc;
 
461
}
 
462
 
 
463