2
* Copyright (C) 2010 Sourcefire, Inc.
4
* Authors: aCaB <acab@clamav.net>, Török Edvin <edwin@clamav.net>
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License version 2 as
8
* published by the Free Software Foundation.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22
#include "clamav-config.h"
37
/* The number of root trees and the chooser function
38
Each tree is protected by a mutex against concurrent access */
40
/* static inline unsigned int getkey(uint8_t *hash) { return 0; } */
42
static inline unsigned int getkey(uint8_t *hash) { return *hash; }
43
/* #define TREES 4096 */
44
/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */
45
/* #define TREES 65536 */
46
/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | (((unsigned int)hash[1])<<8) ; } */
48
/* The number of nodes in each tree */
52
/* The replacement policy algorithm to use */
53
/* #define USE_LRUHASHCACHE */
56
/* LRUHASHCACHE --------------------------------------------------------------------- */
57
#ifdef USE_LRUHASHCACHE
60
uint32_t size; /* 0 is used to mark an empty hash slot! */
61
struct cache_key *lru_next, *lru_prev;
65
struct cache_key *data;
66
size_t maxelements; /* considering load factor */
70
struct cache_key *lru_head, *lru_tail;
73
#define CACHE_KEY_DELETED ~0u
74
#define CACHE_KEY_EMPTY 0
76
static void cacheset_lru_remove(struct cache_set *map, size_t howmany)
79
struct cache_key *old;
80
assert(map->lru_head);
81
assert(!old->lru_prev);
82
/* Remove a key from the head of the list */
84
map->lru_head = old->lru_next;
85
old->size = CACHE_KEY_DELETED;
86
/* This slot is now deleted, it is not empty,
87
* because previously we could have inserted a key that has seen this
88
* slot as occupied, to find that key we need to ensure that all keys
89
* that were occupied when the key was inserted, are seen as occupied
91
* Of course when inserting a new value, we treat deleted slots as
93
* We only replace old values with new values, but there is no guarantee
94
* that the newly inserted value would hash to same place as the value
95
* we remove due to LRU! */
96
if (old == map->lru_tail)
103
static inline int cacheset_lookup_internal(struct cache_set *map,
104
const char *md5, size_t size,
105
uint32_t *insert_pos, int deletedok)
107
const struct cache_key*data = map->data;
108
uint32_t capmask = NODES - 1;
109
const struct cache_key *k;
110
uint32_t idx, tries = 0;
111
uint64_t md5_0, md5_1;
114
memcpy(&md5a, md5, 16);
117
idx = md5_1 & capmask;
119
while (k->size != CACHE_KEY_EMPTY && tries <= capmask) {
120
if (k->digest[0] == md5_0 &&
121
k->digest[1] == md5_1 &&
127
if (deletedok && k->size == CACHE_KEY_DELETED) {
128
/* treat deleted slot as empty */
132
idx = (idx + tries++) & capmask;
135
/* found empty pos */
140
static inline void lru_remove(struct cache_set *map, struct cache_key *newkey)
142
if (newkey->lru_next)
143
newkey->lru_next->lru_prev = newkey->lru_prev;
144
if (newkey->lru_prev)
145
newkey->lru_prev->lru_next = newkey->lru_next;
146
if (newkey == map->lru_head)
147
map->lru_head = newkey->lru_next;
150
static inline void lru_addtail(struct cache_set *map, struct cache_key *newkey)
153
map->lru_head = newkey;
155
map->lru_tail->lru_next = newkey;
156
newkey->lru_next = NULL;
157
newkey->lru_prev = map->lru_tail;
158
map->lru_tail = newkey;
161
static pthread_mutex_t pool_mutex = PTHREAD_MUTEX_INITIALIZER;
163
static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool);
164
static int cacheset_init(struct cache_set *map, mpool_t *mempool);
166
static void cacheset_rehash(struct cache_set *map, mpool_t *mempool)
170
struct cache_set tmp_set;
171
struct cache_key *key;
172
pthread_mutex_lock(&pool_mutex);
173
ret = cacheset_init(&tmp_set, mempool);
174
pthread_mutex_unlock(&pool_mutex);
179
for (i=0;key && i < tmp_set.maxelements/2;i++) {
180
cacheset_add(&tmp_set, (unsigned char*)&key->digest, key->size, mempool);
183
pthread_mutex_lock(&pool_mutex);
184
mpool_free(mempool, map->data);
185
pthread_mutex_unlock(&pool_mutex);
186
memcpy(map, &tmp_set, sizeof(tmp_set));
189
static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool)
193
struct cache_key *newkey;
195
if (map->elements >= map->maxelements) {
196
cacheset_lru_remove(map, 1);
197
if (map->deleted >= map->maxdeleted) {
198
cacheset_rehash(map, mempool);
201
assert(map->elements < map->maxelements);
203
ret = cacheset_lookup_internal(map, md5, size, &pos, 1);
204
newkey = &map->data[pos];
205
if (newkey->size == CACHE_KEY_DELETED)
208
/* was already added, remove from LRU list */
209
lru_remove(map, newkey);
211
/* add new key to tail of LRU list */
212
memcpy(&map->data[pos].digest, md5, sizeof(map->data[pos].digest));
213
map->data[pos].size = size;
214
lru_addtail(map, newkey);
218
assert(pos < map->maxelements);
221
static int cacheset_lookup(struct cache_set *map, unsigned char *md5, size_t size)
223
struct cache_key *newkey;
227
ret = cacheset_lookup_internal(map, md5, size, &pos, 0);
230
newkey = &map->data[pos];
231
/* update LRU position: move to tail */
232
lru_remove(map, newkey);
233
lru_addtail(map, newkey);
237
static int cacheset_init(struct cache_set *map, mpool_t *mempool) {
238
map->data = mpool_calloc(mempool, NODES, sizeof(*map->data));
241
map->maxelements = 80 * NODES / 100;
242
map->maxdeleted = NODES - map->maxelements - 1;
244
map->lru_head = map->lru_tail = NULL;
248
static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool) {
249
mpool_free(mempool, cs->data);
253
#endif /* USE_LRUHASHCACHE */
255
/* SPLAY --------------------------------------------------------------------- */
258
struct node { /* a node */
269
struct cache_set { /* a tree */
276
/* Allocates all the nodes and sets up the replacement chain */
277
static int cacheset_init(struct cache_set *cs, mpool_t *mempool) {
279
cs->data = mpool_calloc(mempool, NODES, sizeof(*cs->data));
285
for(i=1; i<NODES; i++) {
286
cs->data[i-1].next = &cs->data[i];
287
cs->data[i].prev = &cs->data[i-1];
290
cs->first = cs->data;
291
cs->last = &cs->data[NODES-1];
296
/* Frees all the nodes */
297
static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool) {
298
mpool_free(mempool, cs->data);
302
/* The left/right cooser for the splay tree */
303
static inline int cmp(int64_t *a, ssize_t sa, int64_t *b, ssize_t sb) {
304
if(a[1] < b[1]) return -1;
305
if(a[1] > b[1]) return 1;
306
if(a[0] < b[0]) return -1;
307
if(a[0] > b[0]) return 1;
308
if(sa < sb) return -1;
309
if(sa > sb) return 1;
314
/* #define PRINT_TREE */
321
/* Debug function to print the tree and check its consistency */
322
/* #define CHECK_TREE */
324
static int printtree(struct cache_set *cs, struct node *n, int d) {
327
if (n == NULL) return 0;
328
if(n == cs->root) { ptree("--------------------------\n"); }
329
ab |= printtree(cs, n->right, d+1);
331
if(cmp(n->digest, n->size, n->right->digest, n->right->size) >= 0) {
332
for (i=0; i<d; i++) ptree(" ");
333
ptree("^^^^ %lld >= %lld\n", n->digest[1], n->right->digest[1]);
337
for (i=0; i<d; i++) ptree(" ");
338
ptree("%08x(%02u)\n", n->digest[1]>>48, n - cs->data);
340
if(cmp(n->digest, n->size, n->left->digest, n->left->size) <= 0) {
341
for (i=0; i<d; i++) ptree(" ");
342
ptree("vvvv %lld <= %lld\n", n->digest[1], n->left->digest[1]);
348
ptree("no parent!\n");
351
if(n->up->left != n && n->up->right != n) {
352
ptree("broken parent\n");
358
ptree("root with a parent!\n");
362
ab |= printtree(cs, n->left, d+1);
366
#define printtree(a,b,c) (0)
369
/* Looks up a node and splays it up to the root of the tree */
370
static int splay(int64_t *md5, size_t len, struct cache_set *cs) {
371
struct node next = {{0, 0}, NULL, NULL, NULL, NULL, NULL, 0, 0}, *right = &next, *left = &next, *temp, *root = cs->root;
378
comp = cmp(md5, len, root->digest, root->size);
380
if(!root->left) break;
381
if(cmp(md5, len, root->left->digest, root->left->size) < 0) {
383
root->left = temp->right;
384
if(temp->right) temp->right->up = root;
388
if(!root->left) break;
394
} else if(comp > 0) {
395
if(!root->right) break;
396
if(cmp(md5, len, root->right->digest, root->right->size) > 0) {
398
root->right = temp->left;
399
if(temp->left) temp->left->up = root;
403
if(!root->right) break;
415
left->right = root->left;
416
if(root->left) root->left->up = left;
417
right->left = root->right;
418
if(root->right) root->right->up = right;
419
root->left = next.right;
420
if(next.right) next.right->up = root;
421
root->right = next.left;
422
if(next.left) next.left->up = root;
429
/* Looks up an hash in the tree and maintains the replacement chain */
430
static inline int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size, uint32_t reclevel) {
433
memcpy(hash, md5, 16);
434
if(splay(hash, size, cs)) {
435
struct node *o = cs->root->prev, *p = cs->root, *q = cs->root->next;
437
printf("promoting %02d\n", p - cs->data);
439
struct node *x = cs->first;
442
printf("%02d,", x - cs->data);
448
printf("%02d,", x - cs->data);
467
struct node *x = cs->first;
470
printf("%02d,", x - cs->data);
476
printf("%02d,", x - cs->data);
482
if(reclevel >= p->minrec)
488
/* If the hash is present nothing happens.
489
Otherwise a new node is created for the hash picking one from the begin of the chain.
490
Used nodes are moved to the end of the chain */
491
static inline void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size, uint32_t reclevel) {
492
struct node *newnode;
495
memcpy(hash, md5, 16);
496
if(splay(hash, size, cs)) {
497
if(cs->root->minrec > reclevel)
498
cs->root->minrec = reclevel;
499
return; /* Already there */
503
if(printtree(cs, cs->root, 0)) {
504
cli_errmsg("cacheset_add: inconsistent tree before choosing newnode, good luck\n");
510
if(!newnode->right && !newnode->left)
512
newnode = newnode->next;
515
cli_errmsg("cacheset_add: tree has got no end nodes\n");
519
if(newnode->up->left == newnode)
520
newnode->up->left = NULL;
522
newnode->up->right = NULL;
525
newnode->prev->next = newnode->next;
527
newnode->next->prev = newnode->prev;
528
if(cs->first == newnode)
529
cs->first = newnode->next;
531
newnode->prev = cs->last;
532
newnode->next = NULL;
533
cs->last->next = newnode;
537
if(printtree(cs, cs->root, 0)) {
538
cli_errmsg("cacheset_add: inconsistent tree before adding newnode, good luck\n");
543
newnode->left = NULL;
544
newnode->right = NULL;
546
if(cmp(hash, size, cs->root->digest, cs->root->size) < 0) {
547
newnode->left = cs->root->left;
548
newnode->right = cs->root;
549
cs->root->left = NULL;
551
newnode->right = cs->root->right;
552
newnode->left = cs->root;
553
cs->root->right = NULL;
555
if(newnode->left) newnode->left->up = newnode;
556
if(newnode->right) newnode->right->up = newnode;
558
newnode->digest[0] = hash[0];
559
newnode->digest[1] = hash[1];
561
newnode->size = size;
562
newnode->minrec = reclevel;
565
ptree("3: %lld\n", hash[1]);
566
if(printtree(cs, cs->root, 0)) {
567
cli_errmsg("cacheset_add: inconsistent tree after adding newnode, good luck\n");
571
#endif /* USE_SPLAY */
574
/* COMMON STUFF --------------------------------------------------------------------- */
577
struct cache_set cacheset;
578
pthread_mutex_t mutex;
581
/* Allocates the trees for the engine cache */
582
int cli_cache_init(struct cl_engine *engine) {
583
static struct CACHE *cache;
587
cli_errmsg("cli_cache_init: mpool malloc fail\n");
591
if(!(cache = mpool_malloc(engine->mempool, sizeof(struct CACHE) * TREES))) {
592
cli_errmsg("cli_cache_init: mpool malloc fail\n");
596
for(i=0; i<TREES; i++) {
597
if(pthread_mutex_init(&cache[i].mutex, NULL)) {
598
cli_errmsg("cli_cache_init: mutex init fail\n");
599
for(j=0; j<i; j++) cacheset_destroy(&cache[j].cacheset, engine->mempool);
600
for(j=0; j<i; j++) pthread_mutex_destroy(&cache[j].mutex);
601
mpool_free(engine->mempool, cache);
604
if(cacheset_init(&cache[i].cacheset, engine->mempool)) {
605
for(j=0; j<i; j++) cacheset_destroy(&cache[j].cacheset, engine->mempool);
606
for(j=0; j<=i; j++) pthread_mutex_destroy(&cache[j].mutex);
607
mpool_free(engine->mempool, cache);
611
engine->cache = cache;
615
/* Frees the engine cache */
616
void cli_cache_destroy(struct cl_engine *engine) {
617
static struct CACHE *cache;
620
if(!engine || !(cache = engine->cache))
623
for(i=0; i<TREES; i++) {
624
cacheset_destroy(&cache[i].cacheset, engine->mempool);
625
pthread_mutex_destroy(&cache[i].mutex);
627
mpool_free(engine->mempool, cache);
630
/* Looks up an hash in the proper tree */
631
static int cache_lookup_hash(unsigned char *md5, size_t len, struct CACHE *cache, uint32_t reclevel) {
632
unsigned int key = getkey(md5);
637
if(pthread_mutex_lock(&c->mutex)) {
638
cli_errmsg("cache_lookup_hash: cache_lookup_hash: mutex lock fail\n");
642
ret = (cacheset_lookup(&c->cacheset, md5, len, reclevel)) ? CL_CLEAN : CL_VIRUS;
643
pthread_mutex_unlock(&c->mutex);
644
/* if(ret == CL_CLEAN) cli_warnmsg("cached\n"); */
648
/* Adds an hash to the cache */
649
void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx) {
650
unsigned int key = getkey(md5);
654
if(!ctx || !ctx->engine || !ctx->engine->cache)
657
level = (*ctx->fmap && (*ctx->fmap)->dont_cache_flag) ? ctx->recursion : 0;
658
if (ctx->found_possibly_unwanted && (level || !ctx->recursion))
660
c = &ctx->engine->cache[key];
661
if(pthread_mutex_lock(&c->mutex)) {
662
cli_errmsg("cli_add: mutex lock fail\n");
666
#ifdef USE_LRUHASHCACHE
667
cacheset_add(&c->cacheset, md5, size, ctx->engine->mempool);
670
cacheset_add(&c->cacheset, md5, size, level);
672
#error #define USE_SPLAY or USE_LRUHASHCACHE
676
pthread_mutex_unlock(&c->mutex);
677
cli_dbgmsg("cache_add: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (level %u)\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15], level);
681
/* Hashes a file onto the provided buffer and looks it up the cache.
682
Returns CL_VIRUS if found, CL_CLEAN if not FIXME or an error */
683
int cache_check(unsigned char *hash, cli_ctx *ctx) {
689
if(!ctx || !ctx->engine || !ctx->engine->cache)
698
size_t readme = todo < FILEBUFF ? todo : FILEBUFF;
699
if(!(buf = fmap_need_off_once(map, at, readme)))
703
cli_md5_update(&md5, buf, readme);
705
cli_md5_final(hash, &md5);
706
ret = cache_lookup_hash(hash, map->len, ctx->engine->cache, ctx->recursion);
707
cli_dbgmsg("cache_check: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x is %s\n", hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7], hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15], (ret == CL_VIRUS) ? "negative" : "positive");