2
* Unit tests for regular expression processing.
4
* Copyright (C) 2008 Sourcefire, Inc.
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License version 2 as
10
* published by the Free Software Foundation.
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
* GNU General Public License for more details.
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23
#include "clamav-config.h"
32
#include "../libclamav/clamav.h"
33
#include "../libclamav/others.h"
34
#include "../libclamav/mbox.h"
35
#include "../libclamav/message.h"
36
#include "../libclamav/htmlnorm.h"
37
#include "../libclamav/phishcheck.h"
38
#include "../libclamav/regex_suffix.h"
39
#include "../libclamav/regex_list.h"
40
#include "../libclamav/phish_domaincheck_db.h"
41
#include "../libclamav/phish_whitelist.h"
44
static size_t cb_called = 0;
46
static int cb_fail(void *cbdata, const char *suffix, size_t len, const struct regex_list *regex)
48
fail("this pattern is not supposed to have a suffix");
52
static int cb_expect_single(void *cbdata, const char *suffix, size_t len, const struct regex_list *regex)
54
const char *expected = cbdata;
56
fail_unless(suffix && strcmp(suffix, expected) == 0,
57
"suffix mismatch, was: %s, expected: %s\n", suffix, expected);
61
static struct regex_list regex;
64
const char pattern[] = "";
69
preg = malloc(sizeof(*regex.preg));
70
fail_unless(!!preg, "malloc");
71
rc = cli_regex2suffix(pattern, preg, cb_fail, NULL);
73
fail_unless(rc == REG_EMPTY, "empty pattern");
74
fail_unless(cb_called == 0, "callback shouldn't be called");
84
preg = malloc(sizeof(*regex.preg));
85
fail_unless(!!preg, "malloc");
86
rc = cli_regex2suffix(pattern, preg, cb_expect_single, pattern);
87
fail_unless(rc == 0, "single character pattern");
90
fail_unless(cb_called == 1, "callback should be called once");
95
static const char *ex1[] =
96
{"com|de","moc","ed",NULL};
97
static const char *ex2[] =
98
{"xd|(a|e)bc","dx","cba","cbe",NULL};
100
static const char **tests[] = {
106
static int cb_expect_multi(void *cbdata, const char *suffix, size_t len, const struct regex_list *r)
108
const char **exp = cbdata;
109
fail_unless(!!exp, "expected data");
111
fail_unless(!!*exp, "expected no suffix, got: %s\n",suffix);
112
fail_unless(!!exp[cb_called], "expected less suffixes, but already got: %d\n", cb_called);
113
fail_unless(strcmp(exp[cb_called], suffix) == 0,
114
"suffix mismatch, was: %s, expected: %s\n",suffix, exp[cb_called]);
115
fail_unless(strlen(suffix) == len, "incorrect suffix len, expected: %d, got: %d\n", strlen(suffix), len);
120
#ifdef CHECK_HAVE_LOOPS
121
START_TEST (test_suffix)
125
const char *pattern = tests[_i][0];
127
const char **p=tests[_i];
129
fail_unless(!!pattern, "test pattern");
130
preg = malloc(sizeof(*regex.preg));
131
fail_unless(!!preg, "malloc");
132
rc = cli_regex2suffix(pattern, preg, cb_expect_multi, tests[_i]);
133
fail_unless(rc == 0, "single character pattern");
138
fail_unless(cb_called == n,
139
"suffix number mismatch, expected: %d, was: %d\n", n, cb_called);
142
#endif /* CHECK_HAVE_LOOPS */
144
static void setup(void)
149
static void teardown(void)
153
static struct regex_matcher matcher;
155
static void rsetup(void)
157
int rc = init_regex_list(&matcher);
158
fail_unless(rc == 0, "init_regex_list");
161
static void rteardown(void)
163
regex_list_done(&matcher);
166
static const struct rtest {
167
const char *pattern;/* NULL if not meant for whitelist testing */
169
const char *displayurl;
170
int result;/* 0 - phish, 1 - whitelisted, 2 - clean,
171
3 - blacklisted if 2nd db is loaded,
174
{NULL,"http://fake.example.com","=====key.com",0},
175
{NULL,"http://key.com","=====key.com",2},
176
{NULL,"http://key.com@fake.example.com","key.com",0},
177
/* entry taken from .wdb with a / appended */
178
{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
179
"http://srx.main.ebayrtm.com",
181
1 /* should be whitelisted */},
182
{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
183
"http://srx.main.ebayrtm.com.evil.example.com",
186
{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
187
"www.www.ebayrtm.com?somecgi",
188
"www.ebay.com/something",1},
190
"http://key.com","go to key.com",2
192
{NULL, "http://somefakeurl.example.com","someotherdomain-key.com",2},
193
{NULL, "http://somefakeurl.example.com","someotherdomain.key.com",0},
194
{NULL, "http://1.test.example.com/something","test",3},
195
{NULL, "http://1.test.example.com/2","test",3},
196
{NULL, "http://user@1.test.example.com/2","test",3},
197
{NULL, "http://user@1.test.example.com/2/test","test",3},
198
{NULL, "http://user@1.test.example.com/","test",3},
199
{NULL, "http://x.exe","http:///x.exe",2},
200
{".+\\.ebayrtm\\.com([/?].*)?:[^.]+\\.ebay\\.(de|com|co\\.uk)/",
201
"http://srx.main.ebayrtm.com",
203
1 /* should be whitelisted */},
204
{".+\\.ebayrtm\\.com([/?].*)?:.+[r-t]\\.ebay\\.(de|com|co\\.uk)/",
205
"http://srx.main.ebayrtm.com",
207
1 /* should be whitelisted */},
208
{".+\\.ebayrtm\\.com([/?].*)?:.+[r-t]\\.ebay\\.(de|com|co\\.uk)/",
209
"http://srx.main.ebayrtm.com",
211
1 /* should be whitelisted */},
213
{NULL,"http://co.uk","http:// co.uk",2},
214
{NULL,"http://co.uk"," ",2},
215
{NULL,"127.0.0.1","pages.ebay.de",2},
216
{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
217
"http://pages.ebay.de@fake.example.com","pages.ebay.de",0},
218
{NULL,"http://key.com","https://key.com",0},
219
{NULL,"http://key.com%00fake.example.com","https://key.com",0},
222
#ifdef CHECK_HAVE_LOOPS
223
START_TEST (regex_list_match_test)
226
const struct rtest *rtest = &rtests[_i];
227
char *pattern, *realurl;
230
if(!rtest->pattern) {
231
fail_unless(rtest->result != 1,
232
"whitelist test must have pattern set");
233
/* this test entry is not meant for whitelist testing */
237
fail_unless(rtest->result == 0 || rtest->result == 1 || rtest->result==4,
238
"whitelist test result must be either 0 or 1 or 4");
239
pattern = cli_strdup(rtest->pattern);
240
fail_unless(!!pattern, "cli_strdup");
242
rc = regex_list_add_pattern(&matcher, pattern);
243
if(rtest->result == 4) {
244
fail_unless(rc, "regex_list_add_pattern should return error");
248
fail_unless(rc == 0,"regex_list_add_pattern");
251
matcher.list_loaded = 1;
253
rc = cli_build_regex_list(&matcher);
254
fail_unless(rc == 0,"cli_build_regex_list");
256
fail_unless(is_regex_ok(&matcher),"is_regex_ok");
258
realurl = cli_strdup(rtest->realurl);
259
rc = regex_list_match(&matcher, realurl, rtest->displayurl, NULL, 1, &info, 1);
260
fail_unless(rc == rtest->result,"regex_list_match");
261
/* regex_list_match is not supposed to modify realurl in this case */
262
fail_unless(!strcmp(realurl, rtest->realurl), "realurl altered");
266
#endif /* CHECK_HAVE_LOOPS */
268
static struct cl_engine *engine;
269
static int loaded_2 = 0;
271
static void psetup_impl(int load2)
274
struct phishcheck *pchk;
276
rc = cli_initengine(&engine, 0);
277
fail_unless(rc == 0, "cl_initengine");
279
rc = phishing_init(engine);
280
fail_unless(rc == 0,"phishing_init");
281
pchk = engine->phishcheck;
282
fail_unless(!!pchk, "engine->phishcheck");
284
rc = init_domainlist(engine);
285
fail_unless(rc == 0,"init_domainlist");
287
f = fdopen(open_testfile("input/daily.pdb"),"r");
288
fail_unless(!!f, "fopen daily.pdb");
290
rc = load_regex_matcher(engine->domainlist_matcher, f, 0, 0, NULL);
291
fail_unless(rc == 0, "load_regex_matcher");
295
f = fdopen(open_testfile("input/daily.pdb2"),"r");
296
fail_unless(!!f, "fopen daily.pdb2");
298
rc = load_regex_matcher(engine->domainlist_matcher, f, 0, 0, NULL);
299
fail_unless(rc == 0, "load_regex_matcher");
304
rc = init_whitelist(engine);
305
fail_unless(rc == 0,"init_whitelist");
307
f = fdopen(open_testfile("input/daily.wdb"),"r");
308
rc = load_regex_matcher(engine->whitelist_matcher, f, 0, 1, NULL);
309
fail_unless(rc == 0,"load_regex_matcher");
312
rc = cli_build_regex_list(engine->whitelist_matcher);
313
fail_unless(rc == 0,"cli_build_regex_list");
315
rc = cli_build_regex_list(engine->domainlist_matcher);
316
fail_unless(rc == 0,"cli_build_regex_list");
318
fail_unless(is_regex_ok(engine->whitelist_matcher),"is_regex_ok");
319
fail_unless(is_regex_ok(engine->domainlist_matcher),"is_regex_ok");
322
static void psetup(void)
327
static void psetup2(void)
333
static void pteardown(void)
336
phishing_done(engine);
343
static void do_phishing_test(const struct rtest *rtest)
348
tag_arguments_t hrefs;
351
memset(&ctx, 0, sizeof(ctx));
353
realurl = cli_strdup(rtest->realurl);
354
fail_unless(!!realurl, "cli_strdup");
357
hrefs.value = cli_malloc(sizeof(*hrefs.value));
358
fail_unless(!!hrefs.value, "cli_malloc");
359
hrefs.value[0] = (unsigned char*)realurl;
360
hrefs.contents = cli_malloc(sizeof(*hrefs.contents));
361
fail_unless(!!hrefs.contents, "cli_malloc");
362
hrefs.contents[0] = blobCreate();
363
hrefs.tag = cli_malloc(sizeof(*hrefs.tag));
364
fail_unless(!!hrefs.tag, "cli_malloc");
365
hrefs.tag[0] = (unsigned char*)cli_strdup("href");
366
blobAddData(hrefs.contents[0], (const unsigned char*) rtest->displayurl, strlen(rtest->displayurl)+1);
369
ctx.virname = &virname;
371
rc = phishingScan(NULL, NULL, &ctx, &hrefs);
373
html_tag_arg_free(&hrefs);
374
fail_unless(rc == CL_CLEAN,"phishingScan");
375
switch(rtest->result) {
377
fail_unless(ctx.found_possibly_unwanted,
378
"this should be phishing, realURL: %s, displayURL: %s",
379
rtest->realurl, rtest->displayurl);
382
fail_unless(!ctx.found_possibly_unwanted,
383
"this should be whitelisted, realURL: %s, displayURL: %s",
384
rtest->realurl, rtest->displayurl);
387
fail_unless(!ctx.found_possibly_unwanted,
388
"this should be clean, realURL: %s, displayURL: %s",
389
rtest->realurl, rtest->displayurl);
393
fail_unless(!ctx.found_possibly_unwanted,
394
"this should be clean, realURL: %s, displayURL: %s",
395
rtest->realurl, rtest->displayurl);
397
fail_unless(ctx.found_possibly_unwanted,
398
"this should be blacklisted, realURL: %s, displayURL: %s",
399
rtest->realurl, rtest->displayurl);
400
fail_unless(!strstr((const char*)ctx.virname,"Blacklisted"),
401
"should be blacklisted, but is: %s\n", ctx.virname);
407
#ifdef CHECK_HAVE_LOOPS
408
START_TEST (phishingScan_test)
410
do_phishing_test(&rtests[_i]);
415
START_TEST(phishing_fake_test)
418
FILE *f = fdopen(open_testfile("input/daily.pdb"),"r");
419
fail_unless(!!f,"fopen daily.pdb");
420
while(fgets(buf, sizeof(buf), f)) {
422
const char *pdb = strchr(buf,':');
423
fail_unless(!!pdb, "missing : in pdb");
425
rtest.displayurl = pdb;
427
do_phishing_test(&rtest);
428
rtest.realurl = "http://fake.example.com";
430
do_phishing_test(&rtest);
436
Suite *test_regex_suite(void)
438
Suite *s = suite_create("regex");
439
TCase *tc_api, *tc_matching, *tc_phish, *tc_phish2;
441
tc_api = tcase_create("cli_regex2suffix");
442
suite_add_tcase(s, tc_api);
443
tcase_add_checked_fixture (tc_api, setup, teardown);
444
tcase_add_test(tc_api, empty);
445
tcase_add_test(tc_api, one);
446
#ifdef CHECK_HAVE_LOOPS
447
tcase_add_loop_test(tc_api, test_suffix, 0, sizeof(tests)/sizeof(tests[0]));
449
tc_matching = tcase_create("regex_list");
450
suite_add_tcase(s, tc_matching);
451
tcase_add_checked_fixture (tc_matching, rsetup, rteardown);
452
#ifdef CHECK_HAVE_LOOPS
453
tcase_add_loop_test(tc_matching, regex_list_match_test, 0, sizeof(rtests)/sizeof(rtests[0]));
455
tc_phish = tcase_create("phishingScan");
456
suite_add_tcase(s, tc_phish);
457
tcase_add_checked_fixture(tc_phish, psetup, pteardown);
458
#ifdef CHECK_HAVE_LOOPS
459
tcase_add_loop_test(tc_phish, phishingScan_test, 0, sizeof(rtests)/sizeof(rtests[0]));
461
tcase_add_test(tc_phish, phishing_fake_test);
464
tc_phish2 = tcase_create("phishingScan with 2 dbs");
465
suite_add_tcase(s, tc_phish2);
466
tcase_add_checked_fixture(tc_phish2, psetup2, pteardown);
467
#ifdef CHECK_HAVE_LOOPS
468
tcase_add_loop_test(tc_phish2, phishingScan_test, 0, sizeof(rtests)/sizeof(rtests[0]));
470
tcase_add_test(tc_phish2, phishing_fake_test);