2
* Unit tests for JS normalizer.
4
* Copyright (C) 2008 Sourcefire, Inc.
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License version 2 as
10
* published by the Free Software Foundation.
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
* GNU General Public License for more details.
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23
#include "clamav-config.h"
34
#include "../libclamav/clamav.h"
35
#include "../libclamav/others.h"
36
#include "../libclamav/dconf.h"
37
#include "../libclamav/htmlnorm.h"
38
#include "../libclamav/jsparse/js-norm.h"
39
#include "../libclamav/jsparse/lexglobal.h"
40
#include "../libclamav/jsparse/textbuf.h"
41
#include "../libclamav/jsparse/generated/keywords.h"
42
#include "../libclamav/jsparse/generated/operators.h"
50
static struct test kw_test[] = {
58
static struct test op_test[] = {
63
#ifdef CHECK_HAVE_LOOPS
64
START_TEST (test_keywords)
66
const struct keyword *kw = in_word_set(kw_test[_i].str, strlen(kw_test[_i].str));
68
fail_unless(kw && !strcmp(kw->name, kw_test[_i].str), "keyword mismatch");
70
fail_unless(!kw, "non-keyword detected as keyword");
75
START_TEST (test_operators)
77
const struct operator *op = in_op_set(op_test[_i].str, strlen(op_test[_i].str));
79
fail_unless(op && !strcmp(op->name, op_test[_i].str), "operator mismatch");
81
fail_unless(!op, "non-operator detected as operator");
84
#endif /* CHECK_HAVE_LOOPS */
86
START_TEST (test_token_string)
90
memset(&tok, 0, sizeof(tok));
92
TOKEN_SET(&tok, string, str);
93
fail_unless(TOKEN_GET(&tok, string) == str, "token string get/set");
94
fail_unless(TOKEN_GET(&tok, cstring) == str, "token string->cstring");
95
fail_unless(TOKEN_GET(&tok, scope) == NULL, "token string->scope");
96
fail_unless(TOKEN_GET(&tok, ival) == -1, "token string->ival");
100
START_TEST (test_token_cstring)
102
const char *str = "test";
104
memset(&tok, 0, sizeof(tok));
106
TOKEN_SET(&tok, cstring, str);
107
fail_unless(TOKEN_GET(&tok, string) == NULL, "token cstring->string");
108
fail_unless(TOKEN_GET(&tok, cstring) == str, "token string->cstring");
109
fail_unless(TOKEN_GET(&tok, scope) == NULL, "token string->scope");
110
fail_unless(TOKEN_GET(&tok, ival) == -1, "token string->ival");
114
START_TEST (test_token_scope)
116
struct scope *sc = (struct scope*)0xdeadbeef;
118
memset(&tok, 0, sizeof(tok));
120
TOKEN_SET(&tok, scope, sc);
121
fail_unless(TOKEN_GET(&tok, string) == NULL, "token scope->string");
122
fail_unless(TOKEN_GET(&tok, cstring) == NULL, "token scope->cstring");
123
fail_unless(TOKEN_GET(&tok, scope) == sc, "token scope->scope");
124
fail_unless(TOKEN_GET(&tok, ival) == -1, "token scope->ival");
128
START_TEST (test_token_ival)
132
memset(&tok, 0, sizeof(tok));
134
TOKEN_SET(&tok, ival, val);
135
fail_unless(TOKEN_GET(&tok, string) == NULL, "token ival->string");
136
fail_unless(TOKEN_GET(&tok, cstring) == NULL, "token ival->cstring");
137
fail_unless(TOKEN_GET(&tok, scope) == NULL, "token ival->scope");
138
fail_unless(TOKEN_GET(&tok, dval) - -1 < 1e-9, "token ival->dval");
139
fail_unless(TOKEN_GET(&tok, ival) == val, "token ival->ival");
143
START_TEST (test_token_dval)
147
memset(&tok, 0, sizeof(tok));
149
TOKEN_SET(&tok, dval, val);
150
fail_unless(TOKEN_GET(&tok, string) == NULL, "token dval->string");
151
fail_unless(TOKEN_GET(&tok, cstring) == NULL, "token dval->cstring");
152
fail_unless(TOKEN_GET(&tok, scope) == NULL, "token dval->scope");
153
fail_unless(TOKEN_GET(&tok, dval) - val < 1e-9, "token dval->dval");
154
fail_unless(TOKEN_GET(&tok, ival) == -1, "token dval->ival");
158
START_TEST (test_init_destroy)
160
struct parser_state *state = cli_js_init();
161
fail_unless(!!state, "cli_js_init()");
162
cli_js_destroy(state);
163
cli_js_destroy(NULL);
167
START_TEST (test_init_parse_destroy)
169
const char buf[] = "function (p) { return \"anonymous\";}";
170
struct parser_state *state = cli_js_init();
171
fail_unless(!!state, "cli_js_init()");
172
cli_js_process_buffer(state, buf, strlen(buf));
173
cli_js_process_buffer(state, buf, strlen(buf));
174
cli_js_parse_done(state);
175
cli_js_destroy(state);
179
START_TEST (js_begin_end)
181
char buf[16384] = "</script>";
183
struct cli_dconf *dconf = cli_dconf_init();
185
fail_unless(!!dconf, "failed to init dconf");
186
for(p=strlen(buf); p < 8191; p++) {
190
strncpy(buf + 8192, " stuff stuff <script language='javascript'> function () {}", 8192);
191
fail_unless(html_normalise_mem((unsigned char*)buf, sizeof(buf), NULL, NULL, dconf) == 1, "normalise");
196
START_TEST (multiple_scripts)
198
char buf[] = "</script> stuff"\
199
"<script language='Javascript'> function foo() {} </script>"\
200
"<script language='Javascript'> function bar() {} </script>";
201
struct cli_dconf *dconf = cli_dconf_init();
203
fail_unless(!!dconf, "failed to init dconf");
204
fail_unless(html_normalise_mem((unsigned char*)buf, sizeof(buf), NULL, NULL, dconf) == 1, "normalise");
205
/* TODO: test that both had been normalized */
210
static struct parser_state *state;
211
static char *tmpdir = NULL;
213
static void jstest_setup(void)
215
state = cli_js_init();
216
fail_unless(!!state, "js init");
217
tmpdir = cli_gentemp(NULL);
218
fail_unless(!!tmpdir,"js tmp dir");
219
fail_unless(mkdir(tmpdir, 0700) == 0, "tempdir mkdir");
222
static void jstest_teardown(void)
228
cli_js_destroy(state);
232
static void tokenizer_test(const char *in, const char *expected, int split)
237
ssize_t len = strlen(expected);
238
size_t inlen = strlen(in);
242
cli_js_process_buffer(state, in, inlen/2);
243
cli_js_process_buffer(state, in + inlen/2, inlen - inlen/2);
245
cli_js_process_buffer(state, in, inlen);
248
cli_js_parse_done(state);
249
cli_js_output(state, tmpdir);
250
snprintf(filename, 1023, "%s/javascript", tmpdir);
252
buf = cli_malloc(len + 1);
255
fail("malloc buffer");
258
fd = open(filename, O_RDONLY);
261
fail("failed to open output file: %s", filename);
264
p = read(fd, buf, len);
268
fail("file is smaller: %lu, expected: %lu", p, len);
270
p = lseek(fd, 0, SEEK_CUR);
271
fail_unless(p == len, "lseek position incorrect: %ld != %ld", p, len);
274
char c1 = expected[p];
279
fail("file contents mismatch at byte: %lu, was: %c, expected: %c", p, c2, c1);
285
p2 = lseek(fd, 0, SEEK_END);
289
fail("trailing garbage, file size: %ld, expected: %ld", p2, p);
294
static const char jstest_buf0[] =
295
"function foo(a, b) {\n"\
296
"var x = 1.9e2*2*a/ 4.;\n"\
297
"var y = 'test\\'tst';//var foo=5\n"\
298
"x=b[5],/* multiline\nvar z=6;\nsome*some/other**/"\
299
"z=x/y;/* multiline oneline */var t=z/a;\n"\
301
"document.writeln('something');}";
303
static const char jstest_expected0[] =
304
"function n000(n001,n002){"\
305
"var n003=190*2*n001/4;"\
306
"var n004=\"test\'tst\";"\
308
"z=n003/n004;var n005=z/n001;"\
310
"document.writeln(\"something\");}";
312
static const char jstest_buf1[] =
313
"function () { var id\\u1234tx;}";
315
static const char jstest_expected1[] =
316
"function(){var n000;}";
318
static const char jstest_buf2[] =
319
"function () { var tst=\"a\"+'bc'+ 'd'; }";
321
static const char jstest_expected2[] =
322
"function(){var n000=\"abcd\";}";
324
static const char jstest_buf3[] =
325
"dF('bmfsu%2639%2638x11u%2638%263%3A%264C1');";
327
static const char jstest_expected3[] =
330
#define B64 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
332
/* TODO: document.write should be normalized too */
333
static const char jstest_buf4[] =
334
"document.write(unescape('%3C%73%63%72%69%70%74%20%6C%61%6E%67%75%61%67%65%3D%22%6A%61%76%61%73%63%72%69%70%74%22%3E%66%75%6E%63%74%69%6F%6E%20%64%46%28%73%29%7B%76%61%72%20%73%31%3D%75%6E%65%73%63%61%70%65%28%73%2E%73%75%62%73%74%72%28%30%2C%73%2E%6C%65%6E%67%74%68%2D%31%29%29%3B%20%76%61%72%20%74%3D%27%27%3B%66%6F%72%28%69%3D%30%3B%69%3C%73%31%2E%6C%65%6E%67%74%68%3B%69%2B%2B%29%74%2B%3D%53%74%72%69%6E%67%2E%66%72%6F%6D%43%68%61%72%43%6F%64%65%28%73%31%2E%63%68%61%72%43%6F%64%65%41%74%28%69%29%2D%73%2E%73%75%62%73%74%72%28%73%2E%6C%65%6E%67%74%68%2D%31%2C%31%29%29%3B%64%6F%63%75%6D%65%6E%74%2E%77%72%69%74%65%28%75%6E%65%73%63%61%70%65%28%74%29%29%3B%7D%3C%2F%73%63%72%69%70%74%3E'));eval(dF('gI%285%3B%285%3Afqjwy%28585%3A7%28586D%28585%3A7%3C%7C55y%28585%3A7%3C%28585%3A7%28586E%28585%3A8G5%285%3A%285%3C%286E3'));";
336
static const char jstest_expected4[] =
337
"document.write(\"<script language=\"javascript\">function df(s){var s1=unescape(s.substr(0,s.length-1)); var t='';for(i=0;i<s1.length;i++)t+=string.fromcharcode(s1.charcodeat(i)-s.substr(s.length-1,1));document.write(unescape(t));}</script>\");eval();alert(\"w00t\");";
339
static const char jstest_buf5[] =
340
"function (p,a,c,k,e,r){}('0(\\'1\\');',2,2,'alert|w00t'.split('|'),0,{});";
342
static const char jstest_expected5[] =
343
"function(n000,n001,n002,n003,n004,n005){}(alert(\"w00t\"););";
345
static const char jstest_buf6[] =
346
"function $(p,a,c,k,e,d){} something(); $('0(\\'1\\');',2,2,'alert|w00t'.split('|'),0,{});";
348
static const char jstest_expected6[] =
349
"function n000(n001,n002,n003,n004,n005,n006){}something();$(alert(\"w00t\"););";
351
static const char jstest_buf7[] =
352
"var z=\"tst" B64 "tst\";";
354
static const char jstest_expected7[] =
355
"var n000=\"tst" B64 "tst\";";
357
static const char jstest_buf8[] =
358
"var z=\'tst" B64 "tst\';";
360
static const char jstest_expected8[] =
361
"var n000=\"tst" B64 "tst\";";
363
static const char jstest_buf9[] =
364
"eval(unescape('%61%6c%65%72%74%28%27%74%65%73%74%27%29%3b'));";
366
static const char jstest_expected9[] =
369
static const char jstest_buf10[] =
370
"function $ $() dF(x); function (p,a,c,k,e,r){function $(){}";
372
static const char jstest_expected10[] =
373
"function n000 n000()n001(x);function(n002,n003,n004,n005,n006,n007){function n008(){}";
375
static const char jstest_buf11[] =
378
static const char jstest_expected11[] =
379
"var n000=123456789;";
381
static const char jstest_buf12[] =
382
"var x='test\\u0000test';";
384
static const char jstest_expected12[] =
385
"var n000=\"test\x1test\";";
387
static const char jstest_buf13[] =
390
static const char jstest_expected13[] =
396
const char *expected;
398
{jstest_buf0, jstest_expected0},
399
{jstest_buf1, jstest_expected1},
400
{jstest_buf2, jstest_expected2},
401
{jstest_buf3, jstest_expected3},
402
{jstest_buf4, jstest_expected4},
403
{jstest_buf5, jstest_expected5},
404
{jstest_buf6, jstest_expected6},
405
{jstest_buf7, jstest_expected7},
406
{jstest_buf8, jstest_expected8},
407
{jstest_buf9, jstest_expected9},
408
{jstest_buf10, jstest_expected10},
409
{jstest_buf11, jstest_expected11},
410
{jstest_buf12, jstest_expected12},
411
{jstest_buf13, jstest_expected13}
414
#ifdef CHECK_HAVE_LOOPS
415
START_TEST (tokenizer_basic)
417
tokenizer_test(js_tests[_i].in, js_tests[_i].expected, 0);
421
START_TEST (tokenizer_split)
423
tokenizer_test(js_tests[_i].in, js_tests[_i].expected, 1);
426
#endif /* CHECK_HAVE_LOOPS */
428
START_TEST (js_buffer)
430
const size_t len = 512*1024;
431
const char s[] = "x=\"";
432
const char e[] = "\"";
433
char *tst = malloc(len);
435
fail_unless(!!tst, "malloc");
437
memset(tst, 'a', len);
438
strncpy(tst, s, strlen(s));
439
strncpy(tst + len - sizeof(e), e, sizeof(e));
441
tokenizer_test(tst,tst,1);
446
START_TEST (screnc_infloop)
448
char buf[24700] = "<%@ language='jscript.encode'>";
449
struct cli_dconf *dconf = cli_dconf_init();
452
fail_unless(!!dconf, "failed to init dconf");
453
for(p = strlen(buf); p < 16384; p++) {
456
for(; p < 24625; p++) {
459
strncpy(buf+24626,"#@~^ ", 10);
460
fail_unless(html_normalise_mem((unsigned char*)buf, sizeof(buf), NULL, NULL, dconf) == 1, "normalise");
465
Suite *test_jsnorm_suite(void)
467
Suite *s = suite_create("jsnorm");
468
TCase *tc_jsnorm_gperf, *tc_jsnorm_token, *tc_jsnorm_api,
469
*tc_jsnorm_tokenizer, *tc_jsnorm_bugs, *tc_screnc_infloop;
471
tc_jsnorm_gperf = tcase_create("jsnorm gperf");
472
suite_add_tcase (s, tc_jsnorm_gperf);
473
#ifdef CHECK_HAVE_LOOPS
474
tcase_add_loop_test(tc_jsnorm_gperf, test_keywords, 0, sizeof(kw_test)/sizeof(kw_test[0]));
475
tcase_add_loop_test(tc_jsnorm_gperf, test_operators, 0, sizeof(op_test)/sizeof(op_test[0]));
477
tc_jsnorm_token = tcase_create("jsnorm token functions");
478
suite_add_tcase (s, tc_jsnorm_token);
479
tcase_add_test(tc_jsnorm_token, test_token_string);
480
tcase_add_test(tc_jsnorm_token, test_token_cstring);
481
tcase_add_test(tc_jsnorm_token, test_token_scope);
482
tcase_add_test(tc_jsnorm_token, test_token_ival);
483
tcase_add_test(tc_jsnorm_token, test_token_dval);
485
tc_jsnorm_api = tcase_create("jsnorm api functions");
486
suite_add_tcase (s, tc_jsnorm_api);
487
tcase_add_test(tc_jsnorm_api, test_init_destroy);
488
tcase_add_test(tc_jsnorm_api, test_init_parse_destroy);
490
tc_jsnorm_tokenizer = tcase_create("jsnorm tokenizer");
491
suite_add_tcase (s, tc_jsnorm_tokenizer);
492
tcase_add_checked_fixture (tc_jsnorm_tokenizer, jstest_setup, jstest_teardown);
493
#ifdef CHECK_HAVE_LOOPS
494
tcase_add_loop_test(tc_jsnorm_tokenizer, tokenizer_basic, 0, sizeof(js_tests)/sizeof(js_tests[0]));
495
tcase_add_loop_test(tc_jsnorm_tokenizer, tokenizer_split, 0, sizeof(js_tests)/sizeof(js_tests[0]));
497
tcase_add_test(tc_jsnorm_tokenizer, js_buffer);
499
tc_jsnorm_bugs = tcase_create("jsnorm bugs");
500
suite_add_tcase (s, tc_jsnorm_bugs);
501
tcase_add_test(tc_jsnorm_bugs, js_begin_end);
502
tcase_add_test(tc_jsnorm_bugs, multiple_scripts);
504
tc_screnc_infloop = tcase_create("screnc infloop bug");
505
suite_add_tcase (s, tc_screnc_infloop);
506
tcase_add_test(tc_screnc_infloop, screnc_infloop);