3
* Decode MS Script Encoder protection.
5
* Copyright (C) 2004 trog@uncon.org
7
* The ScrEnc decoder was initially based upon an analysis by Andreas Marx.
9
* This program is free software; you can redistribute it and/or modify
10
* it under the terms of the GNU General Public License as published by
11
* the Free Software Foundation; either version 2 of the License, or
12
* (at your option) any later version.
14
* This program is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
* GNU General Public License for more details.
19
* You should have received a copy of the GNU General Public License
20
* along with this program; if not, write to the Free Software
21
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26
#include <sys/types.h>
35
#include "clamav-config.h"
41
#else /* HAVE_SYS_MMAN_H */
49
#define HTML_STR_LENGTH 1024
69
HTML_JSDECODE_DECRYPT,
85
typedef struct m_area_tag {
86
unsigned char *buffer;
91
#define HTML_FILE_BUFF_LEN 8192
93
typedef struct file_buff_tag {
95
unsigned char buffer[HTML_FILE_BUFF_LEN];
99
static const int base64_chars[256] = {
100
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
101
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
102
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
103
52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
104
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
105
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
106
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
107
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
108
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
109
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
110
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
111
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
112
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
113
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
114
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
115
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
118
int table_order[] = {
119
00, 02, 01, 00, 02, 01, 02, 01, 01, 02, 01, 02, 00, 01, 02, 01,
120
00, 01, 02, 01, 00, 00, 02, 01, 01, 02, 00, 01, 02, 01, 01, 02,
121
00, 00, 01, 02, 01, 02, 01, 00, 01, 00, 00, 02, 01, 00, 01, 02,
122
00, 01, 02, 01, 00, 00, 02, 01, 01, 00, 00, 02, 01, 00, 01, 02
125
int decrypt_tables[3][128] = {
126
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x57, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
127
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
128
0x2E, 0x47, 0x7A, 0x56, 0x42, 0x6A, 0x2F, 0x26, 0x49, 0x41, 0x34, 0x32, 0x5B, 0x76, 0x72, 0x43,
129
0x38, 0x39, 0x70, 0x45, 0x68, 0x71, 0x4F, 0x09, 0x62, 0x44, 0x23, 0x75, 0x3C, 0x7E, 0x3E, 0x5E,
130
0xFF, 0x77, 0x4A, 0x61, 0x5D, 0x22, 0x4B, 0x6F, 0x4E, 0x3B, 0x4C, 0x50, 0x67, 0x2A, 0x7D, 0x74,
131
0x54, 0x2B, 0x2D, 0x2C, 0x30, 0x6E, 0x6B, 0x66, 0x35, 0x25, 0x21, 0x64, 0x4D, 0x52, 0x63, 0x3F,
132
0x7B, 0x78, 0x29, 0x28, 0x73, 0x59, 0x33, 0x7F, 0x6D, 0x55, 0x53, 0x7C, 0x3A, 0x5F, 0x65, 0x46,
133
0x58, 0x31, 0x69, 0x6C, 0x5A, 0x48, 0x27, 0x5C, 0x3D, 0x24, 0x79, 0x37, 0x60, 0x51, 0x20, 0x36},
135
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x7B, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
136
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
137
0x32, 0x30, 0x21, 0x29, 0x5B, 0x38, 0x33, 0x3D, 0x58, 0x3A, 0x35, 0x65, 0x39, 0x5C, 0x56, 0x73,
138
0x66, 0x4E, 0x45, 0x6B, 0x62, 0x59, 0x78, 0x5E, 0x7D, 0x4A, 0x6D, 0x71, 0x3C, 0x60, 0x3E, 0x53,
139
0xFF, 0x42, 0x27, 0x48, 0x72, 0x75, 0x31, 0x37, 0x4D, 0x52, 0x22, 0x54, 0x6A, 0x47, 0x64, 0x2D,
140
0x20, 0x7F, 0x2E, 0x4C, 0x5D, 0x7E, 0x6C, 0x6F, 0x79, 0x74, 0x43, 0x26, 0x76, 0x25, 0x24, 0x2B,
141
0x28, 0x23, 0x41, 0x34, 0x09, 0x2A, 0x44, 0x3F, 0x77, 0x3B, 0x55, 0x69, 0x61, 0x63, 0x50, 0x67,
142
0x51, 0x49, 0x4F, 0x46, 0x68, 0x7C, 0x36, 0x70, 0x6E, 0x7A, 0x2F, 0x5F, 0x4B, 0x5A, 0x2C, 0x57},
144
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x6E, 0x0A, 0x0B, 0x0C, 0x06, 0x0E, 0x0F,
145
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
146
0x2D, 0x75, 0x52, 0x60, 0x71, 0x5E, 0x49, 0x5C, 0x62, 0x7D, 0x29, 0x36, 0x20, 0x7C, 0x7A, 0x7F,
147
0x6B, 0x63, 0x33, 0x2B, 0x68, 0x51, 0x66, 0x76, 0x31, 0x64, 0x54, 0x43, 0x3C, 0x3A, 0x3E, 0x7E,
148
0xFF, 0x45, 0x2C, 0x2A, 0x74, 0x27, 0x37, 0x44, 0x79, 0x59, 0x2F, 0x6F, 0x26, 0x72, 0x6A, 0x39,
149
0x7B, 0x3F, 0x38, 0x77, 0x67, 0x53, 0x47, 0x34, 0x78, 0x5D, 0x30, 0x23, 0x5A, 0x5B, 0x6C, 0x48,
150
0x55, 0x70, 0x69, 0x2E, 0x4C, 0x21, 0x24, 0x4E, 0x50, 0x09, 0x56, 0x73, 0x35, 0x61, 0x4B, 0x58,
151
0x3B, 0x57, 0x22, 0x6D, 0x4D, 0x25, 0x28, 0x46, 0x4A, 0x32, 0x41, 0x3D, 0x5F, 0x4F, 0x42, 0x65}
154
static unsigned char *cli_readline(FILE *stream, m_area_t *m_area, unsigned int max_len)
156
unsigned char *line, *ptr, *start, *end;
157
unsigned int line_len, count;
159
line = (unsigned char *) cli_malloc(max_len);
164
/* Try and use the memory buffer first */
166
start = ptr = m_area->buffer + m_area->offset;
167
end = m_area->buffer + m_area->length;
173
while ((ptr < end) && (*ptr != '\n') && (line_len < (max_len-1))) {
179
memcpy(line, start, line_len);
180
line[line_len] = '\0';
181
} else if (*ptr == '\n') {
182
memcpy(line, start, line_len);
183
line[line_len] = '\0';
186
/* Store the current line end and length*/
188
while (!isspace(*ptr) && (line_len > 1)) {
195
memcpy(line, start, line_len);
196
line[line_len] = '\0';
198
m_area->offset += line_len;
201
cli_dbgmsg("No HTML stream\n");
205
if (fgets(line, max_len, stream) == NULL) {
210
line_len=strlen(line);
215
if (line_len == max_len-1) {
216
/* didn't find a whole line - rewind to a space*/
218
while (!isspace(line[--line_len])) {
224
fseek(stream, count, SEEK_CUR);
225
line[line_len+1] = '\0';
231
static void html_output_flush(file_buff_t *fbuff)
233
if (fbuff && (fbuff->length > 0)) {
234
cli_writen(fbuff->fd, fbuff->buffer, fbuff->length);
239
static void html_output_c(file_buff_t *fbuff1, file_buff_t *fbuff2, unsigned char c)
242
if (fbuff1->length == HTML_FILE_BUFF_LEN) {
243
html_output_flush(fbuff1);
245
fbuff1->buffer[fbuff1->length++] = c;
248
if (fbuff2->length == HTML_FILE_BUFF_LEN) {
249
html_output_flush(fbuff2);
251
fbuff2->buffer[fbuff2->length++] = c;
255
static void html_output_str(file_buff_t *fbuff, unsigned char *str, int len)
258
if ((fbuff->length + len) >= HTML_FILE_BUFF_LEN) {
259
html_output_flush(fbuff);
261
if (len >= HTML_FILE_BUFF_LEN) {
262
html_output_flush(fbuff);
263
cli_writen(fbuff->fd, str, len);
265
memcpy(fbuff->buffer + fbuff->length, str, len);
266
fbuff->length += len;
271
static char *html_tag_arg_value(tag_arguments_t *tags, char *tag)
275
for (i=0; i < tags->count; i++) {
276
if (strcmp(tags->tag[i], tag) == 0) {
277
return tags->value[i];
283
static void html_tag_arg_set(tag_arguments_t *tags, char *tag, char *value)
287
for (i=0; i < tags->count; i++) {
288
if (strcmp(tags->tag[i], tag) == 0) {
289
free(tags->value[i]);
290
tags->value[i] = strdup(value);
296
static void html_tag_arg_add(tag_arguments_t *tags,
297
unsigned char *tag, unsigned char *value)
301
tags->tag = (unsigned char **) cli_realloc(tags->tag,
302
tags->count * sizeof(char *));
306
tags->value = (unsigned char **) cli_realloc(tags->value,
307
tags->count * sizeof(char *));
311
tags->tag[tags->count-1] = strdup(tag);
314
tags->value[tags->count-1] = strdup(value+1);
315
len = strlen(value+1);
317
tags->value[tags->count-1][len-1] = '\0';
320
tags->value[tags->count-1] = strdup(value);
323
tags->value[tags->count-1] = NULL;
328
/* Bad error - can't do 100% recovery */
330
for (i=0; i < tags->count; i++) {
335
free(tags->value[i]);
344
tags->tag = tags->value = NULL;
349
static void html_output_tag(file_buff_t *fbuff, char *tag, tag_arguments_t *tags)
353
html_output_c(fbuff, NULL, '<');
354
html_output_str(fbuff, tag, strlen(tag));
355
for (i=0; i < tags->count; i++) {
356
html_output_c(fbuff, NULL, ' ');
357
html_output_str(fbuff, tags->tag[i], strlen(tags->tag[i]));
358
if (tags->value[i]) {
359
html_output_str(fbuff, "=\"", 2);
360
len = strlen(tags->value[i]);
361
for (j=0 ; j<len ; j++) {
362
html_output_c(fbuff, NULL, tolower(tags->value[i][j]));
364
html_output_c(fbuff, NULL, '"');
367
html_output_c(fbuff, NULL, '>');
370
void html_tag_arg_free(tag_arguments_t *tags)
374
for (i=0; i < tags->count; i++) {
376
if (tags->value[i]) {
377
free(tags->value[i]);
386
tags->tag = tags->value = NULL;
390
static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag_arguments_t *hrefs)
392
int fd_tmp, tag_length, tag_arg_length, binary;
393
int retval=FALSE, escape, value, hex, tag_val_length, table_pos, in_script=FALSE;
395
html_state state=HTML_NORM, next_state=HTML_BAD_STATE;
396
char filename[1024], tag[HTML_STR_LENGTH+1], tag_arg[HTML_STR_LENGTH+1];
397
char tag_val[HTML_STR_LENGTH+1], *tmp_file;
398
unsigned char *line, *ptr, *arg_value;
399
tag_arguments_t tag_args;
401
unsigned long length;
402
file_buff_t *file_buff_o1, *file_buff_o2, *file_buff_script;
403
file_buff_t *file_tmp_o1;
407
cli_dbgmsg("Invalid HTML fd\n");
410
lseek(fd, 0, SEEK_SET);
415
stream_in = fdopen(fd_tmp, "r");
424
tag_args.value = NULL;
427
snprintf(filename, 1024, "%s/rfc2397", dirname);
428
if (mkdir(filename, 0700)) {
429
file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
432
file_buff_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
434
file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
438
file_buff_o2 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
441
file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
445
file_buff_script = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
446
if (!file_buff_script) {
449
file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
453
snprintf(filename, 1024, "%s/comment.html", dirname);
454
file_buff_o1->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
455
if (!file_buff_o1->fd) {
456
cli_dbgmsg("open failed: %s\n", filename);
459
free(file_buff_script);
460
file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
464
snprintf(filename, 1024, "%s/nocomment.html", dirname);
465
file_buff_o2->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
466
if (!file_buff_o2->fd) {
467
cli_dbgmsg("open failed: %s\n", filename);
468
close(file_buff_o1->fd);
471
free(file_buff_script);
472
file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
476
snprintf(filename, 1024, "%s/script.html", dirname);
477
file_buff_script->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
478
if (!file_buff_script->fd) {
479
cli_dbgmsg("open failed: %s\n", filename);
480
close(file_buff_o1->fd);
481
close(file_buff_o2->fd);
484
free(file_buff_script);
485
file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
489
file_buff_o1->length = 0;
490
file_buff_o2->length = 0;
491
file_buff_script->length = 0;
495
file_buff_script = NULL;
500
ptr = line = cli_readline(stream_in, m_area, 8192);
502
while (*ptr && isspace(*ptr)) {
506
if (!binary && *ptr == '\n') {
507
/* Convert it to a space and re-process */
511
if (!binary && *ptr == '\r') {
517
/* An engine error has occurred */
518
cli_dbgmsg("HTML Engine Error\n");
520
case HTML_SKIP_LENGTH:
532
next_state = HTML_BAD_STATE;
539
html_output_c(file_buff_o1, file_buff_o2, ' ');
541
next_state = HTML_BAD_STATE;
546
html_output_c(file_buff_o1, file_buff_o2, '<');
548
html_output_c(file_buff_script, NULL, '<');
551
state = HTML_SKIP_WS;
553
next_state = HTML_TAG;
554
} else if (isspace(*ptr)) {
555
state = HTML_TRIM_WS;
556
next_state = HTML_NORM;
557
} else if (*ptr == '&') {
558
state = HTML_CHAR_REF;
559
next_state = HTML_NORM;
562
html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
564
html_output_c(file_buff_script, NULL, tolower(*ptr));
570
if ((tag_length == 0) && (*ptr == '!')) {
572
html_output_c(file_buff_o1, NULL, '!');
574
html_output_c(file_buff_script, NULL, '!');
576
/* Need to rewind in the no-comment output stream */
577
if (file_buff_o2 && (file_buff_o2->length > 0)) {
578
file_buff_o2->length--;
580
state = HTML_COMMENT;
581
next_state = HTML_BAD_STATE;
583
} else if (*ptr == '>') {
584
html_output_c(file_buff_o1, file_buff_o2, '>');
586
html_output_c(file_buff_script, NULL, '>');
589
tag[tag_length] = '\0';
590
state = HTML_SKIP_WS;
591
next_state = HTML_PROCESS_TAG;
592
} else if (!isspace(*ptr)) {
593
html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
595
html_output_c(file_buff_script, NULL, tolower(*ptr));
597
if (tag_length < HTML_STR_LENGTH) {
598
tag[tag_length++] = tolower(*ptr);
602
tag[tag_length] = '\0';
603
state = HTML_SKIP_WS;
605
next_state = HTML_TAG_ARG;
610
html_output_c(file_buff_o1, file_buff_o2, '=');
611
tag_arg[tag_arg_length] = '\0';
613
state = HTML_SKIP_WS;
617
next_state = HTML_TAG_ARG_VAL;
618
} else if (isspace(*ptr)) {
620
tag_arg[tag_arg_length] = '\0';
621
state = HTML_SKIP_WS;
622
next_state = HTML_TAG_ARG_EQUAL;
623
} else if (*ptr == '>') {
624
html_output_c(file_buff_o1, file_buff_o2, '>');
625
if (tag_arg_length > 0) {
626
tag_arg[tag_arg_length] = '\0';
627
html_tag_arg_add(&tag_args, tag_arg, NULL);
630
state = HTML_PROCESS_TAG;
631
next_state = HTML_BAD_STATE;
633
if (tag_arg_length == 0) {
634
/* Start of new tag - add space */
635
html_output_c(file_buff_o1, file_buff_o2,' ');
637
html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
638
if (tag_arg_length < HTML_STR_LENGTH) {
639
tag_arg[tag_arg_length++] = tolower(*ptr);
644
case HTML_TAG_ARG_EQUAL:
646
html_output_c(file_buff_o1, file_buff_o2, '=');
648
state = HTML_SKIP_WS;
652
next_state = HTML_TAG_ARG_VAL;
654
if (tag_arg_length > 0) {
655
tag_arg[tag_arg_length] = '\0';
656
html_tag_arg_add(&tag_args, tag_arg, NULL);
659
state = HTML_TAG_ARG;
660
next_state = HTML_BAD_STATE;
663
case HTML_TAG_ARG_VAL:
664
if ((tag_val_length == 5) && (strncmp(tag_val, "data:", 5) == 0)) {
665
/* RFC2397 inline data */
667
/* Rewind one byte so we don't recursuive */
668
if (file_buff_o1 && (file_buff_o1->length > 0)) {
669
file_buff_o1->length--;
671
if (file_buff_o2 && (file_buff_o2->length > 0)) {
672
file_buff_o2->length--;
675
if (quoted != NOT_QUOTED) {
676
html_output_c(file_buff_o1, file_buff_o2, '"');
679
state = HTML_RFC2397_TYPE;
680
next_state = HTML_TAG_ARG;
681
} else if ((tag_val_length == 6) && (strncmp(tag_val, "\"data:", 6) == 0)) {
682
/* RFC2397 inline data */
684
/* Rewind one byte so we don't recursuive */
685
if (file_buff_o1 && (file_buff_o1->length > 0)) {
686
file_buff_o1->length--;
688
if (file_buff_o2 && (file_buff_o2->length > 0)) {
689
file_buff_o2->length--;
692
if (quoted != NOT_QUOTED) {
693
html_output_c(file_buff_o1, file_buff_o2, '"');
697
state = HTML_RFC2397_TYPE;
698
next_state = HTML_TAG_ARG;
699
} else if (*ptr == '&') {
700
state = HTML_CHAR_REF;
701
next_state = HTML_TAG_ARG_VAL;
703
} else if (*ptr == '\'') {
704
if (tag_val_length == 0) {
705
quoted = SINGLE_QUOTED;
706
html_output_c(file_buff_o1, file_buff_o2, '"');
707
if (tag_val_length < HTML_STR_LENGTH) {
708
tag_val[tag_val_length++] = '"';
712
if (!escape && (quoted==SINGLE_QUOTED)) {
713
html_output_c(file_buff_o1, file_buff_o2, '"');
714
if (tag_val_length < HTML_STR_LENGTH) {
715
tag_val[tag_val_length++] = '"';
717
tag_val[tag_val_length] = '\0';
718
html_tag_arg_add(&tag_args, tag_arg, tag_val);
720
state = HTML_SKIP_WS;
722
next_state = HTML_TAG_ARG;
724
html_output_c(file_buff_o1, file_buff_o2, '"');
725
if (tag_val_length < HTML_STR_LENGTH) {
726
tag_val[tag_val_length++] = '"';
731
} else if (*ptr == '"') {
732
if (tag_val_length == 0) {
733
quoted = DOUBLE_QUOTED;
734
html_output_c(file_buff_o1, file_buff_o2, '"');
735
if (tag_val_length < HTML_STR_LENGTH) {
736
tag_val[tag_val_length++] = '"';
740
if (!escape && (quoted==DOUBLE_QUOTED)) {
741
html_output_c(file_buff_o1, file_buff_o2, '"');
742
if (tag_val_length < HTML_STR_LENGTH) {
743
tag_val[tag_val_length++] = '"';
745
tag_val[tag_val_length] = '\0';
746
html_tag_arg_add(&tag_args, tag_arg, tag_val);
748
state = HTML_SKIP_WS;
750
next_state = HTML_TAG_ARG;
752
html_output_c(file_buff_o1, file_buff_o2, '"');
753
if (tag_val_length < HTML_STR_LENGTH) {
754
tag_val[tag_val_length++] = '"';
759
} else if (isspace(*ptr) || (*ptr == '>')) {
760
if (quoted == NOT_QUOTED) {
761
tag_val[tag_val_length] = '\0';
762
html_tag_arg_add(&tag_args, tag_arg, tag_val);
763
state = HTML_SKIP_WS;
765
next_state = HTML_TAG_ARG;
767
html_output_c(file_buff_o1, file_buff_o2, *ptr);
768
if (tag_val_length < HTML_STR_LENGTH) {
770
tag_val[tag_val_length++] = ' ';
772
tag_val[tag_val_length++] = '>';
775
state = HTML_SKIP_WS;
778
next_state = HTML_TAG_ARG_VAL;
782
html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
783
if (tag_val_length < HTML_STR_LENGTH) {
784
tag_val[tag_val_length++] = *ptr;
796
html_output_c(file_buff_o1, NULL, tolower(*ptr));
798
html_output_c(file_buff_script, NULL, tolower(*ptr));
801
state = HTML_SKIP_WS;
802
next_state = HTML_NORM;
806
case HTML_PROCESS_TAG:
808
/* Default to no action for this tag */
809
state = HTML_SKIP_WS;
810
next_state = HTML_NORM;
813
state = HTML_SKIP_WS;
814
next_state = HTML_NORM;
815
if (strcmp(tag, "/script") == 0) {
817
html_output_c(file_buff_script, NULL, '\n');
819
} else if (strcmp(tag, "script") == 0) {
820
arg_value = html_tag_arg_value(&tag_args, "language");
821
if (arg_value && (strcasecmp(arg_value, "jscript.encode") == 0)) {
822
html_tag_arg_set(&tag_args, "language", "javascript");
823
state = HTML_SKIP_WS;
824
next_state = HTML_JSDECODE;
825
} else if (arg_value && (strcasecmp(arg_value, "vbscript.encode") == 0)) {
826
html_tag_arg_set(&tag_args, "language", "vbscript");
827
state = HTML_SKIP_WS;
828
next_state = HTML_JSDECODE;
832
html_output_tag(file_buff_script, tag, &tag_args);
834
if (strcmp(tag, "a") == 0) {
835
arg_value = html_tag_arg_value(&tag_args, "href");
836
if (arg_value && strlen(arg_value) > 0) {
837
html_tag_arg_add(hrefs, "href", arg_value);
839
} else if (strcmp(tag, "img") == 0) {
840
arg_value = html_tag_arg_value(&tag_args, "src");
841
if (arg_value && strlen(arg_value) > 0) {
842
html_tag_arg_add(hrefs, "src", arg_value);
844
arg_value = html_tag_arg_value(&tag_args, "dynsrc");
845
if (arg_value && strlen(arg_value) > 0) {
846
html_tag_arg_add(hrefs, "dynsrc", arg_value);
848
} else if (strcmp(tag, "iframe") == 0) {
849
arg_value = html_tag_arg_value(&tag_args, "src");
850
if (arg_value && strlen(arg_value) > 0) {
851
html_tag_arg_add(hrefs, "iframe", arg_value);
855
html_tag_arg_free(&tag_args);
861
state = HTML_CHAR_REF_DECODE;
864
html_output_c(file_buff_o1, file_buff_o2, '&');
866
next_state = HTML_BAD_STATE;
869
case HTML_CHAR_REF_DECODE:
870
if ((value==0) && ((*ptr == 'x') || (*ptr == 'X'))) {
873
} else if (*ptr == ';') {
874
html_output_c(file_buff_o1, file_buff_o2, value);
876
next_state = HTML_BAD_STATE;
878
} else if (isdigit(*ptr) || (hex && isxdigit(*ptr))) {
885
value += (*ptr - '0');
887
value += (tolower(*ptr) - 'a' + 10);
891
html_output_c(file_buff_o1, file_buff_o2, value);
893
next_state = HTML_BAD_STATE;
897
/* Check for start marker */
898
if (strncmp(ptr, "#@~^", 4) == 0) {
900
state = HTML_JSDECODE_LENGTH;
901
next_state = HTML_BAD_STATE;
903
html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
904
html_output_c(file_buff_script, NULL, tolower(*ptr));
908
case HTML_JSDECODE_LENGTH:
909
if (strlen(ptr) < 8) {
911
next_state = HTML_BAD_STATE;
914
length = base64_chars[ptr[0]] << 2;
915
length += base64_chars[ptr[1]] >> 4;
916
length += (base64_chars[ptr[1]] & 0x0f) << 12;
917
length += (base64_chars[ptr[2]] >> 2) << 8;
918
length += (base64_chars[ptr[2]] & 0x03) << 22;
919
length += base64_chars[ptr[3]] << 16;
920
length += (base64_chars[ptr[4]] << 2) << 24;
921
length += (base64_chars[ptr[5]] >> 4) << 24;
923
state = HTML_JSDECODE_DECRYPT;
924
next_state = HTML_BAD_STATE;
927
case HTML_JSDECODE_DECRYPT:
929
html_output_str(file_buff_script, "</script>\n", 10);
931
state = HTML_SKIP_LENGTH;
932
next_state = HTML_NORM;
936
value = decrypt_tables[table_order[table_pos]][*ptr];
937
if (value == 0xFF) { /* special character */
942
/* Fixup for end of line */
946
html_output_c(file_buff_o1, file_buff_o2, 0x3c);
947
html_output_c(file_buff_script, NULL, 0x3c);
950
html_output_c(file_buff_o1, file_buff_o2, 0x0d);
951
html_output_c(file_buff_script, NULL, 0x0d);
954
html_output_c(file_buff_o1, file_buff_o2, 0x40);
955
html_output_c(file_buff_script, NULL, 0x40);
958
html_output_c(file_buff_o1, file_buff_o2, 0x0a);
959
html_output_c(file_buff_script, NULL, 0x0a);
962
html_output_c(file_buff_o1, file_buff_o2, 0x3e);
963
html_output_c(file_buff_script, NULL, 0x3e);
967
html_output_c(file_buff_o1, file_buff_o2, value);
968
html_output_c(file_buff_script, NULL, tolower(value));
971
table_pos = (table_pos + 1) % 64;
976
case HTML_RFC2397_TYPE:
978
if (!escape && (quoted==SINGLE_QUOTED)) {
979
/* Early end of data detected. Error */
981
state = HTML_SKIP_WS;
983
next_state = HTML_TAG_ARG;
985
if (tag_val_length < HTML_STR_LENGTH) {
986
tag_val[tag_val_length++] = '"';
990
} else if (*ptr == '"') {
991
if (!escape && (quoted==DOUBLE_QUOTED)) {
992
/* Early end of data detected. Error */
994
state = HTML_SKIP_WS;
996
next_state = HTML_TAG_ARG;
998
if (tag_val_length < HTML_STR_LENGTH) {
999
tag_val[tag_val_length++] = '"';
1003
} else if (isspace(*ptr) || (*ptr == '>')) {
1004
if (quoted == NOT_QUOTED) {
1005
/* Early end of data detected. Error */
1006
state = HTML_SKIP_WS;
1008
next_state = HTML_TAG_ARG;
1010
if (tag_val_length < HTML_STR_LENGTH) {
1011
if (isspace(*ptr)) {
1012
tag_val[tag_val_length++] = ' ';
1014
tag_val[tag_val_length++] = '>';
1017
state = HTML_SKIP_WS;
1019
quoted = NOT_QUOTED;
1020
next_state = HTML_RFC2397_TYPE;
1023
} else if (*ptr == ',') {
1024
/* Beginning of data */
1025
tag_val[tag_val_length] = '\0';
1026
state = HTML_RFC2397_INIT;
1028
next_state = HTML_BAD_STATE;
1032
if (tag_val_length < HTML_STR_LENGTH) {
1033
tag_val[tag_val_length++] = tolower(*ptr);
1043
case HTML_RFC2397_INIT:
1044
file_tmp_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
1048
snprintf(filename, 1024, "%s/rfc2397", dirname);
1049
tmp_file = cli_gentemp(filename);
1050
cli_dbgmsg("RFC2397 data file: %s\n", tmp_file);
1051
file_tmp_o1->fd = open(tmp_file, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
1053
if (!file_tmp_o1->fd) {
1054
cli_dbgmsg("open failed: %s\n", filename);
1058
file_tmp_o1->length = 0;
1060
html_output_str(file_tmp_o1, "From html-normalise\n", 20);
1061
html_output_str(file_tmp_o1, "Content-type: ", 14);
1062
if ((tag_val_length == 0) && (*tag_val == ';')) {
1063
html_output_str(file_tmp_o1, "text/plain\n", 11);
1065
html_output_str(file_tmp_o1, tag_val, tag_val_length);
1066
html_output_c(file_tmp_o1, NULL, '\n');
1067
if (strstr(tag_val, ";base64") != NULL) {
1068
html_output_str(file_tmp_o1, "Content-transfer-encoding: base64\n", 34);
1070
html_output_c(file_tmp_o1, NULL, '\n');
1071
state = HTML_RFC2397_DATA;
1074
case HTML_RFC2397_DATA:
1076
state = HTML_CHAR_REF;
1077
next_state = HTML_RFC2397_DATA;
1079
} else if (*ptr == '%') {
1082
state = HTML_ESCAPE_CHAR;
1083
next_state = HTML_RFC2397_ESC;
1085
} else if (*ptr == '\'') {
1086
if (!escape && (quoted==SINGLE_QUOTED)) {
1087
state = HTML_RFC2397_FINISH;
1090
html_output_c(file_tmp_o1, NULL, *ptr);
1093
} else if (*ptr == '\"') {
1094
if (!escape && (quoted=DOUBLE_QUOTED)) {
1095
state = HTML_RFC2397_FINISH;
1098
html_output_c(file_tmp_o1, NULL, *ptr);
1101
} else if (isspace(*ptr) || (*ptr == '>')) {
1102
if (quoted == NOT_QUOTED) {
1103
state = HTML_RFC2397_FINISH;
1106
html_output_c(file_tmp_o1, NULL, *ptr);
1110
html_output_c(file_tmp_o1, NULL, *ptr);
1119
case HTML_RFC2397_FINISH:
1120
html_output_flush(file_tmp_o1);
1121
close(file_tmp_o1->fd);
1123
state = HTML_SKIP_WS;
1125
quoted = NOT_QUOTED;
1126
next_state = HTML_TAG_ARG;
1129
case HTML_RFC2397_ESC:
1131
html_output_c(file_tmp_o1, NULL, value);
1132
} else if (length == 1) {
1133
html_output_c(file_tmp_o1, NULL, '%');
1134
html_output_c(file_tmp_o1, NULL, value+'0');
1136
html_output_c(file_tmp_o1, NULL, '%');
1138
state = HTML_RFC2397_DATA;
1140
case HTML_ESCAPE_CHAR:
1143
if (isxdigit(*ptr)) {
1144
if (isdigit(*ptr)) {
1145
value += (*ptr - '0');
1147
value += (tolower(*ptr) - 'a' + 10);
1160
ptr = line = cli_readline(stream_in, m_area, 8192);
1165
html_tag_arg_free(&tag_args);
1170
html_output_flush(file_buff_o1);
1171
close(file_buff_o1->fd);
1175
html_output_flush(file_buff_o2);
1176
close(file_buff_o2->fd);
1179
if (file_buff_script) {
1180
html_output_flush(file_buff_script);
1181
close(file_buff_script->fd);
1182
free(file_buff_script);
1187
int html_normalise_mem(unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs)
1191
m_area.buffer = in_buff;
1192
m_area.length = in_size;
1195
return cli_html_normalise(-1, &m_area, dirname, hrefs);
1198
int html_normalise_fd(int fd, const char *dirname, tag_arguments_t *hrefs)
1203
struct stat statbuf;
1205
if (fstat(fd, &statbuf) == 0) {
1206
m_area.length = statbuf.st_size;
1207
m_area.buffer = (unsigned char *) mmap(NULL, m_area.length, PROT_READ, MAP_PRIVATE, fd, 0);
1209
if (m_area.buffer == MAP_FAILED) {
1210
cli_dbgmsg("mmap HTML failed\n");
1211
retval = cli_html_normalise(fd, NULL, dirname, hrefs);
1213
cli_dbgmsg("mmap'ed file\n");
1214
retval = cli_html_normalise(-1, &m_area, dirname, hrefs);
1215
munmap(m_area.buffer, m_area.length);
1218
cli_dbgmsg("fstat HTML failed\n");
1219
retval = cli_html_normalise(fd, NULL, dirname, hrefs);
1223
return cli_html_normalise(fd, NULL, dirname, hrefs);
1227
int html_screnc_decode(int fd, const char *dirname)
1229
int fd_tmp, table_pos=0, result, count, state, retval=FALSE;
1230
unsigned char *line, tmpstr[6];
1231
unsigned long length;
1232
unsigned char *ptr, filename[1024];
1234
file_buff_t file_buff;
1236
lseek(fd, 0, SEEK_SET);
1241
stream_in = fdopen(fd_tmp, "r");
1247
snprintf(filename, 1024, "%s/screnc.html", dirname);
1248
file_buff.fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
1249
file_buff.length = 0;
1251
if (!file_buff.fd) {
1252
cli_dbgmsg("open failed: %s\n", filename);
1257
while ((line = cli_readline(stream_in, NULL, 8192)) != NULL) {
1258
ptr = strstr(line, "#@~^");
1268
/* Calculate the length of the encoded string */
1274
ptr = line = cli_readline(stream_in, NULL, 8192);
1279
tmpstr[count++] = *ptr;
1281
} while (count < 6);
1283
length = base64_chars[tmpstr[0]] << 2;
1284
length += base64_chars[tmpstr[1]] >> 4;
1285
length += (base64_chars[tmpstr[1]] & 0x0f) << 12;
1286
length += (base64_chars[tmpstr[2]] >> 2) << 8;
1287
length += (base64_chars[tmpstr[2]] & 0x03) << 22;
1288
length += base64_chars[tmpstr[3]] << 16;
1289
length += (base64_chars[tmpstr[4]] << 2) << 24;
1290
length += (base64_chars[tmpstr[5]] >> 4) << 24;
1292
/* Move forward 2 bytes */
1294
state = HTML_SKIP_LENGTH;
1296
while (length && line) {
1297
while (length && *ptr) {
1298
if ((*ptr == '\n') || (*ptr == '\r')) {
1303
case HTML_SKIP_LENGTH:
1310
case HTML_SPECIAL_CHAR:
1313
html_output_c(&file_buff, NULL, 0x3c);
1316
html_output_c(&file_buff, NULL, 0x0d);
1319
html_output_c(&file_buff, NULL, 0x40);
1322
html_output_c(&file_buff, NULL, 0x0a);
1325
html_output_c(&file_buff, NULL, 0x3e);
1334
result = decrypt_tables[table_order[table_pos]][*ptr];
1335
if (result == 0xFF) { /* special character */
1336
state = HTML_SPECIAL_CHAR;
1338
html_output_c(&file_buff, NULL, (char)result);
1343
table_pos = (table_pos + 1) % 64;
1349
ptr = line = cli_readline(stream_in, NULL, 8192);
1356
html_output_flush(&file_buff);
1357
close(file_buff.fd);