~ubuntu-branches/ubuntu/trusty/clamav/trusty-proposed

« back to all changes in this revision

Viewing changes to libclamav/htmlnorm.c

  • Committer: Bazaar Package Importer
  • Author(s): Stephen Gran
  • Date: 2005-09-19 09:05:59 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20050919090559-hikpqduq8yx5qxo2
Tags: 0.87-1
* New upstream version
  - Fixes CAN-2005-2920 and CAN-2005-2919 (closes: #328660)
* New logcheck line for clamav-daemon (closes: #323132)
* relibtoolize and apply kfreebsd patch (closes: #327707)
* Make sure init.d script starts freshclam up again after upgrade when run
  from if-up.d (closes: #328912)

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 *  Normalise HTML text.
 
3
 *  Decode MS Script Encoder protection. 
 
4
 *
 
5
 *  Copyright (C) 2004 trog@uncon.org
 
6
 *
 
7
 *  The ScrEnc decoder was initially based upon an analysis by Andreas Marx.
 
8
 *
 
9
 *  This program is free software; you can redistribute it and/or modify
 
10
 *  it under the terms of the GNU General Public License as published by
 
11
 *  the Free Software Foundation; either version 2 of the License, or
 
12
 *  (at your option) any later version.
 
13
 *
 
14
 *  This program is distributed in the hope that it will be useful,
 
15
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
16
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
17
 *  GNU General Public License for more details.
 
18
 *
 
19
 *  You should have received a copy of the GNU General Public License
 
20
 *  along with this program; if not, write to the Free Software
 
21
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 
22
 */
 
23
 
 
24
#include <stdio.h>
 
25
#include <unistd.h>
 
26
#include <sys/types.h>
 
27
#include <sys/stat.h>
 
28
#include <fcntl.h>
 
29
#include <string.h>
 
30
#include <errno.h>
 
31
#include <stdio.h>
 
32
#include <ctype.h>
 
33
 
 
34
#if HAVE_CONFIG_H
 
35
#include "clamav-config.h"
 
36
#endif
 
37
 
 
38
#if HAVE_MMAP
 
39
#if HAVE_SYS_MMAN_H
 
40
#include <sys/mman.h>
 
41
#else /* HAVE_SYS_MMAN_H */
 
42
#undef HAVE_MMAP
 
43
#endif
 
44
#endif
 
45
 
 
46
#include "others.h"
 
47
#include "htmlnorm.h"
 
48
 
 
49
#define HTML_STR_LENGTH 1024
 
50
#define FALSE (0)
 
51
#define TRUE (1)
 
52
 
 
53
typedef enum {
 
54
    HTML_BAD_STATE,
 
55
    HTML_NORM,
 
56
    HTML_COMMENT,
 
57
    HTML_CHAR_REF,
 
58
    HTML_SKIP_WS,
 
59
    HTML_TRIM_WS,
 
60
    HTML_TAG,
 
61
    HTML_TAG_ARG,
 
62
    HTML_TAG_ARG_VAL,
 
63
    HTML_TAG_ARG_EQUAL,
 
64
    HTML_PROCESS_TAG,
 
65
    HTML_CHAR_REF_DECODE,
 
66
    HTML_SKIP_LENGTH,
 
67
    HTML_JSDECODE,
 
68
    HTML_JSDECODE_LENGTH,
 
69
    HTML_JSDECODE_DECRYPT,
 
70
    HTML_SPECIAL_CHAR,
 
71
    HTML_RFC2397_TYPE,
 
72
    HTML_RFC2397_INIT,
 
73
    HTML_RFC2397_DATA,
 
74
    HTML_RFC2397_FINISH,
 
75
    HTML_RFC2397_ESC,
 
76
    HTML_ESCAPE_CHAR,
 
77
} html_state;
 
78
 
 
79
typedef enum {
 
80
    SINGLE_QUOTED,
 
81
    DOUBLE_QUOTED,
 
82
    NOT_QUOTED,
 
83
} quoted_state;
 
84
 
 
85
typedef struct m_area_tag {
 
86
        unsigned char *buffer;
 
87
        off_t length;
 
88
        off_t offset;
 
89
} m_area_t;
 
90
 
 
91
#define HTML_FILE_BUFF_LEN 8192
 
92
 
 
93
typedef struct file_buff_tag {
 
94
        int fd;
 
95
        unsigned char buffer[HTML_FILE_BUFF_LEN];
 
96
        int length;
 
97
} file_buff_t;
 
98
 
 
99
static const int base64_chars[256] = {
 
100
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 
101
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 
102
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 
103
    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
 
104
    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 
105
    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 
106
    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 
107
    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
 
108
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 
109
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 
110
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 
111
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 
112
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 
113
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 
114
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 
115
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 
116
};
 
117
 
 
118
int table_order[] = {
 
119
       00, 02, 01, 00, 02, 01, 02, 01, 01, 02, 01, 02, 00, 01, 02, 01,
 
120
       00, 01, 02, 01, 00, 00, 02, 01, 01, 02, 00, 01, 02, 01, 01, 02,
 
121
       00, 00, 01, 02, 01, 02, 01, 00, 01, 00, 00, 02, 01, 00, 01, 02,
 
122
       00, 01, 02, 01, 00, 00, 02, 01, 01, 00, 00, 02, 01, 00, 01, 02
 
123
};
 
124
 
 
125
int decrypt_tables[3][128] = {
 
126
      {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x57, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
 
127
       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
 
128
       0x2E, 0x47, 0x7A, 0x56, 0x42, 0x6A, 0x2F, 0x26, 0x49, 0x41, 0x34, 0x32, 0x5B, 0x76, 0x72, 0x43,
 
129
       0x38, 0x39, 0x70, 0x45, 0x68, 0x71, 0x4F, 0x09, 0x62, 0x44, 0x23, 0x75, 0x3C, 0x7E, 0x3E, 0x5E,
 
130
       0xFF, 0x77, 0x4A, 0x61, 0x5D, 0x22, 0x4B, 0x6F, 0x4E, 0x3B, 0x4C, 0x50, 0x67, 0x2A, 0x7D, 0x74,
 
131
       0x54, 0x2B, 0x2D, 0x2C, 0x30, 0x6E, 0x6B, 0x66, 0x35, 0x25, 0x21, 0x64, 0x4D, 0x52, 0x63, 0x3F,
 
132
       0x7B, 0x78, 0x29, 0x28, 0x73, 0x59, 0x33, 0x7F, 0x6D, 0x55, 0x53, 0x7C, 0x3A, 0x5F, 0x65, 0x46,
 
133
       0x58, 0x31, 0x69, 0x6C, 0x5A, 0x48, 0x27, 0x5C, 0x3D, 0x24, 0x79, 0x37, 0x60, 0x51, 0x20, 0x36},
 
134
 
 
135
      {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x7B, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
 
136
       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
 
137
       0x32, 0x30, 0x21, 0x29, 0x5B, 0x38, 0x33, 0x3D, 0x58, 0x3A, 0x35, 0x65, 0x39, 0x5C, 0x56, 0x73,
 
138
       0x66, 0x4E, 0x45, 0x6B, 0x62, 0x59, 0x78, 0x5E, 0x7D, 0x4A, 0x6D, 0x71, 0x3C, 0x60, 0x3E, 0x53,
 
139
       0xFF, 0x42, 0x27, 0x48, 0x72, 0x75, 0x31, 0x37, 0x4D, 0x52, 0x22, 0x54, 0x6A, 0x47, 0x64, 0x2D,
 
140
       0x20, 0x7F, 0x2E, 0x4C, 0x5D, 0x7E, 0x6C, 0x6F, 0x79, 0x74, 0x43, 0x26, 0x76, 0x25, 0x24, 0x2B,
 
141
       0x28, 0x23, 0x41, 0x34, 0x09, 0x2A, 0x44, 0x3F, 0x77, 0x3B, 0x55, 0x69, 0x61, 0x63, 0x50, 0x67,
 
142
       0x51, 0x49, 0x4F, 0x46, 0x68, 0x7C, 0x36, 0x70, 0x6E, 0x7A, 0x2F, 0x5F, 0x4B, 0x5A, 0x2C, 0x57},
 
143
 
 
144
      {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x6E, 0x0A, 0x0B, 0x0C, 0x06, 0x0E, 0x0F,
 
145
       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
 
146
       0x2D, 0x75, 0x52, 0x60, 0x71, 0x5E, 0x49, 0x5C, 0x62, 0x7D, 0x29, 0x36, 0x20, 0x7C, 0x7A, 0x7F,
 
147
       0x6B, 0x63, 0x33, 0x2B, 0x68, 0x51, 0x66, 0x76, 0x31, 0x64, 0x54, 0x43, 0x3C, 0x3A, 0x3E, 0x7E,
 
148
       0xFF, 0x45, 0x2C, 0x2A, 0x74, 0x27, 0x37, 0x44, 0x79, 0x59, 0x2F, 0x6F, 0x26, 0x72, 0x6A, 0x39,
 
149
       0x7B, 0x3F, 0x38, 0x77, 0x67, 0x53, 0x47, 0x34, 0x78, 0x5D, 0x30, 0x23, 0x5A, 0x5B, 0x6C, 0x48,
 
150
       0x55, 0x70, 0x69, 0x2E, 0x4C, 0x21, 0x24, 0x4E, 0x50, 0x09, 0x56, 0x73, 0x35, 0x61, 0x4B, 0x58,
 
151
       0x3B, 0x57, 0x22, 0x6D, 0x4D, 0x25, 0x28, 0x46, 0x4A, 0x32, 0x41, 0x3D, 0x5F, 0x4F, 0x42, 0x65}
 
152
};
 
153
 
 
154
static unsigned char *cli_readline(FILE *stream, m_area_t *m_area, unsigned int max_len)
 
155
{
 
156
        unsigned char *line, *ptr, *start, *end;
 
157
        unsigned int line_len, count;
 
158
 
 
159
        line = (unsigned char *) cli_malloc(max_len);
 
160
        if (!line) {
 
161
                return NULL;
 
162
        }
 
163
 
 
164
        /* Try and use the memory buffer first */
 
165
        if (m_area) {
 
166
                start = ptr = m_area->buffer + m_area->offset;
 
167
                end = m_area->buffer + m_area->length;
 
168
                if (start >= end) {
 
169
                        free(line);
 
170
                        return NULL;
 
171
                }
 
172
                line_len = 1;
 
173
                while ((ptr < end) && (*ptr != '\n') && (line_len < (max_len-1))) {
 
174
                        ptr++;
 
175
                        line_len++;
 
176
                }
 
177
                if (ptr == end) {
 
178
                        line_len--;
 
179
                        memcpy(line, start, line_len);
 
180
                        line[line_len] = '\0';
 
181
                } else if (*ptr == '\n') {
 
182
                        memcpy(line, start, line_len);
 
183
                        line[line_len] = '\0';
 
184
                } else {
 
185
                        /* Hit max_len */
 
186
                        /* Store the current line end and length*/
 
187
                        count = line_len;
 
188
                        while (!isspace(*ptr) && (line_len > 1)) {
 
189
                                ptr--;
 
190
                                line_len--;
 
191
                        }
 
192
                        if (line_len == 1) {
 
193
                                line_len=count;
 
194
                        }
 
195
                        memcpy(line, start, line_len);
 
196
                        line[line_len] = '\0';
 
197
                }
 
198
                m_area->offset += line_len;
 
199
        } else {
 
200
                if (!stream) {
 
201
                        cli_dbgmsg("No HTML stream\n");
 
202
                        free(line);
 
203
                        return NULL;
 
204
                }
 
205
                if (fgets(line, max_len, stream) == NULL) {
 
206
                        free(line);
 
207
                        return NULL;
 
208
                }
 
209
 
 
210
                line_len=strlen(line);
 
211
                if (line_len == 0) {
 
212
                        free(line);
 
213
                        return NULL;
 
214
                }
 
215
                if (line_len == max_len-1) {
 
216
                        /* didn't find a whole line - rewind to a space*/
 
217
                        count = 0;
 
218
                        while (!isspace(line[--line_len])) {
 
219
                                count--;
 
220
                                if (line_len == 0) {
 
221
                                        return line;
 
222
                                }
 
223
                        }
 
224
                        fseek(stream, count, SEEK_CUR);
 
225
                        line[line_len+1] = '\0';
 
226
                }
 
227
        }
 
228
        return line;
 
229
}
 
230
 
 
231
static void html_output_flush(file_buff_t *fbuff)
 
232
{
 
233
        if (fbuff && (fbuff->length > 0)) {
 
234
                cli_writen(fbuff->fd, fbuff->buffer, fbuff->length);
 
235
                fbuff->length = 0;
 
236
        }
 
237
}
 
238
 
 
239
static void html_output_c(file_buff_t *fbuff1, file_buff_t *fbuff2, unsigned char c)
 
240
{
 
241
        if (fbuff1) {
 
242
                if (fbuff1->length == HTML_FILE_BUFF_LEN) {
 
243
                        html_output_flush(fbuff1);
 
244
                }
 
245
                fbuff1->buffer[fbuff1->length++] = c;
 
246
        }
 
247
        if (fbuff2) {
 
248
                if (fbuff2->length == HTML_FILE_BUFF_LEN) {
 
249
                        html_output_flush(fbuff2);
 
250
                }
 
251
                fbuff2->buffer[fbuff2->length++] = c;
 
252
        }
 
253
}
 
254
 
 
255
static void html_output_str(file_buff_t *fbuff, unsigned char *str, int len)
 
256
{
 
257
        if (fbuff) {
 
258
                if ((fbuff->length + len) >= HTML_FILE_BUFF_LEN) {
 
259
                        html_output_flush(fbuff);
 
260
                }
 
261
                if (len >= HTML_FILE_BUFF_LEN) {
 
262
                        html_output_flush(fbuff);
 
263
                        cli_writen(fbuff->fd, str, len);
 
264
                } else {
 
265
                        memcpy(fbuff->buffer + fbuff->length, str, len);
 
266
                        fbuff->length += len;
 
267
                }
 
268
        }
 
269
}
 
270
 
 
271
static char *html_tag_arg_value(tag_arguments_t *tags, char *tag)
 
272
{
 
273
        int i;
 
274
        
 
275
        for (i=0; i < tags->count; i++) {
 
276
                if (strcmp(tags->tag[i], tag) == 0) {
 
277
                        return tags->value[i];
 
278
                }
 
279
        }
 
280
        return NULL;
 
281
}
 
282
 
 
283
static void html_tag_arg_set(tag_arguments_t *tags, char *tag, char *value)
 
284
{
 
285
        int i;
 
286
        
 
287
        for (i=0; i < tags->count; i++) {
 
288
                if (strcmp(tags->tag[i], tag) == 0) {
 
289
                        free(tags->value[i]);
 
290
                        tags->value[i] = strdup(value);
 
291
                        return;
 
292
                }
 
293
        }
 
294
        return;
 
295
}
 
296
static void html_tag_arg_add(tag_arguments_t *tags,
 
297
                unsigned char *tag, unsigned char *value)
 
298
{
 
299
        int len, i;
 
300
        tags->count++;
 
301
        tags->tag = (unsigned char **) cli_realloc(tags->tag,
 
302
                                tags->count * sizeof(char *));
 
303
        if (!tags->tag) {
 
304
                goto abort;
 
305
        }
 
306
        tags->value = (unsigned char **) cli_realloc(tags->value,
 
307
                                tags->count * sizeof(char *));
 
308
        if (!tags->value) {
 
309
                goto abort;
 
310
        }
 
311
        tags->tag[tags->count-1] = strdup(tag);
 
312
        if (value) {
 
313
                if (*value == '"') {
 
314
                        tags->value[tags->count-1] = strdup(value+1);
 
315
                        len = strlen(value+1);
 
316
                        if (len > 0) {
 
317
                                tags->value[tags->count-1][len-1] = '\0';
 
318
                        }
 
319
                } else {
 
320
                        tags->value[tags->count-1] = strdup(value);
 
321
                }
 
322
        } else {
 
323
                tags->value[tags->count-1] = NULL;
 
324
        }
 
325
        return;
 
326
        
 
327
abort:
 
328
        /* Bad error - can't do 100% recovery */
 
329
        tags->count--;
 
330
        for (i=0; i < tags->count; i++) {
 
331
                if (tags->tag) {
 
332
                        free(tags->tag[i]);
 
333
                }
 
334
                if (tags->value) {
 
335
                        free(tags->value[i]);
 
336
                }
 
337
        }
 
338
        if (tags->tag) {
 
339
                free(tags->tag);
 
340
        }
 
341
        if (tags->value) {
 
342
                free(tags->value);
 
343
        }
 
344
        tags->tag = tags->value = NULL;
 
345
        tags->count = 0;        
 
346
        return;
 
347
}
 
348
 
 
349
static void html_output_tag(file_buff_t *fbuff, char *tag, tag_arguments_t *tags)
 
350
{
 
351
        int i, j, len;
 
352
 
 
353
        html_output_c(fbuff, NULL, '<');
 
354
        html_output_str(fbuff, tag, strlen(tag));
 
355
        for (i=0; i < tags->count; i++) {
 
356
                html_output_c(fbuff, NULL, ' ');
 
357
                html_output_str(fbuff, tags->tag[i], strlen(tags->tag[i]));
 
358
                if (tags->value[i]) {
 
359
                        html_output_str(fbuff, "=\"", 2);
 
360
                        len = strlen(tags->value[i]);
 
361
                        for (j=0 ; j<len ; j++) {
 
362
                                html_output_c(fbuff, NULL, tolower(tags->value[i][j]));
 
363
                        }
 
364
                        html_output_c(fbuff, NULL, '"');
 
365
                }
 
366
        }
 
367
        html_output_c(fbuff, NULL, '>');
 
368
}
 
369
 
 
370
void html_tag_arg_free(tag_arguments_t *tags)
 
371
{
 
372
        int i;
 
373
        
 
374
        for (i=0; i < tags->count; i++) {
 
375
                free(tags->tag[i]);
 
376
                if (tags->value[i]) {
 
377
                        free(tags->value[i]);
 
378
                }
 
379
        }
 
380
        if (tags->tag) {
 
381
                free(tags->tag);
 
382
        }
 
383
        if (tags->value) {
 
384
                free(tags->value);
 
385
        }
 
386
        tags->tag = tags->value = NULL;
 
387
        tags->count = 0;
 
388
}
 
389
 
 
390
static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag_arguments_t *hrefs)
 
391
{
 
392
        int fd_tmp, tag_length, tag_arg_length, binary;
 
393
        int retval=FALSE, escape, value, hex, tag_val_length, table_pos, in_script=FALSE;
 
394
        FILE *stream_in;
 
395
        html_state state=HTML_NORM, next_state=HTML_BAD_STATE;
 
396
        char filename[1024], tag[HTML_STR_LENGTH+1], tag_arg[HTML_STR_LENGTH+1];
 
397
        char tag_val[HTML_STR_LENGTH+1], *tmp_file;
 
398
        unsigned char *line, *ptr, *arg_value;
 
399
        tag_arguments_t tag_args;
 
400
        quoted_state quoted;
 
401
        unsigned long length;
 
402
        file_buff_t *file_buff_o1, *file_buff_o2, *file_buff_script;
 
403
        file_buff_t *file_tmp_o1;
 
404
 
 
405
        if (!m_area) {
 
406
                if (fd < 0) {
 
407
                        cli_dbgmsg("Invalid HTML fd\n");
 
408
                        return FALSE;
 
409
                }
 
410
                lseek(fd, 0, SEEK_SET); 
 
411
                fd_tmp = dup(fd);
 
412
                if (fd_tmp < 0) {
 
413
                        return FALSE;
 
414
                }
 
415
                stream_in = fdopen(fd_tmp, "r");
 
416
                if (!stream_in) {
 
417
                        close(fd_tmp);
 
418
                        return FALSE;
 
419
                }
 
420
        }
 
421
 
 
422
        tag_args.count = 0;
 
423
        tag_args.tag = NULL;
 
424
        tag_args.value = NULL;
 
425
        
 
426
        if (dirname) {
 
427
                snprintf(filename, 1024, "%s/rfc2397", dirname);
 
428
                if (mkdir(filename, 0700)) {
 
429
                        file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
 
430
                        goto abort;
 
431
                }
 
432
                file_buff_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
 
433
                if (!file_buff_o1) {
 
434
                        file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
 
435
                        goto abort;
 
436
                }
 
437
                
 
438
                file_buff_o2 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
 
439
                if (!file_buff_o2) {
 
440
                        free(file_buff_o1);
 
441
                        file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
 
442
                        goto abort;
 
443
                }
 
444
                
 
445
                file_buff_script = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
 
446
                if (!file_buff_script) {
 
447
                        free(file_buff_o1);
 
448
                        free(file_buff_o2);
 
449
                        file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
 
450
                        goto abort;
 
451
                }
 
452
                
 
453
                snprintf(filename, 1024, "%s/comment.html", dirname);
 
454
                file_buff_o1->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
 
455
                if (!file_buff_o1->fd) {
 
456
                        cli_dbgmsg("open failed: %s\n", filename);
 
457
                        free(file_buff_o1);
 
458
                        free(file_buff_o2);
 
459
                        free(file_buff_script);
 
460
                        file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
 
461
                        goto abort;
 
462
                }
 
463
 
 
464
                snprintf(filename, 1024, "%s/nocomment.html", dirname);
 
465
                file_buff_o2->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
 
466
                if (!file_buff_o2->fd) {
 
467
                        cli_dbgmsg("open failed: %s\n", filename);
 
468
                        close(file_buff_o1->fd);
 
469
                        free(file_buff_o1);
 
470
                        free(file_buff_o2);
 
471
                        free(file_buff_script);
 
472
                        file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
 
473
                        goto abort;
 
474
                }
 
475
 
 
476
                snprintf(filename, 1024, "%s/script.html", dirname);
 
477
                file_buff_script->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
 
478
                if (!file_buff_script->fd) {
 
479
                        cli_dbgmsg("open failed: %s\n", filename);
 
480
                        close(file_buff_o1->fd);
 
481
                        close(file_buff_o2->fd);
 
482
                        free(file_buff_o1);
 
483
                        free(file_buff_o2);
 
484
                        free(file_buff_script);
 
485
                        file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
 
486
                        goto abort;
 
487
                }
 
488
 
 
489
                file_buff_o1->length = 0;
 
490
                file_buff_o2->length = 0;
 
491
                file_buff_script->length = 0;
 
492
        } else {
 
493
                file_buff_o1 = NULL;
 
494
                file_buff_o2 = NULL;
 
495
                file_buff_script = NULL;
 
496
        }
 
497
        
 
498
        binary = FALSE;
 
499
                
 
500
        ptr = line = cli_readline(stream_in, m_area, 8192);
 
501
        while (line) {
 
502
                while (*ptr && isspace(*ptr)) {
 
503
                        ptr++;
 
504
                }
 
505
                while (*ptr) {
 
506
                        if (!binary && *ptr == '\n') {
 
507
                                /* Convert it to a space and re-process */
 
508
                                *ptr = ' ';
 
509
                                continue;
 
510
                        }
 
511
                        if (!binary && *ptr == '\r') {
 
512
                                ptr++;
 
513
                                continue;
 
514
                        }
 
515
                        switch (state) {
 
516
                        case HTML_BAD_STATE:
 
517
                                /* An engine error has occurred */
 
518
                                cli_dbgmsg("HTML Engine Error\n");
 
519
                                goto abort;
 
520
                        case HTML_SKIP_LENGTH:
 
521
                                length--;
 
522
                                ptr++;
 
523
                                if (!length) {
 
524
                                        state = next_state;
 
525
                                }
 
526
                                break;
 
527
                        case HTML_SKIP_WS:
 
528
                                if (isspace(*ptr)) {
 
529
                                        ptr++;
 
530
                                } else {
 
531
                                        state = next_state;
 
532
                                        next_state = HTML_BAD_STATE;
 
533
                                }
 
534
                                break;
 
535
                        case HTML_TRIM_WS:
 
536
                                if (isspace(*ptr)) {
 
537
                                        ptr++;
 
538
                                } else {
 
539
                                        html_output_c(file_buff_o1, file_buff_o2, ' ');
 
540
                                        state = next_state;
 
541
                                        next_state = HTML_BAD_STATE;
 
542
                                }
 
543
                                break;
 
544
                        case HTML_NORM:
 
545
                                if (*ptr == '<') {
 
546
                                        html_output_c(file_buff_o1, file_buff_o2, '<');
 
547
                                        if (in_script) {
 
548
                                                html_output_c(file_buff_script, NULL, '<');
 
549
                                        }
 
550
                                        ptr++;
 
551
                                        state = HTML_SKIP_WS;
 
552
                                        tag_length=0;
 
553
                                        next_state = HTML_TAG;
 
554
                                } else if (isspace(*ptr)) {
 
555
                                        state = HTML_TRIM_WS;
 
556
                                        next_state = HTML_NORM;
 
557
                                } else if (*ptr == '&') {
 
558
                                        state = HTML_CHAR_REF;
 
559
                                        next_state = HTML_NORM;
 
560
                                        ptr++;
 
561
                                } else {
 
562
                                        html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
 
563
                                        if (in_script) {
 
564
                                                html_output_c(file_buff_script, NULL, tolower(*ptr));
 
565
                                        }
 
566
                                        ptr++;
 
567
                                }
 
568
                                break;
 
569
                        case HTML_TAG:
 
570
                                if ((tag_length == 0) && (*ptr == '!')) {
 
571
                                        /* Comment */
 
572
                                        html_output_c(file_buff_o1, NULL, '!');
 
573
                                        if (in_script) {
 
574
                                                html_output_c(file_buff_script, NULL, '!');
 
575
                                        }
 
576
                                        /* Need to rewind in the no-comment output stream */
 
577
                                        if (file_buff_o2 && (file_buff_o2->length > 0)) {
 
578
                                                file_buff_o2->length--;
 
579
                                        }
 
580
                                        state = HTML_COMMENT;
 
581
                                        next_state = HTML_BAD_STATE;
 
582
                                        ptr++;
 
583
                                } else if (*ptr == '>') {
 
584
                                        html_output_c(file_buff_o1, file_buff_o2, '>');
 
585
                                        if (in_script) {
 
586
                                                html_output_c(file_buff_script, NULL, '>');
 
587
                                        }
 
588
                                        ptr++;
 
589
                                        tag[tag_length] = '\0';
 
590
                                        state = HTML_SKIP_WS;
 
591
                                        next_state = HTML_PROCESS_TAG;
 
592
                                } else if (!isspace(*ptr)) {
 
593
                                        html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
 
594
                                        if (in_script) {
 
595
                                                html_output_c(file_buff_script, NULL, tolower(*ptr));
 
596
                                        }
 
597
                                        if (tag_length < HTML_STR_LENGTH) {
 
598
                                                tag[tag_length++] = tolower(*ptr);
 
599
                                        }
 
600
                                        ptr++;
 
601
                                }  else {
 
602
                                        tag[tag_length] = '\0';
 
603
                                        state = HTML_SKIP_WS;
 
604
                                        tag_arg_length = 0;
 
605
                                        next_state = HTML_TAG_ARG;
 
606
                                }
 
607
                                break;
 
608
                        case HTML_TAG_ARG:
 
609
                                if (*ptr == '=') {
 
610
                                        html_output_c(file_buff_o1, file_buff_o2, '=');
 
611
                                        tag_arg[tag_arg_length] = '\0';
 
612
                                        ptr++;
 
613
                                        state = HTML_SKIP_WS;
 
614
                                        escape = FALSE;
 
615
                                        quoted = NOT_QUOTED;
 
616
                                        tag_val_length = 0;
 
617
                                        next_state = HTML_TAG_ARG_VAL;
 
618
                                } else if (isspace(*ptr)) {
 
619
                                        ptr++;
 
620
                                        tag_arg[tag_arg_length] = '\0';
 
621
                                        state = HTML_SKIP_WS;
 
622
                                        next_state = HTML_TAG_ARG_EQUAL;
 
623
                                } else if (*ptr == '>') {
 
624
                                        html_output_c(file_buff_o1, file_buff_o2, '>');
 
625
                                        if (tag_arg_length > 0) {
 
626
                                                tag_arg[tag_arg_length] = '\0';
 
627
                                                html_tag_arg_add(&tag_args, tag_arg, NULL);
 
628
                                        }
 
629
                                        ptr++;
 
630
                                        state = HTML_PROCESS_TAG;
 
631
                                        next_state = HTML_BAD_STATE;
 
632
                                } else {
 
633
                                        if (tag_arg_length == 0) {
 
634
                                                /* Start of new tag - add space */
 
635
                                                html_output_c(file_buff_o1, file_buff_o2,' ');
 
636
                                        }
 
637
                                        html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
 
638
                                        if (tag_arg_length < HTML_STR_LENGTH) {
 
639
                                                tag_arg[tag_arg_length++] = tolower(*ptr);
 
640
                                        }
 
641
                                        ptr++;
 
642
                                }
 
643
                                break;
 
644
                        case HTML_TAG_ARG_EQUAL:
 
645
                                if (*ptr == '=') {
 
646
                                        html_output_c(file_buff_o1, file_buff_o2, '=');
 
647
                                        ptr++;
 
648
                                        state = HTML_SKIP_WS;
 
649
                                        escape = FALSE;
 
650
                                        quoted = NOT_QUOTED;
 
651
                                        tag_val_length = 0;
 
652
                                        next_state = HTML_TAG_ARG_VAL;
 
653
                                } else {
 
654
                                        if (tag_arg_length > 0) {
 
655
                                                tag_arg[tag_arg_length] = '\0';
 
656
                                                html_tag_arg_add(&tag_args, tag_arg, NULL);
 
657
                                        }
 
658
                                        tag_arg_length=0;
 
659
                                        state = HTML_TAG_ARG;
 
660
                                        next_state = HTML_BAD_STATE;
 
661
                                }
 
662
                                break;
 
663
                        case HTML_TAG_ARG_VAL:
 
664
                                if ((tag_val_length == 5) && (strncmp(tag_val, "data:", 5) == 0)) {
 
665
                                        /* RFC2397 inline data */
 
666
 
 
667
                                        /* Rewind one byte so we don't recursuive */
 
668
                                        if (file_buff_o1 && (file_buff_o1->length > 0)) {
 
669
                                                file_buff_o1->length--;
 
670
                                        }
 
671
                                        if (file_buff_o2 && (file_buff_o2->length > 0)) {
 
672
                                                file_buff_o2->length--;
 
673
                                        }
 
674
                                        
 
675
                                        if (quoted != NOT_QUOTED) {
 
676
                                                html_output_c(file_buff_o1, file_buff_o2, '"');
 
677
                                        }
 
678
                                        tag_val_length = 0;
 
679
                                        state = HTML_RFC2397_TYPE;
 
680
                                        next_state = HTML_TAG_ARG;
 
681
                                } else if ((tag_val_length == 6) && (strncmp(tag_val, "\"data:", 6) == 0)) {
 
682
                                        /* RFC2397 inline data */
 
683
 
 
684
                                        /* Rewind one byte so we don't recursuive */
 
685
                                        if (file_buff_o1 && (file_buff_o1->length > 0)) {
 
686
                                                file_buff_o1->length--;
 
687
                                        }
 
688
                                        if (file_buff_o2 && (file_buff_o2->length > 0)) {
 
689
                                                file_buff_o2->length--;
 
690
                                        }
 
691
                                        
 
692
                                        if (quoted != NOT_QUOTED) {
 
693
                                                html_output_c(file_buff_o1, file_buff_o2, '"');
 
694
                                        }
 
695
 
 
696
                                        tag_val_length = 0;
 
697
                                        state = HTML_RFC2397_TYPE;
 
698
                                        next_state = HTML_TAG_ARG;
 
699
                                } else if (*ptr == '&') {
 
700
                                        state = HTML_CHAR_REF;
 
701
                                        next_state = HTML_TAG_ARG_VAL;
 
702
                                        ptr++;
 
703
                                } else if (*ptr == '\'') {
 
704
                                        if (tag_val_length == 0) {
 
705
                                                quoted = SINGLE_QUOTED;
 
706
                                                html_output_c(file_buff_o1, file_buff_o2, '"');
 
707
                                                if (tag_val_length < HTML_STR_LENGTH) {
 
708
                                                        tag_val[tag_val_length++] = '"';
 
709
                                                }
 
710
                                                ptr++;
 
711
                                        } else {
 
712
                                                if (!escape && (quoted==SINGLE_QUOTED)) {
 
713
                                                        html_output_c(file_buff_o1, file_buff_o2, '"');
 
714
                                                        if (tag_val_length < HTML_STR_LENGTH) {
 
715
                                                                tag_val[tag_val_length++] = '"';
 
716
                                                        }
 
717
                                                        tag_val[tag_val_length] = '\0';
 
718
                                                        html_tag_arg_add(&tag_args, tag_arg, tag_val);
 
719
                                                        ptr++;
 
720
                                                        state = HTML_SKIP_WS;
 
721
                                                        tag_arg_length=0;
 
722
                                                        next_state = HTML_TAG_ARG;
 
723
                                                } else {
 
724
                                                        html_output_c(file_buff_o1, file_buff_o2, '"');
 
725
                                                        if (tag_val_length < HTML_STR_LENGTH) {
 
726
                                                                tag_val[tag_val_length++] = '"';
 
727
                                                        }
 
728
                                                        ptr++;
 
729
                                                }
 
730
                                        }
 
731
                                } else if (*ptr == '"') {
 
732
                                        if (tag_val_length == 0) {
 
733
                                                quoted = DOUBLE_QUOTED;
 
734
                                                html_output_c(file_buff_o1, file_buff_o2, '"');
 
735
                                                if (tag_val_length < HTML_STR_LENGTH) {
 
736
                                                        tag_val[tag_val_length++] = '"';
 
737
                                                }
 
738
                                                ptr++;
 
739
                                        } else {
 
740
                                                if (!escape && (quoted==DOUBLE_QUOTED)) {                                       
 
741
                                                        html_output_c(file_buff_o1, file_buff_o2, '"');
 
742
                                                        if (tag_val_length < HTML_STR_LENGTH) {
 
743
                                                                tag_val[tag_val_length++] = '"';
 
744
                                                        }
 
745
                                                        tag_val[tag_val_length] = '\0';
 
746
                                                        html_tag_arg_add(&tag_args, tag_arg, tag_val);
 
747
                                                        ptr++;
 
748
                                                        state = HTML_SKIP_WS;
 
749
                                                        tag_arg_length=0;
 
750
                                                        next_state = HTML_TAG_ARG;
 
751
                                                } else {
 
752
                                                        html_output_c(file_buff_o1, file_buff_o2, '"');
 
753
                                                        if (tag_val_length < HTML_STR_LENGTH) {
 
754
                                                                tag_val[tag_val_length++] = '"';
 
755
                                                        }
 
756
                                                        ptr++;
 
757
                                                }
 
758
                                        }
 
759
                                } else if (isspace(*ptr) || (*ptr == '>')) {
 
760
                                        if (quoted == NOT_QUOTED) {
 
761
                                                tag_val[tag_val_length] = '\0';
 
762
                                                html_tag_arg_add(&tag_args, tag_arg, tag_val);
 
763
                                                state = HTML_SKIP_WS;
 
764
                                                tag_arg_length=0;
 
765
                                                next_state = HTML_TAG_ARG;
 
766
                                        } else {
 
767
                                                html_output_c(file_buff_o1, file_buff_o2, *ptr);
 
768
                                                if (tag_val_length < HTML_STR_LENGTH) {
 
769
                                                        if (isspace(*ptr)) {
 
770
                                                                tag_val[tag_val_length++] = ' ';
 
771
                                                        } else {
 
772
                                                                tag_val[tag_val_length++] = '>';
 
773
                                                        }
 
774
                                                }
 
775
                                                state = HTML_SKIP_WS;
 
776
                                                escape = FALSE;
 
777
                                                quoted = NOT_QUOTED;
 
778
                                                next_state = HTML_TAG_ARG_VAL;
 
779
                                                ptr++;
 
780
                                        }
 
781
                                } else {
 
782
                                        html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
 
783
                                        if (tag_val_length < HTML_STR_LENGTH) {
 
784
                                                tag_val[tag_val_length++] = *ptr;
 
785
                                        }
 
786
                                        ptr++;
 
787
                                }
 
788
                                
 
789
                                if (*ptr == '\\') {
 
790
                                        escape = TRUE;
 
791
                                } else {
 
792
                                        escape = FALSE;
 
793
                                }
 
794
                                break;
 
795
                        case HTML_COMMENT:
 
796
                                html_output_c(file_buff_o1, NULL, tolower(*ptr));
 
797
                                if (in_script) {
 
798
                                        html_output_c(file_buff_script, NULL, tolower(*ptr));
 
799
                                }
 
800
                                if (*ptr == '>') {
 
801
                                        state = HTML_SKIP_WS;
 
802
                                        next_state = HTML_NORM; 
 
803
                                }
 
804
                                ptr++;
 
805
                                break;
 
806
                        case HTML_PROCESS_TAG:
 
807
                                
 
808
                                /* Default to no action for this tag */
 
809
                                state = HTML_SKIP_WS;
 
810
                                next_state = HTML_NORM;
 
811
                                if (tag[0] == '/') {
 
812
                                        /* End tag */
 
813
                                        state = HTML_SKIP_WS;
 
814
                                        next_state = HTML_NORM;
 
815
                                        if (strcmp(tag, "/script") == 0) {
 
816
                                                in_script=FALSE;
 
817
                                                html_output_c(file_buff_script, NULL, '\n');
 
818
                                        }
 
819
                                } else if (strcmp(tag, "script") == 0) {
 
820
                                        arg_value = html_tag_arg_value(&tag_args, "language");
 
821
                                        if (arg_value && (strcasecmp(arg_value, "jscript.encode") == 0)) {
 
822
                                                html_tag_arg_set(&tag_args, "language", "javascript");
 
823
                                                state = HTML_SKIP_WS;
 
824
                                                next_state = HTML_JSDECODE;
 
825
                                        } else if (arg_value && (strcasecmp(arg_value, "vbscript.encode") == 0)) {
 
826
                                                html_tag_arg_set(&tag_args, "language", "vbscript");
 
827
                                                state = HTML_SKIP_WS;
 
828
                                                next_state = HTML_JSDECODE;
 
829
                                        } else {
 
830
                                                in_script = TRUE;
 
831
                                        }
 
832
                                        html_output_tag(file_buff_script, tag, &tag_args);
 
833
                                } else if (hrefs) {
 
834
                                        if (strcmp(tag, "a") == 0) {
 
835
                                                arg_value = html_tag_arg_value(&tag_args, "href");
 
836
                                                if (arg_value && strlen(arg_value) > 0) {
 
837
                                                        html_tag_arg_add(hrefs, "href", arg_value);
 
838
                                                }
 
839
                                        } else if (strcmp(tag, "img") == 0) {
 
840
                                                arg_value = html_tag_arg_value(&tag_args, "src");
 
841
                                                if (arg_value && strlen(arg_value) > 0) {
 
842
                                                        html_tag_arg_add(hrefs, "src", arg_value);
 
843
                                                }
 
844
                                                arg_value = html_tag_arg_value(&tag_args, "dynsrc");
 
845
                                                if (arg_value && strlen(arg_value) > 0) {
 
846
                                                        html_tag_arg_add(hrefs, "dynsrc", arg_value);
 
847
                                                }
 
848
                                        } else if (strcmp(tag, "iframe") == 0) {
 
849
                                                arg_value = html_tag_arg_value(&tag_args, "src");
 
850
                                                if (arg_value && strlen(arg_value) > 0) {
 
851
                                                        html_tag_arg_add(hrefs, "iframe", arg_value);
 
852
                                                }
 
853
                                        }                                               
 
854
                                }
 
855
                                html_tag_arg_free(&tag_args);
 
856
                                break;
 
857
                        case HTML_CHAR_REF:
 
858
                                if (*ptr == '#') {
 
859
                                        value = 0;
 
860
                                        hex = FALSE;
 
861
                                        state = HTML_CHAR_REF_DECODE;
 
862
                                        ptr++;
 
863
                                } else {
 
864
                                        html_output_c(file_buff_o1, file_buff_o2, '&');
 
865
                                        state = next_state;
 
866
                                        next_state = HTML_BAD_STATE;
 
867
                                }
 
868
                                break;
 
869
                        case HTML_CHAR_REF_DECODE:
 
870
                                if ((value==0) && ((*ptr == 'x') || (*ptr == 'X'))) {
 
871
                                        hex=TRUE;
 
872
                                        ptr++;
 
873
                                } else if (*ptr == ';') {
 
874
                                        html_output_c(file_buff_o1, file_buff_o2, value);
 
875
                                        state = next_state;
 
876
                                        next_state = HTML_BAD_STATE;
 
877
                                        ptr++;
 
878
                                } else if (isdigit(*ptr) || (hex && isxdigit(*ptr))) {
 
879
                                        if (hex) {
 
880
                                                value *= 16;
 
881
                                        } else {
 
882
                                                value *= 10;
 
883
                                        }
 
884
                                        if (isdigit(*ptr)) {
 
885
                                                value += (*ptr - '0');
 
886
                                        } else {
 
887
                                                value += (tolower(*ptr) - 'a' + 10);
 
888
                                        }
 
889
                                        ptr++;
 
890
                                } else {
 
891
                                        html_output_c(file_buff_o1, file_buff_o2, value);
 
892
                                        state = next_state;
 
893
                                        next_state = HTML_BAD_STATE;
 
894
                                }
 
895
                                break;
 
896
                        case HTML_JSDECODE:
 
897
                                /* Check for start marker */
 
898
                                if (strncmp(ptr, "#@~^", 4) == 0) {
 
899
                                        ptr += 4;
 
900
                                        state = HTML_JSDECODE_LENGTH;
 
901
                                        next_state = HTML_BAD_STATE;
 
902
                                } else {
 
903
                                        html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
 
904
                                        html_output_c(file_buff_script, NULL, tolower(*ptr));
 
905
                                        ptr++;
 
906
                                }
 
907
                                break;
 
908
                        case HTML_JSDECODE_LENGTH:
 
909
                                if (strlen(ptr) < 8) {
 
910
                                        state = HTML_NORM;
 
911
                                        next_state = HTML_BAD_STATE;
 
912
                                        break;
 
913
                                }
 
914
                                length = base64_chars[ptr[0]] << 2;
 
915
                                length += base64_chars[ptr[1]] >> 4;
 
916
                                length += (base64_chars[ptr[1]] & 0x0f) << 12;
 
917
                                length += (base64_chars[ptr[2]] >> 2) << 8;
 
918
                                length += (base64_chars[ptr[2]] & 0x03) << 22;
 
919
                                length += base64_chars[ptr[3]] << 16;
 
920
                                length += (base64_chars[ptr[4]] << 2) << 24;
 
921
                                length += (base64_chars[ptr[5]] >> 4) << 24;
 
922
                                table_pos = 0;
 
923
                                state = HTML_JSDECODE_DECRYPT;
 
924
                                next_state = HTML_BAD_STATE;
 
925
                                ptr += 8;
 
926
                                break;
 
927
                        case HTML_JSDECODE_DECRYPT:
 
928
                                if (length == 0) {
 
929
                                        html_output_str(file_buff_script, "</script>\n", 10);
 
930
                                        length = 12;
 
931
                                        state = HTML_SKIP_LENGTH;
 
932
                                        next_state = HTML_NORM;
 
933
                                        break;
 
934
                                }
 
935
                                if (*ptr < 0x80) {
 
936
                                        value = decrypt_tables[table_order[table_pos]][*ptr];
 
937
                                        if (value == 0xFF) { /* special character */
 
938
                                                ptr++;
 
939
                                                length--;
 
940
                                                switch (*ptr) {
 
941
                                                case '\0':
 
942
                                                        /* Fixup for end of line */
 
943
                                                        ptr--;
 
944
                                                        break;
 
945
                                                case 0x21:
 
946
                                                        html_output_c(file_buff_o1, file_buff_o2, 0x3c);
 
947
                                                        html_output_c(file_buff_script, NULL, 0x3c);
 
948
                                                        break;
 
949
                                                case 0x23:
 
950
                                                        html_output_c(file_buff_o1, file_buff_o2, 0x0d);
 
951
                                                        html_output_c(file_buff_script, NULL, 0x0d);
 
952
                                                        break;
 
953
                                                case 0x24:
 
954
                                                        html_output_c(file_buff_o1, file_buff_o2, 0x40);
 
955
                                                        html_output_c(file_buff_script, NULL, 0x40);
 
956
                                                        break;                          
 
957
                                                case 0x26:
 
958
                                                        html_output_c(file_buff_o1, file_buff_o2, 0x0a);
 
959
                                                        html_output_c(file_buff_script, NULL, 0x0a);
 
960
                                                        break;
 
961
                                                case 0x2a:
 
962
                                                        html_output_c(file_buff_o1, file_buff_o2, 0x3e);
 
963
                                                        html_output_c(file_buff_script, NULL, 0x3e);
 
964
                                                        break;
 
965
                                                }
 
966
                                        } else {
 
967
                                                html_output_c(file_buff_o1, file_buff_o2, value);
 
968
                                                html_output_c(file_buff_script, NULL, tolower(value));
 
969
                                        }
 
970
                                }
 
971
                                table_pos = (table_pos + 1) % 64;
 
972
                                ptr++;
 
973
                                length--;
 
974
                                break;
 
975
                                
 
976
                        case HTML_RFC2397_TYPE:
 
977
                                if (*ptr == '\'') {
 
978
                                        if (!escape && (quoted==SINGLE_QUOTED)) {
 
979
                                                /* Early end of data detected. Error */
 
980
                                                ptr++;
 
981
                                                state = HTML_SKIP_WS;
 
982
                                                tag_arg_length=0;
 
983
                                                next_state = HTML_TAG_ARG;
 
984
                                        } else {
 
985
                                                if (tag_val_length < HTML_STR_LENGTH) {
 
986
                                                        tag_val[tag_val_length++] = '"';
 
987
                                                }
 
988
                                                ptr++;
 
989
                                        }
 
990
                                } else if (*ptr == '"') {
 
991
                                        if (!escape && (quoted==DOUBLE_QUOTED)) {
 
992
                                                /* Early end of data detected. Error */
 
993
                                                ptr++;
 
994
                                                state = HTML_SKIP_WS;
 
995
                                                tag_arg_length=0;
 
996
                                                next_state = HTML_TAG_ARG;
 
997
                                        } else {
 
998
                                                if (tag_val_length < HTML_STR_LENGTH) {
 
999
                                                        tag_val[tag_val_length++] = '"';
 
1000
                                                }
 
1001
                                                ptr++;
 
1002
                                        }
 
1003
                                } else if (isspace(*ptr) || (*ptr == '>')) {
 
1004
                                        if (quoted == NOT_QUOTED) {
 
1005
                                                /* Early end of data detected. Error */
 
1006
                                                state = HTML_SKIP_WS;
 
1007
                                                tag_arg_length=0;
 
1008
                                                next_state = HTML_TAG_ARG;
 
1009
                                        } else {
 
1010
                                                if (tag_val_length < HTML_STR_LENGTH) {
 
1011
                                                        if (isspace(*ptr)) {
 
1012
                                                                tag_val[tag_val_length++] = ' ';
 
1013
                                                        } else {
 
1014
                                                                tag_val[tag_val_length++] = '>';
 
1015
                                                        }
 
1016
                                                }
 
1017
                                                state = HTML_SKIP_WS;
 
1018
                                                escape = FALSE;
 
1019
                                                quoted = NOT_QUOTED;
 
1020
                                                next_state = HTML_RFC2397_TYPE;
 
1021
                                                ptr++;
 
1022
                                        }
 
1023
                                } else if (*ptr == ',') {
 
1024
                                        /* Beginning of data */
 
1025
                                        tag_val[tag_val_length] = '\0';
 
1026
                                        state = HTML_RFC2397_INIT;
 
1027
                                        escape = FALSE;
 
1028
                                        next_state = HTML_BAD_STATE;
 
1029
                                        ptr++;
 
1030
                                
 
1031
                                } else {
 
1032
                                        if (tag_val_length < HTML_STR_LENGTH) {
 
1033
                                                tag_val[tag_val_length++] = tolower(*ptr);
 
1034
                                        }
 
1035
                                        ptr++;
 
1036
                                }
 
1037
                                if (*ptr == '\\') {
 
1038
                                        escape = TRUE;
 
1039
                                } else {
 
1040
                                        escape = FALSE;
 
1041
                                }
 
1042
                                break;
 
1043
                        case HTML_RFC2397_INIT:
 
1044
                                file_tmp_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
 
1045
                                if (!file_tmp_o1) {
 
1046
                                        goto abort;
 
1047
                                }
 
1048
                                snprintf(filename, 1024, "%s/rfc2397", dirname);
 
1049
                                tmp_file = cli_gentemp(filename);
 
1050
                                cli_dbgmsg("RFC2397 data file: %s\n", tmp_file);
 
1051
                                file_tmp_o1->fd = open(tmp_file, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
 
1052
                                free(tmp_file);
 
1053
                                if (!file_tmp_o1->fd) {
 
1054
                                        cli_dbgmsg("open failed: %s\n", filename);
 
1055
                                        free(file_tmp_o1);
 
1056
                                        goto abort;
 
1057
                                }
 
1058
                                file_tmp_o1->length = 0;
 
1059
                                
 
1060
                                html_output_str(file_tmp_o1, "From html-normalise\n", 20);
 
1061
                                html_output_str(file_tmp_o1, "Content-type: ", 14);
 
1062
                                if ((tag_val_length == 0) && (*tag_val == ';')) {
 
1063
                                                html_output_str(file_tmp_o1, "text/plain\n", 11);
 
1064
                                }
 
1065
                                html_output_str(file_tmp_o1, tag_val, tag_val_length);
 
1066
                                html_output_c(file_tmp_o1, NULL, '\n');
 
1067
                                if (strstr(tag_val, ";base64") != NULL) {
 
1068
                                        html_output_str(file_tmp_o1, "Content-transfer-encoding: base64\n", 34);
 
1069
                                }
 
1070
                                html_output_c(file_tmp_o1, NULL, '\n');
 
1071
                                state = HTML_RFC2397_DATA;
 
1072
                                binary = TRUE;
 
1073
                                break;
 
1074
                        case HTML_RFC2397_DATA:
 
1075
                                if (*ptr == '&') {
 
1076
                                        state = HTML_CHAR_REF;
 
1077
                                        next_state = HTML_RFC2397_DATA;
 
1078
                                        ptr++;
 
1079
                                } else if (*ptr == '%') {
 
1080
                                        length = 0;
 
1081
                                        value = 0;
 
1082
                                        state = HTML_ESCAPE_CHAR;
 
1083
                                        next_state = HTML_RFC2397_ESC;
 
1084
                                        ptr++;
 
1085
                                } else if (*ptr == '\'') {
 
1086
                                        if (!escape && (quoted==SINGLE_QUOTED)) {
 
1087
                                                state = HTML_RFC2397_FINISH;
 
1088
                                                ptr++;
 
1089
                                        } else {
 
1090
                                                html_output_c(file_tmp_o1, NULL, *ptr);
 
1091
                                                ptr++;
 
1092
                                        }
 
1093
                                } else if (*ptr == '\"') {
 
1094
                                        if (!escape && (quoted=DOUBLE_QUOTED)) {
 
1095
                                                state = HTML_RFC2397_FINISH;
 
1096
                                                ptr++;
 
1097
                                        } else {
 
1098
                                                html_output_c(file_tmp_o1, NULL, *ptr);
 
1099
                                                ptr++;
 
1100
                                        }
 
1101
                                } else if (isspace(*ptr) || (*ptr == '>')) {
 
1102
                                        if (quoted == NOT_QUOTED) {
 
1103
                                                state = HTML_RFC2397_FINISH;
 
1104
                                                ptr++;
 
1105
                                        } else {
 
1106
                                                html_output_c(file_tmp_o1, NULL, *ptr);
 
1107
                                                ptr++;
 
1108
                                        }
 
1109
                                } else {
 
1110
                                        html_output_c(file_tmp_o1, NULL, *ptr);
 
1111
                                        ptr++;
 
1112
                                }
 
1113
                                if (*ptr == '\\') {
 
1114
                                        escape = TRUE;
 
1115
                                } else {
 
1116
                                        escape = FALSE;
 
1117
                                }
 
1118
                                break;
 
1119
                        case HTML_RFC2397_FINISH:
 
1120
                                html_output_flush(file_tmp_o1);
 
1121
                                close(file_tmp_o1->fd);
 
1122
                                free(file_tmp_o1);
 
1123
                                state = HTML_SKIP_WS;
 
1124
                                escape = FALSE;
 
1125
                                quoted = NOT_QUOTED;
 
1126
                                next_state = HTML_TAG_ARG;
 
1127
                                binary = FALSE;
 
1128
                                break;
 
1129
                        case HTML_RFC2397_ESC:
 
1130
                                if (length == 2) {
 
1131
                                        html_output_c(file_tmp_o1, NULL, value);
 
1132
                                } else if (length == 1) {
 
1133
                                        html_output_c(file_tmp_o1, NULL, '%');
 
1134
                                        html_output_c(file_tmp_o1, NULL, value+'0');
 
1135
                                } else {
 
1136
                                        html_output_c(file_tmp_o1, NULL, '%');
 
1137
                                }
 
1138
                                state = HTML_RFC2397_DATA;
 
1139
                                break;          
 
1140
                        case HTML_ESCAPE_CHAR:
 
1141
                                value *= 16;
 
1142
                                length++;
 
1143
                                if (isxdigit(*ptr)) {
 
1144
                                        if (isdigit(*ptr)) {
 
1145
                                                value += (*ptr - '0');
 
1146
                                        } else {
 
1147
                                                value += (tolower(*ptr) - 'a' + 10);
 
1148
                                        }
 
1149
                                } else {
 
1150
                                        state = next_state;
 
1151
                                }
 
1152
                                if (length == 2) {
 
1153
                                        state = next_state;
 
1154
                                }
 
1155
                                ptr++;
 
1156
                                break;  
 
1157
                        }
 
1158
                }
 
1159
                free(line);
 
1160
                ptr = line = cli_readline(stream_in, m_area, 8192);
 
1161
        }
 
1162
        
 
1163
        retval = TRUE;
 
1164
abort:
 
1165
        html_tag_arg_free(&tag_args);
 
1166
        if (!m_area) {
 
1167
                fclose(stream_in);
 
1168
        }
 
1169
        if (file_buff_o1) {
 
1170
                html_output_flush(file_buff_o1);
 
1171
                close(file_buff_o1->fd);
 
1172
                free(file_buff_o1);
 
1173
        }
 
1174
        if (file_buff_o2) {
 
1175
                html_output_flush(file_buff_o2);
 
1176
                close(file_buff_o2->fd);
 
1177
                free(file_buff_o2);
 
1178
        }
 
1179
        if (file_buff_script) {
 
1180
                html_output_flush(file_buff_script);
 
1181
                close(file_buff_script->fd);
 
1182
                free(file_buff_script);
 
1183
        }
 
1184
        return retval;
 
1185
}
 
1186
 
 
1187
int html_normalise_mem(unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs)
 
1188
{
 
1189
        m_area_t m_area;
 
1190
        
 
1191
        m_area.buffer = in_buff;
 
1192
        m_area.length = in_size;
 
1193
        m_area.offset = 0;
 
1194
        
 
1195
        return cli_html_normalise(-1, &m_area, dirname, hrefs);
 
1196
}
 
1197
 
 
1198
int html_normalise_fd(int fd, const char *dirname, tag_arguments_t *hrefs)
 
1199
{
 
1200
#if HAVE_MMAP
 
1201
        int retval=FALSE;
 
1202
        m_area_t m_area;
 
1203
        struct stat statbuf;
 
1204
        
 
1205
        if (fstat(fd, &statbuf) == 0) {
 
1206
                m_area.length = statbuf.st_size;
 
1207
                m_area.buffer = (unsigned char *) mmap(NULL, m_area.length, PROT_READ, MAP_PRIVATE, fd, 0);
 
1208
                m_area.offset = 0;
 
1209
                if (m_area.buffer == MAP_FAILED) {
 
1210
                        cli_dbgmsg("mmap HTML failed\n");
 
1211
                        retval = cli_html_normalise(fd, NULL, dirname, hrefs);
 
1212
                } else {
 
1213
                        cli_dbgmsg("mmap'ed file\n");
 
1214
                        retval = cli_html_normalise(-1, &m_area, dirname, hrefs);
 
1215
                        munmap(m_area.buffer, m_area.length);
 
1216
                }
 
1217
        } else {
 
1218
                cli_dbgmsg("fstat HTML failed\n");
 
1219
                retval = cli_html_normalise(fd, NULL, dirname, hrefs);
 
1220
        }
 
1221
        return retval;
 
1222
#else
 
1223
        return cli_html_normalise(fd, NULL, dirname, hrefs);
 
1224
#endif
 
1225
}
 
1226
 
 
1227
int html_screnc_decode(int fd, const char *dirname)
 
1228
{
 
1229
        int fd_tmp, table_pos=0, result, count, state, retval=FALSE;
 
1230
        unsigned char *line, tmpstr[6];
 
1231
        unsigned long length;
 
1232
        unsigned char *ptr, filename[1024];
 
1233
        FILE *stream_in;
 
1234
        file_buff_t file_buff;
 
1235
        
 
1236
        lseek(fd, 0, SEEK_SET); 
 
1237
        fd_tmp = dup(fd);
 
1238
        if (fd_tmp < 0) {
 
1239
                return FALSE;
 
1240
        }
 
1241
        stream_in = fdopen(fd_tmp, "r");
 
1242
        if (!stream_in) {
 
1243
                close(fd_tmp);
 
1244
                return FALSE;
 
1245
        }
 
1246
        
 
1247
        snprintf(filename, 1024, "%s/screnc.html", dirname);
 
1248
        file_buff.fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
 
1249
        file_buff.length = 0;
 
1250
        
 
1251
        if (!file_buff.fd) {
 
1252
                cli_dbgmsg("open failed: %s\n", filename);
 
1253
                fclose(stream_in);
 
1254
                return FALSE;
 
1255
        }
 
1256
        
 
1257
        while ((line = cli_readline(stream_in, NULL, 8192)) != NULL) {
 
1258
                ptr = strstr(line, "#@~^");
 
1259
                if (ptr) {
 
1260
                        break;
 
1261
                }
 
1262
                free(line);
 
1263
        }
 
1264
        if (!line) {
 
1265
                goto abort;
 
1266
        }
 
1267
        
 
1268
        /* Calculate the length of the encoded string */
 
1269
        ptr += 4;
 
1270
        count = 0;
 
1271
        do {
 
1272
                if (! *ptr) {
 
1273
                        free(line);
 
1274
                        ptr = line = cli_readline(stream_in, NULL, 8192);
 
1275
                        if (!line) {
 
1276
                                goto abort;
 
1277
                        }
 
1278
                }
 
1279
                tmpstr[count++] = *ptr;
 
1280
                ptr++;
 
1281
        } while (count < 6);
 
1282
        
 
1283
        length = base64_chars[tmpstr[0]] << 2;
 
1284
        length += base64_chars[tmpstr[1]] >> 4;
 
1285
        length += (base64_chars[tmpstr[1]] & 0x0f) << 12;
 
1286
        length += (base64_chars[tmpstr[2]] >> 2) << 8;
 
1287
        length += (base64_chars[tmpstr[2]] & 0x03) << 22;
 
1288
        length += base64_chars[tmpstr[3]] << 16;
 
1289
        length += (base64_chars[tmpstr[4]] << 2) << 24;
 
1290
        length += (base64_chars[tmpstr[5]] >> 4) << 24;
 
1291
 
 
1292
        /* Move forward 2 bytes */
 
1293
        count = 2;
 
1294
        state = HTML_SKIP_LENGTH;
 
1295
 
 
1296
        while (length && line) {
 
1297
                while (length && *ptr) {
 
1298
                        if ((*ptr == '\n') || (*ptr == '\r')) {
 
1299
                                ptr++;
 
1300
                                continue;
 
1301
                        }
 
1302
                        switch (state) {
 
1303
                        case HTML_SKIP_LENGTH:
 
1304
                                ptr++;
 
1305
                                count--;
 
1306
                                if (count == 0) {
 
1307
                                        state = HTML_NORM;
 
1308
                                }
 
1309
                                break;
 
1310
                        case HTML_SPECIAL_CHAR:
 
1311
                                switch (*ptr) {
 
1312
                                case 0x21:
 
1313
                                        html_output_c(&file_buff, NULL, 0x3c);
 
1314
                                        break;
 
1315
                                case 0x23:
 
1316
                                        html_output_c(&file_buff, NULL, 0x0d);
 
1317
                                        break;
 
1318
                                case 0x24:
 
1319
                                        html_output_c(&file_buff, NULL, 0x40);
 
1320
                                        break;                          
 
1321
                                case 0x26:
 
1322
                                        html_output_c(&file_buff, NULL, 0x0a);
 
1323
                                        break;
 
1324
                                case 0x2a:
 
1325
                                        html_output_c(&file_buff, NULL, 0x3e);
 
1326
                                        break;
 
1327
                                }
 
1328
                                ptr++;
 
1329
                                length--;
 
1330
                                state = HTML_NORM;
 
1331
                                break;
 
1332
                        case HTML_NORM: 
 
1333
                                if (*ptr < 0x80) {
 
1334
                                        result = decrypt_tables[table_order[table_pos]][*ptr];
 
1335
                                        if (result == 0xFF) { /* special character */
 
1336
                                                state = HTML_SPECIAL_CHAR;
 
1337
                                        } else {
 
1338
                                                html_output_c(&file_buff, NULL, (char)result);
 
1339
                                        }
 
1340
                                }
 
1341
                                ptr++;
 
1342
                                length--;
 
1343
                                table_pos = (table_pos + 1) % 64;
 
1344
                                break;
 
1345
                        }
 
1346
                }
 
1347
                free(line);
 
1348
                if (length) {
 
1349
                        ptr = line = cli_readline(stream_in, NULL, 8192);
 
1350
                }
 
1351
        }
 
1352
        retval = TRUE;
 
1353
                                                
 
1354
abort:
 
1355
        fclose(stream_in);
 
1356
        html_output_flush(&file_buff);
 
1357
        close(file_buff.fd);
 
1358
        return retval;
 
1359
}