~ubuntu-branches/ubuntu/saucy/clamav/saucy-backports

« back to all changes in this revision

Viewing changes to libclamav/filetypes.c

  • Committer: Package Import Robot
  • Author(s): Scott Kitterman
  • Date: 2014-07-15 01:08:10 UTC
  • mfrom: (0.35.47 sid)
  • Revision ID: package-import@ubuntu.com-20140715010810-ru66ek4fun2iseba
Tags: 0.98.4+dfsg-2~ubuntu13.10.1
No-change backport to saucy (LP: #1341962)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
 
 *  Copyright (C) 2007-2008 Sourcefire, Inc.
 
2
 *  Copyright (C) 2007-2013 Sourcefire, Inc.
3
3
 *
4
4
 *  Authors: Tomasz Kojm
5
5
 *
30
30
#include <unistd.h>
31
31
#endif
32
32
 
 
33
#include <openssl/ssl.h>
 
34
#include <openssl/err.h>
 
35
#include "libclamav/crypto.h"
 
36
 
33
37
#include "clamav.h"
34
38
#include "filetypes.h"
35
39
#include "others.h"
39
43
#include "textdet.h"
40
44
#include "default.h"
41
45
#include "iowrap.h"
 
46
#include "mbr.h"
 
47
#include "gpt.h"
42
48
 
43
49
#include "htmlnorm.h"
44
50
#include "entconv.h"
45
51
#include "mpool.h"
 
52
#define UNZIP_PRIVATE
 
53
#include "unzip.h"
46
54
 
47
55
static const struct ftmap_s {
48
56
    const char *name;
102
110
    { "CL_TYPE_ISO9660",        CL_TYPE_ISO9660         },
103
111
    { "CL_TYPE_JAVA",           CL_TYPE_JAVA            },
104
112
    { "CL_TYPE_DMG",            CL_TYPE_DMG             },
 
113
    { "CL_TYPE_MBR",        CL_TYPE_MBR     },
 
114
    { "CL_TYPE_GPT",        CL_TYPE_GPT     },
 
115
    { "CL_TYPE_APM",        CL_TYPE_APM     },
105
116
    { "CL_TYPE_XAR",            CL_TYPE_XAR             },
106
117
    { "CL_TYPE_PART_ANY",       CL_TYPE_PART_ANY        },
107
118
    { "CL_TYPE_PART_HFSPLUS",   CL_TYPE_PART_HFSPLUS    },
108
119
    { "CL_TYPE_XZ",             CL_TYPE_XZ              },
 
120
    { "CL_TYPE_OOXML_WORD",     CL_TYPE_OOXML_WORD      },
 
121
    { "CL_TYPE_OOXML_PPT",      CL_TYPE_OOXML_PPT       },
 
122
    { "CL_TYPE_OOXML_XL",       CL_TYPE_OOXML_XL        },
109
123
    { NULL,                     CL_TYPE_IGNORED         }
110
124
};
111
125
 
244
258
                    cli_dbgmsg("Recognized POSIX tar file\n");
245
259
                    return CL_TYPE_POSIX_TAR;
246
260
            }
247
 
        }
 
261
        } else if (ret == CL_TYPE_ZIP && bread > 2*(SIZEOF_LH+5)) {
 
262
            const char lhdr_magic[4] = {0x50,0x4b,0x03,0x04};
 
263
            const unsigned char *zbuff = buff;
 
264
            uint32_t zread = bread;
 
265
            uint64_t zoff = bread;
 
266
            const unsigned char * znamep = buff;
 
267
            int32_t zlen = bread;
 
268
            int lhc = 0;
 
269
            int zi;
 
270
            
 
271
            for (zi=0; zi<32; zi++) {
 
272
                znamep = cli_memstr(znamep, zlen, lhdr_magic, 4);
 
273
                if (NULL != znamep) {
 
274
                    znamep += SIZEOF_LH;
 
275
                    zlen = zread - (znamep - zbuff);
 
276
                    if (zlen > 4) { /* Ensure we've mapped for OOXML filename compare */
 
277
                        if (0 == memcmp(znamep, "xl/", 3)) {
 
278
                            cli_dbgmsg("Recognized OOXML XL file\n");
 
279
                            return CL_TYPE_OOXML_XL;
 
280
                        } else if (0 == memcmp(znamep, "ppt/", 4)) {
 
281
                            cli_dbgmsg("Recognized OOXML PPT file\n");
 
282
                            return CL_TYPE_OOXML_PPT;                        
 
283
                        } else if (0 == memcmp(znamep, "word/", 5)) {
 
284
                            cli_dbgmsg("Recognized OOXML Word file\n");
 
285
                            return CL_TYPE_OOXML_WORD;
 
286
                        }
 
287
                        if (++lhc > 2)
 
288
                            break; /* only check first three zip headers */
 
289
                    }
 
290
                    else {
 
291
                        znamep = NULL; /* force to map more */
 
292
                    }
 
293
                }
 
294
 
 
295
                if (znamep == NULL) {
 
296
                    if (map->len-zoff > SIZEOF_LH) {
 
297
                        zoff -= SIZEOF_LH+5; /* remap for SIZEOF_LH+filelen for header overlap map boundary */ 
 
298
                        zread = MIN(MAGIC_BUFFER_SIZE, map->len-zoff);
 
299
                        zbuff = fmap_need_off_once(map, zoff, zread);
 
300
                        if (zbuff == NULL) {
 
301
                            cli_dbgmsg("cli_filetype2: error mapping data for OOXML check\n");
 
302
                            return CL_TYPE_ERROR;
 
303
                        }
 
304
                        zoff += zread;
 
305
                        znamep = zbuff;
 
306
                        zlen = zread;
 
307
                    }
 
308
                    else {
 
309
                        break; /* end of data */
 
310
                    }
 
311
                }
 
312
            }
 
313
        } else if (ret == CL_TYPE_MBR) {
 
314
            /* given filetype sig type 0 */
 
315
            int iret = cli_mbr_check(buff, bread, map->len);
 
316
            if (iret == CL_TYPE_GPT) {
 
317
                cli_dbgmsg("Recognized GUID Partition Table file\n");
 
318
                return CL_TYPE_GPT;
 
319
            }
 
320
            else if (iret == CL_CLEAN) {
 
321
                return CL_TYPE_MBR;
 
322
            }
 
323
 
 
324
            /* re-detect type */
 
325
            cli_dbgmsg("Recognized binary data\n");
 
326
            ret = CL_TYPE_BINARY_DATA;
 
327
        }
248
328
    }
249
329
 
250
330
    if(ret >= CL_TYPE_TEXT_ASCII && ret <= CL_TYPE_BINARY_DATA) {
296
376
                            out_area.length = sizeof(decodedbuff);
297
377
                            out_area.offset = 0;
298
378
 
299
 
                            /* in htmlnorm we simply skip over \0 chars, and that allows to parse HTML in any unicode 
 
379
                            /* in htmlnorm we simply skip over \0 chars, allowing HTML parsing in any unicode 
300
380
                             * (multibyte characters will not be exactly handled, but that is not a problem).
301
381
                             * However when detecting whether a file is HTML or not, we need exact conversion.
302
382
                             * (just eliminating zeros and matching would introduce false positives */