102
110
{ "CL_TYPE_ISO9660", CL_TYPE_ISO9660 },
103
111
{ "CL_TYPE_JAVA", CL_TYPE_JAVA },
104
112
{ "CL_TYPE_DMG", CL_TYPE_DMG },
113
{ "CL_TYPE_MBR", CL_TYPE_MBR },
114
{ "CL_TYPE_GPT", CL_TYPE_GPT },
115
{ "CL_TYPE_APM", CL_TYPE_APM },
105
116
{ "CL_TYPE_XAR", CL_TYPE_XAR },
106
117
{ "CL_TYPE_PART_ANY", CL_TYPE_PART_ANY },
107
118
{ "CL_TYPE_PART_HFSPLUS", CL_TYPE_PART_HFSPLUS },
108
119
{ "CL_TYPE_XZ", CL_TYPE_XZ },
120
{ "CL_TYPE_OOXML_WORD", CL_TYPE_OOXML_WORD },
121
{ "CL_TYPE_OOXML_PPT", CL_TYPE_OOXML_PPT },
122
{ "CL_TYPE_OOXML_XL", CL_TYPE_OOXML_XL },
109
123
{ NULL, CL_TYPE_IGNORED }
244
258
cli_dbgmsg("Recognized POSIX tar file\n");
245
259
return CL_TYPE_POSIX_TAR;
261
} else if (ret == CL_TYPE_ZIP && bread > 2*(SIZEOF_LH+5)) {
262
const char lhdr_magic[4] = {0x50,0x4b,0x03,0x04};
263
const unsigned char *zbuff = buff;
264
uint32_t zread = bread;
265
uint64_t zoff = bread;
266
const unsigned char * znamep = buff;
267
int32_t zlen = bread;
271
for (zi=0; zi<32; zi++) {
272
znamep = cli_memstr(znamep, zlen, lhdr_magic, 4);
273
if (NULL != znamep) {
275
zlen = zread - (znamep - zbuff);
276
if (zlen > 4) { /* Ensure we've mapped for OOXML filename compare */
277
if (0 == memcmp(znamep, "xl/", 3)) {
278
cli_dbgmsg("Recognized OOXML XL file\n");
279
return CL_TYPE_OOXML_XL;
280
} else if (0 == memcmp(znamep, "ppt/", 4)) {
281
cli_dbgmsg("Recognized OOXML PPT file\n");
282
return CL_TYPE_OOXML_PPT;
283
} else if (0 == memcmp(znamep, "word/", 5)) {
284
cli_dbgmsg("Recognized OOXML Word file\n");
285
return CL_TYPE_OOXML_WORD;
288
break; /* only check first three zip headers */
291
znamep = NULL; /* force to map more */
295
if (znamep == NULL) {
296
if (map->len-zoff > SIZEOF_LH) {
297
zoff -= SIZEOF_LH+5; /* remap for SIZEOF_LH+filelen for header overlap map boundary */
298
zread = MIN(MAGIC_BUFFER_SIZE, map->len-zoff);
299
zbuff = fmap_need_off_once(map, zoff, zread);
301
cli_dbgmsg("cli_filetype2: error mapping data for OOXML check\n");
302
return CL_TYPE_ERROR;
309
break; /* end of data */
313
} else if (ret == CL_TYPE_MBR) {
314
/* given filetype sig type 0 */
315
int iret = cli_mbr_check(buff, bread, map->len);
316
if (iret == CL_TYPE_GPT) {
317
cli_dbgmsg("Recognized GUID Partition Table file\n");
320
else if (iret == CL_CLEAN) {
325
cli_dbgmsg("Recognized binary data\n");
326
ret = CL_TYPE_BINARY_DATA;
250
330
if(ret >= CL_TYPE_TEXT_ASCII && ret <= CL_TYPE_BINARY_DATA) {
296
376
out_area.length = sizeof(decodedbuff);
297
377
out_area.offset = 0;
299
/* in htmlnorm we simply skip over \0 chars, and that allows to parse HTML in any unicode
379
/* in htmlnorm we simply skip over \0 chars, allowing HTML parsing in any unicode
300
380
* (multibyte characters will not be exactly handled, but that is not a problem).
301
381
* However when detecting whether a file is HTML or not, we need exact conversion.
302
382
* (just eliminating zeros and matching would introduce false positives */