53
53
{ "CL_TYPE_TEXT_UTF16BE", CL_TYPE_TEXT_UTF16BE },
54
54
{ "CL_TYPE_BINARY_DATA", CL_TYPE_BINARY_DATA },
55
55
{ "CL_TYPE_IGNORED", CL_TYPE_IGNORED },
56
{ "CL_TYPE_ANY", 0 }, /* for ft-sigs */
56
{ "CL_TYPE_ANY", CL_TYPE_ANY },
57
57
{ "CL_TYPE_MSEXE", CL_TYPE_MSEXE },
58
58
{ "CL_TYPE_ELF", CL_TYPE_ELF },
59
{ "CL_TYPE_MACHO", CL_TYPE_MACHO },
60
{ "CL_TYPE_MACHO_UNIBIN", CL_TYPE_MACHO_UNIBIN },
59
61
{ "CL_TYPE_POSIX_TAR", CL_TYPE_POSIX_TAR },
60
62
{ "CL_TYPE_OLD_TAR", CL_TYPE_OLD_TAR },
63
{ "CL_TYPE_CPIO_OLD", CL_TYPE_CPIO_OLD },
64
{ "CL_TYPE_CPIO_ODC", CL_TYPE_CPIO_ODC },
65
{ "CL_TYPE_CPIO_NEWC", CL_TYPE_CPIO_NEWC },
66
{ "CL_TYPE_CPIO_CRC", CL_TYPE_CPIO_CRC },
61
67
{ "CL_TYPE_GZ", CL_TYPE_GZ },
62
68
{ "CL_TYPE_ZIP", CL_TYPE_ZIP },
63
69
{ "CL_TYPE_BZ", CL_TYPE_BZ },
88
94
{ "CL_TYPE_ARJSFX", CL_TYPE_ARJSFX },
89
95
{ "CL_TYPE_NULSFT", CL_TYPE_NULSFT },
90
96
{ "CL_TYPE_AUTOIT", CL_TYPE_AUTOIT },
92
/* for compatibility with daily.cvd < 9954 */
93
{ "CL_TYPE_CPIO_NEWC", CL_TYPE_CPIO_NEWC },
94
{ "CL_TYPE_CPIO_CRC", CL_TYPE_CPIO_CRC },
95
{ "CL_TYPE_CPIO_ODC", CL_TYPE_CPIO_ODC },
96
{ "CL_TYPE_CPIO_OLD", CL_TYPE_CPIO_OLD },
97
{ "CL_TYPE_ISHIELD_MSI", CL_TYPE_ISHIELD_MSI },
98
{ "CL_TYPE_7Z", CL_TYPE_7Z },
98
99
{ NULL, CL_TYPE_IGNORED }
143
144
int is_tar(unsigned char *buf, unsigned int nbytes);
145
cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
146
cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine)
147
unsigned char buff[MAGIC_BUFFER_SIZE + 1], *decoded;
148
unsigned char *buff, *decoded;
149
int bread = MIN(map->len, MAGIC_BUFFER_SIZE), sret;
149
150
cli_file_t ret = CL_TYPE_BINARY_DATA;
150
151
struct cli_matcher *root;
151
152
struct cli_ac_data mdata;
156
157
return CL_TYPE_ERROR;
159
memset(buff, 0, sizeof(buff));
160
bread = cli_readn(desc, buff, MAGIC_BUFFER_SIZE);
160
buff = fmap_need_off_once(map, 0, bread);
162
162
return CL_TYPE_ERROR;
165
164
ret = cli_filetype(buff, bread, engine);
175
if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))
174
if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))
178
sret = cli_ac_scanbuff(buff, bread, NULL, NULL, NULL, engine->root[0], &mdata, 0, ret, desc, NULL, AC_SCAN_FT, NULL);
177
sret = cli_ac_scanbuff(buff, bread, NULL, NULL, NULL, engine->root[0], &mdata, 0, ret, NULL, AC_SCAN_FT, NULL);
180
179
cli_ac_freedata(&mdata);
182
181
if(sret >= CL_TYPENO) {
185
if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))
184
if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))
188
187
decoded = (unsigned char *) cli_utf16toascii((char *) buff, bread);
190
sret = cli_ac_scanbuff(decoded, strlen((char *) decoded), NULL, NULL, NULL, engine->root[0], &mdata, 0, CL_TYPE_TEXT_ASCII, desc, NULL, AC_SCAN_FT, NULL);
189
sret = cli_ac_scanbuff(decoded, bread / 2, NULL, NULL, NULL, engine->root[0], &mdata, 0, CL_TYPE_TEXT_ASCII, NULL, AC_SCAN_FT, NULL);
192
191
if(sret == CL_TYPE_HTML)
193
192
ret = CL_TYPE_HTML_UTF16;
201
200
* If we can't don't try to detect HTML sig, since
202
201
* we just tried that above, and failed */
203
202
if((encoding = encoding_detect_bom(buff, bread))) {
204
unsigned char decodedbuff[sizeof(buff)*2];
203
unsigned char decodedbuff[(MAGIC_BUFFER_SIZE+1)*2];
205
204
m_area_t in_area, out_area;
207
206
in_area.buffer = (unsigned char *) buff;
216
215
* However when detecting whether a file is HTML or not, we need exact conversion.
217
216
* (just eliminating zeros and matching would introduce false positives */
218
217
if(encoding_normalize_toascii(&in_area, encoding, &out_area) >= 0 && out_area.length > 0) {
219
if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))
218
if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))
222
221
if(out_area.length > 0) {
223
sret = cli_ac_scanbuff(decodedbuff, out_area.length, NULL, NULL, NULL, engine->root[0], &mdata, 0, 0, desc, NULL, AC_SCAN_FT, NULL); /* FIXME: can we use CL_TYPE_TEXT_ASCII instead of 0? */
222
sret = cli_ac_scanbuff(decodedbuff, out_area.length, NULL, NULL, NULL, engine->root[0], &mdata, 0, 0, NULL, AC_SCAN_FT, NULL); /* FIXME: can we use CL_TYPE_TEXT_ASCII instead of 0? */
224
223
if(sret == CL_TYPE_HTML) {
225
224
cli_dbgmsg("cli_filetype2: detected HTML signature in Unicode file\n");
226
225
/* htmlnorm is able to handle any unicode now, since it skips null chars */