14
14
* You should have received a copy of the GNU General Public License
15
15
* along with this program; if not, write to the Free Software
16
16
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
* Revision 1.75 2004/06/14 09:07:10 nigelhorne
21
* Handle spam using broken e-mail generators for multipart/alternative
23
* Revision 1.74 2004/06/09 18:18:59 nigelhorne
24
* Find uuencoded viruses in multipart/mixed that have no start of message boundaries
26
* Revision 1.73 2004/05/14 08:15:55 nigelhorne
27
* Use mkstemp on cygwin
29
* Revision 1.72 2004/05/12 11:20:37 nigelhorne
30
* More bounce message false positives handled
32
* Revision 1.71 2004/05/10 11:35:11 nigelhorne
33
* No need to update mbox.c for cli_filetype problem
35
* Revision 1.69 2004/05/06 11:26:49 nigelhorne
36
* Force attachments marked as RFC822 messages to be scanned
38
* Revision 1.68 2004/04/29 08:59:24 nigelhorne
39
* Tidied up SetDispositionType
41
* Revision 1.67 2004/04/23 10:47:41 nigelhorne
42
* If an inline text portion has a filename treat is as an attachment
44
* Revision 1.66 2004/04/14 08:32:21 nigelhorne
45
* When debugging print the email number in mailboxes
47
* Revision 1.65 2004/04/07 18:18:07 nigelhorne
48
* Some occurances of W97M.Lexar were let through
50
* Revision 1.64 2004/04/05 09:32:20 nigelhorne
51
* Added SCAN_TO_DISC define
53
* Revision 1.63 2004/04/01 15:32:34 nigelhorne
54
* Graceful exit if messageAddLine fails in strdup
56
* Revision 1.62 2004/03/31 17:00:20 nigelhorne
57
* Code tidy up free memory earlier
59
* Revision 1.61 2004/03/30 22:45:13 nigelhorne
60
* Better handling of multipart/multipart messages
62
* Revision 1.60 2004/03/29 09:22:03 nigelhorne
63
* Tidy up code and reduce shuffling of data
65
* Revision 1.59 2004/03/26 11:08:36 nigelhorne
68
* Revision 1.58 2004/03/25 22:40:46 nigelhorne
69
* Removed even more calls to realloc and some duplicated code
71
* Revision 1.57 2004/03/21 17:19:49 nigelhorne
72
* Handle bounce messages with no headers
74
* Revision 1.56 2004/03/21 09:41:26 nigelhorne
75
* Faster scanning for non MIME messages
77
* Revision 1.55 2004/03/20 17:39:23 nigelhorne
78
* First attempt to handle all bounces
80
* Revision 1.54 2004/03/19 15:40:45 nigelhorne
81
* Handle empty content-disposition types
83
* Revision 1.53 2004/03/19 08:08:02 nigelhorne
84
* If a message part of a multipart contains an RFC822 message that has no encoding don't scan it
86
* Revision 1.52 2004/03/18 21:51:41 nigelhorne
87
* If a message only contains a single RFC822 message that has no encoding don't save for scanning
89
* Revision 1.51 2004/03/17 19:48:12 nigelhorne
90
* Improved embedded RFC822 message handling
92
* Revision 1.50 2004/03/10 22:05:39 nigelhorne
93
* Fix seg fault when a message in a multimessage mailbox fails to scan
95
* Revision 1.49 2004/03/04 13:01:58 nigelhorne
96
* Ensure all bounces are rescanned by cl_mbox
98
* Revision 1.48 2004/02/27 12:16:26 nigelhorne
99
* Catch lines just containing ':'
101
* Revision 1.47 2004/02/23 10:13:08 nigelhorne
102
* Handle spaces before : in headers
104
* Revision 1.46 2004/02/18 13:29:19 nigelhorne
105
* Stop buffer overflows for files with very long suffixes
107
* Revision 1.45 2004/02/18 10:07:40 nigelhorne
110
* Revision 1.44 2004/02/15 08:45:54 nigelhorne
111
* Avoid scanning the same file twice
113
* Revision 1.43 2004/02/14 19:04:05 nigelhorne
114
* Handle spaces in boundaries
116
* Revision 1.42 2004/02/14 17:23:45 nigelhorne
117
* Had deleted O_BINARY by mistake
119
* Revision 1.41 2004/02/12 18:43:58 nigelhorne
120
* Use mkstemp on Solaris
122
* Revision 1.40 2004/02/11 08:15:59 nigelhorne
123
* Use O_BINARY for cygwin
125
* Revision 1.39 2004/02/06 13:46:08 kojm
126
* Support for clamav-config.h
128
* Revision 1.38 2004/02/04 13:29:48 nigelhorne
129
* Handle partial writes - and print when write fails
131
* Revision 1.37 2004/02/03 22:54:59 nigelhorne
132
* Catch another example of Worm.Dumaru.Y
134
* Revision 1.36 2004/02/02 09:52:57 nigelhorne
135
* Some instances of Worm.Dumaru.Y got through the net
137
* Revision 1.35 2004/01/28 10:15:24 nigelhorne
138
* Added support to scan some bounce messages
140
* Revision 1.34 2004/01/24 17:43:37 nigelhorne
141
* Removed (incorrect) warning about uninitialised variable
143
* Revision 1.33 2004/01/23 10:38:22 nigelhorne
144
* Fixed memory leak in handling some multipart messages
146
* Revision 1.32 2004/01/23 08:51:19 nigelhorne
147
* Add detection of uuencoded viruses in single part multipart/mixed files
149
* Revision 1.31 2004/01/22 22:13:06 nigelhorne
150
* Prevent infinite recursion on broken uuencoded files
152
* Revision 1.30 2004/01/13 10:12:05 nigelhorne
153
* Remove duplicate code when handling multipart messages
155
* Revision 1.29 2004/01/09 18:27:11 nigelhorne
156
* ParseMimeHeader could corrupt arg
158
* Revision 1.28 2004/01/09 15:07:42 nigelhorne
159
* Re-engineered update 1.11 lost in recent changes
161
* Revision 1.27 2004/01/09 14:45:59 nigelhorne
162
* Removed duplicated code in multipart handler
164
* Revision 1.26 2004/01/09 10:20:54 nigelhorne
165
* Locate uuencoded viruses hidden in text poritions of multipart/mixed mime messages
167
* Revision 1.25 2004/01/06 14:41:18 nigelhorne
168
* Handle headers which do not not have a space after the ':'
170
* Revision 1.24 2003/12/20 13:55:36 nigelhorne
171
* Ensure multipart just save the bodies of attachments
173
* Revision 1.23 2003/12/14 18:07:01 nigelhorne
174
* Some viruses in embedded messages were not being found
176
* Revision 1.22 2003/12/13 16:42:23 nigelhorne
179
* Revision 1.21 2003/12/11 14:35:48 nigelhorne
180
* Better handling of encapsulated messages
182
* Revision 1.20 2003/12/06 04:03:26 nigelhorne
183
* Handle hand crafted emails that incorrectly set multipart headers
185
* Revision 1.19 2003/11/21 07:26:31 nigelhorne
186
* Scan multipart alternatives that have no boundaries, finds some uuencoded happy99
188
* Revision 1.18 2003/11/17 08:13:21 nigelhorne
189
* Handle spaces at the end of lines of MIME headers
191
* Revision 1.17 2003/11/06 05:06:42 nigelhorne
192
* Some applications weren't being scanned
194
* Revision 1.16 2003/11/04 08:24:00 nigelhorne
195
* Handle multipart messages that have no text portion
197
* Revision 1.15 2003/10/12 20:13:49 nigelhorne
198
* Use NO_STRTOK_R consistent with message.c
200
* Revision 1.14 2003/10/12 12:37:11 nigelhorne
201
* Appledouble encoded EICAR now found
203
* Revision 1.13 2003/10/01 09:27:42 nigelhorne
204
* Handle content-type header going over to a new line
206
* Revision 1.12 2003/09/29 17:10:19 nigelhorne
207
* Moved stub from heap to stack since its maximum size is known
209
* Revision 1.11 2003/09/29 12:58:32 nigelhorne
210
* Handle Content-Type: /; name="eicar.com"
212
* Revision 1.10 2003/09/28 10:06:34 nigelhorne
213
* Compilable under SCO; removed duplicate code with message.c
216
static char const rcsid[] = "$Id: mbox.c,v 1.75 2004/06/14 09:07:10 nigelhorne Exp $";
18
static char const rcsid[] = "$Id: mbox.c,v 1.238+fixes 2005/04/19 09:20:55 nigelhorne Exp $";
219
21
#include "clamav-config.h"
223
/*#define NDEBUG /* map CLAMAV debug onto standard */
25
#define NDEBUG /* map CLAMAV debug onto standard */
226
28
#ifdef CL_THREAD_SAFE
331
279
{ "related", RELATED },
332
280
{ "report", REPORT },
333
281
{ "appledouble", APPLEDOUBLE },
282
{ "fax-message", FAX },
283
{ "encrypted", ENCRYPTED },
284
{ "x-bfile", X_BFILE }, /* BeOS */
285
{ "knowbot", KNOWBOT }, /* ??? */
286
{ "knowbot-metadata", KNOWBOT }, /* ??? */
287
{ "knowbot-code", KNOWBOT }, /* ??? */
288
{ "knowbot-state", KNOWBOT }, /* ??? */
336
static table_t *rfc821Table, *subtypeTable;
338
/* Maximum filenames under various systems */
339
#ifndef NAME_MAX /* e.g. Linux */
341
#ifdef MAXNAMELEN /* e.g. Solaris */
342
#define NAME_MAX MAXNAMELEN
345
#ifdef FILENAME_MAX /* e.g. SCO */
346
#define NAME_MAX FILENAME_MAX
292
#ifdef CL_THREAD_SAFE
293
static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER;
354
297
#define O_BINARY 0
357
#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
304
#include <sys/mman.h>
305
#else /* HAVE_SYS_MMAN_H */
313
encoding_type decoder; /* only BASE64 and QUOTEDPRINTABLE for now */
314
struct scanlist *next;
318
* This could be the future. Instead of parsing and decoding it just decodes.
320
* USE IT AT YOUR PERIL, a large number of viruses are not detected with this
321
* method, possibly because the decoded files must be exact and not have
322
* extra data at the start or end, which this code will produce.
324
* Currently only supports base64 and quoted-printable
326
* You may also see a lot of warnings. For the moment it falls back to old
327
* world mode if it doesn't know what to do - that'll be removed.
328
* The code is untidy...
330
* FIXME: Some mailbox scans are slower with this method. I suspect that it's
331
* because the scan can proceed to the end of the file rather than the end
332
* of the attachment which can mean than later emails are scanned many times
334
* TODO: Also all those pmemstr()s are slow, so we need to reduce the number
335
* and size of data scanned each time, and we fall through to
336
* cli_parse_mbox() too often
339
cli_mbox(const char *dir, int desc, unsigned int options)
341
char *start, *ptr, *line, *p, *q;
349
struct scanlist *scanlist, *scanelem;
352
cli_warnmsg("cli_mbox called with NULL dir\n");
355
if(fstat(desc, &statb) < 0)
358
size = statb.st_size;
363
if(size > 10*1024*1024)
364
return cli_parse_mbox(dir, desc, options); /* should be StreamMaxLength, I guess */
366
cli_warnmsg("NEW_WORLD is new code - use at your own risk.\n");
368
start = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
369
if(start == MAP_FAILED)
372
cli_dbgmsg("mmap'ed mbox\n");
374
/* last points to the last *valid* address in the array */
375
last = &start[size - 1];
377
ptr = cli_malloc(size);
380
memcpy(ptr, start, size);
383
last = &start[size - 1];
388
* Would be nice to have a case insensitive cli_memstr()
390
scanelem = scanlist = NULL;
393
while((p = (char *)cli_pmemstr(q, s, "base64", 6)) != NULL) {
394
cli_dbgmsg("Found base64\n");
396
scanelem->next = cli_malloc(sizeof(struct scanlist));
397
scanelem = scanelem->next;
399
scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
400
scanelem->next = NULL;
401
scanelem->decoder = BASE64;
403
q = scanelem->start = &p[6];
404
if(((p = (char *)cli_pmemstr(q, s, "\nFrom ", 6)) != NULL) ||
405
((p = (char *)cli_pmemstr(q, s, "base64", 6)) != NULL) ||
406
((p = (char *)cli_pmemstr(q, s, "quoted-printable", 16)) != NULL)) {
407
scanelem->size = (size_t)(p - q);
411
scanelem->size = (size_t)(last - scanelem->start) + 1;
414
cli_dbgmsg("base64: last %u q %u s %u\n", (unsigned int)last, (unsigned int)q, s);
415
assert(scanelem->size <= size);
416
assert(&q[s - 1] <= last);
420
while((p = (char *)cli_pmemstr(q, s, "quoted-printable", 16)) != NULL) {
425
case '=': /* wrong but allow it */
430
cli_dbgmsg("Ignore quoted-printable false positive\n");
431
cli_dbgmsg("s = %u\n", s);
432
continue; /* false positive */
435
cli_dbgmsg("Found quoted-printable\n");
437
scanelem->next = cli_malloc(sizeof(struct scanlist));
438
scanelem = scanelem->next;
440
scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
441
scanelem->next = NULL;
442
scanelem->decoder = QUOTEDPRINTABLE;
444
q = scanelem->start = &p[16];
445
cli_dbgmsg("qp: last %u q %u s %u\n", (unsigned int)last, (unsigned int)q, s);
446
if(((p = (char *)cli_pmemstr(q, s, "\nFrom ", 6)) != NULL) ||
447
((p = (char *)cli_pmemstr(q, s, "quoted-printable", 16)) != NULL) ||
448
((p = (char *)cli_pmemstr(q, s, "base64", 6)) != NULL)) {
449
scanelem->size = (size_t)(p - q);
452
cli_dbgmsg("qp: scanelem->size = %u\n", scanelem->size);
454
scanelem->size = (size_t)(last - scanelem->start) + 1;
457
assert(scanelem->size <= size);
458
assert(&q[s - 1] <= last);
461
if(scanlist == NULL) {
462
const struct tableinit *tableinit;
463
bool anyHeadersFound = FALSE;
464
bool hasuuencode = FALSE;
466
/* FIXME: message: There could of course be no decoder needed... */
467
for(tableinit = rfc821headers; tableinit->key; tableinit++)
468
if(cli_pmemstr(start, size, tableinit->key, strlen(tableinit->key))) {
469
anyHeadersFound = TRUE;
473
if((!anyHeadersFound) && cli_pmemstr(start, size, "\nbegin ", 7))
482
if(anyHeadersFound || hasuuencode) {
483
/* TODO: reduce the number of falls through here */
484
cli_warnmsg("cli_mbox: uuencode or unknown encoder\n");
485
return cli_parse_mbox(dir, desc, options);
488
cli_warnmsg("cli_mbox: I believe it's plain text which must be clean\n");
492
for(scanelem = scanlist; scanelem; scanelem = scanelem->next) {
493
if(scanelem->decoder == BASE64) {
494
char *b64start = scanelem->start;
495
long b64size = scanelem->size;
497
cli_dbgmsg("b64size = %lu\n", b64size);
498
while(*b64start != '\n') {
503
* Look for the end of the headers
505
while(b64start < last) {
506
if(*b64start == ';') {
509
} else if(*b64start == '\n') {
512
if((*b64start == '\n') || (*b64start == '\r')) {
523
while((!isalnum(*b64start)) && (*b64start != '/')) {
530
cli_dbgmsg("cli_mbox: decoding %ld base64 bytes\n", b64size);
537
messageSetEncoding(m, "base64");
542
/*printf("%ld: ", b64size); fflush(stdout);*/
544
for(ptr = b64start; b64size && (*ptr != '\n') && (*ptr != '\r'); ptr++) {
549
/*printf("%d: ", length); fflush(stdout);*/
551
line = cli_realloc(line, length + 1);
553
memcpy(line, b64start, length);
558
if(messageAddStr(m, line) < 0)
561
if((b64size > 0) && (*ptr == '\r')) {
567
if(strchr(line, '='))
569
} while(b64size > 0L);
572
fb = messageToFileblob(m, dir);
580
} else if(scanelem->decoder == QUOTEDPRINTABLE) {
581
char *quotedstart = scanelem->start;
582
long quotedsize = scanelem->size;
584
cli_dbgmsg("quotedsize = %lu\n", quotedsize);
585
while(*quotedstart != '\n') {
590
* Look for the end of the headers
592
while(quotedstart < last) {
593
if(*quotedstart == ';') {
596
} else if(*quotedstart == '\n') {
599
if((*quotedstart == '\n') || (*quotedstart == '\r')) {
609
while(!isalnum(*quotedstart)) {
614
if(quotedsize > 0L) {
615
cli_dbgmsg("cli_mbox: decoding %ld quoted-printable bytes\n", quotedsize);
620
messageSetEncoding(m, "quoted-printable");
627
/*printf("%ld: ", quotedsize); fflush(stdout);*/
629
for(ptr = quotedstart; quotedsize && (*ptr != '\n') && (*ptr != '\r'); ptr++) {
634
/*printf("%d: ", length); fflush(stdout);*/
636
line = cli_realloc(line, length + 1);
638
memcpy(line, quotedstart, length);
643
if(messageAddStr(m, line) < 0)
646
if((quotedsize > 0) && (*ptr == '\r')) {
652
} while(quotedsize > 0L);
655
fb = messageToFileblob(m, dir);
668
struct scanlist *n = scanelem->next;
680
* FIXME: Need to run cl_scandir() here and return that value
683
return CL_CLEAN; /* a lie - but it gets things going */
685
/* Fall back for now */
686
lseek(desc, 0L, SEEK_SET);
687
return cli_parse_mbox(dir, desc, options);
691
cli_mbox(const char *dir, int desc, unsigned int options)
694
cli_warnmsg("cli_mbox called with NULL dir\n");
697
return cli_parse_mbox(dir, desc, options);
360
702
* TODO: when signal handling is added, need to remove temp files when a
362
704
* TODO: add option to scan in memory not via temp files, perhaps with a
363
705
* named pipe or memory mapped file, though this won't work on big e-mails
364
706
* containing many levels of encapsulated messages - it'd just take too much
366
* TODO: if debug is enabled, catch a segfault and dump the current e-mail
367
* in it's entirety, then call abort()
368
708
* TODO: parse .msg format files
369
709
* TODO: fully handle AppleDouble format, see
370
* http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
710
* http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
371
711
* TODO: ensure parseEmailHeaders is always called before parseEmailBody
372
712
* TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
713
* TODO: Look into TNEF. Is there anything that needs to be done here?
375
cl_mbox(const char *dir, int desc)
716
cli_parse_mbox(const char *dir, int desc, unsigned int options)
380
char buffer[LINE_LENGTH];
721
char buffer[LINE_LENGTH + 1];
722
#ifdef HAVE_BACKTRACE
725
static table_t *rfc821, *subtype;
727
char tmpfilename[16];
382
731
cli_dbgmsg("in mbox()\n");
385
734
if((fd = fdopen(i, "rb")) == NULL) {
386
735
cli_errmsg("Can't open descriptor %d\n", desc);
390
if(fgets(buffer, sizeof(buffer), fd) == NULL) {
741
* Copy the incoming mail for debugging, so that if it falls over
742
* we have a copy of the offending email. This is debugging code
743
* that you shouldn't of course install in a live environment. I am
744
* not interested in hearing about security issues with this section
747
strcpy(tmpfilename, "/tmp/mboxXXXXXX");
748
tmpfd = mkstemp(tmpfilename);
751
cli_errmsg("Can't make debugging file\n");
753
FILE *tmpfp = fdopen(tmpfd, "w");
756
while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL)
757
fputs(buffer, tmpfp);
761
cli_errmsg("Can't fdopen debugging file\n");
764
if(fgets(buffer, sizeof(buffer) - 1, fd) == NULL) {
391
765
/* empty message */
401
if(rfc821Table == NULL) {
402
assert(subtypeTable == NULL);
404
if(initialiseTables(&rfc821Table, &subtypeTable) < 0) {
772
#ifdef CL_THREAD_SAFE
773
pthread_mutex_lock(&tables_mutex);
776
assert(subtype == NULL);
778
if(initialiseTables(&rfc821, &subtype) < 0) {
781
#ifdef CL_THREAD_SAFE
782
pthread_mutex_unlock(&tables_mutex);
791
#ifdef CL_THREAD_SAFE
792
pthread_mutex_unlock(&tables_mutex);
795
#ifdef HAVE_BACKTRACE
796
segv = signal(SIGSEGV, sigsegv);
412
* is it a UNIX style mbox with more than one
800
* Is it a UNIX style mbox with more than one
413
801
* mail message, or just a single mail message?
803
* TODO: It would be better if we called cli_scandir here rather than
804
* in cli_scanmail. Then we could improve the way mailboxes with more
805
* than one message is handled, e.g. stopping parsing when an infected
806
* message is stopped, and giving a better indication of which message
807
* within the mailbox is infected
415
809
if(strncmp(buffer, "From ", 5) == 0) {
417
811
* Have been asked to check a UNIX style mbox file, which
418
812
* may contain more than one e-mail message to decode
814
* It would be far better for scanners.c to do this splitting
816
* FOR EACH mail in the mailbox
818
* pass this mail to cli_mbox --
820
* IF this file has a virus quit
825
* This would remove a problem with this code that it can
826
* fill up the tmp directory before it starts scanning
420
bool lastLineWasEmpty = FALSE;
421
int messagenumber = 1;
828
bool lastLineWasEmpty;
830
message *m = messageCreate();
834
#ifdef HAVE_BACKTRACE
835
signal(SIGSEGV, segv);
843
lastLineWasEmpty = FALSE;
424
/*cli_dbgmsg("read: %s", buffer);*/
426
847
cli_chomp(buffer);
427
848
if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
428
849
cli_dbgmsg("Deal with email number %d\n", messagenumber++);
430
851
* End of a message in the mail box
432
body = parseEmailHeaders(m, rfc821Table);
853
body = parseEmailHeaders(m, rfc821);
433
858
messageDestroy(m);
434
859
if(messageGetBody(body))
435
if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable)) {
860
if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options)) {
436
861
messageReset(body);
441
866
* Starting a new message, throw away all the
442
* information about the old one
867
* information about the old one. It would
868
* be best to be able to scan this message
869
* now, but cli_scanfile needs arguments
870
* that haven't been passed here so it can't be
445
874
messageReset(body);
447
876
cli_dbgmsg("Finished processing message\n");
449
878
lastLineWasEmpty = (bool)(buffer[0] == '\0');
450
if(messageAddLine(m, buffer, 1) < 0)
452
} while(fgets(buffer, sizeof(buffer), fd) != NULL);
454
cli_dbgmsg("Deal with email number %d\n", messagenumber);
880
if(isuuencodebegin(buffer)) {
882
* Fast track visa to uudecode.
885
if(uufasttrack(m, buffer, dir, fd) < 0)
886
if(messageAddStr(m, buffer) < 0)
889
if(messageAddStr(m, buffer) < 0)
891
} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
895
cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
896
body = parseEmailHeaders(m, rfc821);
457
900
* It's a single message, parse the headers then the body
902
if(strncmp(buffer, "P I ", 4) == 0)
461
* No need to preprocess such as cli_chomp() since
462
* that'll be done by parseEmailHeaders()
464
* TODO: this needlessly creates a message object,
465
* it'd be better if parseEmailHeaders could also
466
* read in from a file. I do not want to lump the
467
* parseEmailHeaders code here, that'd be a duplication
468
* of code I want to avoid
904
* CommuniGate Pro format: ignore headers until
470
if(messageAddLine(m, buffer, 1) < 0)
472
while(fgets(buffer, sizeof(buffer), fd) != NULL);
478
body = parseEmailHeaders(m, rfc821Table);
481
* Write out the last entry in the mailbox
483
if(messageGetBody(body))
484
if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable))
490
messageDestroy(body);
907
while((fgets(buffer, sizeof(buffer) - 1, fd) != NULL) &&
908
(strchr("\r\n", buffer[0]) == NULL))
911
* Ignore any blank lines at the top of the message
913
while(strchr("\r\n", buffer[0]) &&
914
(getline_from_mbox(buffer, sizeof(buffer) - 1, fd) != NULL))
917
buffer[sizeof(buffer) - 1] = '\0';
919
body = parseEmailFile(fd, rfc821, buffer, dir);
924
* This is not necessarily true, but since the only options are
925
* CL_CLEAN and CL_VIRUS this is the better choice. It would be
926
* nice to have CL_CONTINUESCANNING or something like that
932
* Write out the last entry in the mailbox
934
if(messageGetBody(body))
935
if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options))
936
retcode = CL_EFORMAT;
941
messageDestroy(body);
492
944
cli_dbgmsg("cli_mbox returning %d\n", retcode);
946
#ifdef HAVE_BACKTRACE
947
signal(SIGSEGV, segv);
957
* Read in an email message from fin, parse it, and return the message
959
* FIXME: files full of new lines and nothing else are
960
* handled ungracefully...
963
parseEmailFile(FILE *fin, const table_t *rfc821, const char *firstLine, const char *dir)
965
bool inHeader = TRUE;
966
bool contMarker = FALSE;
967
bool lastWasBlank = FALSE;
969
bool anyHeadersFound = FALSE;
970
int commandNumber = -1;
971
char *fullline = NULL, *boundary = NULL;
972
size_t fulllinelength = 0;
973
char buffer[LINE_LENGTH+1];
975
cli_dbgmsg("parseEmailFile\n");
977
ret = messageCreate();
981
strcpy(buffer, firstLine);
985
(void)cli_chomp(buffer);
993
* Don't blank lines which are only spaces from headers,
994
* otherwise they'll be treated as the end of header marker
997
lastWasBlank = FALSE;
998
if(boundaryStart(buffer, boundary)) {
999
cli_dbgmsg("Found a header line with space that should be blank\n");
1008
cli_dbgmsg("parseEmailFile: check '%s' contMarker %d fullline 0x%p\n",
1009
buffer ? buffer : "", (int)contMarker, fullline);
1010
if(line && isspace(line[0])) {
1011
char copy[sizeof(buffer)];
1013
strcpy(copy, buffer);
1015
if(copy[0] == '\0') {
1017
* The header line contains only white
1018
* space. This is not the end of the
1019
* headers according to RFC2822, but
1020
* some MUAs will handle it as though
1021
* it were, and virus writers exploit
1022
* this bug. We can't just break from
1023
* the loop here since that would allow
1024
* other exploits such as inserting a
1025
* white space line before the
1026
* content-type line. So we just have
1027
* to make a best guess. Sigh.
1030
if(parseEmailHeader(ret, fullline, rfc821) < 0)
1036
if((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL) {
1037
lastWasBlank = TRUE;
1042
lastWasBlank = FALSE;
1043
if((line == NULL) && (fullline == NULL)) { /* empty line */
1046
* A blank line signifies the end of
1047
* the header and the start of the text
1049
if(!anyHeadersFound)
1050
/* Ignore the junk at the top */
1053
cli_dbgmsg("End of header information\n");
1062
if(fullline == NULL) {
1063
char cmd[LINE_LENGTH + 1], out[LINE_LENGTH + 1];
1066
* Continuation of line we're ignoring?
1068
if((line[0] == '\t') || (line[0] == ' ') || contMarker) {
1069
contMarker = continuationMarker(line);
1074
* Is this a header we're interested in?
1076
if((strchr(line, ':') == NULL) ||
1077
(cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
1078
if(strncmp(line, "From ", 5) == 0)
1079
anyHeadersFound = TRUE;
1083
ptr = rfc822comments(cmd, out);
1084
commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
1086
switch(commandNumber) {
1087
case CONTENT_TRANSFER_ENCODING:
1088
case CONTENT_DISPOSITION:
1090
anyHeadersFound = TRUE;
1093
if(!anyHeadersFound)
1094
anyHeadersFound = usefulHeader(commandNumber, cmd);
1097
fullline = strdup(line);
1098
fulllinelength = strlen(line) + 1;
1099
} else if(line != NULL) {
1100
fulllinelength += strlen(line);
1101
fullline = cli_realloc(fullline, fulllinelength);
1102
strcat(fullline, line);
1106
contMarker = continuationMarker(line);
1113
assert(fullline != NULL);
1115
lookahead = getc(fin);
1116
if(lookahead != EOF) {
1117
ungetc(lookahead, fin);
1120
* Section B.2 of RFC822 says TAB or
1121
* SPACE means a continuation of the
1124
* Add all the arguments on the line
1126
if((lookahead == '\t') || (lookahead == ' '))
1132
for(qptr = fullline; *qptr; qptr++)
1140
ptr = rfc822comments(fullline, NULL);
1146
if(parseEmailHeader(ret, fullline, rfc821) < 0)
1152
} else if(line && isuuencodebegin(line)) {
1154
* Fast track visa to uudecode.
1155
* TODO: binhex, yenc
1157
if(uufasttrack(ret, line, dir, fin) < 0)
1158
if(messageAddStr(ret, line) < 0)
1161
if(messageAddStr(ret, line) < 0)
1163
} while(getline_from_mbox(buffer, sizeof(buffer) - 1, fin) != NULL);
1166
if(*fullline) switch(commandNumber) {
1167
case CONTENT_TRANSFER_ENCODING:
1168
case CONTENT_DISPOSITION:
1170
cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline);
1175
if(!anyHeadersFound) {
1177
* False positive in believing we have an e-mail when we don't
1179
messageDestroy(ret);
1180
cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n");
1186
cli_dbgmsg("parseEmailFile: return\n");
498
1192
* The given message contains a raw e-mail.
500
* This function parses the headers of m and sets the message's arguments
502
1194
* Returns the message's body with the correct arguments set
1196
* The downside of this approach is that for a short time we have two copies
1197
* of the message in memory, the upside is that it makes for easier parsing
1198
* of encapsulated messages, and in the long run uses less memory in those
1201
* TODO: remove the duplication with parseEmailFile
504
1203
static message *
505
parseEmailHeaders(const message *m, const table_t *rfc821Table)
1204
parseEmailHeaders(const message *m, const table_t *rfc821)
507
bool inContinuationHeader = FALSE; /* state machine: ugh */
508
1206
bool inHeader = TRUE;
1209
bool anyHeadersFound = FALSE;
1210
int commandNumber = -1;
1211
char *fullline = NULL;
1212
size_t fulllinelength = 0;
1214
cli_dbgmsg("parseEmailHeaders\n");
1163
2190
cli_dbgmsg("Found multipart inside multipart\n");
1165
body = parseEmailHeaders(aMessage, rfc821Table);
1167
assert(aMessage == messages[i]);
1168
messageDestroy(messages[i]);
1171
if(mainMessage && (mainMessage != messageIn))
1172
messageDestroy(mainMessage);
1174
/*t = messageToText(body);
1175
rc = parseEmailBody(body, blobs, nBlobs, t, dir, rfc821Table, subtypeTable);*/
1176
rc = parseEmailBody(body, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
1179
cli_dbgmsg("Finished recursion\n");
2193
* The headers were parsed when reading in the
2194
* whole multipart section
2196
rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options);
2197
cli_dbgmsg("Finished recursion\n");
2198
assert(aMessage == messages[i]);
2199
messageDestroy(messages[i]);
1184
rc = parseEmailBody(NULL, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable);
2202
rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options);
1185
2203
if(mainMessage && (mainMessage != messageIn))
1186
2204
messageDestroy(mainMessage);
1187
2205
mainMessage = NULL;
1194
* TODO: it may be nice to
1195
* have an option to throw
1196
* away all images and sound
1197
* files for ultra-secure sites
1199
addAttachment = TRUE;
1202
2209
cli_warnmsg("Only text and application attachments are supported, type = %d\n",
1203
2210
messageGetMimeType(aMessage));
1208
* It must be either text or
1209
* an attachment. It can't be both
1211
assert(addToText || addAttachment);
1212
assert(!(addToText && addAttachment));
1214
2214
if(addToText) {
2215
cli_dbgmsg("Adding to non mime-part\n");
1215
2216
aText = textAdd(aText, messageGetBody(aMessage));
1216
} else if(addAttachment) {
1217
blob *aBlob = messageToBlob(aMessage);
2218
fb = messageToFileblob(aMessage, dir);
1220
assert(blobGetFilename(aBlob) != NULL);
1222
blobList[numberOfAttachments++] = aBlob;
2221
fileblobDestroy(fb);
1225
2223
assert(aMessage == messages[i]);
1226
2224
messageDestroy(messages[i]);
1227
2225
messages[i] = NULL;
1230
if(numberOfAttachments == 0) {
1231
/* No usable attachment was found */
1232
rc = parseEmailBody(NULL, NULL, 0, aText, dir, rfc821Table, subtypeTable);
1237
* Store any existing attachments at the end of
1238
* the list we've just built up
1240
numberOfNewAttachments = 0;
1241
for(i = 0; i < nBlobs; i++) {
1244
assert(blobs[i]->magic == BLOB);
1246
for(j = 0; j < numberOfAttachments; j++)
1247
if(blobcmp(blobs[i], blobList[j]) == 0)
1249
if(j >= numberOfAttachments) {
1250
assert(numberOfAttachments < MAX_ATTACHMENTS);
1251
cli_dbgmsg("Attaching %s to list of blobs\n",
1252
blobGetFilename(blobs[i]));
1253
blobClose(blobs[i]);
1254
blobList[numberOfAttachments++] = blobs[i];
1255
numberOfNewAttachments++;
1257
cli_warnmsg("Don't scan the same file twice as '%s' and '%s'\n",
1258
blobGetFilename(blobs[i]),
1259
blobGetFilename(blobList[j]));
1260
blobDestroy(blobs[i]);
1265
* If we've found nothing new save what we have
1266
* and quit - that's this part all done.
1268
if(numberOfNewAttachments == 0) {
1269
rc = parseEmailBody(NULL, blobList, numberOfAttachments, NULL, dir, rfc821Table, subtypeTable);
1273
* If there's only one part of the MULTIPART
1274
* we already have the body to decode so
1275
* there's no more work to do.
1277
* This is mostly for the situation where
1278
* broken messages claim to be multipart
1279
* but aren't was causing us to go into
1280
* infinite recursion
1283
rc = parseEmailBody(mainMessage, blobList, numberOfAttachments, aText, dir, rfc821Table, subtypeTable);
1284
else if(numberOfAttachments == 1) {
1285
(void)saveFile(blobList[0], dir);
1286
blobDestroy(blobList[0]);
2228
/* rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options); */
1292
* According to section 5.1.5 RFC2046, the
1293
* default mime type of multipart/digest parts
1429
cli_dbgmsg("%d attachments found\n", nBlobs);
1435
* No attachments - scan the text portions, often files
1436
* are hidden in HTML code
1438
cli_dbgmsg("%d multiparts found\n", multiparts);
1439
for(i = 0; i < multiparts; i++) {
1440
b = messageToBlob(messages[i]);
1444
cli_dbgmsg("Saving multipart %d, encoded with scheme %d\n",
1445
i, messageGetEncoding(messages[i]));
1447
(void)saveFile(b, dir);
1454
* Look for uu-encoded main file
1458
if((t_line = uuencodeBegin(mainMessage)) != NULL) {
1459
cli_dbgmsg("Found uuencoded file\n");
1462
* Main part contains uuencoded section
1464
messageSetEncoding(mainMessage, "x-uuencode");
1466
if((b = messageToBlob(mainMessage)) != NULL) {
1467
if((cptr = blobGetFilename(b)) != NULL) {
1468
cli_dbgmsg("Found uuencoded message %s\n", cptr);
1470
(void)saveFile(b, dir);
1474
} else if((encodingLine(mainMessage) != NULL) &&
1475
((t_line = bounceBegin(mainMessage)) != NULL)) {
1477
static const char encoding[] = "Content-Transfer-Encoding";
1479
* Attempt to save the original (unbounced)
1480
* message - clamscan will find that in the
1481
* directory and call us again (with any luck)
1482
* having found an e-mail message to handle
1484
* This finds a lot of false positives, the
1485
* search that an encoding line is in the
1486
* bounce (i.e. it's after the bounce header)
1487
* helps a bit, but at the expense of scanning
1488
* the entire message. messageAddLine
1489
* optimisation could help here, but needs
1490
* careful thought, do it with line numbers
1491
* would be best, since the current method in
1492
* messageAddLine of checking encoding first
1493
* must remain otherwise non bounce messages
1496
for(t = t_line; t; t = t->t_next)
1497
if((strncasecmp(t->t_text, encoding, sizeof(encoding) - 1) == 0) &&
1498
(strstr(t->t_text, "7bit") == NULL))
1500
if(t && ((b = textToBlob(t_line, NULL)) != NULL)) {
1501
cli_dbgmsg("Found a bounce message\n");
1510
cli_dbgmsg("Not found uuencoded file\n");
1512
if(messageGetMimeType(mainMessage) == MESSAGE)
1514
* Quick peek, if the encapsulated
1516
* content encoding statement don't
1517
* bother saving to scan, it's safe
1519
saveIt = (encodingLine(mainMessage) != NULL);
1520
else if((t_line = encodingLine(mainMessage)) != NULL) {
1522
* Some bounces include the message
1523
* body without the headers.
1524
* Unfortunately this generates a
1525
* lot of false positives that a bounce
1526
* has been found when it hasn't.
1528
if((b = blobCreate()) != NULL) {
1529
cli_dbgmsg("Found a bounce message with no header\n");
1530
blobAddData(b, "Received: by clamd\n", 19);
1532
b = textToBlob(t_line, b);
1541
* Save the entire text portion,
1542
* since it it may be an HTML file with
1543
* a JavaScript virus
1548
cli_dbgmsg("Saving text part to scan\n");
1549
saveTextPart(mainMessage, dir);
1553
rc = (multiparts) ? 1 : 2; /* anything saved? */
1555
short attachmentNumber;
1557
for(attachmentNumber = 0; attachmentNumber < nBlobs; attachmentNumber++) {
1558
blob *b = blobs[attachmentNumber];
1561
if(!saveFile(b, dir))
1564
blobs[attachmentNumber] = NULL;
1569
if(aText && (textIn == NULL))
2378
if(aText && (textIn == NULL)) {
2379
/* Look for a bounce in the text (non mime encoded) portion */
2382
for(t = aText; t; t = t->t_next) {
2383
const line_t *l = t->t_line;
2384
const text *lookahead, *topofbounce;
2393
if(cli_filetype(s, strlen(s)) != CL_TYPE_MAIL)
2397
* We've found what looks like the start of a bounce
2398
* message. Only bother saving if it really is a bounce
2399
* message, this helps to speed up scanning of ping-pong
2400
* messages that have lots of bounces within bounces in
2403
for(lookahead = t->t_next; lookahead; lookahead = lookahead->t_next) {
2404
l = lookahead->t_line;
2409
if(strncasecmp(s, "Content-Type:", 13) == 0)
2411
* Don't bother with plain/text or
2414
if(strstr(s, "text/") == NULL)
2416
* Don't bother to save the unuseful
2422
if(lookahead && (lookahead->t_line == NULL)) {
2423
cli_dbgmsg("Non mime part bounce message is not mime encoded, so it will not be scanned\n");
2425
/* look for next bounce message */
2430
* Prescan the bounce message to see if there's likely
2431
* to be anything nasty.
2432
* This algorithm is hand crafted and may be breakable
2433
* so all submissions are welcome. It's best NOT to
2434
* remove this however you may be tempted, because it
2435
* significantly speeds up the scanning of multiple
2436
* bounces (i.e. bounces within many bounces)
2438
for(; lookahead; lookahead = lookahead->t_next) {
2439
l = lookahead->t_line;
2443
if((strncasecmp(s, "Content-Type:", 13) == 0) &&
2444
(strstr(s, "multipart/") == NULL) &&
2445
(strstr(s, "message/rfc822") == NULL) &&
2446
(strstr(s, "text/plain") == NULL))
2450
if(lookahead == NULL) {
2451
cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n");
2452
/* nothing here, move along please */
2455
if((fb = fileblobCreate()) == NULL)
2457
cli_dbgmsg("Save non mime part bounce message\n");
2458
fileblobSetFilename(fb, dir, "bounce");
2459
fileblobAddData(fb, (unsigned char *)"Received: by clamd (bounce)\n", 28);
2473
fileblobAddData(fb, (unsigned char *)s, strlen(s));
2475
fileblobAddData(fb, (unsigned char *)"\n", 1);
2476
lookahead = t->t_next;
2477
if(lookahead == NULL)
2481
if((!inheader) && l) {
2483
if(cli_filetype(s, strlen(s)) == CL_TYPE_MAIL) {
2484
cli_dbgmsg("Found the start of another bounce candidate\n");
2490
fileblobDestroy(fb);
2494
* Don't do this - it slows bugs.txt
2497
mainMessage->bounce = NULL;*/
1570
2499
textDestroy(aText);
1573
if(blobs && (blobsIn == NULL))
1574
blobArrayDestroy(blobs, nBlobs);
2504
* No attachments - scan the text portions, often files
2505
* are hidden in HTML code
2507
cli_dbgmsg("%d multiparts found\n", multiparts);
2508
for(i = 0; i < multiparts; i++) {
2509
fb = messageToFileblob(messages[i], dir);
2512
cli_dbgmsg("Saving multipart %d\n", i);
2514
fileblobDestroy(fb);
2520
* Look for uu-encoded main file
2524
if((t_line = uuencodeBegin(mainMessage)) != NULL) {
2525
cli_dbgmsg("Found uuencoded file\n");
2528
* Main part contains uuencoded section
2530
messageSetEncoding(mainMessage, "x-uuencode");
2532
if((fb = messageToFileblob(mainMessage, dir)) != NULL) {
2533
if((cptr = fileblobGetFilename(fb)) != NULL)
2534
cli_dbgmsg("Saving uuencoded message %s\n", cptr);
2535
fileblobDestroy(fb);
2538
} else if((encodingLine(mainMessage) != NULL) &&
2539
((t_line = bounceBegin(mainMessage)) != NULL)) {
2540
const text *t, *start;
2542
* Attempt to save the original (unbounced)
2543
* message - clamscan will find that in the
2544
* directory and call us again (with any luck)
2545
* having found an e-mail message to handle.
2547
* This finds a lot of false positives, the
2548
* search that a content type is in the
2549
* bounce (i.e. it's after the bounce header)
2553
* optimisation could help here, but needs
2554
* careful thought, do it with line numbers
2555
* would be best, since the current method in
2556
* messageAddLine of checking encoding first
2557
* must remain otherwise non bounce messages
2560
for(t = start = t_line; t; t = t->t_next) {
2561
char cmd[LINE_LENGTH + 1];
2562
const char *txt = lineGetData(t->t_line);
2566
if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
2569
switch(tableFind(rfc821Table, cmd)) {
2570
case CONTENT_TRANSFER_ENCODING:
2571
if((strstr(txt, "7bit") == NULL) &&
2572
(strstr(txt, "8bit") == NULL))
2575
case CONTENT_DISPOSITION:
2578
if(strstr(txt, "text/plain") != NULL)
2582
if(strcasecmp(cmd, "From") == 0)
2584
else if(strcasecmp(cmd, "Received") == 0)
2590
if(t && ((fb = fileblobCreate()) != NULL)) {
2591
cli_dbgmsg("Found a bounce message\n");
2592
fileblobSetFilename(fb, dir, "bounce");
2593
if(textToFileblob(start, fb) == NULL)
2594
cli_dbgmsg("Nothing new to save in the bounce message");
2597
fileblobDestroy(fb);
2599
cli_dbgmsg("Not found a bounce message\n");
2603
cli_dbgmsg("Not found uuencoded file\n");
2605
if(messageGetMimeType(mainMessage) == MESSAGE)
2607
* Quick peek, if the encapsulated
2609
* content encoding statement don't
2610
* bother saving to scan, it's safe
2612
saveIt = (encodingLine(mainMessage) != NULL);
2613
else if((t_line = encodingLine(mainMessage)) != NULL) {
2615
* Some bounces include the message
2616
* body without the headers.
2617
* FIXME: Unfortunately this generates a
2618
* lot of false positives that a bounce
2619
* has been found when it hasn't.
2621
if((fb = fileblobCreate()) != NULL) {
2622
cli_dbgmsg("Found a bounce message with no header at '%s'\n",
2623
lineGetData(t_line->t_line));
2624
fileblobSetFilename(fb, dir, "bounce");
2626
(const unsigned char *)"Received: by clamd (bounce)\n",
2629
fb = textToFileblob(t_line, fb);
2631
fileblobDestroy(fb);
2634
} else if(multiparts == 0)
2636
* Save the entire text portion,
2637
* since it it may be an HTML file with
2638
* a JavaScript virus
2645
cli_dbgmsg("Saving text part to scan\n");
2647
* TODO: May be better to save aText
2649
saveTextPart(mainMessage, dir);
2650
if(mainMessage != messageIn) {
2651
messageDestroy(mainMessage);
2654
messageReset(mainMessage);
2659
rc = (multiparts) ? 1 : 2; /* anything saved? */
1576
2661
if(mainMessage && (mainMessage != messageIn))
1577
2662
messageDestroy(mainMessage);
1579
2667
cli_dbgmsg("parseEmailBody() returning %d\n", rc);
1869
3113
saveTextPart(message *m, const char *dir)
1873
3117
messageAddArgument(m, "filename=textportion");
1874
if((b = messageToBlob(m)) != NULL) {
3118
if((fb = messageToFileblob(m, dir)) != NULL) {
1876
3120
* Save main part to scan that
1878
cli_dbgmsg("Saving main message, encoded with scheme %d\n",
1879
messageGetEncoding(m));
1881
(void)saveFile(b, dir);
3122
cli_dbgmsg("Saving main message\n");
3124
fileblobDestroy(fb);
3129
* Handle RFC822 comments in headers.
3130
* If out == NULL, return a buffer without the comments, the caller must free
3131
* the returned buffer
3132
* Return NULL on error or if the input * has no comments.
3133
* See secion 3.4.3 of RFC822
3134
* TODO: handle comments that go on to more than one line
3137
rfc822comments(const char *in, char *out)
3141
int backslash, inquote, commentlevel;
3146
if(strchr(in, '(') == NULL)
3152
out = cli_malloc(strlen(in) + 1);
3157
backslash = commentlevel = inquote = 0;
3160
cli_dbgmsg("rfc822comments: contains a comment\n");
3162
for(iptr = in; *iptr; iptr++)
3164
if(commentlevel == 0)
3167
} else switch(*iptr) {
3184
else if(commentlevel > 0)
3188
if(commentlevel == 0)
3192
if(backslash) /* last character was a single backslash */
3198
cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
3204
* Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
3205
* free, or NULL on error
3208
rfc2047(const char *in)
3213
if((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL))
3216
cli_dbgmsg("rfc2047 '%s'\n", in);
3217
out = cli_malloc(strlen(in) + 1);
3224
/* For each RFC2047 string */
3226
char encoding, *ptr, *enctext;
3230
/* Find next RFC2047 string */
3232
if((*in == '=') && (in[1] == '?')) {
3238
/* Skip over charset, find encoding */
3239
while((*in != '?') && *in)
3244
encoding = tolower(encoding);
3246
if((encoding != 'q') && (encoding != 'b')) {
3247
cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus that was missed, report it to bugs@clamav.net\n", encoding);
3252
/* Skip to encoded text */
3258
enctext = strdup(in);
3259
if(enctext == NULL) {
3264
in = strstr(in, "?=");
3270
ptr = strstr(enctext, "?=");
3271
assert(ptr != NULL);
3273
/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
3275
m = messageCreate();
3278
messageAddStr(m, enctext);
3282
messageSetEncoding(m, "quoted-printable");
3285
messageSetEncoding(m, "base64");
3288
b = messageToBlob(m);
3289
len = blobGetDataSize(b);
3290
cli_dbgmsg("Decoded as '%*.*s'\n", len, len, blobGetData(b));
3291
memcpy(pout, blobGetData(b), len);
1883
3292
blobDestroy(b);
3294
if(pout[len - 1] == '\n')
3305
cli_dbgmsg("rfc2047 returns '%s'\n", out);
1888
* Save some data as a unique file in the given directory.
1890
* TODO: don't save archive files if archive scanning is disabled, or
1891
* OLE2 files if that is disabled or pattern match --exclude, but
1892
* we need access to the command line options/clamav.conf here to
1893
* be able to do that
3311
* Handle partial messages
1896
saveFile(const blob *b, const char *dir)
3314
rfc1341(message *m, const char *dir)
1898
const unsigned long nbytes = blobGetDataSize(b);
1899
size_t suffixLen = 0;
1901
const char *cptr, *suffix;
1902
char filename[NAME_MAX + 1];
1904
assert(dir != NULL);
1909
cptr = blobGetFilename(b);
3317
char *arg, *id, *number, *total, *oldfilename;
3319
char pdir[NAME_MAX + 1];
3321
id = (char *)messageFindArgument(m, "id");
3326
if((tmpdir = getenv("TEMP")) == (char *)NULL)
3327
if((tmpdir = getenv("TMP")) == (char *)NULL)
3328
if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
3331
if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
3332
if((tmpdir = getenv("TMP")) == (char *)NULL)
3333
if((tmpdir = getenv("TEMP")) == (char *)NULL)
3341
snprintf(pdir, sizeof(pdir) - 1, "%s/clamav-partial", tmpdir);
3343
if((mkdir(pdir, 0700) < 0) && (errno != EEXIST)) {
3344
cli_errmsg("Can't create the directory '%s'\n", pdir);
3349
if(stat(pdir, &statb) < 0) {
3350
cli_errmsg("Can't stat the directory '%s'\n", pdir);
3353
if(statb.st_mode & 077)
3354
cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
3355
pdir, statb.st_mode & 0777);
3358
number = (char *)messageFindArgument(m, "number");
3359
if(number == NULL) {
3364
oldfilename = (char *)messageFindArgument(m, "filename");
3365
if(oldfilename == NULL)
3366
oldfilename = (char *)messageFindArgument(m, "name");
3368
arg = cli_malloc(10 + strlen(id) + strlen(number));
3369
sprintf(arg, "filename=%s%s", id, number);
3370
messageAddArgument(m, arg);
3374
cli_warnmsg("Must reset to %s\n", oldfilename);
3378
if((fb = messageToFileblob(m, pdir)) == NULL) {
3384
fileblobDestroy(fb);
3386
total = (char *)messageFindArgument(m, "total");
3387
cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
3389
int n = atoi(number);
3390
int t = atoi(total);
1916
* Some programs are broken and use an idea of a ".suffix"
1917
* to determine the file type rather than looking up the
1918
* magic number. CPM has a lot to answer for...
1919
* FIXME: the suffix now appears twice in the filename...
3395
* If it's the last one - reassemble it
3396
* FIXME: this assumes that we receive the parts in order
1921
suffix = strrchr(cptr, '.');
1925
suffixLen = strlen(suffix);
1927
/* Found a full stop which isn't a suffix */
1933
cli_dbgmsg("Saving attachment in %s/%s\n", dir, cptr);
1936
* Allow for very long filenames. We have to truncate them to fit
1938
snprintf(filename, sizeof(filename) - 1 - suffixLen, "%s/%.*sXXXXXX", dir,
1939
(int)(sizeof(filename) - 9 - suffixLen - strlen(dir)), cptr);
1942
* TODO: add a HAVE_MKSTEMP property
1944
#if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN)
1945
fd = mkstemp(filename);
1947
(void)mktemp(filename);
1948
fd = open(filename, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
1952
cli_errmsg("Can't create temporary file %s: %s\n", filename, strerror(errno));
1953
cli_dbgmsg("%lu %d %d\n", suffixLen, sizeof(filename), strlen(filename));
1958
* Add the suffix back to the end of the filename. Tut-tut, filenames
1959
* should be independant of their usage on UNIX type systems.
1962
char stub[NAME_MAX + 1];
1964
snprintf(stub, sizeof(stub), "%s%s", filename, suffix);
1966
rename(stub, filename);
1968
link(stub, filename);
1973
cli_dbgmsg("Saving attachment as %s (%lu bytes long)\n",
1976
if(cli_writen(fd, blobGetData(b), (size_t)nbytes) != nbytes) {
1982
return (close(fd) >= 0);
3398
if((n == t) && ((dd = opendir(pdir)) != NULL)) {
3400
char outname[NAME_MAX + 1];
3402
snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id);
3404
cli_dbgmsg("outname: %s\n", outname);
3406
fout = fopen(outname, "wb");
3408
cli_errmsg("Can't open '%s' for writing", outname);
3415
for(n = 1; n <= t; n++) {
3416
char filename[NAME_MAX + 1];
3417
const struct dirent *dent;
3418
#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
3421
char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
3425
snprintf(filename, sizeof(filename), "%s%d", id, n);
3427
#ifdef HAVE_READDIR_R_3
3428
while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
3429
#elif defined(HAVE_READDIR_R_2)
3430
while((dent = (struct dirent *)readdir_r(dd, &result.d))) {
3431
#else /*!HAVE_READDIR_R*/
3432
while((dent = readdir(dd))) {
3435
char buffer[BUFSIZ];
3437
extern short cli_leavetemps_flag;
3439
if(dent->d_ino == 0)
3442
if(strncmp(filename, dent->d_name, strlen(filename)) != 0)
3445
sprintf(filename, "%s/%s", pdir, dent->d_name);
3446
fin = fopen(filename, "rb");
3448
cli_errmsg("Can't open '%s' for reading", filename);
3457
while(fgets(buffer, sizeof(buffer), fin) != NULL)
3459
* Ensure that trailing newlines
3462
if(buffer[0] == '\n')
3468
while(--nblanks > 0);
3469
fputs(buffer, fout);
3473
/* don't unlink if leave temps */
3474
if(!cli_leavetemps_flag)
3491
#if defined(FOLLOWURLS) && (FOLLOWURLS > 0)
3493
checkURLs(message *m, const char *dir)
3495
blob *b = messageToBlob(m);
3499
#if defined(WITH_CURL) && defined(CL_THREAD_SAFE)
3500
pthread_t tid[FOLLOWURLS];
3501
struct arg args[FOLLOWURLS];
3503
tag_arguments_t hrefs;
3508
len = blobGetDataSize(b);
3515
/* TODO: make this size customisable */
3516
if(len > 100*1024) {
3517
cli_warnmsg("Viruses pointed to by URL not scanned in large message\n");
3530
hrefs.tag = hrefs.value = NULL;
3532
cli_dbgmsg("checkURLs: calling html_normalise_mem\n");
3533
if(!html_normalise_mem(blobGetData(b), len, NULL, &hrefs)) {
3538
cli_dbgmsg("checkURLs: html_normalise_mem returned\n");
3540
/* TODO: Do we need to call remove_html_comments? */
3544
for(i = 0; i < hrefs.count; i++) {
3545
const char *url = (const char *)hrefs.value[i];
3547
if(strncasecmp("http://", url, 7) == 0) {
3550
#ifndef CL_THREAD_SAFE
3554
#else /*!WITH_CURL*/
3555
#ifdef CL_THREAD_SAFE
3556
static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER;
3560
#endif /*WITH_CURL*/
3561
char name[NAME_MAX + 1];
3563
if(tableFind(t, url) == 1) {
3564
cli_dbgmsg("URL %s already downloaded\n", url);
3567
if(n == FOLLOWURLS) {
3568
cli_warnmsg("Not all URLs will be scanned\n");
3571
(void)tableInsert(t, url, 1);
3572
cli_dbgmsg("Downloading URL %s to be scanned\n", url);
3573
strncpy(name, url, sizeof(name) - 1);
3574
name[sizeof(name) - 1] = '\0';
3575
for(ptr = name; *ptr; ptr++)
3580
#ifdef CL_THREAD_SAFE
3583
args[n].filename = strdup(name);
3584
pthread_create(&tid[n], NULL, getURL, &args[n]);
3588
arg.filename = name;
3594
* TODO: maximum size and timeouts
3596
len = sizeof(cmd) - 26 - strlen(dir) - strlen(name);
3598
snprintf(cmd, sizeof(cmd) - 1, "GET -t10 %.*s >%s/%s", len, url, dir, name);
3600
snprintf(cmd, sizeof(cmd) - 1, "GET -t10 %.*s >%s/%s 2>/dev/null", len, url, dir, name);
3602
cmd[sizeof(cmd) - 1] = '\0';
3605
for(ptr = cmd; *ptr; ptr++)
3606
if(strchr(";&", *ptr))
3610
cli_dbgmsg("%s\n", cmd);
3611
#ifdef CL_THREAD_SAFE
3612
pthread_mutex_lock(&system_mutex);
3615
#ifdef CL_THREAD_SAFE
3616
pthread_mutex_unlock(&system_mutex);
3618
snprintf(cmd, sizeof(cmd), "%s/%s", dir, name);
3619
if(stat(cmd, &statb) >= 0)
3620
if(statb.st_size == 0) {
3621
cli_warnmsg("URL %s failed to download\n", url);
3623
* Don't bother scanning an empty file
3634
#if defined(WITH_CURL) && defined(CL_THREAD_SAFE)
3635
assert(n <= FOLLOWURLS);
3636
cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n);
3638
pthread_join(tid[n], NULL);
3639
free(args[n].filename);
3642
html_tag_arg_free(&hrefs);
3647
#ifdef CL_THREAD_SAFE
3650
getURL(struct arg *arg)
3655
struct curl_slist *headers;
3656
static int initialised = 0;
3657
#ifdef CL_THREAD_SAFE
3658
static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
3659
struct arg *arg = (struct arg *)a;
3661
const char *url = arg->url;
3662
const char *dir = arg->dir;
3663
const char *filename = arg->filename;
3664
char fout[NAME_MAX + 1];
3665
#ifdef CURLOPT_ERRORBUFFER
3666
char errorbuffer[128];
3669
#ifdef CL_THREAD_SAFE
3670
pthread_mutex_lock(&init_mutex);
3673
if(curl_global_init(CURL_GLOBAL_NOTHING) != 0) {
3674
#ifdef CL_THREAD_SAFE
3675
pthread_mutex_unlock(&init_mutex);
3681
#ifdef CL_THREAD_SAFE
3682
pthread_mutex_unlock(&init_mutex);
3685
/* easy isn't the word I'd use... */
3686
curl = curl_easy_init();
3690
(void)curl_easy_setopt(curl, CURLOPT_USERAGENT, "www.clamav.net");
3692
if(curl_easy_setopt(curl, CURLOPT_URL, url) != 0)
3695
snprintf(fout, NAME_MAX, "%s/%s", dir, filename);
3697
fp = fopen(fout, "w");
3700
cli_errmsg("Can't open '%s' for writing", fout);
3701
curl_easy_cleanup(curl);
3704
#ifdef CURLOPT_WRITEDATA
3705
if(curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp) != 0) {
3707
curl_easy_cleanup(curl);
3711
if(curl_easy_setopt(curl, CURLOPT_FILE, fp) != 0) {
3713
curl_easy_cleanup(curl);
3719
* If an item is in squid's cache get it from there (TCP_HIT/200)
3720
* by default curl doesn't (TCP_CLIENT_REFRESH_MISS/200)
3722
headers = curl_slist_append(NULL, "Pragma:");
3723
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
3725
/* These should be customisable */
3726
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30);
3727
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10);
3728
#ifdef CURLOPT_MAXFILESIZE
3729
curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, 50*1024);
3732
#ifdef CL_THREAD_SAFE
3733
#ifdef CURLOPT_DNS_USE_GLOBAL_CACHE
3734
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
3739
* Prevent password: prompting with older versions
3740
* FIXME: a better username?
3742
curl_easy_setopt(curl, CURLOPT_USERPWD, "username:password");
3744
#ifdef CURLOPT_ERRORBUFFER
3745
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errorbuffer);
3749
* FIXME: valgrind reports "pthread_mutex_unlock: mutex is not locked"
3750
* from gethostbyaddr_r within this. It may be a bug in libcurl
3751
* rather than this code, but I need to check, see Curl_resolv()
3752
* If pushed really hard it will sometimes say
3753
* Conditional jump or move depends on uninitialised value(s) and
3754
* quit. But the program seems to work OK without valgrind...
3755
* Perhaps Curl_resolv() isn't thread safe?
3758
* On some C libraries (notably with FC3, glibc-2.3.3-74) you get a
3759
* memory leak * here in getaddrinfo(), see
3760
* https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=139559
3763
if(curl_easy_perform(curl) != CURLE_OK) {
3764
#ifdef CURLOPT_ERRORBUFFER
3765
cli_warnmsg("URL %s failed to download: %s\n", url, errorbuffer);
3767
cli_warnmsg("URL %s failed to download\n", url);
3772
curl_slist_free_all(headers);
3773
curl_easy_cleanup(curl);
3781
checkURLs(message *m, const char *dir)
3786
#ifdef HAVE_BACKTRACE
3790
signal(SIGSEGV, SIG_DFL);
3796
print_trace(int use_syslog)
3802
pid_t pid = getpid();
3804
size = backtrace(array, 10);
3805
strings = backtrace_symbols(array, size);
3808
cli_dbgmsg("Backtrace of pid %d:\n", pid);
3810
syslog(LOG_ERR, "Backtrace of pid %d:", pid);
3812
for(i = 0; i < size; i++)
3814
syslog(LOG_ERR, "bt[%u]: %s", i, strings[i]);
3816
cli_dbgmsg("%s\n", strings[i]);
3818
/* TODO: dump the current email */
3825
usefulHeader(int commandNumber, const char *cmd)
3827
switch(commandNumber) {
3828
case CONTENT_TRANSFER_ENCODING:
3829
case CONTENT_DISPOSITION:
3833
if(strcasecmp(cmd, "From") == 0)
3835
else if(strcasecmp(cmd, "Received") == 0)
3837
else if(strcasecmp(cmd, "De") == 0)
3845
* Save the uuencoded part of the file as it is read in since there's no need
3846
* to include it in the parse tree. Saves memory and parse time.
3847
* Return < 0 for failure
3850
uufasttrack(message *m, const char *firstline, const char *dir, FILE *fin)
3852
fileblob *fb = fileblobCreate();
3853
char buffer[LINE_LENGTH + 1];
3854
char *filename = cli_strtok(firstline, 2, " ");
3856
if(filename == NULL)
3859
fileblobSetFilename(fb, dir, filename);
3860
cli_dbgmsg("Fast track uudecode %s\n", filename);
3863
while(fgets(buffer, sizeof(buffer) - 1, fin) != NULL) {
3864
unsigned char data[1024];
3865
const unsigned char *uptr;
3869
if(strcasecmp(buffer, "end") == 0)
3871
if(buffer[0] == '\0')
3874
uptr = decodeLine(m, UUENCODE, buffer, data, sizeof(data));
3878
len = (size_t)(uptr - data);
3879
if((len > 62) || (len == 0))
3882
if(fileblobAddData(fb, data, len) < 0)
3886
fileblobDestroy(fb);
3891
* Like fgets but cope with end of line by "\n", "\r\n", "\n\r", "\r"
3894
getline_from_mbox(char *buffer, size_t len, FILE *fin)
3901
if((len == 0) || (buffer == NULL)) {
3902
cli_errmsg("Invalid call to getline_from_mbox(). Report to bugs@clamav.net\n");
3918
if((c != '\r') && !feof(fin))
3929
if((c != '\n') && !feof(fin))
3937
/* probably, the email breaks RFC821 */
3938
cli_dbgmsg("getline_from_mbox: buffer overflow stopped\n");
3948
* like cli_memstr - but returns the location of the match
3949
* FIXME: need a case insensitive version
3952
cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
3954
const char *pt, *hay;
3957
if(haystack == needle)
3963
if(memcmp(haystack, needle, ns) == 0)
3966
pt = hay = haystack;
3969
while((pt = memchr(hay, needle[0], n)) != NULL) {
3970
n -= (int) pt - (int) hay;
3974
if(memcmp(pt, needle, ns) == 0)