~ubuntu-branches/ubuntu/breezy/clamav/breezy-backports

« back to all changes in this revision

Viewing changes to libclamav/mbox.c

  • Committer: Bazaar Package Importer
  • Author(s): Stephen Gran
  • Date: 2005-09-19 09:05:59 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20050919090559-hikpqduq8yx5qxo2
Tags: 0.87-1
* New upstream version
  - Fixes CAN-2005-2920 and CAN-2005-2919 (closes: #328660)
* New logcheck line for clamav-daemon (closes: #323132)
* relibtoolize and apply kfreebsd patch (closes: #327707)
* Make sure init.d script starts freshclam up again after upgrade when run
  from if-up.d (closes: #328912)

Show diffs side-by-side

added added

removed removed

Lines of Context:
14
14
 *  You should have received a copy of the GNU General Public License
15
15
 *  along with this program; if not, write to the Free Software
16
16
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
 
 *
18
 
 * Change History:
19
 
 * $Log: mbox.c,v $
20
 
 * Revision 1.75  2004/06/14 09:07:10  nigelhorne
21
 
 * Handle spam using broken e-mail generators for multipart/alternative
22
 
 *
23
 
 * Revision 1.74  2004/06/09 18:18:59  nigelhorne
24
 
 * Find uuencoded viruses in multipart/mixed that have no start of message boundaries
25
 
 *
26
 
 * Revision 1.73  2004/05/14 08:15:55  nigelhorne
27
 
 * Use mkstemp on cygwin
28
 
 *
29
 
 * Revision 1.72  2004/05/12 11:20:37  nigelhorne
30
 
 * More bounce message false positives handled
31
 
 *
32
 
 * Revision 1.71  2004/05/10 11:35:11  nigelhorne
33
 
 * No need to update mbox.c for cli_filetype problem
34
 
 *
35
 
 * Revision 1.69  2004/05/06 11:26:49  nigelhorne
36
 
 * Force attachments marked as RFC822 messages to be scanned
37
 
 *
38
 
 * Revision 1.68  2004/04/29 08:59:24  nigelhorne
39
 
 * Tidied up SetDispositionType
40
 
 *
41
 
 * Revision 1.67  2004/04/23 10:47:41  nigelhorne
42
 
 * If an inline text portion has a filename treat is as an attachment
43
 
 *
44
 
 * Revision 1.66  2004/04/14 08:32:21  nigelhorne
45
 
 * When debugging print the email number in mailboxes
46
 
 *
47
 
 * Revision 1.65  2004/04/07 18:18:07  nigelhorne
48
 
 * Some occurances of W97M.Lexar were let through
49
 
 *
50
 
 * Revision 1.64  2004/04/05 09:32:20  nigelhorne
51
 
 * Added SCAN_TO_DISC define
52
 
 *
53
 
 * Revision 1.63  2004/04/01 15:32:34  nigelhorne
54
 
 * Graceful exit if messageAddLine fails in strdup
55
 
 *
56
 
 * Revision 1.62  2004/03/31 17:00:20  nigelhorne
57
 
 * Code tidy up free memory earlier
58
 
 *
59
 
 * Revision 1.61  2004/03/30 22:45:13  nigelhorne
60
 
 * Better handling of multipart/multipart messages
61
 
 *
62
 
 * Revision 1.60  2004/03/29 09:22:03  nigelhorne
63
 
 * Tidy up code and reduce shuffling of data
64
 
 *
65
 
 * Revision 1.59  2004/03/26 11:08:36  nigelhorne
66
 
 * Use cli_writen
67
 
 *
68
 
 * Revision 1.58  2004/03/25 22:40:46  nigelhorne
69
 
 * Removed even more calls to realloc and some duplicated code
70
 
 *
71
 
 * Revision 1.57  2004/03/21 17:19:49  nigelhorne
72
 
 * Handle bounce messages with no headers
73
 
 *
74
 
 * Revision 1.56  2004/03/21 09:41:26  nigelhorne
75
 
 * Faster scanning for non MIME messages
76
 
 *
77
 
 * Revision 1.55  2004/03/20 17:39:23  nigelhorne
78
 
 * First attempt to handle all bounces
79
 
 *
80
 
 * Revision 1.54  2004/03/19 15:40:45  nigelhorne
81
 
 * Handle empty content-disposition types
82
 
 *
83
 
 * Revision 1.53  2004/03/19 08:08:02  nigelhorne
84
 
 * If a message part of a multipart contains an RFC822 message that has no encoding don't scan it
85
 
 *
86
 
 * Revision 1.52  2004/03/18 21:51:41  nigelhorne
87
 
 * If a message only contains a single RFC822 message that has no encoding don't save for scanning
88
 
 *
89
 
 * Revision 1.51  2004/03/17 19:48:12  nigelhorne
90
 
 * Improved embedded RFC822 message handling
91
 
 *
92
 
 * Revision 1.50  2004/03/10 22:05:39  nigelhorne
93
 
 * Fix seg fault when a message in a multimessage mailbox fails to scan
94
 
 *
95
 
 * Revision 1.49  2004/03/04 13:01:58  nigelhorne
96
 
 * Ensure all bounces are rescanned by cl_mbox
97
 
 *
98
 
 * Revision 1.48  2004/02/27 12:16:26  nigelhorne
99
 
 * Catch lines just containing ':'
100
 
 *
101
 
 * Revision 1.47  2004/02/23 10:13:08  nigelhorne
102
 
 * Handle spaces before : in headers
103
 
 *
104
 
 * Revision 1.46  2004/02/18 13:29:19  nigelhorne
105
 
 * Stop buffer overflows for files with very long suffixes
106
 
 *
107
 
 * Revision 1.45  2004/02/18 10:07:40  nigelhorne
108
 
 * Find some Yaha
109
 
 *
110
 
 * Revision 1.44  2004/02/15 08:45:54  nigelhorne
111
 
 * Avoid scanning the same file twice
112
 
 *
113
 
 * Revision 1.43  2004/02/14 19:04:05  nigelhorne
114
 
 * Handle spaces in boundaries
115
 
 *
116
 
 * Revision 1.42  2004/02/14 17:23:45  nigelhorne
117
 
 * Had deleted O_BINARY by mistake
118
 
 *
119
 
 * Revision 1.41  2004/02/12 18:43:58  nigelhorne
120
 
 * Use mkstemp on Solaris
121
 
 *
122
 
 * Revision 1.40  2004/02/11 08:15:59  nigelhorne
123
 
 * Use O_BINARY for cygwin
124
 
 *
125
 
 * Revision 1.39  2004/02/06 13:46:08  kojm
126
 
 * Support for clamav-config.h
127
 
 *
128
 
 * Revision 1.38  2004/02/04 13:29:48  nigelhorne
129
 
 * Handle partial writes - and print when write fails
130
 
 *
131
 
 * Revision 1.37  2004/02/03 22:54:59  nigelhorne
132
 
 * Catch another example of Worm.Dumaru.Y
133
 
 *
134
 
 * Revision 1.36  2004/02/02 09:52:57  nigelhorne
135
 
 * Some instances of Worm.Dumaru.Y got through the net
136
 
 *
137
 
 * Revision 1.35  2004/01/28 10:15:24  nigelhorne
138
 
 * Added support to scan some bounce messages
139
 
 *
140
 
 * Revision 1.34  2004/01/24 17:43:37  nigelhorne
141
 
 * Removed (incorrect) warning about uninitialised variable
142
 
 *
143
 
 * Revision 1.33  2004/01/23 10:38:22  nigelhorne
144
 
 * Fixed memory leak in handling some multipart messages
145
 
 *
146
 
 * Revision 1.32  2004/01/23 08:51:19  nigelhorne
147
 
 * Add detection of uuencoded viruses in single part multipart/mixed files
148
 
 *
149
 
 * Revision 1.31  2004/01/22 22:13:06  nigelhorne
150
 
 * Prevent infinite recursion on broken uuencoded files
151
 
 *
152
 
 * Revision 1.30  2004/01/13 10:12:05  nigelhorne
153
 
 * Remove duplicate code when handling multipart messages
154
 
 *
155
 
 * Revision 1.29  2004/01/09 18:27:11  nigelhorne
156
 
 * ParseMimeHeader could corrupt arg
157
 
 *
158
 
 * Revision 1.28  2004/01/09 15:07:42  nigelhorne
159
 
 * Re-engineered update 1.11 lost in recent changes
160
 
 *
161
 
 * Revision 1.27  2004/01/09 14:45:59  nigelhorne
162
 
 * Removed duplicated code in multipart handler
163
 
 *
164
 
 * Revision 1.26  2004/01/09 10:20:54  nigelhorne
165
 
 * Locate uuencoded viruses hidden in text poritions of multipart/mixed mime messages
166
 
 *
167
 
 * Revision 1.25  2004/01/06 14:41:18  nigelhorne
168
 
 * Handle headers which do not not have a space after the ':'
169
 
 *
170
 
 * Revision 1.24  2003/12/20 13:55:36  nigelhorne
171
 
 * Ensure multipart just save the bodies of attachments
172
 
 *
173
 
 * Revision 1.23  2003/12/14 18:07:01  nigelhorne
174
 
 * Some viruses in embedded messages were not being found
175
 
 *
176
 
 * Revision 1.22  2003/12/13 16:42:23  nigelhorne
177
 
 * call new cli_chomp
178
 
 *
179
 
 * Revision 1.21  2003/12/11 14:35:48  nigelhorne
180
 
 * Better handling of encapsulated messages
181
 
 *
182
 
 * Revision 1.20  2003/12/06 04:03:26  nigelhorne
183
 
 * Handle hand crafted emails that incorrectly set multipart headers
184
 
 *
185
 
 * Revision 1.19  2003/11/21 07:26:31  nigelhorne
186
 
 * Scan multipart alternatives that have no boundaries, finds some uuencoded happy99
187
 
 *
188
 
 * Revision 1.18  2003/11/17 08:13:21  nigelhorne
189
 
 * Handle spaces at the end of lines of MIME headers
190
 
 *
191
 
 * Revision 1.17  2003/11/06 05:06:42  nigelhorne
192
 
 * Some applications weren't being scanned
193
 
 *
194
 
 * Revision 1.16  2003/11/04 08:24:00  nigelhorne
195
 
 * Handle multipart messages that have no text portion
196
 
 *
197
 
 * Revision 1.15  2003/10/12 20:13:49  nigelhorne
198
 
 * Use NO_STRTOK_R consistent with message.c
199
 
 *
200
 
 * Revision 1.14  2003/10/12 12:37:11  nigelhorne
201
 
 * Appledouble encoded EICAR now found
202
 
 *
203
 
 * Revision 1.13  2003/10/01 09:27:42  nigelhorne
204
 
 * Handle content-type header going over to a new line
205
 
 *
206
 
 * Revision 1.12  2003/09/29 17:10:19  nigelhorne
207
 
 * Moved stub from heap to stack since its maximum size is known
208
 
 *
209
 
 * Revision 1.11  2003/09/29 12:58:32  nigelhorne
210
 
 * Handle Content-Type: /; name="eicar.com"
211
 
 *
212
 
 * Revision 1.10  2003/09/28 10:06:34  nigelhorne
213
 
 * Compilable under SCO; removed duplicate code with message.c
214
 
 *
215
17
 */
216
 
static  char    const   rcsid[] = "$Id: mbox.c,v 1.75 2004/06/14 09:07:10 nigelhorne Exp $";
 
18
static  char    const   rcsid[] = "$Id: mbox.c,v 1.238+fixes 2005/04/19 09:20:55 nigelhorne Exp $";
217
19
 
218
20
#if HAVE_CONFIG_H
219
21
#include "clamav-config.h"
220
22
#endif
221
23
 
222
24
#ifndef CL_DEBUG
223
 
/*#define       NDEBUG  /* map CLAMAV debug onto standard */
 
25
#define NDEBUG  /* map CLAMAV debug onto standard */
224
26
#endif
225
27
 
226
28
#ifdef CL_THREAD_SAFE
243
45
#include <sys/types.h>
244
46
#include <sys/param.h>
245
47
#include <clamav.h>
 
48
#include <dirent.h>
 
49
#include <limits.h>
 
50
 
 
51
#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
 
52
#include <stddef.h>
 
53
#endif
 
54
 
 
55
#ifdef  CL_THREAD_SAFE
 
56
#include <pthread.h>
 
57
#endif
246
58
 
247
59
#include "table.h"
248
60
#include "mbox.h"
249
61
#include "blob.h"
 
62
#include "line.h"
250
63
#include "text.h"
251
64
#include "message.h"
252
65
#include "others.h"
253
66
#include "defaults.h"
254
67
#include "str.h"
 
68
#include "filetypes.h"
 
69
 
 
70
#ifdef  CL_DEBUG
 
71
#if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
 
72
#define HAVE_BACKTRACE
 
73
#endif
 
74
#endif
 
75
 
 
76
#ifdef HAVE_BACKTRACE
 
77
#include <execinfo.h>
 
78
#include <signal.h>
 
79
#include <syslog.h>
 
80
 
 
81
static  void    sigsegv(int sig);
 
82
static  void    print_trace(int use_syslog);
 
83
#endif
255
84
 
256
85
#if     defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
257
86
#undef strtok_r
267
96
#undef FALSE
268
97
#endif
269
98
 
270
 
typedef enum    { FALSE = 0, TRUE = 1 } bool;
271
 
 
 
99
typedef enum    { FALSE = 0, TRUE = 1 } bool;
 
100
 
 
101
#define SAVE_TO_DISC    /* multipart/message are saved in a temporary file */
 
102
 
 
103
/*
 
104
 * Code does exist to run FOLLORURLS on systems without libcurl, however that
 
105
 * is not recommended so it is not compiled by default
 
106
 *
 
107
 * On Solaris, when using the GNU C compiler, the clamAV build system uses the
 
108
 * Sun supplied ld instead of the GNU ld causing an error. Therefore you cannot
 
109
 * use WITH_CURL on Solaris with gcc, you must configure with
 
110
 * "--without-libcurl". I don't know if it works with Sun's own compiler
 
111
 *
 
112
 * Fails to link on Solaris 10 with this error:
 
113
 *      Undefined                       first referenced
 
114
 *  symbol                             in file
 
115
 *  __floatdidf                         /opt/sfw/lib/libcurl.s
 
116
 */
 
117
#if     C_SOLARIS && __GNUC__
 
118
#undef  WITH_CURL
 
119
#endif
 
120
 
 
121
#ifdef  WITH_CURL
 
122
#define FOLLOWURLS      5       /*
 
123
                                 * Maximum number of URLs scanned in a message
 
124
                                 * part. Helps to find Dialier.gen-45. If
 
125
                                 * not defined, don't check any URLs
 
126
                                 */
 
127
#endif
 
128
 
 
129
#ifdef  FOLLOWURLS
 
130
 
 
131
#include "htmlnorm.h"
 
132
 
 
133
#ifdef  WITH_CURL       /* Set in configure */
 
134
/*
 
135
 * To build with WITH_CURL:
 
136
 * LDFLAGS=`curl-config --libs` ./configure ...
 
137
 */
 
138
#include <curl/curl.h>
 
139
 
 
140
/*
 
141
 * Needs curl >= 7.11 (I've heard that 7.9 can cause crashes and 7.10 is
 
142
 * untested)
 
143
 */
 
144
#if     (LIBCURL_VERSION_MAJOR < 7)
 
145
#undef  WITH_CURL       /* also undef FOLLOWURLS? */
 
146
#endif
 
147
 
 
148
#if     (LIBCURL_VERSION_MAJOR == 7) && (LIBCURL_VERSION_MINOR < 10)
 
149
#undef  WITH_CURL       /* also undef FOLLOWURLS? */
 
150
#endif
 
151
 
 
152
#endif  /*WITH_CURL*/
 
153
 
 
154
#else   /*!FOLLOWURLS*/
 
155
#undef  WITH_CURL
 
156
#endif  /*FOLLOWURLS*/
 
157
 
 
158
/*
 
159
 * Define this to handle messages covered by section 7.3.2 of RFC1341.
 
160
 *      This is experimental code so it is up to YOU to (1) ensure it's secure
 
161
 * (2) periodically trim the directory of old files
 
162
 *
 
163
 * If you use the load balancing feature of clamav-milter to run clamd on
 
164
 * more than one machine you must make sure that .../partial is on a shared
 
165
 * network filesystem
 
166
 */
 
167
#define PARTIAL_DIR
 
168
 
 
169
/*#define       NEW_WORLD*/
 
170
 
 
171
static  int     cli_parse_mbox(const char *dir, int desc, unsigned int options);
 
172
static  message *parseEmailFile(FILE *fin, const table_t *rfc821Table, const char *firstLine, const char *dir);
272
173
static  message *parseEmailHeaders(const message *m, const table_t *rfc821Table);
273
174
static  int     parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
274
 
static  int     parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable);
 
175
static  int     parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t *rfc821Table, const table_t *subtypeTable, unsigned int options);
275
176
static  int     boundaryStart(const char *line, const char *boundary);
276
177
static  int     endOfMessage(const char *line, const char *boundary);
277
178
static  int     initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
280
181
static  bool    continuationMarker(const char *line);
281
182
static  int     parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
282
183
static  void    saveTextPart(message *m, const char *dir);
283
 
static  bool    saveFile(const blob *b, const char *dir);
 
184
static  char    *rfc2047(const char *in);
 
185
static  char    *rfc822comments(const char *in, char *out);
 
186
#ifdef  PARTIAL_DIR
 
187
static  int     rfc1341(message *m, const char *dir);
 
188
#endif
 
189
static  bool    usefulHeader(int commandNumber, const char *cmd);
 
190
static        int     uufasttrack(message *m, const char *firstline, const char *dir, FILE *fin);
 
191
static  char    *getline_from_mbox(char *buffer, size_t len, FILE *fin);
 
192
#ifdef  NEW_WORLD
 
193
static  const   char    *cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns);
 
194
#endif
284
195
 
285
 
/* Maximum number of attachments that we accept */
286
 
#define MAX_ATTACHMENTS 10
 
196
static  void    checkURLs(message *m, const char *dir);
 
197
#ifdef  WITH_CURL
 
198
struct arg {
 
199
        const char *url;
 
200
        const char *dir;
 
201
        char *filename;
 
202
};
 
203
#ifdef  CL_THREAD_SAFE
 
204
static  void    *getURL(void *a);
 
205
#else
 
206
static  void    *getURL(struct arg *arg);
 
207
#endif
 
208
#endif
287
209
 
288
210
/* Maximum line length according to RFC821 */
289
211
#define LINE_LENGTH     1000
306
228
#define RELATED         10      /* RFC2387 */
307
229
#define REPORT          11      /* RFC1892 */
308
230
#define APPLEDOUBLE     12      /* Handling of this in only noddy for now */
 
231
#define FAX             MIXED   /*
 
232
                                 * RFC3458
 
233
                                 * Drafts stated to treat is as mixed if it is
 
234
                                 * not known.  This disappeared in the final
 
235
                                 * version (except when talking about
 
236
                                 * voice-message), but it is good enough for us
 
237
                                 * since we do no validation of coversheet
 
238
                                 * presence etc. (which also has disappeared
 
239
                                 * in the final version)
 
240
                                 */
 
241
#define ENCRYPTED       13      /*
 
242
                                 * e.g. RFC2015
 
243
                                 * Content-Type: multipart/encrypted;
 
244
                                 * boundary="nextPart1383049.XCRrrar2yq";
 
245
                                 * protocol="application/pgp-encrypted"
 
246
                                 */
 
247
#define X_BFILE         RELATED /*
 
248
                                 * BeOS, expert two parts: the file and it's
 
249
                                 * attributes. The attributes part comes as
 
250
                                 *      Content-Type: application/x-be_attribute
 
251
                                 *              name="foo"
 
252
                                 * I can't find where it is defined, any
 
253
                                 * pointers would be appreciated. For now
 
254
                                 * we treat it as multipart/related
 
255
                                 */
 
256
#define KNOWBOT         14      /* Unknown and undocumented format? */
309
257
 
310
258
static  const   struct tableinit {
311
259
        const   char    *key;
316
264
        {       "Content-Transfer-Encoding",    CONTENT_TRANSFER_ENCODING       },
317
265
        {       "Content-Disposition",          CONTENT_DISPOSITION     },
318
266
        {       NULL,                           0                       }
319
 
}, mimeSubtypes[] = {
 
267
}, mimeSubtypes[] = {   /* see RFC2045 */
320
268
                /* subtypes of Text */
321
269
        {       "plain",        PLAIN           },
322
270
        {       "enriched",     ENRICHED        },
331
279
        {       "related",      RELATED         },
332
280
        {       "report",       REPORT          },
333
281
        {       "appledouble",  APPLEDOUBLE     },
 
282
        {       "fax-message",  FAX             },
 
283
        {       "encrypted",    ENCRYPTED       },
 
284
        {       "x-bfile",      X_BFILE         },      /* BeOS */
 
285
        {       "knowbot",              KNOWBOT         },      /* ??? */
 
286
        {       "knowbot-metadata",     KNOWBOT         },      /* ??? */
 
287
        {       "knowbot-code",         KNOWBOT         },      /* ??? */
 
288
        {       "knowbot-state",        KNOWBOT         },      /* ??? */
334
289
        {       NULL,           0               }
335
290
};
336
 
static  table_t *rfc821Table, *subtypeTable;
337
 
 
338
 
/* Maximum filenames under various systems */
339
 
#ifndef NAME_MAX        /* e.g. Linux */
340
 
 
341
 
#ifdef  MAXNAMELEN      /* e.g. Solaris */
342
 
#define NAME_MAX        MAXNAMELEN
343
 
#else
344
 
 
345
 
#ifdef  FILENAME_MAX    /* e.g. SCO */
346
 
#define NAME_MAX        FILENAME_MAX
347
 
#endif
348
 
 
349
 
#endif
350
 
 
 
291
 
 
292
#ifdef  CL_THREAD_SAFE
 
293
static  pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER;
351
294
#endif
352
295
 
353
296
#ifndef O_BINARY
354
297
#define O_BINARY        0
355
298
#endif
356
299
 
357
 
#define SAVE_TO_DISC    /* multipart/message are saved in a temporary file */
 
300
#ifdef  NEW_WORLD
 
301
 
 
302
#if HAVE_MMAP
 
303
#if HAVE_SYS_MMAN_H
 
304
#include <sys/mman.h>
 
305
#else /* HAVE_SYS_MMAN_H */
 
306
#undef HAVE_MMAP
 
307
#endif
 
308
#endif
 
309
 
 
310
struct scanlist {
 
311
        char *start;
 
312
        size_t size;
 
313
        encoding_type decoder;  /* only BASE64 and QUOTEDPRINTABLE for now */
 
314
        struct scanlist *next;
 
315
};
 
316
 
 
317
/*
 
318
 * This could be the future. Instead of parsing and decoding it just decodes.
 
319
 *
 
320
 * USE IT AT YOUR PERIL, a large number of viruses are not detected with this
 
321
 * method, possibly because the decoded files must be exact and not have
 
322
 * extra data at the start or end, which this code will produce.
 
323
 *
 
324
 * Currently only supports base64 and quoted-printable
 
325
 *
 
326
 * You may also see a lot of warnings. For the moment it falls back to old
 
327
 *      world mode if it doesn't know what to do - that'll be removed.
 
328
 * The code is untidy...
 
329
 *
 
330
 * FIXME: Some mailbox scans are slower with this method. I suspect that it's
 
331
 * because the scan can proceed to the end of the file rather than the end
 
332
 * of the attachment which can mean than later emails are scanned many times
 
333
 *
 
334
 * TODO: Also all those pmemstr()s are slow, so we need to reduce the number
 
335
 *      and size of data scanned each time, and we fall through to
 
336
 *      cli_parse_mbox() too often
 
337
 */
 
338
int
 
339
cli_mbox(const char *dir, int desc, unsigned int options)
 
340
{
 
341
        char *start, *ptr, *line, *p, *q;
 
342
        const char *last;
 
343
        size_t size, s;
 
344
        struct stat statb;
 
345
        message *m;
 
346
        fileblob *fb;
 
347
        int ret = 0;
 
348
        int wasAlloced;
 
349
        struct scanlist *scanlist, *scanelem;
 
350
 
 
351
        if(dir == NULL) {
 
352
                cli_warnmsg("cli_mbox called with NULL dir\n");
 
353
                return CL_ENULLARG;
 
354
        }
 
355
        if(fstat(desc, &statb) < 0)
 
356
                return CL_EOPEN;
 
357
 
 
358
        size = statb.st_size;
 
359
 
 
360
        if(size == 0)
 
361
                return CL_CLEAN;
 
362
 
 
363
        if(size > 10*1024*1024)
 
364
                return cli_parse_mbox(dir, desc, options);      /* should be StreamMaxLength, I guess */
 
365
 
 
366
        cli_warnmsg("NEW_WORLD is new code - use at your own risk.\n");
 
367
 
 
368
        start = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
 
369
        if(start == MAP_FAILED)
 
370
                return CL_EMEM;
 
371
 
 
372
        cli_dbgmsg("mmap'ed mbox\n");
 
373
 
 
374
        /* last points to the last *valid* address in the array */
 
375
        last = &start[size - 1];
 
376
 
 
377
        ptr = cli_malloc(size);
 
378
        if(ptr) {
 
379
                wasAlloced = 1;
 
380
                memcpy(ptr, start, size);
 
381
                munmap(start, size);
 
382
                start = ptr;
 
383
                last = &start[size - 1];
 
384
        } else
 
385
                wasAlloced = 0;
 
386
 
 
387
        /*
 
388
         * Would be nice to have a case insensitive cli_memstr()
 
389
         */
 
390
        scanelem = scanlist = NULL;
 
391
        q = start;
 
392
        s = size;
 
393
        while((p = (char *)cli_pmemstr(q, s, "base64", 6)) != NULL) {
 
394
                cli_dbgmsg("Found base64\n");
 
395
                if(scanelem) {
 
396
                        scanelem->next = cli_malloc(sizeof(struct scanlist));
 
397
                        scanelem = scanelem->next;
 
398
                } else
 
399
                        scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
 
400
                scanelem->next = NULL;
 
401
                scanelem->decoder = BASE64;
 
402
                s -= (p - q) + 6;
 
403
                q = scanelem->start = &p[6];
 
404
                if(((p = (char *)cli_pmemstr(q, s, "\nFrom ", 6)) != NULL) ||
 
405
                   ((p = (char *)cli_pmemstr(q, s, "base64", 6)) != NULL) ||
 
406
                   ((p = (char *)cli_pmemstr(q, s, "quoted-printable", 16)) != NULL)) {
 
407
                        scanelem->size = (size_t)(p - q);
 
408
                        q = p;
 
409
                        s -= scanelem->size;
 
410
                } else {
 
411
                        scanelem->size = (size_t)(last - scanelem->start) + 1;
 
412
                        break;
 
413
                }
 
414
                cli_dbgmsg("base64: last %u q %u s %u\n", (unsigned int)last, (unsigned int)q, s);
 
415
                assert(scanelem->size <= size);
 
416
                assert(&q[s - 1] <= last);
 
417
        }
 
418
        q = start;
 
419
        s = size;
 
420
        while((p = (char *)cli_pmemstr(q, s, "quoted-printable", 16)) != NULL) {
 
421
                if(p != q)
 
422
                        switch(p[-1]) {
 
423
                                case ' ':
 
424
                                case ':':
 
425
                                case '=':       /* wrong but allow it */
 
426
                                        break;
 
427
                                default:
 
428
                                        s -= (p - q) + 16;
 
429
                                        q = &p[16];
 
430
                                        cli_dbgmsg("Ignore quoted-printable false positive\n");
 
431
                                        cli_dbgmsg("s = %u\n", s);
 
432
                                        continue;       /* false positive */
 
433
                        }
 
434
 
 
435
                cli_dbgmsg("Found quoted-printable\n");
 
436
                if(scanelem) {
 
437
                        scanelem->next = cli_malloc(sizeof(struct scanlist));
 
438
                        scanelem = scanelem->next;
 
439
                } else
 
440
                        scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
 
441
                scanelem->next = NULL;
 
442
                scanelem->decoder = QUOTEDPRINTABLE;
 
443
                s -= (p - q) + 16;
 
444
                q = scanelem->start = &p[16];
 
445
                cli_dbgmsg("qp: last %u q %u s %u\n", (unsigned int)last, (unsigned int)q, s);
 
446
                if(((p = (char *)cli_pmemstr(q, s, "\nFrom ", 6)) != NULL) ||
 
447
                   ((p = (char *)cli_pmemstr(q, s, "quoted-printable", 16)) != NULL) ||
 
448
                   ((p = (char *)cli_pmemstr(q, s, "base64", 6)) != NULL)) {
 
449
                        scanelem->size = (size_t)(p - q);
 
450
                        q = p;
 
451
                        s -= scanelem->size;
 
452
                        cli_dbgmsg("qp: scanelem->size = %u\n", scanelem->size);
 
453
                } else {
 
454
                        scanelem->size = (size_t)(last - scanelem->start) + 1;
 
455
                        break;
 
456
                }
 
457
                assert(scanelem->size <= size);
 
458
                assert(&q[s - 1] <= last);
 
459
        }
 
460
 
 
461
        if(scanlist == NULL) {
 
462
                const struct tableinit *tableinit;
 
463
                bool anyHeadersFound = FALSE;
 
464
                bool hasuuencode = FALSE;
 
465
 
 
466
                /* FIXME: message: There could of course be no decoder needed... */
 
467
                for(tableinit = rfc821headers; tableinit->key; tableinit++)
 
468
                        if(cli_pmemstr(start, size, tableinit->key, strlen(tableinit->key))) {
 
469
                                anyHeadersFound = TRUE;
 
470
                                break;
 
471
                        }
 
472
 
 
473
                if((!anyHeadersFound) && cli_pmemstr(start, size, "\nbegin ", 7))
 
474
                        /* uuencoded part */
 
475
                        hasuuencode = TRUE;
 
476
 
 
477
                if(wasAlloced)
 
478
                        free(start);
 
479
                else
 
480
                        munmap(start, size);
 
481
 
 
482
                if(anyHeadersFound || hasuuencode) {
 
483
                        /* TODO: reduce the number of falls through here */
 
484
                        cli_warnmsg("cli_mbox: uuencode or unknown encoder\n");
 
485
                        return cli_parse_mbox(dir, desc, options);
 
486
                }
 
487
 
 
488
                cli_warnmsg("cli_mbox: I believe it's plain text which must be clean\n");
 
489
                return CL_CLEAN;
 
490
        }
 
491
 
 
492
        for(scanelem = scanlist; scanelem; scanelem = scanelem->next) {
 
493
                if(scanelem->decoder == BASE64) {
 
494
                        char *b64start = scanelem->start;
 
495
                        long b64size = scanelem->size;
 
496
 
 
497
                        cli_dbgmsg("b64size = %lu\n", b64size);
 
498
                        while(*b64start != '\n') {
 
499
                                b64start++;
 
500
                                b64size--;
 
501
                        }
 
502
                        /*
 
503
                         * Look for the end of the headers
 
504
                         */
 
505
                        while(b64start < last) {
 
506
                                if(*b64start == ';') {
 
507
                                        b64start++;
 
508
                                        b64size--;
 
509
                                } else if(*b64start == '\n') {
 
510
                                        b64start++;
 
511
                                        b64size--;
 
512
                                        if((*b64start == '\n') || (*b64start == '\r')) {
 
513
                                                b64start++;
 
514
                                                b64size--;
 
515
                                                break;
 
516
                                        }
 
517
                                }
 
518
                                b64start++;
 
519
                                b64size--;
 
520
                        }
 
521
 
 
522
                        if(b64size > 0L)
 
523
                                while((!isalnum(*b64start)) && (*b64start != '/')) {
 
524
                                        if(b64size-- == 0L)
 
525
                                                break;
 
526
                                        b64start++;
 
527
                                }
 
528
 
 
529
                        if(b64size > 0L) {
 
530
                                cli_dbgmsg("cli_mbox: decoding %ld base64 bytes\n", b64size);
 
531
 
 
532
                                line = NULL;
 
533
 
 
534
                                m = messageCreate();
 
535
                                if(m == NULL)
 
536
                                        return CL_EMEM;
 
537
                                messageSetEncoding(m, "base64");
 
538
 
 
539
                                do {
 
540
                                        int length = 0;
 
541
 
 
542
                                        /*printf("%ld: ", b64size); fflush(stdout);*/
 
543
 
 
544
                                        for(ptr = b64start; b64size && (*ptr != '\n') && (*ptr != '\r'); ptr++) {
 
545
                                                length++;
 
546
                                                --b64size;
 
547
                                        }
 
548
 
 
549
                                        /*printf("%d: ", length); fflush(stdout);*/
 
550
 
 
551
                                        line = cli_realloc(line, length + 1);
 
552
 
 
553
                                        memcpy(line, b64start, length);
 
554
                                        line[length] = '\0';
 
555
 
 
556
                                        /*puts(line);*/
 
557
 
 
558
                                        if(messageAddStr(m, line) < 0)
 
559
                                                break;
 
560
 
 
561
                                        if((b64size > 0) && (*ptr == '\r')) {
 
562
                                                ptr++;
 
563
                                                --b64size;
 
564
                                        }
 
565
                                        b64start = ++ptr;
 
566
                                        --b64size;
 
567
                                        if(strchr(line, '='))
 
568
                                                break;
 
569
                                } while(b64size > 0L);
 
570
 
 
571
                                free(line);
 
572
                                fb = messageToFileblob(m, dir);
 
573
                                messageDestroy(m);
 
574
 
 
575
                                if(fb)
 
576
                                        fileblobDestroy(fb);
 
577
                                else
 
578
                                        ret = -1;
 
579
                        }
 
580
                } else if(scanelem->decoder == QUOTEDPRINTABLE) {
 
581
                        char *quotedstart = scanelem->start;
 
582
                        long quotedsize = scanelem->size;
 
583
 
 
584
                        cli_dbgmsg("quotedsize = %lu\n", quotedsize);
 
585
                        while(*quotedstart != '\n') {
 
586
                                quotedstart++;
 
587
                                quotedsize--;
 
588
                        }
 
589
                        /*
 
590
                         * Look for the end of the headers
 
591
                         */
 
592
                        while(quotedstart < last) {
 
593
                                if(*quotedstart == ';') {
 
594
                                        quotedstart++;
 
595
                                        quotedsize--;
 
596
                                } else if(*quotedstart == '\n') {
 
597
                                        quotedstart++;
 
598
                                        quotedsize--;
 
599
                                        if((*quotedstart == '\n') || (*quotedstart == '\r')) {
 
600
                                                quotedstart++;
 
601
                                                quotedsize--;
 
602
                                                break;
 
603
                                        }
 
604
                                }
 
605
                                quotedstart++;
 
606
                                quotedsize--;
 
607
                        }
 
608
 
 
609
                        while(!isalnum(*quotedstart)) {
 
610
                                quotedstart++;
 
611
                                quotedsize--;
 
612
                        }
 
613
 
 
614
                        if(quotedsize > 0L) {
 
615
                                cli_dbgmsg("cli_mbox: decoding %ld quoted-printable bytes\n", quotedsize);
 
616
 
 
617
                                m = messageCreate();
 
618
                                if(m == NULL)
 
619
                                        return CL_EMEM;
 
620
                                messageSetEncoding(m, "quoted-printable");
 
621
 
 
622
                                line = NULL;
 
623
 
 
624
                                do {
 
625
                                        int length = 0;
 
626
 
 
627
                                        /*printf("%ld: ", quotedsize); fflush(stdout);*/
 
628
 
 
629
                                        for(ptr = quotedstart; quotedsize && (*ptr != '\n') && (*ptr != '\r'); ptr++) {
 
630
                                                length++;
 
631
                                                --quotedsize;
 
632
                                        }
 
633
 
 
634
                                        /*printf("%d: ", length); fflush(stdout);*/
 
635
 
 
636
                                        line = cli_realloc(line, length + 1);
 
637
 
 
638
                                        memcpy(line, quotedstart, length);
 
639
                                        line[length] = '\0';
 
640
 
 
641
                                        /*puts(line);*/
 
642
 
 
643
                                        if(messageAddStr(m, line) < 0)
 
644
                                                break;
 
645
 
 
646
                                        if((quotedsize > 0) && (*ptr == '\r')) {
 
647
                                                ptr++;
 
648
                                                --quotedsize;
 
649
                                        }
 
650
                                        quotedstart = ++ptr;
 
651
                                        --quotedsize;
 
652
                                } while(quotedsize > 0L);
 
653
 
 
654
                                free(line);
 
655
                                fb = messageToFileblob(m, dir);
 
656
                                messageDestroy(m);
 
657
 
 
658
                                if(fb)
 
659
                                        fileblobDestroy(fb);
 
660
                                else
 
661
                                        ret = -1;
 
662
                        }
 
663
                }
 
664
        }
 
665
        scanelem = scanlist;
 
666
 
 
667
        while(scanelem) {
 
668
                struct scanlist *n = scanelem->next;
 
669
 
 
670
                free(scanelem);
 
671
                scanelem = n;
 
672
        }
 
673
 
 
674
        if(wasAlloced)
 
675
                free(start);
 
676
        else
 
677
                munmap(start, size);
 
678
 
 
679
        /*
 
680
         * FIXME: Need to run cl_scandir() here and return that value
 
681
         */
 
682
        if(ret == 0)
 
683
                return CL_CLEAN;        /* a lie - but it gets things going */
 
684
 
 
685
        /* Fall back for now */
 
686
        lseek(desc, 0L, SEEK_SET);
 
687
        return cli_parse_mbox(dir, desc, options);
 
688
}
 
689
#else
 
690
int
 
691
cli_mbox(const char *dir, int desc, unsigned int options)
 
692
{
 
693
        if(dir == NULL) {
 
694
                cli_warnmsg("cli_mbox called with NULL dir\n");
 
695
                return CL_ENULLARG;
 
696
        }
 
697
        return cli_parse_mbox(dir, desc, options);
 
698
}
 
699
#endif
358
700
 
359
701
/*
360
702
 * TODO: when signal handling is added, need to remove temp files when a
361
 
 * signal is received
 
703
 *      signal is received
362
704
 * TODO: add option to scan in memory not via temp files, perhaps with a
363
705
 * named pipe or memory mapped file, though this won't work on big e-mails
364
706
 * containing many levels of encapsulated messages - it'd just take too much
365
707
 * RAM
366
 
 * TODO: if debug is enabled, catch a segfault and dump the current e-mail
367
 
 * in it's entirety, then call abort()
368
708
 * TODO: parse .msg format files
369
709
 * TODO: fully handle AppleDouble format, see
370
 
 * http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
 
710
 *      http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
371
711
 * TODO: ensure parseEmailHeaders is always called before parseEmailBody
372
712
 * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
 
713
 * TODO: Look into TNEF. Is there anything that needs to be done here?
373
714
 */
374
 
int
375
 
cl_mbox(const char *dir, int desc)
 
715
static int
 
716
cli_parse_mbox(const char *dir, int desc, unsigned int options)
376
717
{
377
718
        int retcode, i;
378
 
        message *m, *body;
 
719
        message *body;
379
720
        FILE *fd;
380
 
        char buffer[LINE_LENGTH];
 
721
        char buffer[LINE_LENGTH + 1];
 
722
#ifdef HAVE_BACKTRACE
 
723
        void (*segv)(int);
 
724
#endif
 
725
        static table_t *rfc821, *subtype;
 
726
#ifdef  CL_DEBUG
 
727
        char tmpfilename[16];
 
728
        int tmpfd;
 
729
#endif
381
730
 
382
731
        cli_dbgmsg("in mbox()\n");
383
732
 
385
734
        if((fd = fdopen(i, "rb")) == NULL) {
386
735
                cli_errmsg("Can't open descriptor %d\n", desc);
387
736
                close(i);
388
 
                return -1;
389
 
        }
390
 
        if(fgets(buffer, sizeof(buffer), fd) == NULL) {
 
737
                return CL_EOPEN;
 
738
        }
 
739
#ifdef  CL_DEBUG
 
740
        /*
 
741
         * Copy the incoming mail for debugging, so that if it falls over
 
742
         * we have a copy of the offending email. This is debugging code
 
743
         * that you shouldn't of course install in a live environment. I am
 
744
         * not interested in hearing about security issues with this section
 
745
         * of the parser.
 
746
         */
 
747
        strcpy(tmpfilename, "/tmp/mboxXXXXXX");
 
748
        tmpfd = mkstemp(tmpfilename);
 
749
        if(tmpfd < 0) {
 
750
                perror(tmpfilename);
 
751
                cli_errmsg("Can't make debugging file\n");
 
752
        } else {
 
753
                FILE *tmpfp = fdopen(tmpfd, "w");
 
754
 
 
755
                if(tmpfp) {
 
756
                        while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL)
 
757
                                fputs(buffer, tmpfp);
 
758
                        fclose(tmpfp);
 
759
                        rewind(fd);
 
760
                } else
 
761
                        cli_errmsg("Can't fdopen debugging file\n");
 
762
        }
 
763
#endif
 
764
        if(fgets(buffer, sizeof(buffer) - 1, fd) == NULL) {
391
765
                /* empty message */
392
766
                fclose(fd);
393
 
                return 0;
394
 
        }
395
 
        m = messageCreate();
396
 
        if(m == NULL) {
397
 
                fclose(fd);
398
 
                return 0;
399
 
        }
400
 
 
401
 
        if(rfc821Table == NULL) {
402
 
                assert(subtypeTable == NULL);
403
 
 
404
 
                if(initialiseTables(&rfc821Table, &subtypeTable) < 0) {
405
 
                        messageDestroy(m);
 
767
#ifdef  CL_DEBUG
 
768
                unlink(tmpfilename);
 
769
#endif
 
770
                return CL_CLEAN;
 
771
        }
 
772
#ifdef  CL_THREAD_SAFE
 
773
        pthread_mutex_lock(&tables_mutex);
 
774
#endif
 
775
        if(rfc821 == NULL) {
 
776
                assert(subtype == NULL);
 
777
 
 
778
                if(initialiseTables(&rfc821, &subtype) < 0) {
 
779
                        rfc821 = NULL;
 
780
                        subtype = NULL;
 
781
#ifdef  CL_THREAD_SAFE
 
782
                        pthread_mutex_unlock(&tables_mutex);
 
783
#endif
406
784
                        fclose(fd);
407
 
                        return -1;
 
785
#ifdef  CL_DEBUG
 
786
                        unlink(tmpfilename);
 
787
#endif
 
788
                        return CL_EMEM;
408
789
                }
409
790
        }
 
791
#ifdef  CL_THREAD_SAFE
 
792
        pthread_mutex_unlock(&tables_mutex);
 
793
#endif
 
794
 
 
795
#ifdef HAVE_BACKTRACE
 
796
        segv = signal(SIGSEGV, sigsegv);
 
797
#endif
410
798
 
411
799
        /*
412
 
         * is it a UNIX style mbox with more than one
 
800
         * Is it a UNIX style mbox with more than one
413
801
         * mail message, or just a single mail message?
 
802
         *
 
803
         * TODO: It would be better if we called cli_scandir here rather than
 
804
         * in cli_scanmail. Then we could improve the way mailboxes with more
 
805
         * than one message is handled, e.g. stopping parsing when an infected
 
806
         * message is stopped, and giving a better indication of which message
 
807
         * within the mailbox is infected
414
808
         */
415
809
        if(strncmp(buffer, "From ", 5) == 0) {
416
810
                /*
417
811
                 * Have been asked to check a UNIX style mbox file, which
418
812
                 * may contain more than one e-mail message to decode
 
813
                 *
 
814
                 * It would be far better for scanners.c to do this splitting
 
815
                 * and do this
 
816
                 *      FOR EACH mail in the mailbox
 
817
                 *      DO
 
818
                 *              pass this mail to cli_mbox --
 
819
                 *              scan this file
 
820
                 *              IF this file has a virus quit
 
821
                 *              THEN
 
822
                 *                      return CL_VIRUS
 
823
                 *              FI
 
824
                 *      END
 
825
                 * This would remove a problem with this code that it can
 
826
                 * fill up the tmp directory before it starts scanning
419
827
                 */
420
 
                bool lastLineWasEmpty = FALSE;
421
 
                int messagenumber = 1;
 
828
                bool lastLineWasEmpty;
 
829
                int messagenumber;
 
830
                message *m = messageCreate();
 
831
 
 
832
                if(m == NULL) {
 
833
                        fclose(fd);
 
834
#ifdef HAVE_BACKTRACE
 
835
                        signal(SIGSEGV, segv);
 
836
#endif
 
837
#ifdef  CL_DEBUG
 
838
                        unlink(tmpfilename);
 
839
#endif
 
840
                        return CL_EMEM;
 
841
                }
 
842
 
 
843
                lastLineWasEmpty = FALSE;
 
844
                messagenumber = 1;
422
845
 
423
846
                do {
424
 
                        /*cli_dbgmsg("read: %s", buffer);*/
425
 
 
426
847
                        cli_chomp(buffer);
427
848
                        if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
428
849
                                cli_dbgmsg("Deal with email number %d\n", messagenumber++);
429
850
                                /*
430
851
                                 * End of a message in the mail box
431
852
                                 */
432
 
                                body = parseEmailHeaders(m, rfc821Table);
 
853
                                body = parseEmailHeaders(m, rfc821);
 
854
                                if(body == NULL) {
 
855
                                        messageReset(m);
 
856
                                        continue;
 
857
                                }
433
858
                                messageDestroy(m);
434
859
                                if(messageGetBody(body))
435
 
                                        if(!parseEmailBody(body,  NULL, 0, NULL, dir, rfc821Table, subtypeTable)) {
 
860
                                        if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options)) {
436
861
                                                messageReset(body);
437
862
                                                m = body;
438
863
                                                continue;
439
864
                                        }
440
865
                                /*
441
866
                                 * Starting a new message, throw away all the
442
 
                                 * information about the old one
 
867
                                 * information about the old one. It would
 
868
                                 * be best to be able to scan this message
 
869
                                 * now, but cli_scanfile needs arguments
 
870
                                 * that haven't been passed here so it can't be
 
871
                                 * called
443
872
                                 */
444
873
                                m = body;
445
874
                                messageReset(body);
447
876
                                cli_dbgmsg("Finished processing message\n");
448
877
                        } else
449
878
                                lastLineWasEmpty = (bool)(buffer[0] == '\0');
450
 
                        if(messageAddLine(m, buffer, 1) < 0)
451
 
                                break;
452
 
                } while(fgets(buffer, sizeof(buffer), fd) != NULL);
453
 
 
454
 
                cli_dbgmsg("Deal with email number %d\n", messagenumber);
455
 
        } else
 
879
 
 
880
                        if(isuuencodebegin(buffer)) {
 
881
                                /*
 
882
                                 * Fast track visa to uudecode.
 
883
                                 * TODO: binhex, yenc
 
884
                                 */
 
885
                                if(uufasttrack(m, buffer, dir, fd) < 0)
 
886
                                        if(messageAddStr(m, buffer) < 0)
 
887
                                            break;
 
888
                        } else
 
889
                                if(messageAddStr(m, buffer) < 0)
 
890
                                        break;
 
891
                } while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
 
892
 
 
893
                fclose(fd);
 
894
 
 
895
                cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
 
896
                body = parseEmailHeaders(m, rfc821);
 
897
                messageDestroy(m);
 
898
        } else {
456
899
                /*
457
900
                 * It's a single message, parse the headers then the body
458
901
                 */
459
 
                do
 
902
                if(strncmp(buffer, "P I ", 4) == 0)
460
903
                        /*
461
 
                         * No need to preprocess such as cli_chomp() since
462
 
                         * that'll be done by parseEmailHeaders()
463
 
                         *
464
 
                         * TODO: this needlessly creates a message object,
465
 
                         * it'd be better if parseEmailHeaders could also
466
 
                         * read in from a file. I do not want to lump the
467
 
                         * parseEmailHeaders code here, that'd be a duplication
468
 
                         * of code I want to avoid
 
904
                         * CommuniGate Pro format: ignore headers until
 
905
                         * blank line
469
906
                         */
470
 
                        if(messageAddLine(m, buffer, 1) < 0)
471
 
                                break;
472
 
                while(fgets(buffer, sizeof(buffer), fd) != NULL);
473
 
 
474
 
        fclose(fd);
475
 
 
476
 
        retcode = 0;
477
 
 
478
 
        body = parseEmailHeaders(m, rfc821Table);
479
 
        messageDestroy(m);
480
 
        /*
481
 
         * Write out the last entry in the mailbox
482
 
         */
483
 
        if(messageGetBody(body))
484
 
                if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable))
485
 
                        retcode = -1;
486
 
 
487
 
        /*
488
 
         * Tidy up and quit
489
 
         */
490
 
        messageDestroy(body);
 
907
                        while((fgets(buffer, sizeof(buffer) - 1, fd) != NULL) &&
 
908
                                (strchr("\r\n", buffer[0]) == NULL))
 
909
                                        ;
 
910
                /*
 
911
                 * Ignore any blank lines at the top of the message
 
912
                 */
 
913
                while(strchr("\r\n", buffer[0]) &&
 
914
                     (getline_from_mbox(buffer, sizeof(buffer) - 1, fd) != NULL))
 
915
                        ;
 
916
 
 
917
                buffer[sizeof(buffer) - 1] = '\0';
 
918
 
 
919
                body = parseEmailFile(fd, rfc821, buffer, dir);
 
920
                fclose(fd);
 
921
        }
 
922
 
 
923
        /*
 
924
         * This is not necessarily true, but since the only options are
 
925
         * CL_CLEAN and CL_VIRUS this is the better choice. It would be
 
926
         * nice to have CL_CONTINUESCANNING or something like that
 
927
         */
 
928
        retcode = CL_CLEAN;
 
929
 
 
930
        if(body) {
 
931
                /*
 
932
                 * Write out the last entry in the mailbox
 
933
                 */
 
934
                if(messageGetBody(body))
 
935
                        if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options))
 
936
                                retcode = CL_EFORMAT;
 
937
 
 
938
                /*
 
939
                 * Tidy up and quit
 
940
                 */
 
941
                messageDestroy(body);
 
942
        }
491
943
 
492
944
        cli_dbgmsg("cli_mbox returning %d\n", retcode);
493
945
 
 
946
#ifdef HAVE_BACKTRACE
 
947
        signal(SIGSEGV, segv);
 
948
#endif
 
949
 
 
950
#ifdef  CL_DEBUG
 
951
        unlink(tmpfilename);
 
952
#endif
494
953
        return retcode;
495
954
}
496
955
 
497
956
/*
 
957
 * Read in an email message from fin, parse it, and return the message
 
958
 *
 
959
 * FIXME: files full of new lines and nothing else are
 
960
 * handled ungracefully...
 
961
 */
 
962
static message *
 
963
parseEmailFile(FILE *fin, const table_t *rfc821, const char *firstLine, const char *dir)
 
964
{
 
965
        bool inHeader = TRUE;
 
966
        bool contMarker = FALSE;
 
967
        bool lastWasBlank = FALSE;
 
968
        message *ret;
 
969
        bool anyHeadersFound = FALSE;
 
970
        int commandNumber = -1;
 
971
        char *fullline = NULL, *boundary = NULL;
 
972
        size_t fulllinelength = 0;
 
973
        char buffer[LINE_LENGTH+1];
 
974
 
 
975
        cli_dbgmsg("parseEmailFile\n");
 
976
 
 
977
        ret = messageCreate();
 
978
        if(ret == NULL)
 
979
                return NULL;
 
980
 
 
981
        strcpy(buffer, firstLine);
 
982
        do {
 
983
                char *line;
 
984
 
 
985
                (void)cli_chomp(buffer);
 
986
 
 
987
                line = buffer;
 
988
 
 
989
                if(line[0] == '\0')
 
990
                        line = NULL;
 
991
 
 
992
                /*
 
993
                 * Don't blank lines which are only spaces from headers,
 
994
                 * otherwise they'll be treated as the end of header marker
 
995
                 */
 
996
                if(lastWasBlank) {
 
997
                        lastWasBlank = FALSE;
 
998
                        if(boundaryStart(buffer, boundary)) {
 
999
                                cli_dbgmsg("Found a header line with space that should be blank\n");
 
1000
                                inHeader = FALSE;
 
1001
                        }
 
1002
                }
 
1003
                if(boundary) {
 
1004
                        free(boundary);
 
1005
                        boundary = NULL;
 
1006
                }
 
1007
                if(inHeader) {
 
1008
                        cli_dbgmsg("parseEmailFile: check '%s' contMarker %d fullline 0x%p\n",
 
1009
                                buffer ? buffer : "", (int)contMarker, fullline);
 
1010
                        if(line && isspace(line[0])) {
 
1011
                                char copy[sizeof(buffer)];
 
1012
 
 
1013
                                strcpy(copy, buffer);
 
1014
                                strstrip(copy);
 
1015
                                if(copy[0] == '\0') {
 
1016
                                        /*
 
1017
                                         * The header line contains only white
 
1018
                                         * space. This is not the end of the
 
1019
                                         * headers according to RFC2822, but
 
1020
                                         * some MUAs will handle it as though
 
1021
                                         * it were, and virus writers exploit
 
1022
                                         * this bug. We can't just break from
 
1023
                                         * the loop here since that would allow
 
1024
                                         * other exploits such as inserting a
 
1025
                                         * white space line before the
 
1026
                                         * content-type line. So we just have
 
1027
                                         * to make a best guess. Sigh.
 
1028
                                         */
 
1029
                                        if(fullline) {
 
1030
                                                if(parseEmailHeader(ret, fullline, rfc821) < 0)
 
1031
                                                        continue;
 
1032
 
 
1033
                                                free(fullline);
 
1034
                                                fullline = NULL;
 
1035
                                        }
 
1036
                                        if((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL) {
 
1037
                                                lastWasBlank = TRUE;
 
1038
                                                continue;
 
1039
                                        }
 
1040
                                }
 
1041
                        }
 
1042
                        lastWasBlank = FALSE;
 
1043
                        if((line == NULL) && (fullline == NULL)) {      /* empty line */
 
1044
                                if(!contMarker) {
 
1045
                                        /*
 
1046
                                         * A blank line signifies the end of
 
1047
                                         * the header and the start of the text
 
1048
                                         */
 
1049
                                        if(!anyHeadersFound)
 
1050
                                                /* Ignore the junk at the top */
 
1051
                                                continue;
 
1052
 
 
1053
                                        cli_dbgmsg("End of header information\n");
 
1054
                                        inHeader = FALSE;
 
1055
                                } else
 
1056
                                        contMarker = FALSE;
 
1057
                        } else {
 
1058
                                char *ptr;
 
1059
                                const char *qptr;
 
1060
                                int lookahead;
 
1061
 
 
1062
                                if(fullline == NULL) {
 
1063
                                        char cmd[LINE_LENGTH + 1], out[LINE_LENGTH + 1];
 
1064
 
 
1065
                                        /*
 
1066
                                         * Continuation of line we're ignoring?
 
1067
                                         */
 
1068
                                        if((line[0] == '\t') || (line[0] == ' ') || contMarker) {
 
1069
                                                contMarker = continuationMarker(line);
 
1070
                                                continue;
 
1071
                                        }
 
1072
 
 
1073
                                        /*
 
1074
                                         * Is this a header we're interested in?
 
1075
                                         */
 
1076
                                        if((strchr(line, ':') == NULL) ||
 
1077
                                           (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
 
1078
                                                if(strncmp(line, "From ", 5) == 0)
 
1079
                                                        anyHeadersFound = TRUE;
 
1080
                                                continue;
 
1081
                                        }
 
1082
 
 
1083
                                        ptr = rfc822comments(cmd, out);
 
1084
                                        commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
 
1085
 
 
1086
                                        switch(commandNumber) {
 
1087
                                                case CONTENT_TRANSFER_ENCODING:
 
1088
                                                case CONTENT_DISPOSITION:
 
1089
                                                case CONTENT_TYPE:
 
1090
                                                        anyHeadersFound = TRUE;
 
1091
                                                        break;
 
1092
                                                default:
 
1093
                                                        if(!anyHeadersFound)
 
1094
                                                                anyHeadersFound = usefulHeader(commandNumber, cmd);
 
1095
                                                        continue;
 
1096
                                        }
 
1097
                                        fullline = strdup(line);
 
1098
                                        fulllinelength = strlen(line) + 1;
 
1099
                                } else if(line != NULL) {
 
1100
                                        fulllinelength += strlen(line);
 
1101
                                        fullline = cli_realloc(fullline, fulllinelength);
 
1102
                                        strcat(fullline, line);
 
1103
                                }
 
1104
 
 
1105
                                if(line) {
 
1106
                                        contMarker = continuationMarker(line);
 
1107
 
 
1108
                                        if(contMarker)
 
1109
                                                continue;
 
1110
                                } else
 
1111
                                        contMarker = FALSE;
 
1112
 
 
1113
                                assert(fullline != NULL);
 
1114
 
 
1115
                                lookahead = getc(fin);
 
1116
                                if(lookahead != EOF) {
 
1117
                                        ungetc(lookahead, fin);
 
1118
 
 
1119
                                        /*
 
1120
                                         * Section B.2 of RFC822 says TAB or
 
1121
                                         * SPACE means a continuation of the
 
1122
                                         * previous entry.
 
1123
                                         *
 
1124
                                         * Add all the arguments on the line
 
1125
                                         */
 
1126
                                        if((lookahead == '\t') || (lookahead == ' '))
 
1127
                                                continue;
 
1128
                                }
 
1129
 
 
1130
                                if(line) {
 
1131
                                        int quotes = 0;
 
1132
                                        for(qptr = fullline; *qptr; qptr++)
 
1133
                                                if(*qptr == '\"')
 
1134
                                                        quotes++;
 
1135
 
 
1136
                                        if(quotes & 1)
 
1137
                                                continue;
 
1138
                                }
 
1139
 
 
1140
                                ptr = rfc822comments(fullline, NULL);
 
1141
                                if(ptr) {
 
1142
                                        free(fullline);
 
1143
                                        fullline = ptr;
 
1144
                                }
 
1145
 
 
1146
                                if(parseEmailHeader(ret, fullline, rfc821) < 0)
 
1147
                                        continue;
 
1148
 
 
1149
                                free(fullline);
 
1150
                                fullline = NULL;
 
1151
                        }
 
1152
                } else if(line && isuuencodebegin(line)) {
 
1153
                        /*
 
1154
                         * Fast track visa to uudecode.
 
1155
                         * TODO: binhex, yenc
 
1156
                         */
 
1157
                        if(uufasttrack(ret, line, dir, fin) < 0)
 
1158
                                if(messageAddStr(ret, line) < 0)
 
1159
                                        break;
 
1160
                } else
 
1161
                        if(messageAddStr(ret, line) < 0)
 
1162
                                break;
 
1163
        } while(getline_from_mbox(buffer, sizeof(buffer) - 1, fin) != NULL);
 
1164
 
 
1165
        if(fullline) {
 
1166
                if(*fullline) switch(commandNumber) {
 
1167
                        case CONTENT_TRANSFER_ENCODING:
 
1168
                        case CONTENT_DISPOSITION:
 
1169
                        case CONTENT_TYPE:
 
1170
                                cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline);
 
1171
                }
 
1172
                free(fullline);
 
1173
        }
 
1174
 
 
1175
        if(!anyHeadersFound) {
 
1176
                /*
 
1177
                 * False positive in believing we have an e-mail when we don't
 
1178
                 */
 
1179
                messageDestroy(ret);
 
1180
                cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n");
 
1181
                return NULL;
 
1182
        }
 
1183
 
 
1184
        messageClean(ret);
 
1185
 
 
1186
        cli_dbgmsg("parseEmailFile: return\n");
 
1187
 
 
1188
        return ret;
 
1189
}
 
1190
 
 
1191
/*
498
1192
 * The given message contains a raw e-mail.
499
1193
 *
500
 
 * This function parses the headers of m and sets the message's arguments
501
 
 *
502
1194
 * Returns the message's body with the correct arguments set
 
1195
 *
 
1196
 * The downside of this approach is that for a short time we have two copies
 
1197
 * of the message in memory, the upside is that it makes for easier parsing
 
1198
 * of encapsulated messages, and in the long run uses less memory in those
 
1199
 * scenarios
 
1200
 *
 
1201
 * TODO: remove the duplication with parseEmailFile
503
1202
 */
504
1203
static message *
505
 
parseEmailHeaders(const message *m, const table_t *rfc821Table)
 
1204
parseEmailHeaders(const message *m, const table_t *rfc821)
506
1205
{
507
 
        bool inContinuationHeader = FALSE;      /* state machine: ugh */
508
1206
        bool inHeader = TRUE;
509
1207
        const text *t;
510
1208
        message *ret;
 
1209
        bool anyHeadersFound = FALSE;
 
1210
        int commandNumber = -1;
 
1211
        char *fullline = NULL;
 
1212
        size_t fulllinelength = 0;
 
1213
 
 
1214
        cli_dbgmsg("parseEmailHeaders\n");
511
1215
 
512
1216
        if(m == NULL)
513
1217
                return NULL;
515
1219
        ret = messageCreate();
516
1220
 
517
1221
        for(t = messageGetBody(m); t; t = t->t_next) {
518
 
                char *buffer = strdup(t->t_text);
519
 
#ifdef CL_THREAD_SAFE
520
 
                char *strptr;
521
 
#endif
522
 
 
523
 
                if(buffer == NULL)
524
 
                        break;
525
 
 
526
 
                cli_chomp(buffer);
527
 
 
528
 
                /*
529
 
                 * Section B.2 of RFC822 says TAB or SPACE means
530
 
                 * a continuation of the previous entry.
531
 
                 */
532
 
                if(inHeader && ((buffer[0] == '\t') || (buffer[0] == ' ')))
533
 
                        inContinuationHeader = TRUE;
534
 
 
535
 
                if(inContinuationHeader) {
536
 
                        const char *ptr;
537
 
 
538
 
                        if(!continuationMarker(buffer))
539
 
                                inContinuationHeader = FALSE;    /* no more args */
540
 
 
541
 
                        /*
542
 
                         * Add all the arguments on the line
543
 
                         */
544
 
                        for(ptr = strtok_r(buffer, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
545
 
                                messageAddArgument(ret, ptr);
546
 
                        free(buffer);
547
 
                } else if(inHeader) {
548
 
                        cli_dbgmsg("Deal with header %s\n", buffer);
549
 
 
550
 
                        /*
551
 
                         * A blank line signifies the end of the header and
552
 
                         * the start of the text
553
 
                         */
554
 
                        if(strlen(buffer) == 0) {
 
1222
                const char *buffer;
 
1223
 
 
1224
                if(t->t_line)
 
1225
                        buffer = lineGetData(t->t_line);
 
1226
                else
 
1227
                        buffer = NULL;
 
1228
 
 
1229
                if(inHeader) {
 
1230
                        cli_dbgmsg("parseEmailHeaders: check '%s'\n",
 
1231
                                buffer ? buffer : "");
 
1232
                        if(buffer == NULL) {
 
1233
                                /*
 
1234
                                 * A blank line signifies the end of
 
1235
                                 * the header and the start of the text
 
1236
                                 */
555
1237
                                cli_dbgmsg("End of header information\n");
556
 
                                inContinuationHeader = inHeader = FALSE;
557
 
                        } else if(parseEmailHeader(ret, buffer, rfc821Table) == CONTENT_TYPE)
558
 
                                inContinuationHeader = continuationMarker(buffer);
559
 
                        free(buffer);
560
 
                } else {
 
1238
                                inHeader = FALSE;
 
1239
                                if(!anyHeadersFound) {
 
1240
                                        cli_dbgmsg("Nothing interesting in the header\n");
 
1241
                                        break;
 
1242
                                }
 
1243
                        } else {
 
1244
                                char *ptr;
 
1245
                                const char *qptr;
 
1246
                                int quotes;
 
1247
 
 
1248
                                if(fullline == NULL) {
 
1249
                                        char cmd[LINE_LENGTH + 1];
 
1250
 
 
1251
                                        /*
 
1252
                                         * Continuation of line we're ignoring?
 
1253
                                         */
 
1254
                                        if((buffer[0] == '\t') || (buffer[0] == ' '))
 
1255
                                                continue;
 
1256
 
 
1257
                                        /*
 
1258
                                         * Is this a header we're interested in?
 
1259
                                         */
 
1260
                                        if((strchr(buffer, ':') == NULL) ||
 
1261
                                           (cli_strtokbuf(buffer, 0, ":", cmd) == NULL)) {
 
1262
                                                if(strncmp(buffer, "From ", 5) == 0)
 
1263
                                                        anyHeadersFound = TRUE;
 
1264
                                                continue;
 
1265
                                        }
 
1266
 
 
1267
                                        ptr = rfc822comments(cmd, NULL);
 
1268
                                        commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
 
1269
                                        if(ptr)
 
1270
                                                free(ptr);
 
1271
 
 
1272
                                        switch(commandNumber) {
 
1273
                                                case CONTENT_TRANSFER_ENCODING:
 
1274
                                                case CONTENT_DISPOSITION:
 
1275
                                                case CONTENT_TYPE:
 
1276
                                                        anyHeadersFound = TRUE;
 
1277
                                                        break;
 
1278
                                                default:
 
1279
                                                        if(!anyHeadersFound)
 
1280
                                                                anyHeadersFound = usefulHeader(commandNumber, cmd);
 
1281
                                                        continue;
 
1282
                                        }
 
1283
                                        fullline = strdup(buffer);
 
1284
                                        fulllinelength = strlen(buffer) + 1;
 
1285
                                } else if(buffer) {
 
1286
                                        fulllinelength += strlen(buffer);
 
1287
                                        fullline = cli_realloc(fullline, fulllinelength);
 
1288
                                        strcat(fullline, buffer);
 
1289
                                }
 
1290
 
 
1291
                                assert(fullline != NULL);
 
1292
 
 
1293
                                if(t->t_next && (t->t_next->t_line != NULL))
 
1294
                                        /*
 
1295
                                         * Section B.2 of RFC822 says TAB or
 
1296
                                         * SPACE means a continuation of the
 
1297
                                         * previous entry.
 
1298
                                         *
 
1299
                                         * Add all the arguments on the line
 
1300
                                         */
 
1301
                                        switch(lineGetData(t->t_next->t_line)[0]) {
 
1302
                                                case ' ':
 
1303
                                                case '\t':
 
1304
                                                        continue;
 
1305
                                        }
 
1306
 
 
1307
                                quotes = 0;
 
1308
                                for(qptr = fullline; *qptr; qptr++)
 
1309
                                        if(*qptr == '\"')
 
1310
                                                quotes++;
 
1311
 
 
1312
                                if(quotes & 1)
 
1313
                                        continue;
 
1314
 
 
1315
                                ptr = rfc822comments(fullline, NULL);
 
1316
                                if(ptr) {
 
1317
                                        free(fullline);
 
1318
                                        fullline = ptr;
 
1319
                                }
 
1320
 
 
1321
                                if(parseEmailHeader(ret, fullline, rfc821) < 0)
 
1322
                                        continue;
 
1323
 
 
1324
                                free(fullline);
 
1325
                                fullline = NULL;
 
1326
                        }
 
1327
                } else
561
1328
                        /*cli_dbgmsg("Add line to body '%s'\n", buffer);*/
562
 
                        messageAddLine(ret, buffer, 0);
 
1329
                        if(messageAddLine(ret, t->t_line) < 0)
 
1330
                                break;
 
1331
        }
 
1332
 
 
1333
        if(fullline) {
 
1334
                if(*fullline) switch(commandNumber) {
 
1335
                        case CONTENT_TRANSFER_ENCODING:
 
1336
                        case CONTENT_DISPOSITION:
 
1337
                        case CONTENT_TYPE:
 
1338
                                cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline);
563
1339
                }
 
1340
                free(fullline);
 
1341
        }
 
1342
 
 
1343
        if(!anyHeadersFound) {
 
1344
                /*
 
1345
                 * False positive in believing we have an e-mail when we don't
 
1346
                 */
 
1347
                messageDestroy(ret);
 
1348
                cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
 
1349
                return NULL;
564
1350
        }
565
1351
 
566
1352
        messageClean(ret);
574
1360
 * Handle a header line of an email message
575
1361
 */
576
1362
static int
577
 
parseEmailHeader(message *m, const char *line, const table_t *rfc821Table)
 
1363
parseEmailHeader(message *m, const char *line, const table_t *rfc821)
578
1364
{
579
 
        char *copy, *cmd;
 
1365
        char *cmd;
580
1366
        int ret = -1;
581
1367
#ifdef CL_THREAD_SAFE
582
1368
        char *strptr;
583
1369
#endif
 
1370
        const char *separater;
 
1371
        char *copy, tokenseparater[2];
584
1372
 
585
1373
        cli_dbgmsg("parseEmailHeader '%s'\n", line);
586
1374
 
587
 
        if(strchr(line, ':') == NULL)
 
1375
        /*
 
1376
         * In RFC822 the separater between the key a value is a colon,
 
1377
         * e.g. Content-Transfer-Encoding: base64
 
1378
         * However some MUA's are lapse about this and virus writers exploit
 
1379
         * this hole, so we need to check all known possiblities
 
1380
         */
 
1381
        for(separater = ":= "; *separater; separater++)
 
1382
                if(strchr(line, *separater) != NULL)
 
1383
                        break;
 
1384
 
 
1385
        if(*separater == '\0')
588
1386
                return -1;
589
1387
 
590
 
        copy = strdup(line);
591
 
 
592
 
        cmd = strtok_r(copy, ":", &strptr);
593
 
 
594
 
        if(cmd && *cmd) {
 
1388
        copy = rfc2047(line);
 
1389
        if(copy == NULL)
 
1390
                /* an RFC checker would return -1 here */
 
1391
                copy = strdup(line);
 
1392
 
 
1393
        tokenseparater[0] = *separater;
 
1394
        tokenseparater[1] = '\0';
 
1395
 
 
1396
#ifdef  CL_THREAD_SAFE
 
1397
        cmd = strtok_r(copy, tokenseparater, &strptr);
 
1398
#else
 
1399
        cmd = strtok(copy, tokenseparater);
 
1400
#endif
 
1401
 
 
1402
        if(cmd && (strstrip(cmd) > 0)) {
 
1403
#ifdef  CL_THREAD_SAFE
595
1404
                char *arg = strtok_r(NULL, "", &strptr);
 
1405
#else
 
1406
                char *arg = strtok(NULL, "");
 
1407
#endif
596
1408
 
597
1409
                if(arg)
598
1410
                        /*
602
1414
                         * "multipart/mixed" and cmd to
603
1415
                         * be "Content-Type"
604
1416
                         */
605
 
                        ret = parseMimeHeader(m, cmd, rfc821Table, arg);
 
1417
                        ret = parseMimeHeader(m, cmd, rfc821, arg);
606
1418
        }
607
1419
        free(copy);
608
 
 
609
1420
        return ret;
610
1421
}
611
1422
 
612
1423
/*
613
1424
 * This is a recursive routine.
 
1425
 * FIXME: We are not passed &mrec so we can't check against MAX_MAIL_RECURSION
614
1426
 *
615
1427
 * This function parses the body of mainMessage and saves its attachments in dir
616
1428
 *
617
1429
 * mainMessage is the buffer to be parsed, it contains an e-mail's body, without
618
 
 * any headers. First
619
 
 * time of calling it'll be
620
 
 *      the whole message. Later it'll be parts of a multipart message
 
1430
 * any headers. First time of calling it'll be
 
1431
 * the whole message. Later it'll be parts of a multipart message
621
1432
 * textIn is the plain text message being built up so far
622
 
 * blobsIn contains the array of attachments found so far
623
1433
 *
624
1434
 * Returns:
625
1435
 *      0 for fail
627
1437
 *      2 for success, attachments not saved
628
1438
 */
629
1439
static int      /* success or fail */
630
 
parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable)
 
1440
parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t *rfc821Table, const table_t *subtypeTable, unsigned int options)
631
1441
{
632
 
        message *messages[MAXALTERNATIVE];
633
 
        int inhead, inMimeHead, i, rc = 1, htmltextPart, multiparts = 0;
 
1442
        message **messages;     /* parts of a multipart message */
 
1443
        int inMimeHead, i, rc = 1, htmltextPart, multiparts = 0;
634
1444
        text *aText;
635
 
        blob *blobList[MAX_ATTACHMENTS], **blobs;
636
1445
        const char *cptr;
637
1446
        message *mainMessage;
638
 
 
639
 
        cli_dbgmsg("in parseEmailBody(nBlobs = %d)\n", nBlobs);
640
 
 
641
 
        /* Pre-assertions */
642
 
        if(nBlobs >= MAX_ATTACHMENTS) {
643
 
                cli_warnmsg("Not all attachments will be scanned\n");
644
 
                return 2;
645
 
        }
 
1447
        fileblob *fb;
 
1448
 
 
1449
        cli_dbgmsg("in parseEmailBody\n");
646
1450
 
647
1451
        aText = textIn;
648
 
        blobs = blobsIn;
 
1452
        messages = NULL;
649
1453
        mainMessage = messageIn;
650
1454
 
651
1455
        /* Anything left to be parsed? */
652
1456
        if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
653
 
                int numberOfAttachments = 0, numberOfNewAttachments;
654
1457
                mime_type mimeType;
655
 
                const char *mimeSubtype;
 
1458
                int subtype, inhead;
 
1459
                const char *mimeSubtype, *boundary;
 
1460
                char *protocol;
656
1461
                const text *t_line;
657
1462
                /*bool isAlternative;*/
658
 
                const char *boundary;
659
1463
                message *aMessage;
660
1464
 
661
1465
                cli_dbgmsg("Parsing mail file\n");
663
1467
                mimeType = messageGetMimeType(mainMessage);
664
1468
                mimeSubtype = messageGetMimeSubtype(mainMessage);
665
1469
 
666
 
                if((mimeType == TEXT) && (tableFind(subtypeTable, mimeSubtype) == PLAIN)) {
 
1470
                /* pre-process */
 
1471
                subtype = tableFind(subtypeTable, mimeSubtype);
 
1472
                if((mimeType == TEXT) && (subtype == PLAIN)) {
667
1473
                        /*
668
1474
                         * This is effectively no encoding, notice that we
669
1475
                         * don't check that charset is us-ascii
670
1476
                         */
671
1477
                        cli_dbgmsg("assume no encoding\n");
672
1478
                        mimeType = NOMIME;
 
1479
                        messageSetMimeSubtype(mainMessage, "");
 
1480
                } else if((mimeType == MESSAGE) &&
 
1481
                          (strcasecmp(mimeSubtype, "rfc822-headers") == 0)) {
 
1482
                        /*
 
1483
                         * RFC1892/RFC3462: section 2 text/rfc822-headers
 
1484
                         * incorrectly sent as message/rfc822-headers
 
1485
                         *
 
1486
                         * Parse as text/plain, i.e. no mime
 
1487
                         */
 
1488
                        cli_dbgmsg("Changing message/rfc822-headers to text/rfc822-headers\n");
 
1489
                        mimeType = NOMIME;
 
1490
                        messageSetMimeSubtype(mainMessage, "");
673
1491
                }
674
1492
 
675
1493
                cli_dbgmsg("mimeType = %d\n", mimeType);
676
1494
 
677
1495
                switch(mimeType) {
678
1496
                case NOMIME:
 
1497
                        cli_dbgmsg("Not a mime encoded message\n");
679
1498
                        aText = textAddMessage(aText, mainMessage);
680
1499
                        break;
681
1500
                case TEXT:
682
 
                        if(tableFind(subtypeTable, mimeSubtype) == PLAIN)
683
 
                                aText = textCopy(messageGetBody(mainMessage));
 
1501
                        /* text/plain has been preprocessed as no encoding */
 
1502
                        if((options&CL_SCAN_MAILURL) && (subtype == HTML))
 
1503
                                checkURLs(mainMessage, dir);
684
1504
                        break;
685
1505
                case MULTIPART:
 
1506
                        cli_dbgmsg("Content-type 'multipart' handler\n");
686
1507
                        boundary = messageFindArgument(mainMessage, "boundary");
687
1508
 
688
1509
                        if(boundary == NULL) {
696
1517
                                break;
697
1518
                        }
698
1519
 
 
1520
                        /* Perhaps it should assume mixed? */
699
1521
                        if(mimeSubtype[0] == '\0') {
700
1522
                                cli_warnmsg("Multipart has no subtype assuming alternative\n");
701
1523
                                mimeSubtype = "alternative";
715
1537
                        }
716
1538
 
717
1539
                        do
718
 
                                if(boundaryStart(t_line->t_text, boundary))
719
 
                                        break;
 
1540
                                if(t_line->t_line) {
 
1541
                                        if(boundaryStart(lineGetData(t_line->t_line), boundary))
 
1542
                                                break;
 
1543
                                        /*
 
1544
                                         * Found a uuencoded/binhex file before
 
1545
                                         *      the first multipart
 
1546
                                         * TODO: check yEnc
 
1547
                                         */
 
1548
                                        if(uuencodeBegin(mainMessage) == t_line) {
 
1549
                                                if(messageGetEncoding(mainMessage) == NOENCODING) {
 
1550
                                                        messageSetEncoding(mainMessage, "x-uuencode");
 
1551
                                                        fb = messageToFileblob(mainMessage, dir);
 
1552
 
 
1553
                                                        if(fb)
 
1554
                                                                fileblobDestroy(fb);
 
1555
                                                }
 
1556
                                        } else if(binhexBegin(mainMessage) == t_line) {
 
1557
                                                if(messageGetEncoding(mainMessage) == NOENCODING) {
 
1558
                                                        messageSetEncoding(mainMessage, "x-binhex");
 
1559
                                                        fb = messageToFileblob(mainMessage, dir);
 
1560
 
 
1561
                                                        if(fb)
 
1562
                                                                fileblobDestroy(fb);
 
1563
                                                }
 
1564
                                        } else if(encodingLine(mainMessage) == t_line->t_next) {
 
1565
                                                /*
 
1566
                                                 * We look for the next line
 
1567
                                                 * since later on we'll skip
 
1568
                                                 * over the important line when
 
1569
                                                 * we think it's a blank line
 
1570
                                                 * at the top of the message -
 
1571
                                                 * which it would have been in
 
1572
                                                 * an RFC compliant world
 
1573
                                                 */
 
1574
                                                cli_dbgmsg("Found MIME attachment before the first MIME section\n");
 
1575
                                                if(messageGetEncoding(mainMessage) == NOENCODING)
 
1576
                                                        break;
 
1577
                                        }
 
1578
                                }
720
1579
                        while((t_line = t_line->t_next) != NULL);
721
1580
 
722
1581
                        if(t_line == NULL) {
723
 
                                cli_warnmsg("Multipart MIME message contains no boundary lines\n");
 
1582
                                cli_dbgmsg("Multipart MIME message contains no boundary lines\n");
724
1583
                                /*
725
1584
                                 * Free added by Thomas Lamy
726
1585
                                 * <Thomas.Lamy@in-online.net>
742
1601
                        inMimeHead = 0;
743
1602
 
744
1603
                        /*
 
1604
                         * Parse the mainMessage object and create an array
 
1605
                         * of objects called messages, one for each of the
 
1606
                         * multiparts that mainMessage contains
 
1607
                         *
745
1608
                         * This looks like parseEmailHeaders() - maybe there's
746
1609
                         * some duplication of code to be cleaned up
747
1610
                         */
748
 
                        for(multiparts = 0; t_line && (multiparts < MAXALTERNATIVE); multiparts++) {
 
1611
                        for(multiparts = 0; t_line; multiparts++) {
749
1612
                                int lines = 0;
 
1613
                                message **m;
 
1614
 
 
1615
                                m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
 
1616
                                if(m == NULL)
 
1617
                                        break;
 
1618
                                messages = m;
750
1619
 
751
1620
                                aMessage = messages[multiparts] = messageCreate();
 
1621
                                if(aMessage == NULL) {
 
1622
                                        multiparts--;
 
1623
                                        continue;
 
1624
                                }
752
1625
 
753
1626
                                cli_dbgmsg("Now read in part %d\n", multiparts);
754
1627
 
756
1629
                                 * Ignore blank lines. There shouldn't be ANY
757
1630
                                 * but some viruses insert them
758
1631
                                 */
759
 
                                while((t_line = t_line->t_next) != NULL) {
760
 
                                        cli_chomp(t_line->t_text);
761
 
                                        if(strlen(t_line->t_text) != 0)
 
1632
                                while((t_line = t_line->t_next) != NULL)
 
1633
                                        if(t_line->t_line &&
 
1634
                                           /*(cli_chomp(t_line->t_text) > 0))*/
 
1635
                                           (strlen(lineGetData(t_line->t_line)) > 0))
762
1636
                                                break;
763
 
                                }
764
1637
 
765
1638
                                if(t_line == NULL) {
766
1639
                                        cli_dbgmsg("Empty part\n");
 
1640
                                        /*
 
1641
                                         * Remove this part unless there's
 
1642
                                         * a uuencoded portion somewhere in
 
1643
                                         * the complete message that we may
 
1644
                                         * throw away by mistake if the MIME
 
1645
                                         * encoding information is incorrect
 
1646
                                         */
 
1647
                                        if(uuencodeBegin(mainMessage) == NULL) {
 
1648
                                                messageDestroy(aMessage);
 
1649
                                                --multiparts;
 
1650
                                        }
767
1651
                                        continue;
768
1652
                                }
769
1653
 
770
1654
                                do {
771
 
                                        const char *line = t_line->t_text;
772
 
 
773
 
                                        /*cli_dbgmsg("inMimeHead %d inhead %d boundary %s line '%s' next '%s'\n",
774
 
                                                inMimeHead, inhead, boundary, line, t_line->t_next ? t_line->t_next->t_text : "(null)");*/
775
 
 
776
 
                                        if(inMimeHead) {
 
1655
                                        const char *line = lineGetData(t_line->t_line);
 
1656
 
 
1657
                                        /*cli_dbgmsg("inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
 
1658
                                                inMimeHead, inhead, boundary, line,
 
1659
                                                t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/
 
1660
 
 
1661
                                        if(inMimeHead) {        /* continuation line */
 
1662
                                                if(line == NULL) {
 
1663
                                                        /*inhead =*/ inMimeHead = 0;
 
1664
                                                        continue;
 
1665
                                                }
777
1666
                                                /*
778
1667
                                                 * Handle continuation lines
779
1668
                                                 * because the previous line
780
 
                                                 * ended with a ;
 
1669
                                                 * ended with a ; or this line
 
1670
                                                 * starts with a white space
781
1671
                                                 */
782
 
                                                cli_dbgmsg("About to add mime Argument '%s'\n",
783
 
                                                        line);
 
1672
                                                cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
 
1673
                                                        multiparts, line);
784
1674
                                                /*
785
1675
                                                 * Handle the case when it
786
1676
                                                 * isn't really a continuation
804
1694
                                                 */
805
1695
                                                inMimeHead = continuationMarker(line);
806
1696
                                                messageAddArgument(aMessage, line);
807
 
                                        } else if(inhead) {
808
 
                                                if(strlen(line) == 0) {
 
1697
                                        } else if(inhead) {     /* handling normal headers */
 
1698
                                                int quotes;
 
1699
                                                char *fullline, *ptr;
 
1700
                                                const char *qptr;
 
1701
                                                const text *next;
 
1702
 
 
1703
                                                if(line == NULL) {
 
1704
                                                        /*
 
1705
                                                         * empty line, should the end of the headers,
 
1706
                                                         * but some base64 decoders, e.g. uudeview, are broken
 
1707
                                                         * and will handle this type of entry, decoding the
 
1708
                                                         * base64 content...
 
1709
                                                         * Content-Type: application/octet-stream; name=text.zip
 
1710
                                                         * Content-Transfer-Encoding: base64
 
1711
                                                         * Content-Disposition: attachment; filename="text.zip"
 
1712
                                                         *
 
1713
                                                         * Content-Disposition: attachment;
 
1714
                                                         *      filename=text.zip
 
1715
                                                         * Content-Type: application/octet-stream;
 
1716
                                                         *      name=text.zip
 
1717
                                                         * Content-Transfer-Encoding: base64
 
1718
                                                         *
 
1719
                                                         * UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
 
1720
                                                         */
 
1721
                                                        next = t_line->t_next;
 
1722
                                                        if(next && next->t_line) {
 
1723
                                                                const char *data = lineGetData(next->t_line);
 
1724
 
 
1725
                                                                if((messageGetEncoding(aMessage) == NOENCODING) &&
 
1726
                                                                   (messageGetMimeType(aMessage) == APPLICATION))
 
1727
                                                                        /*
 
1728
                                                                         * Handle this nightmare (note the blank
 
1729
                                                                         * line in the header and the incorrect
 
1730
                                                                         * content-transfer-encoding header)
 
1731
                                                                         *
 
1732
                                                                         * Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes
 
1733
                                                                         *
 
1734
                                                                         * r-Encoding: base64
 
1735
                                                                         * Content-Disposition: attachment; filename="zipped_files.EXE"
 
1736
                                                                         */
 
1737
                                                                        if(strstr(data, "base64")) {
 
1738
                                                                                messageSetEncoding(aMessage, "base64");
 
1739
                                                                                cli_dbgmsg("Ignoring fake end of headers\n");
 
1740
                                                                                continue;
 
1741
                                                                        }
 
1742
                                                                if((strncmp(data, "Content", 7) == 0) ||
 
1743
                                                                   (strncmp(data, "filename=", 9) == 0)) {
 
1744
                                                                        cli_dbgmsg("Ignoring fake end of headers\n");
 
1745
                                                                        continue;
 
1746
                                                                }
 
1747
                                                        }
 
1748
                                                        cli_dbgmsg("Multipart %d: End of header information\n",
 
1749
                                                                multiparts);
809
1750
                                                        inhead = 0;
810
1751
                                                        continue;
811
1752
                                                }
835
1776
                                                        continue;
836
1777
                                                }
837
1778
 
 
1779
                                                inMimeHead = FALSE;
 
1780
 
 
1781
                                                assert(strlen(line) <= LINE_LENGTH);
 
1782
 
 
1783
                                                fullline = rfc822comments(line, NULL);
 
1784
                                                if(fullline == NULL)
 
1785
                                                        fullline = strdup(line);
 
1786
 
 
1787
                                                quotes = 0;
 
1788
                                                for(qptr = fullline; *qptr; qptr++)
 
1789
                                                        if(*qptr == '\"')
 
1790
                                                                quotes++;
 
1791
 
838
1792
                                                /*
839
 
                                                 * Some clients are broken and
840
 
                                                 * put white space after the ;
 
1793
                                                 * Fold next lines to the end of this
 
1794
                                                 * if they start with a white space
 
1795
                                                 * or if this line has an odd number of quotes:
 
1796
                                                 * Content-Type: application/octet-stream; name="foo
 
1797
                                                 * "
841
1798
                                                 */
842
 
                                                inMimeHead = continuationMarker(line);
843
 
                                                if(!inMimeHead)
844
 
                                                        if(t_line->t_next && ((t_line->t_next->t_text[0] == '\t') || (t_line->t_next->t_text[0] == ' ')))
845
 
                                                                inMimeHead = TRUE;
846
 
 
847
 
                                                parseEmailHeader(aMessage, line, rfc821Table);
848
 
                                        } else if(boundaryStart(line, boundary)) {
849
 
                                                inhead = 1;
850
 
                                                break;
 
1799
                                                next = t_line->t_next;
 
1800
                                                while(next && next->t_line) {
 
1801
                                                        const char *data = lineGetData(next->t_line);
 
1802
 
 
1803
                                                        /*if((!isspace(data[0])) &&
 
1804
                                                           ((quotes & 1) == 0))
 
1805
                                                                break;*/
 
1806
                                                        if(!isspace(data[0]))
 
1807
                                                                break;
 
1808
 
 
1809
                                                        ptr = cli_realloc(fullline,
 
1810
                                                                strlen(fullline) + strlen(data) + 1);
 
1811
 
 
1812
                                                        if(ptr == NULL)
 
1813
                                                                break;
 
1814
 
 
1815
                                                        fullline = ptr;
 
1816
                                                        strcat(fullline, data);
 
1817
 
 
1818
                                                        /*for(qptr = data; *qptr; qptr++)
 
1819
                                                                if(*qptr == '\"')
 
1820
                                                                        quotes++;*/
 
1821
 
 
1822
                                                        t_line = next;
 
1823
                                                        next = next->t_next;
 
1824
                                                }
 
1825
                                                cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
 
1826
                                                        multiparts, fullline);
 
1827
 
 
1828
                                                parseEmailHeader(aMessage, fullline, rfc821Table);
 
1829
                                                free(fullline);
851
1830
                                        } else if(endOfMessage(line, boundary)) {
852
1831
                                                /*
853
1832
                                                 * Some viruses put information
859
1838
                                                 */
860
1839
                                                /* t_line = NULL;*/
861
1840
                                                break;
 
1841
                                        } else if(boundaryStart(line, boundary)) {
 
1842
                                                inhead = 1;
 
1843
                                                break;
862
1844
                                        } else {
863
 
                                                messageAddLine(aMessage, line, 1);
 
1845
                                                if(messageAddLine(aMessage, t_line->t_line) < 0)
 
1846
                                                        break;
864
1847
                                                lines++;
865
1848
                                        }
866
1849
                                } while((t_line = t_line->t_next) != NULL);
874
1857
                        free((char *)boundary);
875
1858
 
876
1859
                        /*
 
1860
                         * Preprocess. Anything special to be done before
 
1861
                         * we handle the multiparts?
 
1862
                         */
 
1863
                        switch(tableFind(subtypeTable, mimeSubtype)) {
 
1864
                                case KNOWBOT:
 
1865
                                        /* TODO */
 
1866
                                        cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n");
 
1867
                                        mimeSubtype = "mixed";
 
1868
                                        break;
 
1869
                                case -1:
 
1870
                                        /*
 
1871
                                         * According to section 7.2.6 of
 
1872
                                         * RFC1521, unrecognised multiparts
 
1873
                                         * should be treated as multipart/mixed.
 
1874
                                         */
 
1875
                                        cli_dbgmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype);
 
1876
                                        mimeSubtype = "mixed";
 
1877
                                        break;
 
1878
                        }
 
1879
 
 
1880
                        /*
877
1881
                         * We've finished message we're parsing
878
1882
                         */
879
1883
                        if(mainMessage && (mainMessage != messageIn)) {
881
1885
                                mainMessage = NULL;
882
1886
                        }
883
1887
 
884
 
                        if(multiparts == 0)
 
1888
                        if(multiparts == 0) {
 
1889
                                if(messages)
 
1890
                                        free(messages);
885
1891
                                return 2;       /* Nothing to do */
 
1892
                        }
886
1893
 
887
1894
                        cli_dbgmsg("The message has %d parts\n", multiparts);
888
 
                        cli_dbgmsg("Find out the multipart type(%s)\n", mimeSubtype);
 
1895
                        cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
889
1896
 
 
1897
                        /*
 
1898
                         * We now have all the parts of the multipart message
 
1899
                         * in the messages array:
 
1900
                         *      message *messages[multiparts]
 
1901
                         * Let's decide what to do with them all
 
1902
                         */
890
1903
                        switch(tableFind(subtypeTable, mimeSubtype)) {
891
1904
                        case RELATED:
892
1905
                                cli_dbgmsg("Multipart related handler\n");
914
1927
                                                        break;
915
1928
                                                }
916
1929
 
917
 
                                if(htmltextPart == -1) {
 
1930
                                if(htmltextPart == -1)
918
1931
                                        cli_dbgmsg("No HTML code found to be scanned");
919
 
                                        rc = 0;
920
 
                                } else
921
 
                                        rc = parseEmailBody(aMessage, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
922
 
                                blobArrayDestroy(blobs, nBlobs);
923
 
                                blobs = NULL;
924
 
                                nBlobs = 0;
 
1932
                                else {
 
1933
                                        rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options);
 
1934
                                        if(rc == 1) {
 
1935
                                                assert(aMessage == messages[htmltextPart]);
 
1936
                                                messageDestroy(aMessage);
 
1937
                                                messages[htmltextPart] = NULL;
 
1938
                                        }
 
1939
                                }
925
1940
 
926
1941
                                /*
927
1942
                                 * Fixed based on an idea from Stephen White <stephen@earth.li>
947
1962
                                free((char *)cptr);
948
1963
                                if(!isAlternative)
949
1964
                                        break;*/
 
1965
                        case DIGEST:
 
1966
                                /*
 
1967
                                 * According to section 5.1.5 RFC2046, the
 
1968
                                 * default mime type of multipart/digest parts
 
1969
                                 * is message/rfc822
 
1970
                                 *
 
1971
                                 * We consider them as alternative, wrong in
 
1972
                                 * the strictest sense since they aren't
 
1973
                                 * alternatives - all parts a valid - but it's
 
1974
                                 * OK for our needs since it means each part
 
1975
                                 * will be scanned
 
1976
                                 */
950
1977
                        case ALTERNATIVE:
951
1978
                                cli_dbgmsg("Multipart alternative handler\n");
952
1979
 
953
 
                                htmltextPart = getTextPart(messages, multiparts);
954
 
 
955
 
                                if(htmltextPart == -1)
956
 
                                        htmltextPart = 0;
957
 
 
958
 
                                aMessage = messages[htmltextPart];
959
 
                                aText = textAddMessage(aText, aMessage);
960
 
 
961
 
                                rc = parseEmailBody(NULL, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
962
 
 
963
 
                                if(rc == 1) {
964
 
                                        /*
965
 
                                         * Alternative message has saved its
966
 
                                         * attachments, ensure we don't do
967
 
                                         * the same thing
968
 
                                         */
969
 
                                        blobArrayDestroy(blobs, nBlobs);
970
 
                                        blobs = NULL;
971
 
                                        nBlobs = 0;
972
 
                                        rc = 2;
973
 
                                }
974
1980
                                /*
975
1981
                                 * Fall through - some clients are broken and
976
1982
                                 * say alternative instead of mixed. The Klez
977
 
                                 * virus is broken that way
 
1983
                                 * virus is broken that way, and anyway we
 
1984
                                 * wish to scan all of the alternatives
978
1985
                                 */
979
1986
                        case REPORT:
980
1987
                                /*
1002
2009
 
1003
2010
                                cli_dbgmsg("Mixed message with %d parts\n", multiparts);
1004
2011
                                for(i = 0; i < multiparts; i++) {
1005
 
                                        bool addAttachment = FALSE;
1006
2012
                                        bool addToText = FALSE;
1007
2013
                                        const char *dtype;
 
2014
#ifndef SAVE_TO_DISC
1008
2015
                                        message *body;
 
2016
#endif
1009
2017
 
1010
2018
                                        aMessage = messages[i];
1011
2019
 
1012
 
                                        assert(aMessage != NULL);
1013
 
 
1014
 
                                        dtype = messageGetDispositionType(aMessage);
1015
 
                                        cptr = messageGetMimeSubtype(aMessage);
 
2020
                                        if(aMessage == NULL)
 
2021
                                                continue;
1016
2022
 
1017
2023
                                        cli_dbgmsg("Mixed message part %d is of type %d\n",
1018
2024
                                                i, messageGetMimeType(aMessage));
1019
2025
 
1020
2026
                                        switch(messageGetMimeType(aMessage)) {
1021
2027
                                        case APPLICATION:
1022
 
#if     0
1023
 
                                                /* strict checking... */
1024
 
                                                if((strcasecmp(dtype, "attachment") == 0) ||
1025
 
                                                   (strcasecmp(cptr, "x-msdownload") == 0) ||
1026
 
                                                   (strcasecmp(cptr, "octet-stream") == 0) ||
1027
 
                                                   (strcasecmp(dtype, "octet-stream") == 0))
1028
 
                                                        addAttachment = TRUE;
1029
 
                                                else {
1030
 
                                                        cli_dbgmsg("Discarded mixed/application not sent as attachment\n");
1031
 
                                                        continue;
1032
 
                                                }
1033
 
#endif
1034
 
                                                addAttachment = TRUE;
1035
 
 
 
2028
                                        case AUDIO:
 
2029
                                        case IMAGE:
 
2030
                                        case VIDEO:
1036
2031
                                                break;
1037
2032
                                        case NOMIME:
 
2033
                                                cli_dbgmsg("No mime headers found in multipart part %d\n", i);
1038
2034
                                                if(mainMessage) {
1039
 
                                                        const text *t_line = uuencodeBegin(mainMessage);
1040
 
                                                        if(t_line) {
1041
 
                                                                blob *aBlob;
1042
 
 
 
2035
                                                        if(uuencodeBegin(aMessage)) {
1043
2036
                                                                cli_dbgmsg("Found uuencoded message in multipart/mixed mainMessage\n");
1044
2037
                                                                messageSetEncoding(mainMessage, "x-uuencode");
1045
 
                                                                aBlob = messageToBlob(mainMessage);
 
2038
                                                                fb = messageToFileblob(mainMessage, dir);
1046
2039
 
1047
 
                                                                if(aBlob) {
1048
 
                                                                        assert(blobGetFilename(aBlob) != NULL);
1049
 
                                                                        blobClose(aBlob);
1050
 
                                                                        blobList[numberOfAttachments++] = aBlob;
1051
 
                                                                }
 
2040
                                                                if(fb)
 
2041
                                                                        fileblobDestroy(fb);
1052
2042
                                                        }
1053
2043
                                                        if(mainMessage != messageIn)
1054
2044
                                                                messageDestroy(mainMessage);
1055
2045
                                                        mainMessage = NULL;
 
2046
                                                } else if(aMessage) {
 
2047
                                                        if(uuencodeBegin(aMessage)) {
 
2048
                                                                cli_dbgmsg("Found uuencoded message in multipart/mixed non mime part\n");
 
2049
                                                                messageSetEncoding(aMessage, "x-uuencode");
 
2050
                                                                fb = messageToFileblob(aMessage, dir);
 
2051
 
 
2052
                                                                if(fb)
 
2053
                                                                        fileblobDestroy(fb);
 
2054
                                                                assert(aMessage == messages[i]);
 
2055
                                                                messageReset(messages[i]);
 
2056
                                                        } else if(binhexBegin(aMessage)) {
 
2057
                                                                cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n");
 
2058
                                                                messageSetEncoding(aMessage, "x-binhex");
 
2059
                                                                fb = messageToFileblob(aMessage, dir);
 
2060
 
 
2061
                                                                if(fb)
 
2062
                                                                        fileblobDestroy(fb);
 
2063
                                                                assert(aMessage == messages[i]);
 
2064
                                                                messageReset(messages[i]);
 
2065
                                                        }
1056
2066
                                                }
1057
2067
                                                addToText = TRUE;
1058
2068
                                                if(messageGetBody(aMessage) == NULL)
1059
2069
                                                        /*
1060
2070
                                                         * No plain text version
1061
2071
                                                         */
1062
 
                                                        messageAddLine(aMessage, "No plain text alternative", 1);
 
2072
                                                        messageAddStr(aMessage, "No plain text alternative");
1063
2073
                                                assert(messageGetBody(aMessage) != NULL);
1064
2074
                                                break;
1065
2075
                                        case TEXT:
 
2076
                                                dtype = messageGetDispositionType(aMessage);
1066
2077
                                                cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
1067
2078
                                                        dtype);
1068
2079
                                                if(strcasecmp(dtype, "attachment") == 0)
1069
 
                                                        addAttachment = TRUE;
1070
 
                                                else if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) {
1071
 
                                                        const text *t_line = uuencodeBegin(aMessage);
1072
 
 
 
2080
                                                        break;
 
2081
                                                if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) {
1073
2082
                                                        if(mainMessage && (mainMessage != messageIn))
1074
2083
                                                                messageDestroy(mainMessage);
1075
2084
                                                        mainMessage = NULL;
1076
 
                                                        if(t_line) {
 
2085
                                                        cptr = messageGetMimeSubtype(aMessage);
 
2086
                                                        cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
 
2087
                                                        if(uuencodeBegin(aMessage)) {
1077
2088
                                                                cli_dbgmsg("Found uuencoded message in multipart/mixed text portion\n");
1078
2089
                                                                messageSetEncoding(aMessage, "x-uuencode");
1079
 
                                                                addAttachment = TRUE;
1080
 
                                                        } else if(strcasecmp(messageGetMimeSubtype(aMessage), "plain") == 0) {
 
2090
                                                        } else if((tableFind(subtypeTable, cptr) == PLAIN) &&
 
2091
                                                                  (messageGetEncoding(aMessage) == NOENCODING)) {
1081
2092
                                                                char *filename;
1082
2093
                                                                /*
1083
2094
                                                                 * Strictly speaking
1084
 
                                                                 * a text/html part is
 
2095
                                                                 * a text/plain part is
1085
2096
                                                                 * not an attachment. We
1086
2097
                                                                 * pretend it is so that
1087
2098
                                                                 * we can decode and
1098
2109
                                                                        cli_dbgmsg("Treating %s as attachment\n",
1099
2110
                                                                                filename);
1100
2111
                                                                        free(filename);
1101
 
                                                                        addAttachment = TRUE;
1102
2112
                                                                }
1103
2113
                                                        } else {
1104
 
                                                                messageAddArgument(aMessage, "filename=textportion");
1105
 
                                                                addAttachment = TRUE;
 
2114
                                                                if(options&CL_SCAN_MAILURL)
 
2115
                                                                        if(tableFind(subtypeTable, cptr) == HTML)
 
2116
                                                                                checkURLs(aMessage, dir);
 
2117
                                                                messageAddArgument(aMessage, "filename=mixedtextportion");
1106
2118
                                                        }
1107
2119
                                                } else {
1108
 
                                                        cli_warnmsg("Text type %s is not supported\n", dtype);
 
2120
                                                        cli_dbgmsg("Text type %s is not supported\n", dtype);
1109
2121
                                                        continue;
1110
2122
                                                }
1111
2123
                                                break;
1112
2124
                                        case MESSAGE:
1113
2125
                                                /* Content-Type: message/rfc822 */
1114
 
                                                cli_dbgmsg("Found message inside multipart\n");
1115
 
                                                if(encodingLine(aMessage) == NULL) {
1116
 
                                                        assert(aMessage == messages[i]);
1117
 
                                                        messageDestroy(messages[i]);
1118
 
                                                        messages[i] = NULL;
1119
 
                                                        continue;
 
2126
                                                cli_dbgmsg("Found message inside multipart (encoding type %d)\n",
 
2127
                                                        messageGetEncoding(aMessage));
 
2128
                                                switch(messageGetEncoding(aMessage)) {
 
2129
                                                        case NOENCODING:
 
2130
                                                        case EIGHTBIT:
 
2131
                                                        case BINARY:
 
2132
                                                                if(encodingLine(aMessage) == NULL) {
 
2133
                                                                        /*
 
2134
                                                                         * This means that the message has no attachments
 
2135
                                                                         * The test for messageGetEncoding is needed since
 
2136
                                                                         * encodingLine won't have been set if the message
 
2137
                                                                         * itself has been encoded
 
2138
                                                                         */
 
2139
                                                                        cli_dbgmsg("No encoding line found in the multipart/message\n");
 
2140
                                                                        assert(aMessage == messages[i]);
 
2141
                                                                        messageDestroy(messages[i]);
 
2142
                                                                        messages[i] = NULL;
 
2143
                                                                        continue;
 
2144
                                                                }
1120
2145
                                                }
1121
 
                                                messageAddLineAtTop(aMessage,
1122
 
                                                        "Received: by clamd");
 
2146
#if     0
 
2147
                                                messageAddStrAtTop(aMessage,
 
2148
                                                        "Received: by clamd (message/rfc822)");
 
2149
#endif
1123
2150
#ifdef  SAVE_TO_DISC
1124
2151
                                                /*
1125
2152
                                                 * Save this embedded message
1136
2163
                                                 * many nested levels are
1137
2164
                                                 * involved.
1138
2165
                                                 */
1139
 
                                                body = parseEmailHeaders(aMessage, rfc821Table);
 
2166
                                                body = parseEmailHeaders(aMessage, rfc821Table, TRUE);
1140
2167
                                                /*
1141
2168
                                                 * We've fininished with the
1142
2169
                                                 * original copy of the message,
1149
2176
                                                messageDestroy(messages[i]);
1150
2177
                                                messages[i] = NULL;
1151
2178
                                                if(body) {
1152
 
                                                        rc = parseEmailBody(body, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable);
 
2179
                                                        rc = parseEmailBody(body, NULL, dir, rfc821Table, subtypeTable, options);
1153
2180
                                                        messageDestroy(body);
1154
2181
                                                }
1155
2182
#endif
1162
2189
                                                 */
1163
2190
                                                cli_dbgmsg("Found multipart inside multipart\n");
1164
2191
                                                if(aMessage) {
1165
 
                                                        body = parseEmailHeaders(aMessage, rfc821Table);
1166
 
                                                        if(body) {
1167
 
                                                                assert(aMessage == messages[i]);
1168
 
                                                                messageDestroy(messages[i]);
1169
 
                                                                messages[i] = NULL;
1170
 
 
1171
 
                                                                if(mainMessage && (mainMessage != messageIn))
1172
 
                                                                        messageDestroy(mainMessage);
1173
 
 
1174
 
                                                                /*t = messageToText(body);
1175
 
                                                                rc = parseEmailBody(body, blobs, nBlobs, t, dir, rfc821Table, subtypeTable);*/
1176
 
                                                                rc = parseEmailBody(body, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
1177
 
                                                                /*textDestroy(t);*/
1178
 
 
1179
 
                                                                cli_dbgmsg("Finished recursion\n");
1180
 
 
1181
 
                                                                mainMessage = body;
1182
 
                                                        }
 
2192
                                                        /*
 
2193
                                                         * The headers were parsed when reading in the
 
2194
                                                         * whole multipart section
 
2195
                                                         */
 
2196
                                                        rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options);
 
2197
                                                        cli_dbgmsg("Finished recursion\n");
 
2198
                                                        assert(aMessage == messages[i]);
 
2199
                                                        messageDestroy(messages[i]);
 
2200
                                                        messages[i] = NULL;
1183
2201
                                                } else {
1184
 
                                                        rc = parseEmailBody(NULL, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable);
 
2202
                                                        rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options);
1185
2203
                                                        if(mainMessage && (mainMessage != messageIn))
1186
2204
                                                                messageDestroy(mainMessage);
1187
2205
                                                        mainMessage = NULL;
1188
2206
                                                }
1189
2207
                                                continue;
1190
 
                                        case AUDIO:
1191
 
                                        case IMAGE:
1192
 
                                        case VIDEO:
1193
 
                                                /*
1194
 
                                                 * TODO: it may be nice to
1195
 
                                                 * have an option to throw
1196
 
                                                 * away all images and sound
1197
 
                                                 * files for ultra-secure sites
1198
 
                                                 */
1199
 
                                                addAttachment = TRUE;
1200
 
                                                break;
1201
2208
                                        default:
1202
2209
                                                cli_warnmsg("Only text and application attachments are supported, type = %d\n",
1203
2210
                                                        messageGetMimeType(aMessage));
1204
2211
                                                continue;
1205
2212
                                        }
1206
2213
 
1207
 
                                        /*
1208
 
                                         * It must be either text or
1209
 
                                         * an attachment. It can't be both
1210
 
                                         */
1211
 
                                        assert(addToText || addAttachment);
1212
 
                                        assert(!(addToText && addAttachment));
1213
 
 
1214
2214
                                        if(addToText) {
 
2215
                                                cli_dbgmsg("Adding to non mime-part\n");
1215
2216
                                                aText = textAdd(aText, messageGetBody(aMessage));
1216
 
                                        } else if(addAttachment) {
1217
 
                                                blob *aBlob = messageToBlob(aMessage);
 
2217
                                        } else {
 
2218
                                                fb = messageToFileblob(aMessage, dir);
1218
2219
 
1219
 
                                                if(aBlob) {
1220
 
                                                        assert(blobGetFilename(aBlob) != NULL);
1221
 
                                                        blobClose(aBlob);
1222
 
                                                        blobList[numberOfAttachments++] = aBlob;
1223
 
                                                }
 
2220
                                                if(fb)
 
2221
                                                        fileblobDestroy(fb);
1224
2222
                                        }
1225
2223
                                        assert(aMessage == messages[i]);
1226
2224
                                        messageDestroy(messages[i]);
1227
2225
                                        messages[i] = NULL;
1228
2226
                                }
1229
2227
 
1230
 
                                if(numberOfAttachments == 0) {
1231
 
                                        /* No usable attachment was found */
1232
 
                                        rc = parseEmailBody(NULL, NULL, 0, aText, dir, rfc821Table, subtypeTable);
1233
 
                                        break;
1234
 
                                }
1235
 
 
1236
 
                                /*
1237
 
                                 * Store any existing attachments at the end of
1238
 
                                 * the list we've just built up
1239
 
                                 */
1240
 
                                numberOfNewAttachments = 0;
1241
 
                                for(i = 0; i < nBlobs; i++) {
1242
 
                                        int j;
1243
 
#ifdef  CL_DEBUG
1244
 
                                        assert(blobs[i]->magic == BLOB);
1245
 
#endif
1246
 
                                        for(j = 0; j < numberOfAttachments; j++)
1247
 
                                                if(blobcmp(blobs[i], blobList[j]) == 0)
1248
 
                                                        break;
1249
 
                                        if(j >= numberOfAttachments) {
1250
 
                                                assert(numberOfAttachments < MAX_ATTACHMENTS);
1251
 
                                                cli_dbgmsg("Attaching %s to list of blobs\n",
1252
 
                                                        blobGetFilename(blobs[i]));
1253
 
                                                blobClose(blobs[i]);
1254
 
                                                blobList[numberOfAttachments++] = blobs[i];
1255
 
                                                numberOfNewAttachments++;
1256
 
                                        } else {
1257
 
                                                cli_warnmsg("Don't scan the same file twice as '%s' and '%s'\n",
1258
 
                                                        blobGetFilename(blobs[i]),
1259
 
                                                        blobGetFilename(blobList[j]));
1260
 
                                                blobDestroy(blobs[i]);
1261
 
                                        }
1262
 
                                }
1263
 
 
1264
 
                                /*
1265
 
                                 * If we've found nothing new save what we have
1266
 
                                 * and quit - that's this part all done.
1267
 
                                 */
1268
 
                                if(numberOfNewAttachments == 0) {
1269
 
                                        rc = parseEmailBody(NULL, blobList, numberOfAttachments, NULL, dir, rfc821Table, subtypeTable);
1270
 
                                        break;
1271
 
                                }
1272
 
                                /*
1273
 
                                 * If there's only one part of the MULTIPART
1274
 
                                 * we already have the body to decode so
1275
 
                                 * there's no more work to do.
1276
 
                                 *
1277
 
                                 * This is mostly for the situation where
1278
 
                                 * broken messages claim to be multipart
1279
 
                                 * but aren't was causing us to go into
1280
 
                                 * infinite recursion
1281
 
                                 */
1282
 
                                if(multiparts > 1)
1283
 
                                        rc = parseEmailBody(mainMessage, blobList, numberOfAttachments, aText, dir, rfc821Table, subtypeTable);
1284
 
                                else if(numberOfAttachments == 1) {
1285
 
                                        (void)saveFile(blobList[0], dir);
1286
 
                                        blobDestroy(blobList[0]);
1287
 
                                }
 
2228
                                /* rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options); */
1288
2229
                                break;
1289
 
                        case DIGEST:
1290
 
                                /*
1291
 
                                 * TODO:
1292
 
                                 * According to section 5.1.5 RFC2046, the
1293
 
                                 * default mime type of multipart/digest parts
1294
 
                                 * is message/rfc822
1295
 
                                 */
1296
2230
                        case SIGNED:
1297
2231
                        case PARALLEL:
1298
2232
                                /*
1307
2241
                                if(htmltextPart == -1)
1308
2242
                                        htmltextPart = 0;
1309
2243
 
1310
 
                                rc = parseEmailBody(messages[htmltextPart], blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
1311
 
                                blobArrayDestroy(blobs, nBlobs);
1312
 
                                blobs = NULL;
1313
 
                                nBlobs = 0;
 
2244
                                rc = parseEmailBody(messages[htmltextPart], aText, dir, rfc821Table, subtypeTable, options);
 
2245
                                break;
 
2246
                        case ENCRYPTED:
 
2247
                                rc = 0;
 
2248
                                protocol = (char *)messageFindArgument(mainMessage, "protocol");
 
2249
                                if(protocol) {
 
2250
                                        if(strcasecmp(protocol, "application/pgp-encrypted") == 0) {
 
2251
                                                /* RFC2015 */
 
2252
                                                cli_warnmsg("PGP encoded attachment not scanned\n");
 
2253
                                                rc = 2;
 
2254
                                        } else
 
2255
                                                cli_warnmsg("Unknown encryption protocol '%s' - if you believe this file contains a virus, report it to bugs@clamav.net\n");
 
2256
                                        free(protocol);
 
2257
                                } else
 
2258
                                        cli_dbgmsg("Encryption method missing protocol name\n");
 
2259
 
1314
2260
                                break;
1315
2261
                        default:
1316
 
                                /*
1317
 
                                 * According to section 7.2.6 of RFC1521,
1318
 
                                 * unrecognised multiparts should be treated as
1319
 
                                 * multipart/mixed. I don't do this yet so
1320
 
                                 * that I can see what comes along...
1321
 
                                 */
1322
 
                                cli_warnmsg("Unsupported multipart format `%s'\n", mimeSubtype);
1323
 
                                rc = 0;
 
2262
                                assert(0);
 
2263
                        }
 
2264
 
 
2265
                        if(mainMessage && (mainMessage != messageIn))
 
2266
                                messageDestroy(mainMessage);
 
2267
 
 
2268
                        if(aText && (textIn == NULL)) {
 
2269
                                if((fb = fileblobCreate()) != NULL) {
 
2270
                                        cli_dbgmsg("Save non mime and/or text/plain part\n");
 
2271
                                        fileblobSetFilename(fb, dir, "textpart");
 
2272
                                        /*fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);*/
 
2273
                                        (void)textToFileblob(aText, fb);
 
2274
 
 
2275
                                        fileblobDestroy(fb);
 
2276
                                }
 
2277
                                textDestroy(aText);
1324
2278
                        }
1325
2279
 
1326
2280
                        for(i = 0; i < multiparts; i++)
1327
2281
                                if(messages[i])
1328
2282
                                        messageDestroy(messages[i]);
1329
2283
 
1330
 
                        if(blobs && (blobsIn == NULL))
1331
 
                                puts("arraydestroy");
1332
 
 
1333
 
                        if(mainMessage && (mainMessage != messageIn))
1334
 
                                messageDestroy(mainMessage);
1335
 
 
1336
 
                        if(aText && (textIn == NULL))
1337
 
                                textDestroy(aText);
 
2284
                        if(messages)
 
2285
                                free(messages);
1338
2286
 
1339
2287
                        return rc;
1340
2288
 
1351
2299
                                        cli_warnmsg("MIME type 'message' cannot be decoded\n");
1352
2300
                                        break;
1353
2301
                        }
 
2302
                        rc = 0;
1354
2303
                        if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
1355
2304
                           (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
1356
2305
                                message *m = parseEmailHeaders(mainMessage, rfc821Table);
1360
2309
                                        if(mainMessage && (mainMessage != messageIn)) {
1361
2310
                                                messageDestroy(mainMessage);
1362
2311
                                                mainMessage = NULL;
1363
 
                                        }
 
2312
                                        } else
 
2313
                                                messageReset(mainMessage);
1364
2314
                                        if(messageGetBody(m))
1365
 
                                                rc = parseEmailBody(m, NULL, 0, NULL, dir, rfc821Table, subtypeTable);
 
2315
                                                rc = parseEmailBody(m, NULL, dir, rfc821Table, subtypeTable, options);
1366
2316
 
1367
2317
                                        messageDestroy(m);
1368
2318
                                }
1369
2319
                                break;
1370
 
                        } else if(strcasecmp(mimeSubtype, "partial") == 0)
 
2320
                        } else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) {
 
2321
                                /* RFC 2298 - handle like a normal email */
 
2322
                                rc = 1;
 
2323
                                break;
 
2324
                        } else if(strcasecmp(mimeSubtype, "partial") == 0) {
 
2325
#ifdef  PARTIAL_DIR
 
2326
                                /* RFC1341 message split over many emails */
 
2327
                                if(rfc1341(mainMessage, dir) >= 0)
 
2328
                                        rc = 1;
 
2329
#else
 
2330
                                cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
 
2331
                                rc = 0;
 
2332
#endif
 
2333
                        } else if(strcasecmp(mimeSubtype, "external-body") == 0)
1371
2334
                                /* TODO */
1372
 
                                cli_warnmsg("Content-type message/partial not yet supported");
1373
 
                        else if(strcasecmp(mimeSubtype, "external-body") == 0)
1374
 
                                /*
1375
 
                                 * I don't believe that we should be going
1376
 
                                 * around the Internet looking for referenced
1377
 
                                 * files...
1378
 
                                 */
1379
2335
                                cli_warnmsg("Attempt to send Content-type message/external-body trapped");
1380
2336
                        else
1381
 
                                cli_warnmsg("Unsupported message format `%s'\n", mimeSubtype);
 
2337
                                cli_warnmsg("Unsupported message format `%s' - if you believe this file contains a virus, report it to bugs@clamav.net\n", mimeSubtype);
 
2338
 
1382
2339
 
1383
2340
                        if(mainMessage && (mainMessage != messageIn))
1384
2341
                                messageDestroy(mainMessage);
1385
 
                        return 0;
 
2342
                        if(messages)
 
2343
                                free(messages);
 
2344
                        return rc;
1386
2345
 
1387
2346
                case APPLICATION:
1388
 
                        cptr = messageGetMimeSubtype(mainMessage);
 
2347
                        /*cptr = messageGetMimeSubtype(mainMessage);
1389
2348
 
1390
 
                        /*if((strcasecmp(cptr, "octet-stream") == 0) ||
 
2349
                        if((strcasecmp(cptr, "octet-stream") == 0) ||
1391
2350
                           (strcasecmp(cptr, "x-msdownload") == 0)) {*/
1392
2351
                        {
1393
 
                                blob *aBlob = messageToBlob(mainMessage);
 
2352
                                fb = messageToFileblob(mainMessage, dir);
1394
2353
 
1395
 
                                if(aBlob) {
1396
 
                                        cli_dbgmsg("Saving main message as attachment %d\n", nBlobs);
1397
 
                                        assert(blobGetFilename(aBlob) != NULL);
1398
 
                                        /*
1399
 
                                         * It's likely that we won't have built
1400
 
                                         * a set of attachments
1401
 
                                         */
1402
 
                                        if(blobs == NULL)
1403
 
                                                blobs = blobList;
1404
 
                                        for(i = 0; i < nBlobs; i++)
1405
 
                                                if(blobs[i] == NULL)
1406
 
                                                        break;
1407
 
                                        blobClose(aBlob);
1408
 
                                        blobs[i] = aBlob;
1409
 
                                        if(i == nBlobs) {
1410
 
                                                nBlobs++;
1411
 
                                                assert(nBlobs < MAX_ATTACHMENTS);
1412
 
                                        }
 
2354
                                if(fb) {
 
2355
                                        cli_dbgmsg("Saving main message as attachment\n");
 
2356
                                        fileblobDestroy(fb);
 
2357
                                        if(mainMessage != messageIn) {
 
2358
                                                messageDestroy(mainMessage);
 
2359
                                                mainMessage = NULL;
 
2360
                                        } else
 
2361
                                                messageReset(mainMessage);
1413
2362
                                }
1414
2363
                        } /*else
1415
2364
                                cli_warnmsg("Discarded application not sent as attachment\n");*/
1426
2375
                }
1427
2376
        }
1428
2377
 
1429
 
        cli_dbgmsg("%d attachments found\n", nBlobs);
1430
 
 
1431
 
        if(nBlobs == 0) {
1432
 
                blob *b;
1433
 
 
1434
 
                /*
1435
 
                 * No attachments - scan the text portions, often files
1436
 
                 * are hidden in HTML code
1437
 
                 */
1438
 
                cli_dbgmsg("%d multiparts found\n", multiparts);
1439
 
                for(i = 0; i < multiparts; i++) {
1440
 
                        b = messageToBlob(messages[i]);
1441
 
 
1442
 
                        assert(b != NULL);
1443
 
 
1444
 
                        cli_dbgmsg("Saving multipart %d, encoded with scheme %d\n",
1445
 
                                i, messageGetEncoding(messages[i]));
1446
 
 
1447
 
                        (void)saveFile(b, dir);
1448
 
 
1449
 
                        blobDestroy(b);
1450
 
                }
1451
 
 
1452
 
                if(mainMessage) {
1453
 
                        /*
1454
 
                         * Look for uu-encoded main file
1455
 
                         */
1456
 
                        const text *t_line;
1457
 
 
1458
 
                        if((t_line = uuencodeBegin(mainMessage)) != NULL) {
1459
 
                                cli_dbgmsg("Found uuencoded file\n");
1460
 
 
1461
 
                                /*
1462
 
                                 * Main part contains uuencoded section
1463
 
                                 */
1464
 
                                messageSetEncoding(mainMessage, "x-uuencode");
1465
 
 
1466
 
                                if((b = messageToBlob(mainMessage)) != NULL) {
1467
 
                                        if((cptr = blobGetFilename(b)) != NULL) {
1468
 
                                                cli_dbgmsg("Found uuencoded message %s\n", cptr);
1469
 
 
1470
 
                                                (void)saveFile(b, dir);
1471
 
                                        }
1472
 
                                        blobDestroy(b);
1473
 
                                }
1474
 
                        } else if((encodingLine(mainMessage) != NULL) &&
1475
 
                                  ((t_line = bounceBegin(mainMessage)) != NULL))  {
1476
 
                                const text *t;
1477
 
                                static const char encoding[] = "Content-Transfer-Encoding";
1478
 
                                /*
1479
 
                                 * Attempt to save the original (unbounced)
1480
 
                                 * message - clamscan will find that in the
1481
 
                                 * directory and call us again (with any luck)
1482
 
                                 * having found an e-mail message to handle
1483
 
                                 *
1484
 
                                 * This finds a lot of false positives, the
1485
 
                                 * search that an encoding line is in the
1486
 
                                 * bounce (i.e. it's after the bounce header)
1487
 
                                 * helps a bit, but at the expense of scanning
1488
 
                                 * the entire message. messageAddLine
1489
 
                                 * optimisation could help here, but needs
1490
 
                                 * careful thought, do it with line numbers
1491
 
                                 * would be best, since the current method in
1492
 
                                 * messageAddLine of checking encoding first
1493
 
                                 * must remain otherwise non bounce messages
1494
 
                                 * won't be scanned
1495
 
                                 */
1496
 
                                for(t = t_line; t; t = t->t_next)
1497
 
                                        if((strncasecmp(t->t_text, encoding, sizeof(encoding) - 1) == 0) &&
1498
 
                                           (strstr(t->t_text, "7bit") == NULL))
1499
 
                                                break;
1500
 
                                if(t && ((b = textToBlob(t_line, NULL)) != NULL)) {
1501
 
                                        cli_dbgmsg("Found a bounce message\n");
1502
 
 
1503
 
                                        saveFile(b, dir);
1504
 
 
1505
 
                                        blobDestroy(b);
1506
 
                                }
1507
 
                        } else {
1508
 
                                bool saveIt;
1509
 
 
1510
 
                                cli_dbgmsg("Not found uuencoded file\n");
1511
 
 
1512
 
                                if(messageGetMimeType(mainMessage) == MESSAGE)
1513
 
                                        /*
1514
 
                                         * Quick peek, if the encapsulated
1515
 
                                         * message has no
1516
 
                                         * content encoding statement don't
1517
 
                                         * bother saving to scan, it's safe
1518
 
                                         */
1519
 
                                        saveIt = (encodingLine(mainMessage) != NULL);
1520
 
                                else if((t_line = encodingLine(mainMessage)) != NULL) {
1521
 
                                        /*
1522
 
                                         * Some bounces include the message
1523
 
                                         * body without the headers.
1524
 
                                         * Unfortunately this generates a
1525
 
                                         * lot of false positives that a bounce
1526
 
                                         * has been found when it hasn't.
1527
 
                                         */
1528
 
                                        if((b = blobCreate()) != NULL) {
1529
 
                                                cli_dbgmsg("Found a bounce message with no header\n");
1530
 
                                                blobAddData(b, "Received: by clamd\n", 19);
1531
 
 
1532
 
                                                b = textToBlob(t_line, b);
1533
 
 
1534
 
                                                saveFile(b, dir);
1535
 
 
1536
 
                                                blobDestroy(b);
1537
 
                                        }
1538
 
                                        saveIt = FALSE;
1539
 
                                } else
1540
 
                                        /*
1541
 
                                         * Save the entire text portion,
1542
 
                                         * since it it may be an HTML file with
1543
 
                                         * a JavaScript virus
1544
 
                                         */
1545
 
                                        saveIt = TRUE;
1546
 
 
1547
 
                                if(saveIt) {
1548
 
                                        cli_dbgmsg("Saving text part to scan\n");
1549
 
                                        saveTextPart(mainMessage, dir);
1550
 
                                }
1551
 
                        }
1552
 
                } else
1553
 
                        rc = (multiparts) ? 1 : 2;      /* anything saved? */
1554
 
        } else {
1555
 
                short attachmentNumber;
1556
 
 
1557
 
                for(attachmentNumber = 0; attachmentNumber < nBlobs; attachmentNumber++) {
1558
 
                        blob *b = blobs[attachmentNumber];
1559
 
 
1560
 
                        if(b) {
1561
 
                                if(!saveFile(b, dir))
1562
 
                                        break;
1563
 
                                blobDestroy(b);
1564
 
                                blobs[attachmentNumber] = NULL;
1565
 
                        }
1566
 
                }
1567
 
        }
1568
 
 
1569
 
        if(aText && (textIn == NULL))
 
2378
        if(aText && (textIn == NULL)) {
 
2379
                /* Look for a bounce in the text (non mime encoded) portion */
 
2380
                const text *t;
 
2381
 
 
2382
                for(t = aText; t; t = t->t_next) {
 
2383
                        const line_t *l = t->t_line;
 
2384
                        const text *lookahead, *topofbounce;
 
2385
                        const char *s;
 
2386
                        bool inheader;
 
2387
 
 
2388
                        if(l == NULL)
 
2389
                                continue;
 
2390
 
 
2391
                        s = lineGetData(l);
 
2392
 
 
2393
                        if(cli_filetype(s, strlen(s)) != CL_TYPE_MAIL)
 
2394
                                continue;
 
2395
 
 
2396
                        /*
 
2397
                         * We've found what looks like the start of a bounce
 
2398
                         * message. Only bother saving if it really is a bounce
 
2399
                         * message, this helps to speed up scanning of ping-pong
 
2400
                         * messages that have lots of bounces within bounces in
 
2401
                         * them
 
2402
                         */
 
2403
                        for(lookahead = t->t_next; lookahead; lookahead = lookahead->t_next) {
 
2404
                                l = lookahead->t_line;
 
2405
 
 
2406
                                if(l == NULL)
 
2407
                                        break;
 
2408
                                s = lineGetData(l);
 
2409
                                if(strncasecmp(s, "Content-Type:", 13) == 0)
 
2410
                                        /*
 
2411
                                         * Don't bother with plain/text or
 
2412
                                         * plain/html
 
2413
                                         */
 
2414
                                        if(strstr(s, "text/") == NULL)
 
2415
                                                /*
 
2416
                                                 * Don't bother to save the unuseful
 
2417
                                                 * part
 
2418
                                                 */
 
2419
                                                break;
 
2420
                        }
 
2421
 
 
2422
                        if(lookahead && (lookahead->t_line == NULL)) {
 
2423
                                cli_dbgmsg("Non mime part bounce message is not mime encoded, so it will not be scanned\n");
 
2424
                                t = lookahead;
 
2425
                                /* look for next bounce message */
 
2426
                                continue;
 
2427
                        }
 
2428
 
 
2429
                        /*
 
2430
                         * Prescan the bounce message to see if there's likely
 
2431
                         * to be anything nasty.
 
2432
                         * This algorithm is hand crafted and may be breakable
 
2433
                         * so all submissions are welcome. It's best NOT to
 
2434
                         * remove this however you may be tempted, because it
 
2435
                         * significantly speeds up the scanning of multiple
 
2436
                         * bounces (i.e. bounces within many bounces)
 
2437
                         */
 
2438
                        for(; lookahead; lookahead = lookahead->t_next) {
 
2439
                                l = lookahead->t_line;
 
2440
 
 
2441
                                if(l) {
 
2442
                                        s = lineGetData(l);
 
2443
                                        if((strncasecmp(s, "Content-Type:", 13) == 0) &&
 
2444
                                           (strstr(s, "multipart/") == NULL) &&
 
2445
                                           (strstr(s, "message/rfc822") == NULL) &&
 
2446
                                           (strstr(s, "text/plain") == NULL))
 
2447
                                                break;
 
2448
                                }
 
2449
                        }
 
2450
                        if(lookahead == NULL) {
 
2451
                                cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n");
 
2452
                                /* nothing here, move along please */
 
2453
                                break;
 
2454
                        }
 
2455
                        if((fb = fileblobCreate()) == NULL)
 
2456
                                break;
 
2457
                        cli_dbgmsg("Save non mime part bounce message\n");
 
2458
                        fileblobSetFilename(fb, dir, "bounce");
 
2459
                        fileblobAddData(fb, (unsigned char *)"Received: by clamd (bounce)\n", 28);
 
2460
 
 
2461
                        inheader = TRUE;
 
2462
                        topofbounce = NULL;
 
2463
                        for(;;) {
 
2464
                                l = t->t_line;
 
2465
 
 
2466
                                if(l == NULL) {
 
2467
                                        if(inheader) {
 
2468
                                                inheader = FALSE;
 
2469
                                                topofbounce = t;
 
2470
                                        }
 
2471
                                } else {
 
2472
                                        s = lineGetData(l);
 
2473
                                        fileblobAddData(fb, (unsigned char *)s, strlen(s));
 
2474
                                }
 
2475
                                fileblobAddData(fb, (unsigned char *)"\n", 1);
 
2476
                                lookahead = t->t_next;
 
2477
                                if(lookahead == NULL)
 
2478
                                        break;
 
2479
                                t = lookahead;
 
2480
                                l = t->t_line;
 
2481
                                if((!inheader) && l) {
 
2482
                                        s = lineGetData(l);
 
2483
                                        if(cli_filetype(s, strlen(s)) == CL_TYPE_MAIL) {
 
2484
                                                cli_dbgmsg("Found the start of another bounce candidate\n");
 
2485
                                                break;
 
2486
                                        }
 
2487
                                }
 
2488
                        }
 
2489
 
 
2490
                        fileblobDestroy(fb);
 
2491
                        if(topofbounce)
 
2492
                                t = topofbounce;
 
2493
                        /*
 
2494
                         * Don't do this - it slows bugs.txt
 
2495
                         */
 
2496
                        /*if(mainMessage)
 
2497
                                mainMessage->bounce = NULL;*/
 
2498
                }
1570
2499
                textDestroy(aText);
1571
 
 
1572
 
        /* Already done */
1573
 
        if(blobs && (blobsIn == NULL))
1574
 
                blobArrayDestroy(blobs, nBlobs);
 
2500
                aText = NULL;
 
2501
        }
 
2502
 
 
2503
        /*
 
2504
         * No attachments - scan the text portions, often files
 
2505
         * are hidden in HTML code
 
2506
         */
 
2507
        cli_dbgmsg("%d multiparts found\n", multiparts);
 
2508
        for(i = 0; i < multiparts; i++) {
 
2509
                fb = messageToFileblob(messages[i], dir);
 
2510
 
 
2511
                if(fb) {
 
2512
                        cli_dbgmsg("Saving multipart %d\n", i);
 
2513
 
 
2514
                        fileblobDestroy(fb);
 
2515
                }
 
2516
        }
 
2517
 
 
2518
        if(mainMessage) {
 
2519
                /*
 
2520
                 * Look for uu-encoded main file
 
2521
                 */
 
2522
                const text *t_line;
 
2523
 
 
2524
                if((t_line = uuencodeBegin(mainMessage)) != NULL) {
 
2525
                        cli_dbgmsg("Found uuencoded file\n");
 
2526
 
 
2527
                        /*
 
2528
                         * Main part contains uuencoded section
 
2529
                         */
 
2530
                        messageSetEncoding(mainMessage, "x-uuencode");
 
2531
 
 
2532
                        if((fb = messageToFileblob(mainMessage, dir)) != NULL) {
 
2533
                                if((cptr = fileblobGetFilename(fb)) != NULL)
 
2534
                                        cli_dbgmsg("Saving uuencoded message %s\n", cptr);
 
2535
                                fileblobDestroy(fb);
 
2536
                        }
 
2537
                        rc = 1;
 
2538
                } else if((encodingLine(mainMessage) != NULL) &&
 
2539
                          ((t_line = bounceBegin(mainMessage)) != NULL)) {
 
2540
                        const text *t, *start;
 
2541
                        /*
 
2542
                         * Attempt to save the original (unbounced)
 
2543
                         * message - clamscan will find that in the
 
2544
                         * directory and call us again (with any luck)
 
2545
                         * having found an e-mail message to handle.
 
2546
                         *
 
2547
                         * This finds a lot of false positives, the
 
2548
                         * search that a content type is in the
 
2549
                         * bounce (i.e. it's after the bounce header)
 
2550
                         * helps a bit.
 
2551
                         *
 
2552
                         * messageAddLine
 
2553
                         * optimisation could help here, but needs
 
2554
                         * careful thought, do it with line numbers
 
2555
                         * would be best, since the current method in
 
2556
                         * messageAddLine of checking encoding first
 
2557
                         * must remain otherwise non bounce messages
 
2558
                         * won't be scanned
 
2559
                         */
 
2560
                        for(t = start = t_line; t; t = t->t_next) {
 
2561
                                char cmd[LINE_LENGTH + 1];
 
2562
                                const char *txt = lineGetData(t->t_line);
 
2563
 
 
2564
                                if(txt == NULL)
 
2565
                                        continue;
 
2566
                                if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
 
2567
                                        continue;
 
2568
 
 
2569
                                switch(tableFind(rfc821Table, cmd)) {
 
2570
                                        case CONTENT_TRANSFER_ENCODING:
 
2571
                                                if((strstr(txt, "7bit") == NULL) &&
 
2572
                                                   (strstr(txt, "8bit") == NULL))
 
2573
                                                        break;
 
2574
                                                continue;
 
2575
                                        case CONTENT_DISPOSITION:
 
2576
                                                break;
 
2577
                                        case CONTENT_TYPE:
 
2578
                                                if(strstr(txt, "text/plain") != NULL)
 
2579
                                                        t = NULL;
 
2580
                                                break;
 
2581
                                        default:
 
2582
                                                if(strcasecmp(cmd, "From") == 0)
 
2583
                                                        start = t_line;
 
2584
                                                else if(strcasecmp(cmd, "Received") == 0)
 
2585
                                                        start = t_line;
 
2586
                                                continue;
 
2587
                                }
 
2588
                                break;
 
2589
                        }
 
2590
                        if(t && ((fb = fileblobCreate()) != NULL)) {
 
2591
                                cli_dbgmsg("Found a bounce message\n");
 
2592
                                fileblobSetFilename(fb, dir, "bounce");
 
2593
                                if(textToFileblob(start, fb) == NULL)
 
2594
                                        cli_dbgmsg("Nothing new to save in the bounce message");
 
2595
                                else
 
2596
                                        rc = 1;
 
2597
                                fileblobDestroy(fb);
 
2598
                        } else
 
2599
                                cli_dbgmsg("Not found a bounce message\n");
 
2600
                } else {
 
2601
                        bool saveIt;
 
2602
 
 
2603
                        cli_dbgmsg("Not found uuencoded file\n");
 
2604
 
 
2605
                        if(messageGetMimeType(mainMessage) == MESSAGE)
 
2606
                                /*
 
2607
                                 * Quick peek, if the encapsulated
 
2608
                                 * message has no
 
2609
                                 * content encoding statement don't
 
2610
                                 * bother saving to scan, it's safe
 
2611
                                 */
 
2612
                                saveIt = (encodingLine(mainMessage) != NULL);
 
2613
                        else if((t_line = encodingLine(mainMessage)) != NULL) {
 
2614
                                /*
 
2615
                                 * Some bounces include the message
 
2616
                                 * body without the headers.
 
2617
                                 * FIXME: Unfortunately this generates a
 
2618
                                 * lot of false positives that a bounce
 
2619
                                 * has been found when it hasn't.
 
2620
                                 */
 
2621
                                if((fb = fileblobCreate()) != NULL) {
 
2622
                                        cli_dbgmsg("Found a bounce message with no header at '%s'\n",
 
2623
                                                lineGetData(t_line->t_line));
 
2624
                                        fileblobSetFilename(fb, dir, "bounce");
 
2625
                                        fileblobAddData(fb,
 
2626
                                                (const unsigned char *)"Received: by clamd (bounce)\n",
 
2627
                                                28);
 
2628
 
 
2629
                                        fb = textToFileblob(t_line, fb);
 
2630
 
 
2631
                                        fileblobDestroy(fb);
 
2632
                                }
 
2633
                                saveIt = FALSE;
 
2634
                        } else if(multiparts == 0)
 
2635
                                /*
 
2636
                                 * Save the entire text portion,
 
2637
                                 * since it it may be an HTML file with
 
2638
                                 * a JavaScript virus
 
2639
                                 */
 
2640
                                saveIt = TRUE;
 
2641
                        else
 
2642
                                saveIt = FALSE;
 
2643
 
 
2644
                        if(saveIt) {
 
2645
                                cli_dbgmsg("Saving text part to scan\n");
 
2646
                                /*
 
2647
                                 * TODO: May be better to save aText
 
2648
                                 */
 
2649
                                saveTextPart(mainMessage, dir);
 
2650
                                if(mainMessage != messageIn) {
 
2651
                                        messageDestroy(mainMessage);
 
2652
                                        mainMessage = NULL;
 
2653
                                } else
 
2654
                                        messageReset(mainMessage);
 
2655
                                rc = 1;
 
2656
                        }
 
2657
                }
 
2658
        } else
 
2659
                rc = (multiparts) ? 1 : 2;      /* anything saved? */
1575
2660
 
1576
2661
        if(mainMessage && (mainMessage != messageIn))
1577
2662
                messageDestroy(mainMessage);
1578
2663
 
 
2664
        if(messages)
 
2665
                free(messages);
 
2666
 
1579
2667
        cli_dbgmsg("parseEmailBody() returning %d\n", rc);
1580
2668
 
1581
2669
        return rc;
1589
2677
static int
1590
2678
boundaryStart(const char *line, const char *boundary)
1591
2679
{
 
2680
        char *ptr, *out;
 
2681
        int rc;
 
2682
        char buf[LINE_LENGTH + 1];
 
2683
 
 
2684
        if(line == NULL)
 
2685
                return 0;       /* empty line */
 
2686
 
 
2687
        /*cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);*/
 
2688
 
 
2689
        if((*line != '-') && (*line != '('))
 
2690
                return 0;
 
2691
 
 
2692
        if(strchr(line, '-') == NULL)
 
2693
                return 0;
 
2694
 
 
2695
        if(strlen(line) <= sizeof(buf)) {
 
2696
                out = NULL;
 
2697
                ptr = rfc822comments(line, buf);
 
2698
        } else
 
2699
                out = ptr = rfc822comments(line, NULL);
 
2700
 
 
2701
        if(ptr == NULL)
 
2702
                ptr = (char *)line;
 
2703
 
 
2704
        if(*ptr++ != '-') {
 
2705
                if(out)
 
2706
                        free(out);
 
2707
                return 0;
 
2708
        }
 
2709
 
1592
2710
        /*
1593
 
         * Gibe.B3 is broken it has:
 
2711
         * Gibe.B3 is broken, it has:
1594
2712
         *      boundary="---- =_NextPart_000_01C31177.9DC7C000"
1595
2713
         * but it's boundaries look like
1596
2714
         *      ------ =_NextPart_000_01C31177.9DC7C000
1597
 
         * notice the extra '-'
 
2715
         * notice the one too few '-'.
 
2716
         * Presumably this is a deliberate exploitation of a bug in some mail
 
2717
         * clients.
 
2718
         *
 
2719
         * The trouble is that this creates a lot of false positives for
 
2720
         * boundary conditions, if we're too lax about matches. We do our level
 
2721
         * best to avoid these false positives. For example if we have
 
2722
         * boundary="1" we want to ensure that we don't break out of every line
 
2723
         * that has -1 in it instead of starting --1. This needs some more work.
 
2724
         *
 
2725
         * Look with and without RFC822 comments stripped, I've seen some
 
2726
         * samples where () are taken as comments in boundaries and some where
 
2727
         * they're not. Irrespective of whatever RFC2822 says we need to find
 
2728
         * viruses in both types of mails
1598
2729
         */
1599
 
        /*cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);*/
1600
 
        if(strstr(line, boundary) != NULL) {
1601
 
                cli_dbgmsg("found %s in %s\n", boundary, line);
1602
 
                return 1;
1603
 
        }
1604
 
        if(*line++ != '-')
1605
 
                return 0;
1606
 
        if(*line++ != '-')
1607
 
                return 0;
1608
 
        return strcasecmp(line, boundary) == 0;
 
2730
        if((strstr(ptr, boundary) != NULL) || (strstr(line, boundary) != NULL))
 
2731
                rc = 1;
 
2732
        else if(*ptr++ != '-')
 
2733
                rc = 0;
 
2734
        else
 
2735
                rc = (strcasecmp(ptr, boundary) == 0);
 
2736
 
 
2737
        if(out)
 
2738
                free(out);
 
2739
 
 
2740
        if(rc == 1)
 
2741
                cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
 
2742
 
 
2743
        return rc;
1609
2744
}
1610
2745
 
1611
2746
/*
1618
2753
{
1619
2754
        size_t len;
1620
2755
 
 
2756
        if(line == NULL)
 
2757
                return 0;
 
2758
        /*cli_dbgmsg("endOfMessage: line = '%s' boundary = '%s'\n", line, boundary);*/
1621
2759
        if(*line++ != '-')
1622
2760
                return 0;
1623
2761
        if(*line++ != '-')
1625
2763
        len = strlen(boundary);
1626
2764
        if(strncasecmp(line, boundary, len) != 0)
1627
2765
                return 0;
1628
 
        if(strlen(line) != (len + 2))
 
2766
        /*
 
2767
         * Use < rather than == because some broken mails have white
 
2768
         * space after the boundary
 
2769
         */
 
2770
        if(strlen(line) < (len + 2))
1629
2771
                return 0;
1630
2772
        line = &line[len];
1631
2773
        if(*line++ != '-')
1650
2792
        for(tableinit = rfc821headers; tableinit->key; tableinit++)
1651
2793
                if(tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
1652
2794
                        tableDestroy(*rfc821Table);
 
2795
                        *rfc821Table = NULL;
1653
2796
                        return -1;
1654
2797
                }
1655
2798
 
1660
2803
                if(tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
1661
2804
                        tableDestroy(*rfc821Table);
1662
2805
                        tableDestroy(*subtypeTable);
 
2806
                        *rfc821Table = NULL;
 
2807
                        *subtypeTable = NULL;
1663
2808
                        return -1;
1664
2809
                }
1665
2810
 
1678
2823
getTextPart(message *const messages[], size_t size)
1679
2824
{
1680
2825
        size_t i;
 
2826
        int textpart = -1;
1681
2827
 
1682
2828
        for(i = 0; i < size; i++) {
1683
2829
                assert(messages[i] != NULL);
1684
 
                if((messageGetMimeType(messages[i]) == TEXT) &&
1685
 
                   (strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0))
1686
 
                        return (int)i;
 
2830
                if(messageGetMimeType(messages[i]) == TEXT) {
 
2831
                        if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
 
2832
                                return (int)i;
 
2833
                        textpart = (int)i;
 
2834
                }
1687
2835
        }
1688
 
        for(i = 0; i < size; i++)
1689
 
                if(messageGetMimeType(messages[i]) == TEXT)
1690
 
                        return (int)i;
1691
 
 
1692
 
        return -1;
 
2836
        return textpart;
1693
2837
}
1694
2838
 
1695
2839
/*
1696
2840
 * strip -
1697
 
 *      Remove the trailing spaces from a buffer
 
2841
 *      Remove the trailing spaces from a buffer. Don't call this directly,
 
2842
 * always call strstrip() which is a wrapper to this routine to be used with
 
2843
 * NUL terminated strings. This code looks a bit strange because of it's
 
2844
 * heritage from code that worked on strings that weren't necessarily NUL
 
2845
 * terminated.
 
2846
 * TODO: rewrite for clamAV
 
2847
 *
1698
2848
 * Returns it's new length (a la strlen)
1699
2849
 *
1700
2850
 * len must be int not size_t because of the >= 0 test, it is sizeof(buf)
1707
2857
        register size_t i;
1708
2858
 
1709
2859
        if((buf == NULL) || (len <= 0))
1710
 
                return(0);
 
2860
                return 0;
1711
2861
 
1712
2862
        i = strlen(buf);
1713
2863
        if(len > (int)(i + 1))
1714
 
                return(i);
1715
 
 
 
2864
                return i;
1716
2865
        ptr = &buf[--len];
1717
2866
 
1718
 
#if     defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN)  /* watch - it may be in shared text area */
 
2867
#if     defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN) || defined(C_KFREEBSD_GNU)/* watch - it may be in shared text area */
1719
2868
        do
1720
2869
                if(*ptr)
1721
2870
                        *ptr = '\0';
1722
 
        while((--len >= 0) && !isgraph(*--ptr) && (*ptr != '\n') && (*ptr != '\r'));
 
2871
        while((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r'));
1723
2872
#else   /* more characters can be displayed on DOS */
1724
2873
        do
1725
2874
#ifndef REAL_MODE_DOS
1740
2889
{
1741
2890
        if(s == (char *)NULL)
1742
2891
                return(0);
 
2892
 
1743
2893
        return(strip(s, strlen(s) + 1));
1744
2894
}
1745
2895
 
1746
2896
/*
1747
 
 * When parsing a MIME header see if this spans more than one line. A
1748
 
 * semi-colon at the end of the line indicates that the MIME information
1749
 
 * is continued on the next line.
1750
 
 *
1751
 
 * Some clients are broken and put white space after the ;
 
2897
 * Some broken email headers use ';' at the end of a line to continue
 
2898
 * to the next line and don't add a leading white space on the next line
1752
2899
 */
1753
2900
static bool
1754
2901
continuationMarker(const char *line)
1755
2902
{
1756
2903
        const char *ptr;
1757
2904
 
1758
 
        assert(line != NULL);
 
2905
        if(line == NULL)
 
2906
                return FALSE;
1759
2907
 
1760
2908
#ifdef  CL_DEBUG
1761
2909
        cli_dbgmsg("continuationMarker(%s)\n", line);
1787
2935
static int
1788
2936
parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
1789
2937
{
1790
 
        int type = tableFind(rfc821Table, cmd);
1791
 
#ifdef CL_THREAD_SAFE
1792
 
        char *strptr;
1793
 
#endif
1794
 
        char *copy = strdup(arg);
1795
 
        char *ptr = copy;
 
2938
        char *copy, *p;
 
2939
        const char *ptr;
 
2940
        int commandNumber;
1796
2941
 
1797
2942
        cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
1798
 
        strstrip(copy);
1799
 
 
1800
 
        switch(type) {
 
2943
 
 
2944
        copy = rfc822comments(cmd, NULL);
 
2945
        if(copy) {
 
2946
                commandNumber = tableFind(rfc821Table, copy);
 
2947
                free(copy);
 
2948
        } else
 
2949
                commandNumber = tableFind(rfc821Table, cmd);
 
2950
 
 
2951
        copy = rfc822comments(arg, NULL);
 
2952
 
 
2953
        if(copy)
 
2954
                ptr = copy;
 
2955
        else
 
2956
                ptr = arg;
 
2957
 
 
2958
        switch(commandNumber) {
1801
2959
                case CONTENT_TYPE:
1802
2960
                        /*
1803
2961
                         * Fix for non RFC1521 compliant mailers
1806
2964
                         * just simply "Content-Type:"
1807
2965
                         */
1808
2966
                        if(arg == NULL)
1809
 
                                  cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
1810
 
                        else if(strchr(copy, '/') == NULL)
1811
 
                                  cli_warnmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy);
 
2967
                                /*
 
2968
                                 * According to section 4 of RFC1521:
 
2969
                                 * "Note also that a subtype specification is
 
2970
                                 * MANDATORY. There are no default subtypes"
 
2971
                                 *
 
2972
                                 * We have to break this and make an assumption
 
2973
                                 * for the subtype because virus writers and
 
2974
                                 * email client writers don't get it right
 
2975
                                 */
 
2976
                                 cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
 
2977
                        else if(strchr(ptr, '/') == NULL)
 
2978
                                /*
 
2979
                                 * Empty field, such as
 
2980
                                 *      Content-Type:
 
2981
                                 * which I believe is illegal according to
 
2982
                                 * RFC1521
 
2983
                                 */
 
2984
                                cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr);
1812
2985
                        else {
 
2986
                                int i;
 
2987
                                char *mimeArgs; /* RHS of the ; */
 
2988
 
1813
2989
                                /*
1814
2990
                                 * Some clients are broken and
1815
2991
                                 * put white space after the ;
1816
2992
                                 */
1817
 
                                /*strstrip(copy);*/
1818
2993
                                if(*arg == '/') {
1819
2994
                                        cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n");
1820
2995
                                        messageSetMimeType(m, "application");
1821
2996
                                        messageSetMimeSubtype(m, "octet-stream");
1822
 
                                        strtok_r(copy, ";", &strptr);
1823
2997
                                } else {
1824
 
                                        char *s;
1825
 
 
1826
 
                                        messageSetMimeType(m, strtok_r(copy, "/", &strptr));
1827
 
 
1828
2998
                                        /*
1829
 
                                         * Stephen White <stephen@earth.li>
1830
 
                                         * Some clients put space after
1831
 
                                         * the mime type but before
1832
 
                                         * the ;
 
2999
                                         * The content type could be in quotes:
 
3000
                                         *      Content-Type: "multipart/mixed"
 
3001
                                         * FIXME: this is a hack in that ignores
 
3002
                                         *      the quotes, it doesn't handle
 
3003
                                         *      them properly
1833
3004
                                         */
1834
 
                                        s = strtok_r(NULL, ";", &strptr);
1835
 
                                        strstrip(s);
1836
 
                                        messageSetMimeSubtype(m, s);
 
3005
                                        while(isspace(*ptr))
 
3006
                                                ptr++;
 
3007
                                        if(ptr[0] == '\"')
 
3008
                                                ptr++;
 
3009
 
 
3010
                                        if(ptr[0] != '/') {
 
3011
                                                char *s;
 
3012
                                                char *mimeType; /* LHS of the ; */
 
3013
#ifdef CL_THREAD_SAFE
 
3014
                                                char *strptr;
 
3015
#endif
 
3016
 
 
3017
                                                s = mimeType = cli_strtok(ptr, 0, ";");
 
3018
                                                /*
 
3019
                                                 * Handle
 
3020
                                                 * Content-Type: foo/bar multipart/mixed
 
3021
                                                 * and
 
3022
                                                 * Content-Type: multipart/mixed foo/bar
 
3023
                                                 */
 
3024
                                                if(s && *s) for(;;) {
 
3025
#ifdef  CL_THREAD_SAFE
 
3026
                                                        int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
 
3027
#else
 
3028
                                                        int set = messageSetMimeType(m, strtok(s, "/"));
 
3029
#endif
 
3030
 
 
3031
                                                        /*
 
3032
                                                         * Stephen White <stephen@earth.li>
 
3033
                                                         * Some clients put space after
 
3034
                                                         * the mime type but before
 
3035
                                                         * the ;
 
3036
                                                         */
 
3037
#ifdef  CL_THREAD_SAFE
 
3038
                                                        s = strtok_r(NULL, ";", &strptr);
 
3039
#else
 
3040
                                                        s = strtok(NULL, ";");
 
3041
#endif
 
3042
                                                        if(s == NULL)
 
3043
                                                                break;
 
3044
                                                        if(set) {
 
3045
                                                                size_t len = strstrip(s) - 1;
 
3046
                                                                if(s[len] == '\"') {
 
3047
                                                                        s[len] = '\0';
 
3048
                                                                        len = strstrip(s);
 
3049
                                                                }
 
3050
                                                                if(len) {
 
3051
                                                                        if(strchr(s, ' ')) {
 
3052
                                                                                char *t = cli_strtok(s, 0, " ");
 
3053
 
 
3054
                                                                                messageSetMimeSubtype(m, t);
 
3055
                                                                                free(t);
 
3056
                                                                        } else
 
3057
                                                                                messageSetMimeSubtype(m, s);
 
3058
                                                                }
 
3059
                                                        }
 
3060
 
 
3061
                                                        while(*s && !isspace(*s))
 
3062
                                                                s++;
 
3063
                                                        if(*s++ == '\0')
 
3064
                                                                break;
 
3065
                                                        if(*s == '\0')
 
3066
                                                                break;
 
3067
                                                }
 
3068
                                                if(mimeType)
 
3069
                                                        free(mimeType);
 
3070
                                        }
1837
3071
                                }
1838
3072
 
1839
3073
                                /*
1842
3076
                                 * Content-Type:', arg='multipart/mixed; boundary=foo
1843
3077
                                 * we find the boundary argument set it
1844
3078
                                 */
1845
 
                                copy = strtok_r(NULL, "", &strptr);
1846
 
                                if(copy)
1847
 
                                        messageAddArguments(m, copy);
 
3079
                                i = 1;
 
3080
                                while((mimeArgs = cli_strtok(ptr, i++, ";")) != NULL) {
 
3081
                                        cli_dbgmsg("mimeArgs = '%s'\n", mimeArgs);
 
3082
 
 
3083
                                        messageAddArguments(m, mimeArgs);
 
3084
                                        free(mimeArgs);
 
3085
                                }
1848
3086
                        }
1849
3087
                        break;
1850
3088
                case CONTENT_TRANSFER_ENCODING:
1851
 
                        messageSetEncoding(m, copy);
 
3089
                        messageSetEncoding(m, ptr);
1852
3090
                        break;
1853
3091
                case CONTENT_DISPOSITION:
1854
 
                        arg = strtok_r(copy, ";", &strptr);
1855
 
                        if(arg && *arg) {
1856
 
                                messageSetDispositionType(m, arg);
1857
 
                                messageAddArgument(m, strtok_r(NULL, "\r\n", &strptr));
 
3092
                        p = cli_strtok(ptr, 0, ";");
 
3093
                        if(p) {
 
3094
                                if(*p) {
 
3095
                                        messageSetDispositionType(m, p);
 
3096
                                        free(p);
 
3097
                                        p = cli_strtok(ptr, 1, ";");
 
3098
                                        messageAddArgument(m, p);
 
3099
                                }
 
3100
                                free(p);
1858
3101
                        }
1859
3102
        }
1860
 
        free(ptr);
 
3103
        if(copy)
 
3104
                free(copy);
1861
3105
 
1862
 
        return type;
 
3106
        return 0;
1863
3107
}
1864
3108
 
1865
3109
/*
1868
3112
static void
1869
3113
saveTextPart(message *m, const char *dir)
1870
3114
{
1871
 
        blob *b;
 
3115
        fileblob *fb;
1872
3116
 
1873
3117
        messageAddArgument(m, "filename=textportion");
1874
 
        if((b = messageToBlob(m)) != NULL) {
 
3118
        if((fb = messageToFileblob(m, dir)) != NULL) {
1875
3119
                /*
1876
3120
                 * Save main part to scan that
1877
3121
                 */
1878
 
                cli_dbgmsg("Saving main message, encoded with scheme %d\n",
1879
 
                                messageGetEncoding(m));
1880
 
 
1881
 
                (void)saveFile(b, dir);
1882
 
 
 
3122
                cli_dbgmsg("Saving main message\n");
 
3123
 
 
3124
                fileblobDestroy(fb);
 
3125
        }
 
3126
}
 
3127
 
 
3128
/*
 
3129
 * Handle RFC822 comments in headers.
 
3130
 * If out == NULL, return a buffer without the comments, the caller must free
 
3131
 *      the returned buffer
 
3132
 * Return NULL on error or if the input * has no comments.
 
3133
 * See secion 3.4.3 of RFC822
 
3134
 * TODO: handle comments that go on to more than one line
 
3135
 */
 
3136
static char *
 
3137
rfc822comments(const char *in, char *out)
 
3138
{
 
3139
        const char *iptr;
 
3140
        char *optr;
 
3141
        int backslash, inquote, commentlevel;
 
3142
 
 
3143
        if(in == NULL)
 
3144
                return NULL;
 
3145
 
 
3146
        if(strchr(in, '(') == NULL)
 
3147
                return NULL;
 
3148
 
 
3149
        assert(out != in);
 
3150
 
 
3151
        if(out == NULL) {
 
3152
                out = cli_malloc(strlen(in) + 1);
 
3153
                if(out == NULL)
 
3154
                        return NULL;
 
3155
        }
 
3156
 
 
3157
        backslash = commentlevel = inquote = 0;
 
3158
        optr = out;
 
3159
 
 
3160
        cli_dbgmsg("rfc822comments: contains a comment\n");
 
3161
 
 
3162
        for(iptr = in; *iptr; iptr++)
 
3163
                if(backslash) {
 
3164
                        if(commentlevel == 0)
 
3165
                                *optr++ = *iptr;
 
3166
                        backslash = 0;
 
3167
                } else switch(*iptr) {
 
3168
                        case '\\':
 
3169
                                backslash = 1;
 
3170
                                break;
 
3171
                        case '\"':
 
3172
                                *optr++ = '\"';
 
3173
                                inquote = !inquote;
 
3174
                                break;
 
3175
                        case '(':
 
3176
                                if(inquote)
 
3177
                                        *optr++ = '(';
 
3178
                                else
 
3179
                                        commentlevel++;
 
3180
                                break;
 
3181
                        case ')':
 
3182
                                if(inquote)
 
3183
                                        *optr++ = ')';
 
3184
                                else if(commentlevel > 0)
 
3185
                                        commentlevel--;
 
3186
                                break;
 
3187
                        default:
 
3188
                                if(commentlevel == 0)
 
3189
                                        *optr++ = *iptr;
 
3190
                }
 
3191
 
 
3192
        if(backslash)   /* last character was a single backslash */
 
3193
                *optr++ = '\\';
 
3194
        *optr = '\0';
 
3195
 
 
3196
        /*strstrip(out);*/
 
3197
 
 
3198
        cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
 
3199
 
 
3200
        return out;
 
3201
}
 
3202
 
 
3203
/*
 
3204
 * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
 
3205
 * free, or NULL on error
 
3206
 */
 
3207
static char *
 
3208
rfc2047(const char *in)
 
3209
{
 
3210
        char *out, *pout;
 
3211
        size_t len;
 
3212
 
 
3213
        if((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL))
 
3214
                return strdup(in);
 
3215
 
 
3216
        cli_dbgmsg("rfc2047 '%s'\n", in);
 
3217
        out = cli_malloc(strlen(in) + 1);
 
3218
 
 
3219
        if(out == NULL)
 
3220
                return NULL;
 
3221
 
 
3222
        pout = out;
 
3223
 
 
3224
        /* For each RFC2047 string */
 
3225
        while(*in) {
 
3226
                char encoding, *ptr, *enctext;
 
3227
                message *m;
 
3228
                blob *b;
 
3229
 
 
3230
                /* Find next RFC2047 string */
 
3231
                while(*in) {
 
3232
                        if((*in == '=') && (in[1] == '?')) {
 
3233
                                in += 2;
 
3234
                                break;
 
3235
                        }
 
3236
                        *pout++ = *in++;
 
3237
                }
 
3238
                /* Skip over charset, find encoding */
 
3239
                while((*in != '?') && *in)
 
3240
                        in++;
 
3241
                if(*in == '\0')
 
3242
                        break;
 
3243
                encoding = *++in;
 
3244
                encoding = tolower(encoding);
 
3245
 
 
3246
                if((encoding != 'q') && (encoding != 'b')) {
 
3247
                        cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus that was missed, report it to bugs@clamav.net\n", encoding);
 
3248
                        free(out);
 
3249
                        out = NULL;
 
3250
                        break;
 
3251
                }
 
3252
                /* Skip to encoded text */
 
3253
                if(*++in != '?')
 
3254
                        break;
 
3255
                if(*++in == '\0')
 
3256
                        break;
 
3257
 
 
3258
                enctext = strdup(in);
 
3259
                if(enctext == NULL) {
 
3260
                        free(out);
 
3261
                        out = NULL;
 
3262
                        break;
 
3263
                }
 
3264
                in = strstr(in, "?=");
 
3265
                if(in == NULL) {
 
3266
                        free(enctext);
 
3267
                        break;
 
3268
                }
 
3269
                in += 2;
 
3270
                ptr = strstr(enctext, "?=");
 
3271
                assert(ptr != NULL);
 
3272
                *ptr = '\0';
 
3273
                /*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
 
3274
 
 
3275
                m = messageCreate();
 
3276
                if(m == NULL)
 
3277
                        break;
 
3278
                messageAddStr(m, enctext);
 
3279
                free(enctext);
 
3280
                switch(encoding) {
 
3281
                        case 'q':
 
3282
                                messageSetEncoding(m, "quoted-printable");
 
3283
                                break;
 
3284
                        case 'b':
 
3285
                                messageSetEncoding(m, "base64");
 
3286
                                break;
 
3287
                }
 
3288
                b = messageToBlob(m);
 
3289
                len = blobGetDataSize(b);
 
3290
                cli_dbgmsg("Decoded as '%*.*s'\n", len, len, blobGetData(b));
 
3291
                memcpy(pout, blobGetData(b), len);
1883
3292
                blobDestroy(b);
 
3293
                messageDestroy(m);
 
3294
                if(pout[len - 1] == '\n')
 
3295
                        pout += len - 1;
 
3296
                else
 
3297
                        pout += len;
 
3298
 
1884
3299
        }
 
3300
        if(out == NULL)
 
3301
                return NULL;
 
3302
 
 
3303
        *pout = '\0';
 
3304
 
 
3305
        cli_dbgmsg("rfc2047 returns '%s'\n", out);
 
3306
        return out;
1885
3307
}
1886
3308
 
 
3309
#ifdef  PARTIAL_DIR
1887
3310
/*
1888
 
 * Save some data as a unique file in the given directory.
1889
 
 *
1890
 
 * TODO: don't save archive files if archive scanning is disabled, or
1891
 
 *      OLE2 files if that is disabled or pattern match --exclude, but
1892
 
 *      we need access to the command line options/clamav.conf here to
1893
 
 *      be able to do that
 
3311
 * Handle partial messages
1894
3312
 */
1895
 
static bool
1896
 
saveFile(const blob *b, const char *dir)
 
3313
static int
 
3314
rfc1341(message *m, const char *dir)
1897
3315
{
1898
 
        const unsigned long nbytes = blobGetDataSize(b);
1899
 
        size_t suffixLen = 0;
1900
 
        int fd;
1901
 
        const char *cptr, *suffix;
1902
 
        char filename[NAME_MAX + 1];
1903
 
 
1904
 
        assert(dir != NULL);
1905
 
 
1906
 
        if(nbytes == 0)
1907
 
                return TRUE;
1908
 
 
1909
 
        cptr = blobGetFilename(b);
1910
 
 
1911
 
        if(cptr == NULL) {
1912
 
                cptr = "unknown";
1913
 
                suffix = "";
 
3316
        fileblob *fb;
 
3317
        char *arg, *id, *number, *total, *oldfilename;
 
3318
        const char *tmpdir;
 
3319
        char pdir[NAME_MAX + 1];
 
3320
 
 
3321
        id = (char *)messageFindArgument(m, "id");
 
3322
        if(id == NULL)
 
3323
                return -1;
 
3324
 
 
3325
#ifdef  CYGWIN
 
3326
        if((tmpdir = getenv("TEMP")) == (char *)NULL)
 
3327
                if((tmpdir = getenv("TMP")) == (char *)NULL)
 
3328
                        if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
 
3329
                                tmpdir = "C:\\";
 
3330
#else
 
3331
        if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
 
3332
                if((tmpdir = getenv("TMP")) == (char *)NULL)
 
3333
                        if((tmpdir = getenv("TEMP")) == (char *)NULL)
 
3334
#ifdef  P_tmpdir
 
3335
                                tmpdir = P_tmpdir;
 
3336
#else
 
3337
                                tmpdir = "/tmp";
 
3338
#endif
 
3339
#endif
 
3340
 
 
3341
        snprintf(pdir, sizeof(pdir) - 1, "%s/clamav-partial", tmpdir);
 
3342
 
 
3343
        if((mkdir(pdir, 0700) < 0) && (errno != EEXIST)) {
 
3344
                cli_errmsg("Can't create the directory '%s'\n", pdir);
 
3345
                return -1;
1914
3346
        } else {
 
3347
                struct stat statb;
 
3348
 
 
3349
                if(stat(pdir, &statb) < 0) {
 
3350
                        cli_errmsg("Can't stat the directory '%s'\n", pdir);
 
3351
                        return -1;
 
3352
                }
 
3353
                if(statb.st_mode & 077)
 
3354
                        cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
 
3355
                                pdir, statb.st_mode & 0777);
 
3356
        }
 
3357
 
 
3358
        number = (char *)messageFindArgument(m, "number");
 
3359
        if(number == NULL) {
 
3360
                free(id);
 
3361
                return -1;
 
3362
        }
 
3363
 
 
3364
        oldfilename = (char *)messageFindArgument(m, "filename");
 
3365
        if(oldfilename == NULL)
 
3366
                oldfilename = (char *)messageFindArgument(m, "name");
 
3367
 
 
3368
        arg = cli_malloc(10 + strlen(id) + strlen(number));
 
3369
        sprintf(arg, "filename=%s%s", id, number);
 
3370
        messageAddArgument(m, arg);
 
3371
        free(arg);
 
3372
 
 
3373
        if(oldfilename) {
 
3374
                cli_warnmsg("Must reset to %s\n", oldfilename);
 
3375
                free(oldfilename);
 
3376
        }
 
3377
 
 
3378
        if((fb = messageToFileblob(m, pdir)) == NULL) {
 
3379
                free(id);
 
3380
                free(number);
 
3381
                return -1;
 
3382
        }
 
3383
 
 
3384
        fileblobDestroy(fb);
 
3385
 
 
3386
        total = (char *)messageFindArgument(m, "total");
 
3387
        cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
 
3388
        if(total) {
 
3389
                int n = atoi(number);
 
3390
                int t = atoi(total);
 
3391
                DIR *dd = NULL;
 
3392
 
 
3393
                free(total);
1915
3394
                /*
1916
 
                 * Some programs are broken and use an idea of a ".suffix"
1917
 
                 * to determine the file type rather than looking up the
1918
 
                 * magic number. CPM has a lot to answer for...
1919
 
                 * FIXME: the suffix now appears twice in the filename...
 
3395
                 * If it's the last one - reassemble it
 
3396
                 * FIXME: this assumes that we receive the parts in order
1920
3397
                 */
1921
 
                suffix = strrchr(cptr, '.');
1922
 
                if(suffix == NULL)
1923
 
                        suffix = "";
1924
 
                else {
1925
 
                        suffixLen = strlen(suffix);
1926
 
                        if(suffixLen > 4) {
1927
 
                                /* Found a full stop which isn't a suffix */
1928
 
                                suffix = "";
1929
 
                                suffixLen = 0;
1930
 
                        }
1931
 
                }
1932
 
        }
1933
 
        cli_dbgmsg("Saving attachment in %s/%s\n", dir, cptr);
1934
 
 
1935
 
        /*
1936
 
         * Allow for very long filenames. We have to truncate them to fit
1937
 
         */
1938
 
        snprintf(filename, sizeof(filename) - 1 - suffixLen, "%s/%.*sXXXXXX", dir,
1939
 
                (int)(sizeof(filename) - 9 - suffixLen - strlen(dir)), cptr);
1940
 
 
1941
 
        /*
1942
 
         * TODO: add a HAVE_MKSTEMP property
1943
 
         */
1944
 
#if     defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN)
1945
 
        fd = mkstemp(filename);
1946
 
#else
1947
 
        (void)mktemp(filename);
1948
 
        fd = open(filename, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
1949
 
#endif
1950
 
 
1951
 
        if(fd < 0) {
1952
 
                cli_errmsg("Can't create temporary file %s: %s\n", filename, strerror(errno));
1953
 
                cli_dbgmsg("%lu %d %d\n", suffixLen, sizeof(filename), strlen(filename));
1954
 
                return FALSE;
1955
 
        }
1956
 
 
1957
 
        /*
1958
 
         * Add the suffix back to the end of the filename. Tut-tut, filenames
1959
 
         * should be independant of their usage on UNIX type systems.
1960
 
         */
1961
 
        if(suffixLen > 1) {
1962
 
                char stub[NAME_MAX + 1];
1963
 
 
1964
 
                snprintf(stub, sizeof(stub), "%s%s", filename, suffix);
1965
 
#ifdef  C_LINUX
1966
 
                rename(stub, filename);
1967
 
#else
1968
 
                link(stub, filename);
1969
 
                unlink(stub);
1970
 
#endif
1971
 
        }
1972
 
 
1973
 
        cli_dbgmsg("Saving attachment as %s (%lu bytes long)\n",
1974
 
                filename, nbytes);
1975
 
 
1976
 
        if(cli_writen(fd, blobGetData(b), (size_t)nbytes) != nbytes) {
1977
 
                perror(filename);
1978
 
                close(fd);
1979
 
                return FALSE;
1980
 
        }
1981
 
 
1982
 
        return (close(fd) >= 0);
1983
 
}
 
3398
                if((n == t) && ((dd = opendir(pdir)) != NULL)) {
 
3399
                        FILE *fout;
 
3400
                        char outname[NAME_MAX + 1];
 
3401
 
 
3402
                        snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id);
 
3403
 
 
3404
                        cli_dbgmsg("outname: %s\n", outname);
 
3405
 
 
3406
                        fout = fopen(outname, "wb");
 
3407
                        if(fout == NULL) {
 
3408
                                cli_errmsg("Can't open '%s' for writing", outname);
 
3409
                                free(id);
 
3410
                                free(number);
 
3411
                                closedir(dd);
 
3412
                                return -1;
 
3413
                        }
 
3414
 
 
3415
                        for(n = 1; n <= t; n++) {
 
3416
                                char filename[NAME_MAX + 1];
 
3417
                                const struct dirent *dent;
 
3418
#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
 
3419
                                union {
 
3420
                                        struct dirent d;
 
3421
                                        char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
 
3422
                                } result;
 
3423
#endif
 
3424
 
 
3425
                                snprintf(filename, sizeof(filename), "%s%d", id, n);
 
3426
 
 
3427
#ifdef HAVE_READDIR_R_3
 
3428
                                while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
 
3429
#elif defined(HAVE_READDIR_R_2)
 
3430
                                while((dent = (struct dirent *)readdir_r(dd, &result.d))) {
 
3431
#else   /*!HAVE_READDIR_R*/
 
3432
                                while((dent = readdir(dd))) {
 
3433
#endif
 
3434
                                        FILE *fin;
 
3435
                                        char buffer[BUFSIZ];
 
3436
                                        int nblanks;
 
3437
                                        extern short cli_leavetemps_flag;
 
3438
 
 
3439
                                        if(dent->d_ino == 0)
 
3440
                                                continue;
 
3441
 
 
3442
                                        if(strncmp(filename, dent->d_name, strlen(filename)) != 0)
 
3443
                                                continue;
 
3444
 
 
3445
                                        sprintf(filename, "%s/%s", pdir, dent->d_name);
 
3446
                                        fin = fopen(filename, "rb");
 
3447
                                        if(fin == NULL) {
 
3448
                                                cli_errmsg("Can't open '%s' for reading", filename);
 
3449
                                                fclose(fout);
 
3450
                                                unlink(outname);
 
3451
                                                free(id);
 
3452
                                                free(number);
 
3453
                                                closedir(dd);
 
3454
                                                return -1;
 
3455
                                        }
 
3456
                                        nblanks = 0;
 
3457
                                        while(fgets(buffer, sizeof(buffer), fin) != NULL)
 
3458
                                                /*
 
3459
                                                 * Ensure that trailing newlines
 
3460
                                                 * aren't copied
 
3461
                                                 */
 
3462
                                                if(buffer[0] == '\n')
 
3463
                                                        nblanks++;
 
3464
                                                else {
 
3465
                                                        if(nblanks)
 
3466
                                                                do
 
3467
                                                                        putc('\n', fout);
 
3468
                                                                while(--nblanks > 0);
 
3469
                                                        fputs(buffer, fout);
 
3470
                                                }
 
3471
                                        fclose(fin);
 
3472
 
 
3473
                                        /* don't unlink if leave temps */
 
3474
                                        if(!cli_leavetemps_flag)
 
3475
                                                unlink(filename);
 
3476
                                        break;
 
3477
                                }
 
3478
                                rewinddir(dd);
 
3479
                        }
 
3480
                        closedir(dd);
 
3481
                        fclose(fout);
 
3482
                }
 
3483
        }
 
3484
        free(number);
 
3485
        free(id);
 
3486
 
 
3487
        return 0;
 
3488
}
 
3489
#endif
 
3490
 
 
3491
#if     defined(FOLLOWURLS) && (FOLLOWURLS > 0)
 
3492
static void
 
3493
checkURLs(message *m, const char *dir)
 
3494
{
 
3495
        blob *b = messageToBlob(m);
 
3496
        size_t len;
 
3497
        table_t *t;
 
3498
        int i, n;
 
3499
#if     defined(WITH_CURL) && defined(CL_THREAD_SAFE)
 
3500
        pthread_t tid[FOLLOWURLS];
 
3501
        struct arg args[FOLLOWURLS];
 
3502
#endif
 
3503
        tag_arguments_t hrefs;
 
3504
 
 
3505
        if(b == NULL)
 
3506
                return;
 
3507
 
 
3508
        len = blobGetDataSize(b);
 
3509
 
 
3510
        if(len == 0) {
 
3511
                blobDestroy(b);
 
3512
                return;
 
3513
        }
 
3514
 
 
3515
        /* TODO: make this size customisable */
 
3516
        if(len > 100*1024) {
 
3517
                cli_warnmsg("Viruses pointed to by URL not scanned in large message\n");
 
3518
                blobDestroy(b);
 
3519
                return;
 
3520
        }
 
3521
 
 
3522
        blobClose(b);
 
3523
        t = tableCreate();
 
3524
        if(t == NULL) {
 
3525
                blobDestroy(b);
 
3526
                return;
 
3527
        }
 
3528
 
 
3529
        hrefs.count = 0;
 
3530
        hrefs.tag = hrefs.value = NULL;
 
3531
 
 
3532
        cli_dbgmsg("checkURLs: calling html_normalise_mem\n");
 
3533
        if(!html_normalise_mem(blobGetData(b), len, NULL, &hrefs)) {
 
3534
                blobDestroy(b);
 
3535
                tableDestroy(t);
 
3536
                return;
 
3537
        }
 
3538
        cli_dbgmsg("checkURLs: html_normalise_mem returned\n");
 
3539
 
 
3540
        /* TODO: Do we need to call remove_html_comments? */
 
3541
 
 
3542
        n = 0;
 
3543
 
 
3544
        for(i = 0; i < hrefs.count; i++) {
 
3545
                const char *url = (const char *)hrefs.value[i];
 
3546
 
 
3547
                if(strncasecmp("http://", url, 7) == 0) {
 
3548
                        char *ptr;
 
3549
#ifdef  WITH_CURL
 
3550
#ifndef CL_THREAD_SAFE
 
3551
                        struct arg arg;
 
3552
#endif
 
3553
 
 
3554
#else   /*!WITH_CURL*/
 
3555
#ifdef  CL_THREAD_SAFE
 
3556
                        static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER;
 
3557
#endif
 
3558
                        struct stat statb;
 
3559
                        char cmd[512];
 
3560
#endif  /*WITH_CURL*/
 
3561
                        char name[NAME_MAX + 1];
 
3562
 
 
3563
                        if(tableFind(t, url) == 1) {
 
3564
                                cli_dbgmsg("URL %s already downloaded\n", url);
 
3565
                                continue;
 
3566
                        }
 
3567
                        if(n == FOLLOWURLS) {
 
3568
                                cli_warnmsg("Not all URLs will be scanned\n");
 
3569
                                break;
 
3570
                        }
 
3571
                        (void)tableInsert(t, url, 1);
 
3572
                        cli_dbgmsg("Downloading URL %s to be scanned\n", url);
 
3573
                        strncpy(name, url, sizeof(name) - 1);
 
3574
                        name[sizeof(name) - 1] = '\0';
 
3575
                        for(ptr = name; *ptr; ptr++)
 
3576
                                if(*ptr == '/')
 
3577
                                        *ptr = '_';
 
3578
 
 
3579
#ifdef  WITH_CURL
 
3580
#ifdef  CL_THREAD_SAFE
 
3581
                        args[n].dir = dir;
 
3582
                        args[n].url = url;
 
3583
                        args[n].filename = strdup(name);
 
3584
                        pthread_create(&tid[n], NULL, getURL, &args[n]);
 
3585
#else
 
3586
                        arg.url = url;
 
3587
                        arg.dir = dir;
 
3588
                        arg.filename = name;
 
3589
                        getURL(&arg);
 
3590
#endif
 
3591
 
 
3592
#else
 
3593
                        /*
 
3594
                         * TODO: maximum size and timeouts
 
3595
                         */
 
3596
                        len = sizeof(cmd) - 26 - strlen(dir) - strlen(name);
 
3597
#ifdef  CL_DEBUG
 
3598
                        snprintf(cmd, sizeof(cmd) - 1, "GET -t10 %.*s >%s/%s", len, url, dir, name);
 
3599
#else
 
3600
                        snprintf(cmd, sizeof(cmd) - 1, "GET -t10 %.*s >%s/%s 2>/dev/null", len, url, dir, name);
 
3601
#endif
 
3602
                        cmd[sizeof(cmd) - 1] = '\0';
 
3603
 
 
3604
#ifndef WITH_CURL
 
3605
                        for(ptr = cmd; *ptr; ptr++)
 
3606
                                if(strchr(";&", *ptr))
 
3607
                                        *ptr = '_';
 
3608
#endif
 
3609
 
 
3610
                        cli_dbgmsg("%s\n", cmd);
 
3611
#ifdef  CL_THREAD_SAFE
 
3612
                        pthread_mutex_lock(&system_mutex);
 
3613
#endif
 
3614
                        system(cmd);
 
3615
#ifdef  CL_THREAD_SAFE
 
3616
                        pthread_mutex_unlock(&system_mutex);
 
3617
#endif
 
3618
                        snprintf(cmd, sizeof(cmd), "%s/%s", dir, name);
 
3619
                        if(stat(cmd, &statb) >= 0)
 
3620
                                if(statb.st_size == 0) {
 
3621
                                        cli_warnmsg("URL %s failed to download\n", url);
 
3622
                                        /*
 
3623
                                         * Don't bother scanning an empty file
 
3624
                                         */
 
3625
                                        (void)unlink(cmd);
 
3626
                                }
 
3627
#endif
 
3628
                        ++n;
 
3629
                }
 
3630
        }
 
3631
        blobDestroy(b);
 
3632
        tableDestroy(t);
 
3633
 
 
3634
#if     defined(WITH_CURL) && defined(CL_THREAD_SAFE)
 
3635
        assert(n <= FOLLOWURLS);
 
3636
        cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n);
 
3637
        while(--n >= 0) {
 
3638
                pthread_join(tid[n], NULL);
 
3639
                free(args[n].filename);
 
3640
        }
 
3641
#endif
 
3642
        html_tag_arg_free(&hrefs);
 
3643
}
 
3644
 
 
3645
#ifdef  WITH_CURL
 
3646
static void *
 
3647
#ifdef  CL_THREAD_SAFE
 
3648
getURL(void *a)
 
3649
#else
 
3650
getURL(struct arg *arg)
 
3651
#endif
 
3652
{
 
3653
        CURL *curl;
 
3654
        FILE *fp;
 
3655
        struct curl_slist *headers;
 
3656
        static int initialised = 0;
 
3657
#ifdef  CL_THREAD_SAFE
 
3658
        static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
 
3659
        struct arg *arg = (struct arg *)a;
 
3660
#endif
 
3661
        const char *url = arg->url;
 
3662
        const char *dir = arg->dir;
 
3663
        const char *filename = arg->filename;
 
3664
        char fout[NAME_MAX + 1];
 
3665
#ifdef  CURLOPT_ERRORBUFFER
 
3666
        char errorbuffer[128];
 
3667
#endif
 
3668
 
 
3669
#ifdef  CL_THREAD_SAFE
 
3670
        pthread_mutex_lock(&init_mutex);
 
3671
#endif
 
3672
        if(!initialised) {
 
3673
                if(curl_global_init(CURL_GLOBAL_NOTHING) != 0) {
 
3674
#ifdef  CL_THREAD_SAFE
 
3675
                        pthread_mutex_unlock(&init_mutex);
 
3676
#endif
 
3677
                        return NULL;
 
3678
                }
 
3679
                initialised = 1;
 
3680
        }
 
3681
#ifdef  CL_THREAD_SAFE
 
3682
        pthread_mutex_unlock(&init_mutex);
 
3683
#endif
 
3684
 
 
3685
        /* easy isn't the word I'd use... */
 
3686
        curl = curl_easy_init();
 
3687
        if(curl == NULL)
 
3688
                return NULL;
 
3689
 
 
3690
        (void)curl_easy_setopt(curl, CURLOPT_USERAGENT, "www.clamav.net");
 
3691
 
 
3692
        if(curl_easy_setopt(curl, CURLOPT_URL, url) != 0)
 
3693
                return NULL;
 
3694
 
 
3695
        snprintf(fout, NAME_MAX, "%s/%s", dir, filename);
 
3696
 
 
3697
        fp = fopen(fout, "w");
 
3698
 
 
3699
        if(fp == NULL) {
 
3700
                cli_errmsg("Can't open '%s' for writing", fout);
 
3701
                curl_easy_cleanup(curl);
 
3702
                return NULL;
 
3703
        }
 
3704
#ifdef  CURLOPT_WRITEDATA
 
3705
        if(curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp) != 0) {
 
3706
                fclose(fp);
 
3707
                curl_easy_cleanup(curl);
 
3708
                return NULL;
 
3709
        }
 
3710
#else
 
3711
        if(curl_easy_setopt(curl, CURLOPT_FILE, fp) != 0) {
 
3712
                fclose(fp);
 
3713
                curl_easy_cleanup(curl);
 
3714
                return NULL;
 
3715
        }
 
3716
#endif
 
3717
 
 
3718
        /*
 
3719
         * If an item is in squid's cache get it from there (TCP_HIT/200)
 
3720
         * by default curl doesn't (TCP_CLIENT_REFRESH_MISS/200)
 
3721
         */
 
3722
        headers = curl_slist_append(NULL, "Pragma:");
 
3723
        curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
 
3724
 
 
3725
        /* These should be customisable */
 
3726
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30);
 
3727
        curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10);
 
3728
#ifdef  CURLOPT_MAXFILESIZE
 
3729
        curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, 50*1024);
 
3730
#endif
 
3731
 
 
3732
#ifdef  CL_THREAD_SAFE
 
3733
#ifdef  CURLOPT_DNS_USE_GLOBAL_CACHE
 
3734
        curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
 
3735
#endif
 
3736
#endif
 
3737
 
 
3738
        /*
 
3739
         * Prevent password: prompting with older versions
 
3740
         * FIXME: a better username?
 
3741
         */
 
3742
        curl_easy_setopt(curl, CURLOPT_USERPWD, "username:password");
 
3743
 
 
3744
#ifdef  CURLOPT_ERRORBUFFER
 
3745
        curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errorbuffer);
 
3746
#endif
 
3747
 
 
3748
        /*
 
3749
         * FIXME: valgrind reports "pthread_mutex_unlock: mutex is not locked"
 
3750
         * from gethostbyaddr_r within this. It may be a bug in libcurl
 
3751
         * rather than this code, but I need to check, see Curl_resolv()
 
3752
         * If pushed really hard it will sometimes say
 
3753
         * Conditional jump or move depends on uninitialised value(s) and
 
3754
         * quit. But the program seems to work OK without valgrind...
 
3755
         * Perhaps Curl_resolv() isn't thread safe?
 
3756
         */
 
3757
        /*
 
3758
         * On some C libraries (notably with FC3, glibc-2.3.3-74) you get a
 
3759
         * memory leak * here in getaddrinfo(), see
 
3760
         *      https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=139559
 
3761
         */
 
3762
 
 
3763
        if(curl_easy_perform(curl) != CURLE_OK) {
 
3764
#ifdef  CURLOPT_ERRORBUFFER
 
3765
                cli_warnmsg("URL %s failed to download: %s\n", url, errorbuffer);
 
3766
#else
 
3767
                cli_warnmsg("URL %s failed to download\n", url);
 
3768
#endif
 
3769
        }
 
3770
 
 
3771
        fclose(fp);
 
3772
        curl_slist_free_all(headers);
 
3773
        curl_easy_cleanup(curl);
 
3774
 
 
3775
        return NULL;
 
3776
}
 
3777
#endif
 
3778
 
 
3779
#else
 
3780
static void
 
3781
checkURLs(message *m, const char *dir)
 
3782
{
 
3783
}
 
3784
#endif
 
3785
 
 
3786
#ifdef HAVE_BACKTRACE
 
3787
static void
 
3788
sigsegv(int sig)
 
3789
{
 
3790
        signal(SIGSEGV, SIG_DFL);
 
3791
        print_trace(1);
 
3792
        exit(SIGSEGV);
 
3793
}
 
3794
 
 
3795
static void
 
3796
print_trace(int use_syslog)
 
3797
{
 
3798
        void *array[10];
 
3799
        size_t size;
 
3800
        char **strings;
 
3801
        size_t i;
 
3802
        pid_t pid = getpid();
 
3803
 
 
3804
        size = backtrace(array, 10);
 
3805
        strings = backtrace_symbols(array, size);
 
3806
 
 
3807
        if(use_syslog == 0)
 
3808
                cli_dbgmsg("Backtrace of pid %d:\n", pid);
 
3809
        else
 
3810
                syslog(LOG_ERR, "Backtrace of pid %d:", pid);
 
3811
 
 
3812
        for(i = 0; i < size; i++)
 
3813
                if(use_syslog)
 
3814
                        syslog(LOG_ERR, "bt[%u]: %s", i, strings[i]);
 
3815
                else
 
3816
                        cli_dbgmsg("%s\n", strings[i]);
 
3817
 
 
3818
        /* TODO: dump the current email */
 
3819
 
 
3820
        free(strings);
 
3821
}
 
3822
#endif
 
3823
 
 
3824
static bool
 
3825
usefulHeader(int commandNumber, const char *cmd)
 
3826
{
 
3827
        switch(commandNumber) {
 
3828
                case CONTENT_TRANSFER_ENCODING:
 
3829
                case CONTENT_DISPOSITION:
 
3830
                case CONTENT_TYPE:
 
3831
                        return TRUE;
 
3832
                default:
 
3833
                        if(strcasecmp(cmd, "From") == 0)
 
3834
                                return TRUE;
 
3835
                        else if(strcasecmp(cmd, "Received") == 0)
 
3836
                                return TRUE;
 
3837
                        else if(strcasecmp(cmd, "De") == 0)
 
3838
                                return TRUE;
 
3839
        }
 
3840
 
 
3841
        return FALSE;
 
3842
}
 
3843
 
 
3844
/*
 
3845
 * Save the uuencoded part of the file as it is read in since there's no need
 
3846
 * to include it in the parse tree. Saves memory and parse time.
 
3847
 * Return < 0 for failure
 
3848
 */
 
3849
static int
 
3850
uufasttrack(message *m, const char *firstline, const char *dir, FILE *fin)
 
3851
{
 
3852
        fileblob *fb = fileblobCreate();
 
3853
        char buffer[LINE_LENGTH + 1];
 
3854
        char *filename = cli_strtok(firstline, 2, " ");
 
3855
 
 
3856
        if(filename == NULL)
 
3857
                return -1;
 
3858
 
 
3859
        fileblobSetFilename(fb, dir, filename);
 
3860
        cli_dbgmsg("Fast track uudecode %s\n", filename);
 
3861
        free(filename);
 
3862
 
 
3863
        while(fgets(buffer, sizeof(buffer) - 1, fin) != NULL) {
 
3864
                unsigned char data[1024];
 
3865
                const unsigned char *uptr;
 
3866
                size_t len;
 
3867
 
 
3868
                cli_chomp(buffer);
 
3869
                if(strcasecmp(buffer, "end") == 0)
 
3870
                        break;
 
3871
                if(buffer[0] == '\0')
 
3872
                        break;
 
3873
 
 
3874
                uptr = decodeLine(m, UUENCODE, buffer, data, sizeof(data));
 
3875
                if(uptr == NULL)
 
3876
                        break;
 
3877
 
 
3878
                len = (size_t)(uptr - data);
 
3879
                if((len > 62) || (len == 0))
 
3880
                        break;
 
3881
 
 
3882
                if(fileblobAddData(fb, data, len) < 0)
 
3883
                        break;
 
3884
        }
 
3885
 
 
3886
        fileblobDestroy(fb);
 
3887
        return 1;
 
3888
}
 
3889
 
 
3890
/*
 
3891
 * Like fgets but cope with end of line by "\n", "\r\n", "\n\r", "\r"
 
3892
 */
 
3893
static char *
 
3894
getline_from_mbox(char *buffer, size_t len, FILE *fin)
 
3895
{
 
3896
        char *ret;
 
3897
 
 
3898
        if(feof(fin))
 
3899
                return NULL;
 
3900
 
 
3901
        if((len == 0) || (buffer == NULL)) {
 
3902
                cli_errmsg("Invalid call to getline_from_mbox(). Report to bugs@clamav.net\n");
 
3903
                return NULL;
 
3904
        }
 
3905
 
 
3906
        ret = buffer;
 
3907
 
 
3908
        do {
 
3909
                int c = getc(fin);
 
3910
 
 
3911
                if(ferror(fin))
 
3912
                        return NULL;
 
3913
 
 
3914
                switch(c) {
 
3915
                        case '\n':
 
3916
                                *buffer++ = '\n';
 
3917
                                c = getc(fin);
 
3918
                                if((c != '\r') && !feof(fin))
 
3919
                                        ungetc(c, fin);
 
3920
                                break;
 
3921
                        default:
 
3922
                                *buffer++ = c;
 
3923
                                continue;
 
3924
                        case EOF:
 
3925
                                break;
 
3926
                        case '\r':
 
3927
                                *buffer++ = '\n';
 
3928
                                c = getc(fin);
 
3929
                                if((c != '\n') && !feof(fin))
 
3930
                                        ungetc(c, fin);
 
3931
                                break;
 
3932
                }
 
3933
                break;
 
3934
        } while(--len > 0);
 
3935
 
 
3936
        if(len == 0) {
 
3937
                /* probably, the email breaks RFC821 */
 
3938
                cli_dbgmsg("getline_from_mbox: buffer overflow stopped\n");
 
3939
                return NULL;
 
3940
        }
 
3941
        *buffer = '\0';
 
3942
 
 
3943
        return ret;
 
3944
}
 
3945
 
 
3946
#ifdef  NEW_WORLD
 
3947
/*
 
3948
 * like cli_memstr - but returns the location of the match
 
3949
 * FIXME: need a case insensitive version
 
3950
 */
 
3951
static const char *
 
3952
cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
 
3953
{
 
3954
        const char *pt, *hay;
 
3955
        size_t n;
 
3956
 
 
3957
        if(haystack == needle)
 
3958
                return haystack;
 
3959
 
 
3960
        if(hs < ns)
 
3961
                return NULL;
 
3962
 
 
3963
        if(memcmp(haystack, needle, ns) == 0)
 
3964
                return haystack;
 
3965
 
 
3966
        pt = hay = haystack;
 
3967
        n = hs;
 
3968
 
 
3969
        while((pt = memchr(hay, needle[0], n)) != NULL) {
 
3970
                n -= (int) pt - (int) hay;
 
3971
                if(n < ns)
 
3972
                        break;
 
3973
 
 
3974
                if(memcmp(pt, needle, ns) == 0)
 
3975
                        return pt;
 
3976
 
 
3977
                if(hay == pt) {
 
3978
                        n--;
 
3979
                        hay++;
 
3980
                } else
 
3981
                        hay = pt;
 
3982
        }
 
3983
 
 
3984
        return NULL;
 
3985
}
 
3986
#endif