1
/* Copyright (c) 2013 Dovecot authors, see the included COPYING file */
4
#include "istream-private.h"
9
#include "hash-format.h"
10
#include "rfc822-parser.h"
11
#include "message-parser.h"
12
#include "istream-attachment-extractor.h"
14
#define BASE64_ATTACHMENT_MAX_EXTRA_BYTES 1024
16
enum mail_attachment_state {
17
MAIL_ATTACHMENT_STATE_NO,
18
MAIL_ATTACHMENT_STATE_MAYBE,
19
MAIL_ATTACHMENT_STATE_YES
32
struct attachment_istream_part {
33
char *content_type, *content_disposition;
34
enum mail_attachment_state state;
35
/* start offset of the message part in the original input stream */
38
/* for saving attachments base64-decoded: */
39
enum base64_state base64_state;
40
unsigned int base64_line_blocks, cur_base64_blocks;
42
bool base64_have_crlf; /* CRLF linefeeds */
46
struct ostream *temp_output;
50
struct attachment_istream {
51
struct istream_private istream;
54
struct istream_attachment_settings set;
57
struct message_parser_ctx *parser;
58
struct message_part *cur_part;
59
struct attachment_istream_part part;
65
static void stream_add_data(struct attachment_istream *astream,
66
const void *data, size_t size)
69
memcpy(i_stream_alloc(&astream->istream, size), data, size);
70
astream->istream.pos += size;
74
static void parse_content_type(struct attachment_istream *astream,
75
const struct message_header_line *hdr)
77
struct rfc822_parser_context parser;
78
string_t *content_type;
80
rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
81
rfc822_skip_lwsp(&parser);
84
content_type = t_str_new(64);
85
if (rfc822_parse_content_type(&parser, content_type) >= 0) {
86
i_free(astream->part.content_type);
87
astream->part.content_type =
88
i_strdup(str_c(content_type));
94
parse_content_disposition(struct attachment_istream *astream,
95
const struct message_header_line *hdr)
97
/* just pass it without parsing to is_attachment() callback */
98
i_free(astream->part.content_disposition);
99
astream->part.content_disposition =
100
i_strndup(hdr->full_value, hdr->full_value_len);
103
static void astream_parse_header(struct attachment_istream *astream,
104
struct message_header_line *hdr)
106
if (!hdr->continued) {
107
stream_add_data(astream, hdr->name, hdr->name_len);
108
stream_add_data(astream, hdr->middle, hdr->middle_len);
110
stream_add_data(astream, hdr->value, hdr->value_len);
111
if (!hdr->no_newline) {
112
if (hdr->crlf_newline)
113
stream_add_data(astream, "\r\n", 2);
115
stream_add_data(astream, "\n", 1);
118
if (hdr->continues) {
119
hdr->use_full_value = TRUE;
123
if (strcasecmp(hdr->name, "Content-Type") == 0)
124
parse_content_type(astream, hdr);
125
else if (strcasecmp(hdr->name, "Content-Disposition") == 0)
126
parse_content_disposition(astream, hdr);
129
static bool astream_want_attachment(struct attachment_istream *astream,
130
struct message_part *part)
132
struct istream_attachment_header ahdr;
134
if ((part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0) {
135
/* multiparts may contain attachments as children,
136
but they're never themselves */
139
if (astream->set.want_attachment == NULL)
142
memset(&ahdr, 0, sizeof(ahdr));
144
ahdr.content_type = astream->part.content_type;
145
ahdr.content_disposition = astream->part.content_disposition;
146
return astream->set.want_attachment(&ahdr, astream->context);
149
static int astream_base64_decode_lf(struct attachment_istream_part *part)
151
part->base64_state = BASE64_STATE_0;
152
if (part->cur_base64_blocks < part->base64_line_blocks) {
154
part->base64_state = BASE64_STATE_EOM;
156
} else if (part->base64_line_blocks == 0) {
158
if (part->cur_base64_blocks == 0)
160
part->base64_line_blocks = part->cur_base64_blocks;
161
} else if (part->cur_base64_blocks == part->base64_line_blocks) {
166
part->cur_base64_blocks = 0;
171
astream_try_base64_decode_char(struct attachment_istream_part *part,
172
size_t pos, char chr)
174
switch (part->base64_state) {
176
if (base64_is_valid_char(chr))
177
part->base64_state++;
178
else if (chr == '\r')
179
part->base64_state = BASE64_STATE_CR;
180
else if (chr == '\n') {
181
return astream_base64_decode_lf(part);
187
if (!base64_is_valid_char(chr))
189
part->base64_state++;
192
if (base64_is_valid_char(chr))
193
part->base64_state++;
195
part->base64_state = BASE64_STATE_EOB;
200
part->base64_bytes = part->temp_output->offset + pos + 1;
201
if (base64_is_valid_char(chr)) {
202
part->base64_state = BASE64_STATE_0;
203
part->cur_base64_blocks++;
204
} else if (chr == '=') {
205
part->base64_state = BASE64_STATE_EOM;
206
part->cur_base64_blocks++;
212
case BASE64_STATE_CR:
215
part->base64_have_crlf = TRUE;
216
return astream_base64_decode_lf(part);
217
case BASE64_STATE_EOB:
221
part->base64_bytes = part->temp_output->offset + pos + 1;
222
part->base64_state = BASE64_STATE_EOM;
223
part->cur_base64_blocks++;
225
case BASE64_STATE_EOM:
232
astream_try_base64_decode(struct attachment_istream_part *part,
233
const unsigned char *data, size_t size)
238
if (part->base64_failed || part->base64_state == BASE64_STATE_EOM)
241
for (i = 0; i < size; i++) {
242
ret = astream_try_base64_decode_char(part, i, (char)data[i]);
245
part->base64_failed = TRUE;
251
static int astream_open_output(struct attachment_istream *astream)
255
i_assert(astream->part.temp_fd == -1);
257
fd = astream->set.open_temp_fd(astream->context);
261
astream->part.temp_fd = fd;
262
astream->part.temp_output = o_stream_create_fd(fd, 0, FALSE);
263
o_stream_cork(astream->part.temp_output);
267
static void astream_add_body(struct attachment_istream *astream,
268
const struct message_block *block)
270
struct attachment_istream_part *part = &astream->part;
274
switch (part->state) {
275
case MAIL_ATTACHMENT_STATE_NO:
276
stream_add_data(astream, block->data, block->size);
278
case MAIL_ATTACHMENT_STATE_MAYBE:
279
/* we'll write data to in-memory buffer until we reach
280
attachment min_size */
281
if (part->part_buf == NULL) {
283
buffer_create_dynamic(default_pool,
284
astream->set.min_size);
286
part_buf = part->part_buf;
287
new_size = part_buf->used + block->size;
288
if (new_size < astream->set.min_size) {
289
buffer_append(part_buf, block->data, block->size);
292
/* attachment is large enough. we'll first copy the buffered
293
data from memory to temp file */
294
if (astream_open_output(astream) < 0) {
295
/* failed, fallback to just saving it inline */
296
part->state = MAIL_ATTACHMENT_STATE_NO;
297
stream_add_data(astream, part_buf->data, part_buf->used);
298
stream_add_data(astream, block->data, block->size);
301
part->state = MAIL_ATTACHMENT_STATE_YES;
302
astream_try_base64_decode(part, part_buf->data, part_buf->used);
303
hash_format_loop(astream->set.hash_format,
304
part_buf->data, part_buf->used);
305
o_stream_nsend(part->temp_output,
306
part_buf->data, part_buf->used);
307
buffer_set_used_size(part_buf, 0);
308
/* fall through to write the new data to temp file */
309
case MAIL_ATTACHMENT_STATE_YES:
310
astream_try_base64_decode(part, block->data, block->size);
311
hash_format_loop(astream->set.hash_format,
312
block->data, block->size);
313
o_stream_nsend(part->temp_output, block->data, block->size);
318
static int astream_decode_base64(struct attachment_istream *astream)
320
struct attachment_istream_part *part = &astream->part;
321
buffer_t *extra_buf = NULL;
322
struct istream *input, *base64_input;
323
struct ostream *output;
324
const unsigned char *data;
331
if (part->base64_bytes < astream->set.min_size ||
332
part->temp_output->offset > part->base64_bytes +
333
BASE64_ATTACHMENT_MAX_EXTRA_BYTES) {
334
/* only a small part of the MIME part is base64-encoded. */
338
if (part->base64_line_blocks == 0) {
339
/* only one line of base64 */
340
part->base64_line_blocks = part->cur_base64_blocks;
341
i_assert(part->base64_line_blocks > 0);
344
/* decode base64 data and write it to another temp file */
345
outfd = astream->set.open_temp_fd(astream->context);
349
buf = buffer_create_dynamic(default_pool, 1024);
350
input = i_stream_create_fd(part->temp_fd, IO_BLOCK_SIZE, FALSE);
351
base64_input = i_stream_create_limit(input, part->base64_bytes);
352
output = o_stream_create_fd_file(outfd, 0, FALSE);
353
o_stream_cork(output);
355
hash_format_reset(astream->set.hash_format);
356
while ((ret = i_stream_read(base64_input)) > 0) {
357
data = i_stream_get_data(base64_input, &size);
358
buffer_set_used_size(buf, 0);
359
if (base64_decode(data, size, &size, buf) < 0) {
360
i_error("istream-attachment: BUG: "
361
"Attachment base64 data unexpectedly broke");
365
i_stream_skip(base64_input, size);
366
o_stream_nsend(output, buf->data, buf->used);
367
hash_format_loop(astream->set.hash_format,
368
buf->data, buf->used);
372
} else if (base64_input->stream_errno != 0) {
373
i_error("istream-attachment: read(%s) failed: %m",
374
i_stream_get_name(base64_input));
377
if (o_stream_nfinish(output) < 0) {
378
i_error("istream-attachment: write(%s) failed: %m",
379
o_stream_get_name(output));
384
i_stream_unref(&base64_input);
385
o_stream_unref(&output);
387
if (input->v_offset != part->temp_output->offset && !failed) {
388
/* write the rest of the data to the message stream */
389
extra_buf = buffer_create_dynamic(default_pool, 1024);
390
while ((ret = i_stream_read_data(input, &data, &size, 0)) > 0) {
391
buffer_append(extra_buf, data, size);
392
i_stream_skip(input, size);
395
if (input->stream_errno != 0) {
396
i_error("istream-attachment: read(%s) failed: %m",
397
i_stream_get_name(base64_input));
401
i_stream_unref(&input);
408
/* successfully wrote it. switch to using it. */
409
o_stream_destroy(&part->temp_output);
410
i_close_fd(&part->temp_fd);
411
part->temp_fd = outfd;
413
if (extra_buf != NULL) {
414
stream_add_data(astream, extra_buf->data, extra_buf->used);
415
buffer_free(&extra_buf);
421
astream_part_finish(struct attachment_istream *astream, const char **error_r)
423
struct attachment_istream_part *part = &astream->part;
424
struct istream_attachment_info info;
425
struct istream *input;
426
struct ostream *output;
427
string_t *digest_str;
428
const unsigned char *data;
432
if (o_stream_nfinish(part->temp_output) < 0) {
433
*error_r = t_strdup_printf("write(%s) failed: %s",
434
o_stream_get_name(part->temp_output),
435
o_stream_get_error(part->temp_output));
439
memset(&info, 0, sizeof(info));
440
info.start_offset = astream->part.start_offset;
441
/* base64_bytes contains how many valid base64 bytes there are so far.
442
if the base64 ends properly, it'll specify how much of the MIME part
443
is saved as an attachment. the rest of the data (typically
444
linefeeds) is added back to main stream */
445
info.encoded_size = part->base64_bytes;
446
/* get the hash before base64-decoder resets it */
447
digest_str = t_str_new(128);
448
hash_format_write(astream->set.hash_format, digest_str);
449
info.hash = str_c(digest_str);
451
/* if it looks like we can decode base64 without any data loss,
452
do it and write the decoded data to another temp file. */
453
if (!part->base64_failed) {
454
if (part->base64_state == BASE64_STATE_0 &&
455
part->base64_bytes > 0) {
456
/* there is no trailing LF or '=' characters,
457
but it's not completely empty */
458
part->base64_state = BASE64_STATE_EOM;
460
if (part->base64_state == BASE64_STATE_EOM) {
461
/* base64 data looks ok. */
462
if (astream_decode_base64(astream) < 0)
463
part->base64_failed = TRUE;
465
part->base64_failed = TRUE;
469
/* open attachment output file */
470
info.part = astream->cur_part;
471
if (!part->base64_failed) {
472
info.base64_blocks_per_line = part->base64_line_blocks;
473
info.base64_have_crlf = part->base64_have_crlf;
474
/* base64-decoder updated the hash, use it */
475
str_truncate(digest_str, 0);
476
hash_format_write(astream->set.hash_format, digest_str);
477
info.hash = str_c(digest_str);
479
/* couldn't decode base64, so write the entire MIME part
481
info.encoded_size = part->temp_output->offset;
483
if (astream->set.open_attachment_ostream(&info, &output, error_r,
484
astream->context) < 0)
487
/* copy data to attachment from temp file */
488
input = i_stream_create_fd(part->temp_fd, IO_BLOCK_SIZE, FALSE);
489
while (i_stream_read_data(input, &data, &size, 0) > 0) {
490
o_stream_nsend(output, data, size);
491
i_stream_skip(input, size);
494
if (input->stream_errno != 0) {
495
*error_r = t_strdup_printf("read(%s) failed: %s",
496
i_stream_get_name(input), i_stream_get_error(input));
499
i_stream_destroy(&input);
501
if (astream->set.close_attachment_ostream(output, ret == 0, error_r,
502
astream->context) < 0)
507
static void astream_part_reset(struct attachment_istream *astream)
509
struct attachment_istream_part *part = &astream->part;
511
if (part->temp_output != NULL)
512
o_stream_destroy(&part->temp_output);
513
if (part->temp_fd != -1)
514
i_close_fd(&part->temp_fd);
516
i_free_and_null(part->content_type);
517
i_free_and_null(part->content_disposition);
518
if (part->part_buf != NULL)
519
buffer_free(&part->part_buf);
521
memset(part, 0, sizeof(*part));
523
hash_format_reset(astream->set.hash_format);
527
astream_end_of_part(struct attachment_istream *astream, const char **error_r)
529
struct attachment_istream_part *part = &astream->part;
533
/* MIME part changed. we're now parsing the end of a boundary,
534
possibly followed by message epilogue */
535
switch (part->state) {
536
case MAIL_ATTACHMENT_STATE_NO:
538
case MAIL_ATTACHMENT_STATE_MAYBE:
539
/* MIME part wasn't large enough to be an attachment */
540
if (part->part_buf != NULL) {
541
stream_add_data(astream, part->part_buf->data,
542
part->part_buf->used);
543
ret = part->part_buf->used > 0 ? 1 : 0;
546
case MAIL_ATTACHMENT_STATE_YES:
547
old_size = astream->istream.pos - astream->istream.skip;
548
if (astream_part_finish(astream, error_r) < 0)
551
/* finished base64 may have added a few more trailing
552
bytes to the stream */
553
ret = astream->istream.pos -
554
astream->istream.skip - old_size;
558
part->state = MAIL_ATTACHMENT_STATE_NO;
559
astream_part_reset(astream);
563
static int astream_read_next(struct attachment_istream *astream, bool *retry_r)
565
struct istream_private *stream = &astream->istream;
566
struct message_block block;
567
size_t old_size, new_size;
573
if (stream->pos - stream->skip >= stream->max_buffer_size)
576
if (astream->failed) {
577
stream->istream.stream_errno = EINVAL;
581
old_size = stream->pos - stream->skip;
582
switch (message_parser_parse_next_block(astream->parser, &block)) {
585
ret = astream_end_of_part(astream, &error);
588
new_size = stream->pos - stream->skip;
589
return new_size - old_size;
591
stream->istream.eof = TRUE;
592
stream->istream.stream_errno = stream->parent->stream_errno;
595
io_stream_set_error(&stream->iostream, "%s", error);
596
stream->istream.stream_errno = EINVAL;
597
astream->failed = TRUE;
599
astream->cur_part = NULL;
608
if (block.part != astream->cur_part && astream->cur_part != NULL) {
609
/* end of a MIME part */
610
if (astream_end_of_part(astream, &error) < 0) {
611
io_stream_set_error(&stream->iostream, "%s", error);
612
stream->istream.stream_errno = EINVAL;
613
astream->failed = TRUE;
617
astream->cur_part = block.part;
619
if (block.hdr != NULL) {
620
/* parsing a header */
621
astream_parse_header(astream, block.hdr);
622
} else if (block.size == 0) {
624
if (astream_want_attachment(astream, block.part)) {
625
astream->part.state = MAIL_ATTACHMENT_STATE_MAYBE;
626
astream->part.start_offset = stream->parent->v_offset;
629
astream_add_body(astream, &block);
631
new_size = stream->pos - stream->skip;
632
*retry_r = new_size == old_size;
633
return new_size - old_size;
637
i_stream_attachment_extractor_read(struct istream_private *stream)
639
struct attachment_istream *astream =
640
(struct attachment_istream *)stream;
645
ret = astream_read_next(astream, &retry);
646
} while (retry && astream->set.drain_parent_input);
648
astream->retry_read = retry;
652
static void i_stream_attachment_extractor_close(struct iostream_private *stream,
655
struct attachment_istream *astream =
656
(struct attachment_istream *)stream;
657
struct message_part *parts;
660
if (astream->parser != NULL) {
661
ret = message_parser_deinit(&astream->parser, &parts);
662
i_assert(ret == 0); /* we didn't use preparsed message_parts */
664
hash_format_deinit_free(&astream->set.hash_format);
665
if (astream->pool != NULL)
666
pool_unref(&astream->pool);
668
i_stream_close(astream->istream.parent);
672
i_stream_create_attachment_extractor(struct istream *input,
673
struct istream_attachment_settings *set,
676
struct attachment_istream *astream;
678
i_assert(set->min_size > 0);
679
i_assert(set->hash_format != NULL);
680
i_assert(set->open_attachment_ostream != NULL);
681
i_assert(set->close_attachment_ostream != NULL);
683
astream = i_new(struct attachment_istream, 1);
684
astream->part.temp_fd = -1;
686
astream->context = context;
687
astream->retry_read = TRUE;
689
/* make sure the caller doesn't try to double-free this */
690
set->hash_format = NULL;
692
astream->istream.max_buffer_size = input->real_stream->max_buffer_size;
694
astream->istream.read = i_stream_attachment_extractor_read;
695
astream->istream.iostream.close = i_stream_attachment_extractor_close;
697
astream->istream.istream.readable_fd = FALSE;
698
astream->istream.istream.blocking = input->blocking;
699
astream->istream.istream.seekable = FALSE;
701
astream->pool = pool_alloconly_create("istream attachment", 1024);
702
astream->parser = message_parser_init(astream->pool, input, 0,
703
MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS |
704
MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES);
705
return i_stream_create(&astream->istream, input,
706
i_stream_get_fd(input));
709
bool i_stream_attachment_extractor_can_retry(struct istream *input)
711
struct attachment_istream *astream =
712
(struct attachment_istream *)input->real_stream;
714
return astream->retry_read;