1
/* Copyright (c) 2013 Dovecot authors, see the included COPYING file */
4
#include "istream-private.h"
9
#include "hash-format.h"
10
#include "rfc822-parser.h"
11
#include "message-parser.h"
12
#include "istream-attachment-extractor.h"
14
#define BASE64_ATTACHMENT_MAX_EXTRA_BYTES 1024
16
enum mail_attachment_state {
17
MAIL_ATTACHMENT_STATE_NO,
18
MAIL_ATTACHMENT_STATE_MAYBE,
19
MAIL_ATTACHMENT_STATE_YES
32
struct attachment_istream_part {
33
char *content_type, *content_disposition;
34
enum mail_attachment_state state;
35
/* start offset of the message part in the original input stream */
38
/* for saving attachments base64-decoded: */
39
enum base64_state base64_state;
40
unsigned int base64_line_blocks, cur_base64_blocks;
42
bool base64_have_crlf; /* CRLF linefeeds */
46
struct ostream *temp_output;
50
struct attachment_istream {
51
struct istream_private istream;
54
struct istream_attachment_settings set;
57
struct message_parser_ctx *parser;
58
struct message_part *cur_part;
59
struct attachment_istream_part part;
64
static void stream_add_data(struct attachment_istream *astream,
65
const void *data, size_t size)
68
memcpy(i_stream_alloc(&astream->istream, size), data, size);
69
astream->istream.pos += size;
73
static void parse_content_type(struct attachment_istream *astream,
74
const struct message_header_line *hdr)
76
struct rfc822_parser_context parser;
77
string_t *content_type;
79
rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
80
rfc822_skip_lwsp(&parser);
83
content_type = t_str_new(64);
84
if (rfc822_parse_content_type(&parser, content_type) >= 0) {
85
i_free(astream->part.content_type);
86
astream->part.content_type =
87
i_strdup(str_c(content_type));
93
parse_content_disposition(struct attachment_istream *astream,
94
const struct message_header_line *hdr)
96
/* just pass it without parsing to is_attachment() callback */
97
i_free(astream->part.content_disposition);
98
astream->part.content_disposition =
99
i_strndup(hdr->full_value, hdr->full_value_len);
102
static void astream_parse_header(struct attachment_istream *astream,
103
struct message_header_line *hdr)
105
if (!hdr->continued) {
106
stream_add_data(astream, hdr->name, hdr->name_len);
107
stream_add_data(astream, hdr->middle, hdr->middle_len);
109
stream_add_data(astream, hdr->value, hdr->value_len);
110
if (!hdr->no_newline) {
111
if (hdr->crlf_newline)
112
stream_add_data(astream, "\r\n", 2);
114
stream_add_data(astream, "\n", 1);
117
if (hdr->continues) {
118
hdr->use_full_value = TRUE;
122
if (strcasecmp(hdr->name, "Content-Type") == 0)
123
parse_content_type(astream, hdr);
124
else if (strcasecmp(hdr->name, "Content-Disposition") == 0)
125
parse_content_disposition(astream, hdr);
128
static bool astream_want_attachment(struct attachment_istream *astream,
129
struct message_part *part)
131
struct istream_attachment_header ahdr;
133
if ((part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0) {
134
/* multiparts may contain attachments as children,
135
but they're never themselves */
138
if (astream->set.want_attachment == NULL)
141
memset(&ahdr, 0, sizeof(ahdr));
143
ahdr.content_type = astream->part.content_type;
144
ahdr.content_disposition = astream->part.content_disposition;
145
return astream->set.want_attachment(&ahdr, astream->context);
148
static int astream_base64_decode_lf(struct attachment_istream_part *part)
150
part->base64_state = BASE64_STATE_0;
151
if (part->cur_base64_blocks < part->base64_line_blocks) {
153
part->base64_state = BASE64_STATE_EOM;
155
} else if (part->base64_line_blocks == 0) {
157
if (part->cur_base64_blocks == 0)
159
part->base64_line_blocks = part->cur_base64_blocks;
160
} else if (part->cur_base64_blocks == part->base64_line_blocks) {
165
part->cur_base64_blocks = 0;
170
astream_try_base64_decode_char(struct attachment_istream_part *part,
171
size_t pos, char chr)
173
switch (part->base64_state) {
175
if (base64_is_valid_char(chr))
176
part->base64_state++;
177
else if (chr == '\r')
178
part->base64_state = BASE64_STATE_CR;
179
else if (chr == '\n') {
180
return astream_base64_decode_lf(part);
186
if (!base64_is_valid_char(chr))
188
part->base64_state++;
191
if (base64_is_valid_char(chr))
192
part->base64_state++;
194
part->base64_state = BASE64_STATE_EOB;
199
part->base64_bytes = part->temp_output->offset + pos + 1;
200
if (base64_is_valid_char(chr)) {
201
part->base64_state = BASE64_STATE_0;
202
part->cur_base64_blocks++;
203
} else if (chr == '=') {
204
part->base64_state = BASE64_STATE_EOM;
205
part->cur_base64_blocks++;
211
case BASE64_STATE_CR:
214
part->base64_have_crlf = TRUE;
215
return astream_base64_decode_lf(part);
216
case BASE64_STATE_EOB:
220
part->base64_bytes = part->temp_output->offset + pos + 1;
221
part->base64_state = BASE64_STATE_EOM;
222
part->cur_base64_blocks++;
224
case BASE64_STATE_EOM:
231
astream_try_base64_decode(struct attachment_istream_part *part,
232
const unsigned char *data, size_t size)
237
if (part->base64_failed || part->base64_state == BASE64_STATE_EOM)
240
for (i = 0; i < size; i++) {
241
ret = astream_try_base64_decode_char(part, i, (char)data[i]);
244
part->base64_failed = TRUE;
250
static int astream_open_output(struct attachment_istream *astream)
254
i_assert(astream->part.temp_fd == -1);
256
fd = astream->set.open_temp_fd(astream->context);
260
astream->part.temp_fd = fd;
261
astream->part.temp_output = o_stream_create_fd(fd, 0, FALSE);
262
o_stream_cork(astream->part.temp_output);
266
static void astream_add_body(struct attachment_istream *astream,
267
const struct message_block *block)
269
struct attachment_istream_part *part = &astream->part;
273
switch (part->state) {
274
case MAIL_ATTACHMENT_STATE_NO:
275
stream_add_data(astream, block->data, block->size);
277
case MAIL_ATTACHMENT_STATE_MAYBE:
278
/* we'll write data to in-memory buffer until we reach
279
attachment min_size */
280
if (part->part_buf == NULL) {
282
buffer_create_dynamic(default_pool,
283
astream->set.min_size);
285
part_buf = part->part_buf;
286
new_size = part_buf->used + block->size;
287
if (new_size < astream->set.min_size) {
288
buffer_append(part_buf, block->data, block->size);
291
/* attachment is large enough. we'll first copy the buffered
292
data from memory to temp file */
293
if (astream_open_output(astream) < 0) {
294
/* failed, fallback to just saving it inline */
295
part->state = MAIL_ATTACHMENT_STATE_NO;
296
stream_add_data(astream, part_buf->data, part_buf->used);
297
stream_add_data(astream, block->data, block->size);
300
part->state = MAIL_ATTACHMENT_STATE_YES;
301
astream_try_base64_decode(part, part_buf->data, part_buf->used);
302
hash_format_loop(astream->set.hash_format,
303
part_buf->data, part_buf->used);
304
o_stream_nsend(part->temp_output,
305
part_buf->data, part_buf->used);
306
buffer_set_used_size(part_buf, 0);
307
/* fall through to write the new data to temp file */
308
case MAIL_ATTACHMENT_STATE_YES:
309
astream_try_base64_decode(part, block->data, block->size);
310
hash_format_loop(astream->set.hash_format,
311
block->data, block->size);
312
o_stream_nsend(part->temp_output, block->data, block->size);
317
static int astream_decode_base64(struct attachment_istream *astream)
319
struct attachment_istream_part *part = &astream->part;
320
buffer_t *extra_buf = NULL;
321
struct istream *input, *base64_input;
322
struct ostream *output;
323
const unsigned char *data;
330
if (part->base64_bytes < astream->set.min_size ||
331
part->temp_output->offset > part->base64_bytes +
332
BASE64_ATTACHMENT_MAX_EXTRA_BYTES) {
333
/* only a small part of the MIME part is base64-encoded. */
337
if (part->base64_line_blocks == 0) {
338
/* only one line of base64 */
339
part->base64_line_blocks = part->cur_base64_blocks;
340
i_assert(part->base64_line_blocks > 0);
343
/* decode base64 data and write it to another temp file */
344
outfd = astream->set.open_temp_fd(astream->context);
348
buf = buffer_create_dynamic(default_pool, 1024);
349
input = i_stream_create_fd(part->temp_fd, IO_BLOCK_SIZE, FALSE);
350
base64_input = i_stream_create_limit(input, part->base64_bytes);
351
output = o_stream_create_fd_file(outfd, 0, FALSE);
352
o_stream_cork(output);
354
hash_format_reset(astream->set.hash_format);
355
while ((ret = i_stream_read(base64_input)) > 0) {
356
data = i_stream_get_data(base64_input, &size);
357
buffer_set_used_size(buf, 0);
358
if (base64_decode(data, size, &size, buf) < 0) {
359
i_error("istream-attachment: BUG: "
360
"Attachment base64 data unexpectedly broke");
364
i_stream_skip(base64_input, size);
365
o_stream_nsend(output, buf->data, buf->used);
366
hash_format_loop(astream->set.hash_format,
367
buf->data, buf->used);
371
} else if (base64_input->stream_errno != 0) {
372
i_error("istream-attachment: read(%s) failed: %m",
373
i_stream_get_name(base64_input));
376
if (o_stream_nfinish(output) < 0) {
377
i_error("istream-attachment: write(%s) failed: %m",
378
o_stream_get_name(output));
383
i_stream_unref(&base64_input);
384
o_stream_unref(&output);
386
if (input->v_offset != part->temp_output->offset && !failed) {
387
/* write the rest of the data to the message stream */
388
extra_buf = buffer_create_dynamic(default_pool, 1024);
389
while ((ret = i_stream_read_data(input, &data, &size, 0)) > 0) {
390
buffer_append(extra_buf, data, size);
391
i_stream_skip(input, size);
394
if (input->stream_errno != 0) {
395
i_error("istream-attachment: read(%s) failed: %m",
396
i_stream_get_name(base64_input));
400
i_stream_unref(&input);
407
/* successfully wrote it. switch to using it. */
408
o_stream_destroy(&part->temp_output);
409
i_close_fd(&part->temp_fd);
410
part->temp_fd = outfd;
412
if (extra_buf != NULL) {
413
stream_add_data(astream, extra_buf->data, extra_buf->used);
414
buffer_free(&extra_buf);
419
static int astream_part_finish(struct attachment_istream *astream)
421
struct attachment_istream_part *part = &astream->part;
422
struct istream_attachment_info info;
423
struct istream *input;
424
struct ostream *output;
425
string_t *digest_str;
426
const unsigned char *data;
430
if (o_stream_nfinish(part->temp_output) < 0) {
431
i_error("istream-attachment: write(%s) failed: %m",
432
o_stream_get_name(part->temp_output));
436
memset(&info, 0, sizeof(info));
437
info.start_offset = astream->part.start_offset;
438
/* base64_bytes contains how many valid base64 bytes there are so far.
439
if the base64 ends properly, it'll specify how much of the MIME part
440
is saved as an attachment. the rest of the data (typically
441
linefeeds) is added back to main stream */
442
info.encoded_size = part->base64_bytes;
443
/* get the hash before base64-decoder resets it */
444
digest_str = t_str_new(128);
445
hash_format_write(astream->set.hash_format, digest_str);
446
info.hash = str_c(digest_str);
448
/* if it looks like we can decode base64 without any data loss,
449
do it and write the decoded data to another temp file. */
450
if (!part->base64_failed) {
451
if (part->base64_state == BASE64_STATE_0 &&
452
part->base64_bytes > 0) {
453
/* there is no trailing LF or '=' characters,
454
but it's not completely empty */
455
part->base64_state = BASE64_STATE_EOM;
457
if (part->base64_state == BASE64_STATE_EOM) {
458
/* base64 data looks ok. */
459
if (astream_decode_base64(astream) < 0)
460
part->base64_failed = TRUE;
462
part->base64_failed = TRUE;
466
/* open attachment output file */
467
info.part = astream->cur_part;
468
if (!part->base64_failed) {
469
info.base64_blocks_per_line = part->base64_line_blocks;
470
info.base64_have_crlf = part->base64_have_crlf;
471
/* base64-decoder updated the hash, use it */
472
str_truncate(digest_str, 0);
473
hash_format_write(astream->set.hash_format, digest_str);
474
info.hash = str_c(digest_str);
476
/* couldn't decode base64, so write the entire MIME part
478
info.encoded_size = part->temp_output->offset;
480
if (astream->set.open_attachment_ostream(&info, &output,
481
astream->context) < 0)
484
/* copy data to attachment from temp file */
485
input = i_stream_create_fd(part->temp_fd, IO_BLOCK_SIZE, FALSE);
486
while (i_stream_read_data(input, &data, &size, 0) > 0) {
487
o_stream_nsend(output, data, size);
488
i_stream_skip(input, size);
491
if (input->stream_errno != 0) {
492
i_error("istream-attachment: read(%s) failed: %m",
493
i_stream_get_name(input));
496
i_stream_destroy(&input);
498
if (astream->set.close_attachment_ostream(output, ret == 0,
499
astream->context) < 0)
504
static void astream_part_reset(struct attachment_istream *astream)
506
struct attachment_istream_part *part = &astream->part;
508
if (part->temp_output != NULL)
509
o_stream_destroy(&part->temp_output);
510
if (part->temp_fd != -1)
511
i_close_fd(&part->temp_fd);
513
i_free_and_null(part->content_type);
514
i_free_and_null(part->content_disposition);
515
if (part->part_buf != NULL)
516
buffer_free(&part->part_buf);
518
memset(part, 0, sizeof(*part));
520
hash_format_reset(astream->set.hash_format);
524
astream_end_of_part(struct attachment_istream *astream)
526
struct attachment_istream_part *part = &astream->part;
530
/* MIME part changed. we're now parsing the end of a boundary,
531
possibly followed by message epilogue */
532
switch (part->state) {
533
case MAIL_ATTACHMENT_STATE_NO:
535
case MAIL_ATTACHMENT_STATE_MAYBE:
536
/* MIME part wasn't large enough to be an attachment */
537
if (part->part_buf != NULL) {
538
stream_add_data(astream, part->part_buf->data,
539
part->part_buf->used);
540
ret = part->part_buf->used > 0 ? 1 : 0;
543
case MAIL_ATTACHMENT_STATE_YES:
544
old_size = astream->istream.pos - astream->istream.skip;
545
if (astream_part_finish(astream) < 0)
548
/* finished base64 may have added a few more trailing
549
bytes to the stream */
550
ret = astream->istream.pos -
551
astream->istream.skip - old_size;
555
part->state = MAIL_ATTACHMENT_STATE_NO;
556
astream_part_reset(astream);
560
static int astream_read_next(struct attachment_istream *astream, bool *retry_r)
562
struct istream_private *stream = &astream->istream;
563
struct message_block block;
564
size_t old_size, new_size;
569
if (stream->pos - stream->skip >= stream->max_buffer_size)
572
old_size = stream->pos - stream->skip;
573
switch (message_parser_parse_next_block(astream->parser, &block)) {
576
ret = astream_end_of_part(astream);
579
new_size = stream->pos - stream->skip;
580
return new_size - old_size;
582
stream->istream.eof = TRUE;
583
stream->istream.stream_errno = stream->parent->stream_errno;
586
stream->istream.stream_errno = EINVAL;
587
astream->cur_part = NULL;
596
if (block.part != astream->cur_part && astream->cur_part != NULL) {
597
/* end of a MIME part */
598
if (astream_end_of_part(astream) < 0) {
599
stream->istream.stream_errno = EINVAL;
603
astream->cur_part = block.part;
605
if (block.hdr != NULL) {
606
/* parsing a header */
607
astream_parse_header(astream, block.hdr);
608
} else if (block.size == 0) {
610
if (astream_want_attachment(astream, block.part)) {
611
astream->part.state = MAIL_ATTACHMENT_STATE_MAYBE;
612
astream->part.start_offset = stream->parent->v_offset;
615
astream_add_body(astream, &block);
617
new_size = stream->pos - stream->skip;
618
*retry_r = new_size == old_size;
619
return new_size - old_size;
623
i_stream_attachment_extractor_read(struct istream_private *stream)
625
struct attachment_istream *astream =
626
(struct attachment_istream *)stream;
631
ret = astream_read_next(astream, &retry);
632
} while (retry && astream->set.drain_parent_input);
634
astream->retry_read = retry;
638
static void i_stream_attachment_extractor_close(struct iostream_private *stream,
641
struct attachment_istream *astream =
642
(struct attachment_istream *)stream;
643
struct message_part *parts;
646
if (astream->parser != NULL) {
647
ret = message_parser_deinit(&astream->parser, &parts);
648
i_assert(ret == 0); /* we didn't use preparsed message_parts */
650
hash_format_deinit_free(&astream->set.hash_format);
651
if (astream->pool != NULL)
652
pool_unref(&astream->pool);
654
i_stream_close(astream->istream.parent);
658
i_stream_create_attachment_extractor(struct istream *input,
659
struct istream_attachment_settings *set,
662
struct attachment_istream *astream;
664
i_assert(set->min_size > 0);
665
i_assert(set->hash_format != NULL);
666
i_assert(set->open_attachment_ostream != NULL);
667
i_assert(set->close_attachment_ostream != NULL);
669
astream = i_new(struct attachment_istream, 1);
670
astream->part.temp_fd = -1;
672
astream->context = context;
673
astream->retry_read = TRUE;
675
/* make sure the caller doesn't try to double-free this */
676
set->hash_format = NULL;
678
astream->istream.max_buffer_size = input->real_stream->max_buffer_size;
680
astream->istream.read = i_stream_attachment_extractor_read;
681
astream->istream.iostream.close = i_stream_attachment_extractor_close;
683
astream->istream.istream.readable_fd = FALSE;
684
astream->istream.istream.blocking = input->blocking;
685
astream->istream.istream.seekable = FALSE;
687
astream->pool = pool_alloconly_create("istream attachment", 1024);
688
astream->parser = message_parser_init(astream->pool, input, 0,
689
MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS |
690
MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES);
691
return i_stream_create(&astream->istream, input,
692
i_stream_get_fd(input));
695
bool i_stream_attachment_extractor_can_retry(struct istream *input)
697
struct attachment_istream *astream =
698
(struct attachment_istream *)input->real_stream;
700
return astream->retry_read;