1
# Copyright (C) 2001-2010 Python Software Foundation
2
# Contact: email-sig@python.org
3
# email package unit tests
11
from io import StringIO, BytesIO
12
from itertools import chain
17
from email.charset import Charset
18
from email.header import Header, decode_header, make_header
19
from email.parser import Parser, HeaderParser
20
from email.generator import Generator, DecodedGenerator, BytesGenerator
21
from email.message import Message
22
from email.mime.application import MIMEApplication
23
from email.mime.audio import MIMEAudio
24
from email.mime.text import MIMEText
25
from email.mime.image import MIMEImage
26
from email.mime.base import MIMEBase
27
from email.mime.message import MIMEMessage
28
from email.mime.multipart import MIMEMultipart
29
from email import utils
30
from email import errors
31
from email import encoders
32
from email import iterators
33
from email import base64mime
34
from email import quoprimime
36
from test.support import unlink
37
from test.test_email import openfile, TestEmailBase
39
# These imports are documented to work, but we are testing them using a
40
# different path, so we import them here just to make sure they are importable.
41
from email.parser import FeedParser, BytesFeedParser
48
# Test various aspects of the Message class's API
49
class TestMessageAPI(TestEmailBase):
50
def test_get_all(self):
52
msg = self._msgobj('msg_20.txt')
53
eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54
eq(msg.get_all('xx', 'n/a'), 'n/a')
56
def test_getset_charset(self):
59
eq(msg.get_charset(), None)
60
charset = Charset('iso-8859-1')
61
msg.set_charset(charset)
62
eq(msg['mime-version'], '1.0')
63
eq(msg.get_content_type(), 'text/plain')
64
eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65
eq(msg.get_param('charset'), 'iso-8859-1')
66
eq(msg['content-transfer-encoding'], 'quoted-printable')
67
eq(msg.get_charset().input_charset, 'iso-8859-1')
70
eq(msg.get_charset(), None)
71
eq(msg['content-type'], 'text/plain')
72
# Try adding a charset when there's already MIME headers present
74
msg['MIME-Version'] = '2.0'
75
msg['Content-Type'] = 'text/x-weird'
76
msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77
msg.set_charset(charset)
78
eq(msg['mime-version'], '2.0')
79
eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80
eq(msg['content-transfer-encoding'], 'quinted-puntable')
82
def test_set_charset_from_string(self):
85
msg.set_charset('us-ascii')
86
eq(msg.get_charset().input_charset, 'us-ascii')
87
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
89
def test_set_payload_with_charset(self):
91
charset = Charset('iso-8859-1')
92
msg.set_payload('This is a string payload', charset)
93
self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
95
def test_get_charsets(self):
98
msg = self._msgobj('msg_08.txt')
99
charsets = msg.get_charsets()
100
eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
102
msg = self._msgobj('msg_09.txt')
103
charsets = msg.get_charsets('dingbat')
104
eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
107
msg = self._msgobj('msg_12.txt')
108
charsets = msg.get_charsets()
109
eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110
'iso-8859-3', 'us-ascii', 'koi8-r'])
112
def test_get_filename(self):
113
eq = self.assertEqual
115
msg = self._msgobj('msg_04.txt')
116
filenames = [p.get_filename() for p in msg.get_payload()]
117
eq(filenames, ['msg.txt', 'msg.txt'])
119
msg = self._msgobj('msg_07.txt')
120
subpart = msg.get_payload(1)
121
eq(subpart.get_filename(), 'dingusfish.gif')
123
def test_get_filename_with_name_parameter(self):
124
eq = self.assertEqual
126
msg = self._msgobj('msg_44.txt')
127
filenames = [p.get_filename() for p in msg.get_payload()]
128
eq(filenames, ['msg.txt', 'msg.txt'])
130
def test_get_boundary(self):
131
eq = self.assertEqual
132
msg = self._msgobj('msg_07.txt')
134
eq(msg.get_boundary(), 'BOUNDARY')
136
def test_set_boundary(self):
137
eq = self.assertEqual
138
# This one has no existing boundary parameter, but the Content-Type:
139
# header appears fifth.
140
msg = self._msgobj('msg_01.txt')
141
msg.set_boundary('BOUNDARY')
142
header, value = msg.items()[4]
143
eq(header.lower(), 'content-type')
144
eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145
# This one has a Content-Type: header, with a boundary, stuck in the
146
# middle of its headers. Make sure the order is preserved; it should
148
msg = self._msgobj('msg_04.txt')
149
msg.set_boundary('BOUNDARY')
150
header, value = msg.items()[4]
151
eq(header.lower(), 'content-type')
152
eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153
# And this one has no Content-Type: header at all.
154
msg = self._msgobj('msg_03.txt')
155
self.assertRaises(errors.HeaderParseError,
156
msg.set_boundary, 'BOUNDARY')
158
def test_make_boundary(self):
159
msg = MIMEMultipart('form-data')
160
# Note that when the boundary gets created is an implementation
161
# detail and might change.
162
self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163
# Trigger creation of boundary
165
self.assertEqual(msg.items()[0][1][:33],
166
'multipart/form-data; boundary="==')
167
# XXX: there ought to be tests of the uniqueness of the boundary, too.
169
def test_message_rfc822_only(self):
170
# Issue 7970: message/rfc822 not in multipart parsed by
171
# HeaderParser caused an exception when flattened.
172
with openfile('msg_46.txt') as fp:
174
parser = HeaderParser()
175
msg = parser.parsestr(msgdata)
177
gen = Generator(out, True, 0)
178
gen.flatten(msg, False)
179
self.assertEqual(out.getvalue(), msgdata)
181
def test_byte_message_rfc822_only(self):
182
# Make sure new bytes header parser also passes this.
183
with openfile('msg_46.txt') as fp:
184
msgdata = fp.read().encode('ascii')
185
parser = email.parser.BytesHeaderParser()
186
msg = parser.parsebytes(msgdata)
188
gen = email.generator.BytesGenerator(out)
190
self.assertEqual(out.getvalue(), msgdata)
192
def test_get_decoded_payload(self):
193
eq = self.assertEqual
194
msg = self._msgobj('msg_10.txt')
195
# The outer message is a multipart
196
eq(msg.get_payload(decode=True), None)
197
# Subpart 1 is 7bit encoded
198
eq(msg.get_payload(0).get_payload(decode=True),
199
b'This is a 7bit encoded message.\n')
200
# Subpart 2 is quopri
201
eq(msg.get_payload(1).get_payload(decode=True),
202
b'\xa1This is a Quoted Printable encoded message!\n')
203
# Subpart 3 is base64
204
eq(msg.get_payload(2).get_payload(decode=True),
205
b'This is a Base64 encoded message.')
206
# Subpart 4 is base64 with a trailing newline, which
207
# used to be stripped (issue 7143).
208
eq(msg.get_payload(3).get_payload(decode=True),
209
b'This is a Base64 encoded message.\n')
210
# Subpart 5 has no Content-Transfer-Encoding: header.
211
eq(msg.get_payload(4).get_payload(decode=True),
212
b'This has no Content-Transfer-Encoding: header.\n')
214
def test_get_decoded_uu_payload(self):
215
eq = self.assertEqual
217
msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218
for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219
msg['content-transfer-encoding'] = cte
220
eq(msg.get_payload(decode=True), b'hello world')
221
# Now try some bogus data
222
msg.set_payload('foo')
223
eq(msg.get_payload(decode=True), b'foo')
225
def test_get_payload_n_raises_on_non_multipart(self):
227
self.assertRaises(TypeError, msg.get_payload, 1)
229
def test_decoded_generator(self):
230
eq = self.assertEqual
231
msg = self._msgobj('msg_07.txt')
232
with openfile('msg_17.txt') as fp:
235
g = DecodedGenerator(s)
237
eq(s.getvalue(), text)
239
def test__contains__(self):
243
# Check for case insensitivity
244
self.assertIn('from', msg)
245
self.assertIn('From', msg)
246
self.assertIn('FROM', msg)
247
self.assertIn('to', msg)
248
self.assertIn('To', msg)
249
self.assertIn('TO', msg)
251
def test_as_string(self):
252
msg = self._msgobj('msg_01.txt')
253
with openfile('msg_01.txt') as fp:
255
self.assertEqual(text, str(msg))
256
fullrepr = msg.as_string(unixfrom=True)
257
lines = fullrepr.split('\n')
258
self.assertTrue(lines[0].startswith('From '))
259
self.assertEqual(text, NL.join(lines[1:]))
261
def test_as_string_policy(self):
262
msg = self._msgobj('msg_01.txt')
263
newpolicy = msg.policy.clone(linesep='\r\n')
264
fullrepr = msg.as_string(policy=newpolicy)
266
g = Generator(s, policy=newpolicy)
268
self.assertEqual(fullrepr, s.getvalue())
270
def test_as_bytes(self):
271
msg = self._msgobj('msg_01.txt')
272
with openfile('msg_01.txt') as fp:
273
data = fp.read().encode('ascii')
274
self.assertEqual(data, bytes(msg))
275
fullrepr = msg.as_bytes(unixfrom=True)
276
lines = fullrepr.split(b'\n')
277
self.assertTrue(lines[0].startswith(b'From '))
278
self.assertEqual(data, b'\n'.join(lines[1:]))
280
def test_as_bytes_policy(self):
281
msg = self._msgobj('msg_01.txt')
282
newpolicy = msg.policy.clone(linesep='\r\n')
283
fullrepr = msg.as_bytes(policy=newpolicy)
285
g = BytesGenerator(s,policy=newpolicy)
287
self.assertEqual(fullrepr, s.getvalue())
289
# test_headerregistry.TestContentTypeHeader.bad_params
290
def test_bad_param(self):
291
msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
292
self.assertEqual(msg.get_param('baz'), '')
294
def test_missing_filename(self):
295
msg = email.message_from_string("From: foo\n")
296
self.assertEqual(msg.get_filename(), None)
298
def test_bogus_filename(self):
299
msg = email.message_from_string(
300
"Content-Disposition: blarg; filename\n")
301
self.assertEqual(msg.get_filename(), '')
303
def test_missing_boundary(self):
304
msg = email.message_from_string("From: foo\n")
305
self.assertEqual(msg.get_boundary(), None)
307
def test_get_params(self):
308
eq = self.assertEqual
309
msg = email.message_from_string(
310
'X-Header: foo=one; bar=two; baz=three\n')
311
eq(msg.get_params(header='x-header'),
312
[('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
313
msg = email.message_from_string(
314
'X-Header: foo; bar=one; baz=two\n')
315
eq(msg.get_params(header='x-header'),
316
[('foo', ''), ('bar', 'one'), ('baz', 'two')])
317
eq(msg.get_params(), None)
318
msg = email.message_from_string(
319
'X-Header: foo; bar="one"; baz=two\n')
320
eq(msg.get_params(header='x-header'),
321
[('foo', ''), ('bar', 'one'), ('baz', 'two')])
323
# test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
324
def test_get_param_liberal(self):
326
msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
327
self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
329
def test_get_param(self):
330
eq = self.assertEqual
331
msg = email.message_from_string(
332
"X-Header: foo=one; bar=two; baz=three\n")
333
eq(msg.get_param('bar', header='x-header'), 'two')
334
eq(msg.get_param('quuz', header='x-header'), None)
335
eq(msg.get_param('quuz'), None)
336
msg = email.message_from_string(
337
'X-Header: foo; bar="one"; baz=two\n')
338
eq(msg.get_param('foo', header='x-header'), '')
339
eq(msg.get_param('bar', header='x-header'), 'one')
340
eq(msg.get_param('baz', header='x-header'), 'two')
341
# XXX: We are not RFC-2045 compliant! We cannot parse:
342
# msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
343
# msg.get_param("weird")
346
# test_headerregistry.TestContentTypeHeader.spaces_around_semis
347
def test_get_param_funky_continuation_lines(self):
348
msg = self._msgobj('msg_22.txt')
349
self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
351
# test_headerregistry.TestContentTypeHeader.semis_inside_quotes
352
def test_get_param_with_semis_in_quotes(self):
353
msg = email.message_from_string(
354
'Content-Type: image/pjpeg; name="Jim&&Jill"\n')
355
self.assertEqual(msg.get_param('name'), 'Jim&&Jill')
356
self.assertEqual(msg.get_param('name', unquote=False),
357
'"Jim&&Jill"')
359
# test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
360
def test_get_param_with_quotes(self):
361
msg = email.message_from_string(
362
'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
363
self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
364
msg = email.message_from_string(
365
"Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
366
self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
368
def test_field_containment(self):
369
msg = email.message_from_string('Header: exists')
370
self.assertIn('header', msg)
371
self.assertIn('Header', msg)
372
self.assertIn('HEADER', msg)
373
self.assertNotIn('headerx', msg)
375
def test_set_param(self):
376
eq = self.assertEqual
378
msg.set_param('charset', 'iso-2022-jp')
379
eq(msg.get_param('charset'), 'iso-2022-jp')
380
msg.set_param('importance', 'high value')
381
eq(msg.get_param('importance'), 'high value')
382
eq(msg.get_param('importance', unquote=False), '"high value"')
383
eq(msg.get_params(), [('text/plain', ''),
384
('charset', 'iso-2022-jp'),
385
('importance', 'high value')])
386
eq(msg.get_params(unquote=False), [('text/plain', ''),
387
('charset', '"iso-2022-jp"'),
388
('importance', '"high value"')])
389
msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
390
eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
392
def test_del_param(self):
393
eq = self.assertEqual
394
msg = self._msgobj('msg_05.txt')
396
[('multipart/report', ''), ('report-type', 'delivery-status'),
397
('boundary', 'D1690A7AC1.996856090/mail.example.com')])
398
old_val = msg.get_param("report-type")
399
msg.del_param("report-type")
401
[('multipart/report', ''),
402
('boundary', 'D1690A7AC1.996856090/mail.example.com')])
403
msg.set_param("report-type", old_val)
405
[('multipart/report', ''),
406
('boundary', 'D1690A7AC1.996856090/mail.example.com'),
407
('report-type', old_val)])
409
def test_del_param_on_other_header(self):
411
msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
412
msg.del_param('filename', 'content-disposition')
413
self.assertEqual(msg['content-disposition'], 'attachment')
415
def test_del_param_on_nonexistent_header(self):
417
# Deleting param on empty msg should not raise exception.
418
msg.del_param('filename', 'content-disposition')
420
def test_del_nonexistent_param(self):
422
msg.add_header('Content-Type', 'text/plain', charset='utf-8')
423
existing_header = msg['Content-Type']
424
msg.del_param('foobar', header='Content-Type')
425
self.assertEqual(msg['Content-Type'], existing_header)
427
def test_set_type(self):
428
eq = self.assertEqual
430
self.assertRaises(ValueError, msg.set_type, 'text')
431
msg.set_type('text/plain')
432
eq(msg['content-type'], 'text/plain')
433
msg.set_param('charset', 'us-ascii')
434
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
435
msg.set_type('text/html')
436
eq(msg['content-type'], 'text/html; charset="us-ascii"')
438
def test_set_type_on_other_header(self):
440
msg['X-Content-Type'] = 'text/plain'
441
msg.set_type('application/octet-stream', 'X-Content-Type')
442
self.assertEqual(msg['x-content-type'], 'application/octet-stream')
444
def test_get_content_type_missing(self):
446
self.assertEqual(msg.get_content_type(), 'text/plain')
448
def test_get_content_type_missing_with_default_type(self):
450
msg.set_default_type('message/rfc822')
451
self.assertEqual(msg.get_content_type(), 'message/rfc822')
453
def test_get_content_type_from_message_implicit(self):
454
msg = self._msgobj('msg_30.txt')
455
self.assertEqual(msg.get_payload(0).get_content_type(),
458
def test_get_content_type_from_message_explicit(self):
459
msg = self._msgobj('msg_28.txt')
460
self.assertEqual(msg.get_payload(0).get_content_type(),
463
def test_get_content_type_from_message_text_plain_implicit(self):
464
msg = self._msgobj('msg_03.txt')
465
self.assertEqual(msg.get_content_type(), 'text/plain')
467
def test_get_content_type_from_message_text_plain_explicit(self):
468
msg = self._msgobj('msg_01.txt')
469
self.assertEqual(msg.get_content_type(), 'text/plain')
471
def test_get_content_maintype_missing(self):
473
self.assertEqual(msg.get_content_maintype(), 'text')
475
def test_get_content_maintype_missing_with_default_type(self):
477
msg.set_default_type('message/rfc822')
478
self.assertEqual(msg.get_content_maintype(), 'message')
480
def test_get_content_maintype_from_message_implicit(self):
481
msg = self._msgobj('msg_30.txt')
482
self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
484
def test_get_content_maintype_from_message_explicit(self):
485
msg = self._msgobj('msg_28.txt')
486
self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
488
def test_get_content_maintype_from_message_text_plain_implicit(self):
489
msg = self._msgobj('msg_03.txt')
490
self.assertEqual(msg.get_content_maintype(), 'text')
492
def test_get_content_maintype_from_message_text_plain_explicit(self):
493
msg = self._msgobj('msg_01.txt')
494
self.assertEqual(msg.get_content_maintype(), 'text')
496
def test_get_content_subtype_missing(self):
498
self.assertEqual(msg.get_content_subtype(), 'plain')
500
def test_get_content_subtype_missing_with_default_type(self):
502
msg.set_default_type('message/rfc822')
503
self.assertEqual(msg.get_content_subtype(), 'rfc822')
505
def test_get_content_subtype_from_message_implicit(self):
506
msg = self._msgobj('msg_30.txt')
507
self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
509
def test_get_content_subtype_from_message_explicit(self):
510
msg = self._msgobj('msg_28.txt')
511
self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
513
def test_get_content_subtype_from_message_text_plain_implicit(self):
514
msg = self._msgobj('msg_03.txt')
515
self.assertEqual(msg.get_content_subtype(), 'plain')
517
def test_get_content_subtype_from_message_text_plain_explicit(self):
518
msg = self._msgobj('msg_01.txt')
519
self.assertEqual(msg.get_content_subtype(), 'plain')
521
def test_get_content_maintype_error(self):
523
msg['Content-Type'] = 'no-slash-in-this-string'
524
self.assertEqual(msg.get_content_maintype(), 'text')
526
def test_get_content_subtype_error(self):
528
msg['Content-Type'] = 'no-slash-in-this-string'
529
self.assertEqual(msg.get_content_subtype(), 'plain')
531
def test_replace_header(self):
532
eq = self.assertEqual
534
msg.add_header('First', 'One')
535
msg.add_header('Second', 'Two')
536
msg.add_header('Third', 'Three')
537
eq(msg.keys(), ['First', 'Second', 'Third'])
538
eq(msg.values(), ['One', 'Two', 'Three'])
539
msg.replace_header('Second', 'Twenty')
540
eq(msg.keys(), ['First', 'Second', 'Third'])
541
eq(msg.values(), ['One', 'Twenty', 'Three'])
542
msg.add_header('First', 'Eleven')
543
msg.replace_header('First', 'One Hundred')
544
eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
545
eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
546
self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
548
# test_defect_handling:test_invalid_chars_in_base64_payload
549
def test_broken_base64_payload(self):
550
x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
552
msg['content-type'] = 'audio/x-midi'
553
msg['content-transfer-encoding'] = 'base64'
555
self.assertEqual(msg.get_payload(decode=True),
556
(b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
557
b'\xa1\x00p\xf6\xbf\xe9\x0f'))
558
self.assertIsInstance(msg.defects[0],
559
errors.InvalidBase64CharactersDefect)
561
def test_broken_unicode_payload(self):
562
# This test improves coverage but is not a compliance test.
563
# The behavior in this situation is currently undefined by the API.
564
x = 'this is a br\xf6ken thing to do'
566
msg['content-type'] = 'text/plain'
567
msg['content-transfer-encoding'] = '8bit'
569
self.assertEqual(msg.get_payload(decode=True),
570
bytes(x, 'raw-unicode-escape'))
572
def test_questionable_bytes_payload(self):
573
# This test improves coverage but is not a compliance test,
574
# since it involves poking inside the black box.
575
x = 'this is a quƩstionable thing to do'.encode('utf-8')
577
msg['content-type'] = 'text/plain; charset="utf-8"'
578
msg['content-transfer-encoding'] = '8bit'
580
self.assertEqual(msg.get_payload(decode=True), x)
583
def test_ascii_add_header(self):
585
msg.add_header('Content-Disposition', 'attachment',
587
self.assertEqual('attachment; filename="bud.gif"',
588
msg['Content-Disposition'])
590
def test_noascii_add_header(self):
592
msg.add_header('Content-Disposition', 'attachment',
593
filename="FuĆballer.ppt")
595
'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
596
msg['Content-Disposition'])
598
def test_nonascii_add_header_via_triple(self):
600
msg.add_header('Content-Disposition', 'attachment',
601
filename=('iso-8859-1', '', 'FuĆballer.ppt'))
603
'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
604
msg['Content-Disposition'])
606
def test_ascii_add_header_with_tspecial(self):
608
msg.add_header('Content-Disposition', 'attachment',
609
filename="windows [filename].ppt")
611
'attachment; filename="windows [filename].ppt"',
612
msg['Content-Disposition'])
614
def test_nonascii_add_header_with_tspecial(self):
616
msg.add_header('Content-Disposition', 'attachment',
617
filename="FuĆballer [filename].ppt")
619
"attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
620
msg['Content-Disposition'])
622
def test_binary_quopri_payload(self):
623
for charset in ('latin-1', 'ascii'):
625
msg['content-type'] = 'text/plain; charset=%s' % charset
626
msg['content-transfer-encoding'] = 'quoted-printable'
627
msg.set_payload(b'foo=e6=96=87bar')
629
msg.get_payload(decode=True),
630
b'foo\xe6\x96\x87bar',
631
'get_payload returns wrong result with charset %s.' % charset)
633
def test_binary_base64_payload(self):
634
for charset in ('latin-1', 'ascii'):
636
msg['content-type'] = 'text/plain; charset=%s' % charset
637
msg['content-transfer-encoding'] = 'base64'
638
msg.set_payload(b'Zm9v5paHYmFy')
640
msg.get_payload(decode=True),
641
b'foo\xe6\x96\x87bar',
642
'get_payload returns wrong result with charset %s.' % charset)
644
def test_binary_uuencode_payload(self):
645
for charset in ('latin-1', 'ascii'):
646
for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
648
msg['content-type'] = 'text/plain; charset=%s' % charset
649
msg['content-transfer-encoding'] = encoding
650
msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
652
msg.get_payload(decode=True),
653
b'foo\xe6\x96\x87bar',
654
str(('get_payload returns wrong result ',
655
'with charset {0} and encoding {1}.')).\
656
format(charset, encoding))
658
def test_add_header_with_name_only_param(self):
660
msg.add_header('Content-Disposition', 'inline', foo_bar=None)
661
self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
663
def test_add_header_with_no_value(self):
665
msg.add_header('X-Status', None)
666
self.assertEqual('', msg['X-Status'])
668
# Issue 5871: reject an attempt to embed a header inside a header value
669
# (header injection attack).
670
def test_embeded_header_via_Header_rejected(self):
672
msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
673
self.assertRaises(errors.HeaderParseError, msg.as_string)
675
def test_embeded_header_via_string_rejected(self):
677
msg['Dummy'] = 'dummy\nX-Injected-Header: test'
678
self.assertRaises(errors.HeaderParseError, msg.as_string)
680
def test_unicode_header_defaults_to_utf8_encoding(self):
682
m = MIMEText('abc\n')
683
m['Subject'] = 'Ć test'
684
self.assertEqual(str(m),textwrap.dedent("""\
685
Content-Type: text/plain; charset="us-ascii"
687
Content-Transfer-Encoding: 7bit
688
Subject: =?utf-8?q?=C3=89_test?=
693
def test_unicode_body_defaults_to_utf8_encoding(self):
695
m = MIMEText('Ć testabc\n')
696
self.assertEqual(str(m),textwrap.dedent("""\
697
Content-Type: text/plain; charset="utf-8"
699
Content-Transfer-Encoding: base64
705
# Test the email.encoders module
706
class TestEncoders(unittest.TestCase):
708
def test_EncodersEncode_base64(self):
709
with openfile('PyBanner048.gif', 'rb') as fp:
711
mimed = email.mime.image.MIMEImage(bindata)
712
base64ed = mimed.get_payload()
713
# the transfer-encoded body lines should all be <=76 characters
714
lines = base64ed.split('\n')
715
self.assertLessEqual(max([ len(x) for x in lines ]), 76)
717
def test_encode_empty_payload(self):
718
eq = self.assertEqual
720
msg.set_charset('us-ascii')
721
eq(msg['content-transfer-encoding'], '7bit')
723
def test_default_cte(self):
724
eq = self.assertEqual
725
# 7bit data and the default us-ascii _charset
726
msg = MIMEText('hello world')
727
eq(msg['content-transfer-encoding'], '7bit')
728
# Similar, but with 8bit data
729
msg = MIMEText('hello \xf8 world')
730
eq(msg['content-transfer-encoding'], 'base64')
731
# And now with a different charset
732
msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
733
eq(msg['content-transfer-encoding'], 'quoted-printable')
735
def test_encode7or8bit(self):
736
# Make sure a charset whose input character set is 8bit but
737
# whose output character set is 7bit gets a transfer-encoding
739
eq = self.assertEqual
740
msg = MIMEText('ę', _charset='euc-jp')
741
eq(msg['content-transfer-encoding'], '7bit')
743
def test_qp_encode_latin1(self):
744
msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
745
self.assertEqual(str(msg), textwrap.dedent("""\
747
Content-Type: text/text; charset="iso-8859-1"
748
Content-Transfer-Encoding: quoted-printable
753
def test_qp_encode_non_latin1(self):
755
msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
756
self.assertEqual(str(msg), textwrap.dedent("""\
758
Content-Type: text/text; charset="iso-8859-2"
759
Content-Transfer-Encoding: quoted-printable
765
# Test long header wrapping
766
class TestLongHeaders(TestEmailBase):
770
def test_split_long_continuation(self):
771
eq = self.ndiffAssertEqual
772
msg = email.message_from_string("""\
773
Subject: bug demonstration
774
\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
782
eq(sfp.getvalue(), """\
783
Subject: bug demonstration
784
\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
790
def test_another_long_almost_unsplittable_header(self):
791
eq = self.ndiffAssertEqual
794
\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
796
h = Header(hstr, continuation_ws='\t')
799
\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
801
h = Header(hstr.replace('\t', ' '))
804
12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
807
def test_long_nonstring(self):
808
eq = self.ndiffAssertEqual
809
g = Charset("iso-8859-1")
810
cz = Charset("iso-8859-2")
811
utf8 = Charset("utf-8")
812
g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
813
b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
814
b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
816
cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
818
utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
819
'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
820
'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
821
'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
822
'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
823
'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
824
'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
825
'\u3044\u307e\u3059\u3002')
826
h = Header(g_head, g, header_name='Subject')
827
h.append(cz_head, cz)
828
h.append(utf8_head, utf8)
834
eq(sfp.getvalue(), """\
835
Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
836
=?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
837
=?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
838
=?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
839
=?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
840
=?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
841
=?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
842
=?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
843
=?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
844
=?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
848
eq(h.encode(maxlinelen=76), """\
849
=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
850
=?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
851
=?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
852
=?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
853
=?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
854
=?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
855
=?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
856
=?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
857
=?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
858
=?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
859
=?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
861
def test_long_header_encode(self):
862
eq = self.ndiffAssertEqual
863
h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
864
'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
865
header_name='X-Foobar-Spoink-Defrobnit')
867
wasnipoop; giraffes="very-long-necked-animals";
868
spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
870
def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
871
eq = self.ndiffAssertEqual
872
h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
873
'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
874
header_name='X-Foobar-Spoink-Defrobnit',
875
continuation_ws='\t')
877
wasnipoop; giraffes="very-long-necked-animals";
878
spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
880
def test_long_header_encode_with_tab_continuation(self):
881
eq = self.ndiffAssertEqual
882
h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
883
'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
884
header_name='X-Foobar-Spoink-Defrobnit',
885
continuation_ws='\t')
887
wasnipoop; giraffes="very-long-necked-animals";
888
\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
890
def test_header_encode_with_different_output_charset(self):
891
h = Header('ę', 'euc-jp')
892
self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
894
def test_long_header_encode_with_different_output_charset(self):
895
h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
896
b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
897
b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
898
b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
900
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
901
=?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
902
self.assertEqual(h.encode(), res)
904
def test_header_splitter(self):
905
eq = self.ndiffAssertEqual
907
# It'd be great if we could use add_header() here, but that doesn't
908
# guarantee an order of the parameters.
909
msg['X-Foobar-Spoink-Defrobnit'] = (
910
'wasnipoop; giraffes="very-long-necked-animals"; '
911
'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
915
eq(sfp.getvalue(), '''\
916
Content-Type: text/plain; charset="us-ascii"
918
Content-Transfer-Encoding: 7bit
919
X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
920
spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
924
def test_no_semis_header_splitter(self):
925
eq = self.ndiffAssertEqual
927
msg['From'] = 'test@dom.ain'
928
msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
929
msg.set_payload('Test')
933
eq(sfp.getvalue(), """\
935
References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
936
<5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
940
def test_last_split_chunk_does_not_fit(self):
941
eq = self.ndiffAssertEqual
942
h = Header('Subject: the first part of this is short, but_the_second'
943
'_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
946
Subject: the first part of this is short,
947
but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
949
def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
950
eq = self.ndiffAssertEqual
951
h = Header(', but_the_second'
952
'_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
956
but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
958
def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
959
eq = self.ndiffAssertEqual
960
h = Header(', , but_the_second'
961
'_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
965
but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
967
def test_trailing_splitable_on_overlong_unsplitable(self):
968
eq = self.ndiffAssertEqual
969
h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
970
'be_on_a_line_all_by_itself;')
971
eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
972
"be_on_a_line_all_by_itself;")
974
def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
975
eq = self.ndiffAssertEqual
977
'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
978
'be_on_a_line_all_by_itself; ')
981
this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
983
def test_long_header_with_multiple_sequential_split_chars(self):
984
eq = self.ndiffAssertEqual
985
h = Header('This is a long line that has two whitespaces in a row. '
986
'This used to cause truncation of the header when folded')
988
This is a long line that has two whitespaces in a row. This used to cause
989
truncation of the header when folded""")
991
def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
992
eq = self.ndiffAssertEqual
993
h = Header('thisverylongheaderhas;semicolons;and,commas,but'
994
'they;arenotlegal;fold,points')
995
eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
996
"arenotlegal;fold,points")
998
def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
999
eq = self.ndiffAssertEqual
1000
h = Header('this is a test where we need to have more than one line '
1001
'before; our final line that is just too big to fit;; '
1002
'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1003
'be_on_a_line_all_by_itself;')
1005
this is a test where we need to have more than one line before;
1006
our final line that is just too big to fit;;
1007
this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
1009
def test_overlong_last_part_followed_by_split_point(self):
1010
eq = self.ndiffAssertEqual
1011
h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1012
'be_on_a_line_all_by_itself ')
1013
eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
1014
"should_be_on_a_line_all_by_itself ")
1016
def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
1017
eq = self.ndiffAssertEqual
1018
h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
1019
'before_our_final_line_; ; '
1020
'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1021
'be_on_a_line_all_by_itself; ')
1023
this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
1025
this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1027
def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1028
eq = self.ndiffAssertEqual
1029
h = Header('this is a test where we need to have more than one line '
1030
'before our final line; ; '
1031
'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1032
'be_on_a_line_all_by_itself; ')
1034
this is a test where we need to have more than one line before our final line;
1036
this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1038
def test_long_header_with_whitespace_runs(self):
1039
eq = self.ndiffAssertEqual
1041
msg['From'] = 'test@dom.ain'
1042
msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
1043
msg.set_payload('Test')
1047
eq(sfp.getvalue(), """\
1049
References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1050
<foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1051
<foo@dom.ain> <foo@dom.ain>\x20\x20
1055
def test_long_run_with_semi_header_splitter(self):
1056
eq = self.ndiffAssertEqual
1058
msg['From'] = 'test@dom.ain'
1059
msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1060
msg.set_payload('Test')
1064
eq(sfp.getvalue(), """\
1066
References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1067
<foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1072
def test_splitter_split_on_punctuation_only_if_fws(self):
1073
eq = self.ndiffAssertEqual
1075
msg['From'] = 'test@dom.ain'
1076
msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1077
'they;arenotlegal;fold,points')
1078
msg.set_payload('Test')
1082
# XXX the space after the header should not be there.
1083
eq(sfp.getvalue(), """\
1086
thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1090
def test_no_split_long_header(self):
1091
eq = self.ndiffAssertEqual
1092
hstr = 'References: ' + 'x' * 80
1094
# These come on two lines because Headers are really field value
1095
# classes and don't really know about their field names.
1098
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1099
h = Header('x' * 80)
1100
eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
1102
def test_splitting_multiple_long_lines(self):
1103
eq = self.ndiffAssertEqual
1105
from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1106
\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1107
\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1109
h = Header(hstr, continuation_ws='\t')
1111
from babylon.socal-raves.org (localhost [127.0.0.1]);
1112
by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1113
for <mailman-admin@babylon.socal-raves.org>;
1114
Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1115
\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1116
by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1117
for <mailman-admin@babylon.socal-raves.org>;
1118
Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1119
\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1120
by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1121
for <mailman-admin@babylon.socal-raves.org>;
1122
Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1124
def test_splitting_first_line_only_is_long(self):
1125
eq = self.ndiffAssertEqual
1127
from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1128
\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1129
\tid 17k4h5-00034i-00
1130
\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1131
h = Header(hstr, maxlinelen=78, header_name='Received',
1132
continuation_ws='\t')
1134
from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1135
helo=cthulhu.gerg.ca)
1136
\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1137
\tid 17k4h5-00034i-00
1138
\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1140
def test_long_8bit_header(self):
1141
eq = self.ndiffAssertEqual
1143
h = Header('Britische Regierung gibt', 'iso-8859-1',
1144
header_name='Subject')
1145
h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
1146
eq(h.encode(maxlinelen=76), """\
1147
=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1148
=?iso-8859-1?q?hore-Windkraftprojekte?=""")
1150
eq(msg.as_string(maxheaderlen=76), """\
1151
Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1152
=?iso-8859-1?q?hore-Windkraftprojekte?=
1155
eq(msg.as_string(maxheaderlen=0), """\
1156
Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
1160
def test_long_8bit_header_no_charset(self):
1161
eq = self.ndiffAssertEqual
1163
header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1164
'f\xfcr Offshore-Windkraftprojekte '
1165
'<a-very-long-address@example.com>')
1166
msg['Reply-To'] = header_string
1167
eq(msg.as_string(maxheaderlen=78), """\
1168
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1169
=?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1173
msg['Reply-To'] = Header(header_string,
1174
header_name='Reply-To')
1175
eq(msg.as_string(maxheaderlen=78), """\
1176
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1177
=?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1181
def test_long_to_header(self):
1182
eq = self.ndiffAssertEqual
1183
to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
1184
'<someone@eecs.umich.edu>, '
1185
'"Someone Test #B" <someone@umich.edu>, '
1186
'"Someone Test #C" <someone@eecs.umich.edu>, '
1187
'"Someone Test #D" <someone@eecs.umich.edu>')
1190
eq(msg.as_string(maxheaderlen=78), '''\
1191
To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
1192
"Someone Test #B" <someone@umich.edu>,
1193
"Someone Test #C" <someone@eecs.umich.edu>,
1194
"Someone Test #D" <someone@eecs.umich.edu>
1198
def test_long_line_after_append(self):
1199
eq = self.ndiffAssertEqual
1200
s = 'This is an example of string which has almost the limit of header length.'
1202
h.append('Add another line.')
1203
eq(h.encode(maxlinelen=76), """\
1204
This is an example of string which has almost the limit of header length.
1205
Add another line.""")
1207
def test_shorter_line_with_append(self):
1208
eq = self.ndiffAssertEqual
1209
s = 'This is a shorter line.'
1211
h.append('Add another sentence. (Surprise?)')
1213
'This is a shorter line. Add another sentence. (Surprise?)')
1215
def test_long_field_name(self):
1216
eq = self.ndiffAssertEqual
1217
fn = 'X-Very-Very-Very-Long-Header-Name'
1218
gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1219
'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1220
'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1222
h = Header(gs, 'iso-8859-1', header_name=fn)
1223
# BAW: this seems broken because the first line is too long
1224
eq(h.encode(maxlinelen=76), """\
1225
=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1226
=?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1227
=?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1228
=?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
1230
def test_long_received_header(self):
1231
h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1232
'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1233
'Wed, 05 Mar 2003 18:10:18 -0700')
1235
msg['Received-1'] = Header(h, continuation_ws='\t')
1236
msg['Received-2'] = h
1237
# This should be splitting on spaces not semicolons.
1238
self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1239
Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1240
hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1241
Wed, 05 Mar 2003 18:10:18 -0700
1242
Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1243
hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1244
Wed, 05 Mar 2003 18:10:18 -0700
1248
def test_string_headerinst_eq(self):
1249
h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1250
'tu-muenchen.de> (David Bremner\'s message of '
1251
'"Thu, 6 Mar 2003 13:58:21 +0100")')
1253
msg['Received-1'] = Header(h, header_name='Received-1',
1254
continuation_ws='\t')
1255
msg['Received-2'] = h
1256
# XXX The space after the ':' should not be there.
1257
self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1259
<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1260
Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1262
<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1263
Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1267
def test_long_unbreakable_lines_with_continuation(self):
1268
eq = self.ndiffAssertEqual
1271
iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1272
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1274
msg['Face-2'] = Header(t, header_name='Face-2')
1275
msg['Face-3'] = ' ' + t
1276
# XXX This splitting is all wrong. It the first value line should be
1277
# snug against the field name or the space after the header not there.
1278
eq(msg.as_string(maxheaderlen=78), """\
1280
iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1281
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1283
iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1284
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1286
iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1287
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1291
def test_another_long_multiline_header(self):
1292
eq = self.ndiffAssertEqual
1293
m = ('Received: from siimage.com '
1294
'([172.25.1.3]) by zima.siliconimage.com with '
1295
'Microsoft SMTPSVC(5.0.2195.4905); '
1296
'Wed, 16 Oct 2002 07:41:11 -0700')
1297
msg = email.message_from_string(m)
1298
eq(msg.as_string(maxheaderlen=78), '''\
1299
Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1300
Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
1304
def test_long_lines_with_different_header(self):
1305
eq = self.ndiffAssertEqual
1306
h = ('List-Unsubscribe: '
1307
'<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1308
' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1309
'?subject=unsubscribe>')
1312
msg['List'] = Header(h, header_name='List')
1313
eq(msg.as_string(maxheaderlen=78), """\
1314
List: List-Unsubscribe:
1315
<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1316
<mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1317
List: List-Unsubscribe:
1318
<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1319
<mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1323
def test_long_rfc2047_header_with_embedded_fws(self):
1324
h = Header(textwrap.dedent("""\
1325
We're going to pretend this header is in a non-ascii character set
1326
\tto see if line wrapping with encoded words and embedded
1327
folding white space works"""),
1330
self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1331
=?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1332
=?utf-8?q?cter_set?=
1333
=?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1334
=?utf-8?q?_folding_white_space_works?=""")+'\n')
1338
# Test mangling of "From " lines in the body of a message
1339
class TestFromMangling(unittest.TestCase):
1341
self.msg = Message()
1342
self.msg['From'] = 'aaa@bbb.org'
1343
self.msg.set_payload("""\
1344
From the desk of A.A.A.:
1348
def test_mangled_from(self):
1350
g = Generator(s, mangle_from_=True)
1352
self.assertEqual(s.getvalue(), """\
1355
>From the desk of A.A.A.:
1359
def test_dont_mangle_from(self):
1361
g = Generator(s, mangle_from_=False)
1363
self.assertEqual(s.getvalue(), """\
1366
From the desk of A.A.A.:
1370
def test_mangle_from_in_preamble_and_epilog(self):
1372
g = Generator(s, mangle_from_=True)
1373
msg = email.message_from_string(textwrap.dedent("""\
1376
Content-Type: multipart/mixed; boundary=XXX
1378
From somewhere unknown
1381
Content-Type: text/plain
1387
From somewhere unknowable
1390
self.assertEqual(len([1 for x in s.getvalue().split('\n')
1391
if x.startswith('>From ')]), 2)
1393
def test_mangled_from_with_bad_bytes(self):
1394
source = textwrap.dedent("""\
1395
Content-Type: text/plain; charset="utf-8"
1397
Content-Transfer-Encoding: 8bit
1400
""").encode('utf-8')
1401
msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1403
g = BytesGenerator(b, mangle_from_=True)
1405
self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1408
# Test the basic MIMEAudio class
1409
class TestMIMEAudio(unittest.TestCase):
1411
with openfile('audiotest.au', 'rb') as fp:
1412
self._audiodata = fp.read()
1413
self._au = MIMEAudio(self._audiodata)
1415
def test_guess_minor_type(self):
1416
self.assertEqual(self._au.get_content_type(), 'audio/basic')
1418
def test_encoding(self):
1419
payload = self._au.get_payload()
1420
self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1423
def test_checkSetMinor(self):
1424
au = MIMEAudio(self._audiodata, 'fish')
1425
self.assertEqual(au.get_content_type(), 'audio/fish')
1427
def test_add_header(self):
1428
eq = self.assertEqual
1429
self._au.add_header('Content-Disposition', 'attachment',
1430
filename='audiotest.au')
1431
eq(self._au['content-disposition'],
1432
'attachment; filename="audiotest.au"')
1433
eq(self._au.get_params(header='content-disposition'),
1434
[('attachment', ''), ('filename', 'audiotest.au')])
1435
eq(self._au.get_param('filename', header='content-disposition'),
1438
eq(self._au.get_param('attachment', header='content-disposition'), '')
1439
self.assertIs(self._au.get_param('foo', failobj=missing,
1440
header='content-disposition'), missing)
1441
# Try some missing stuff
1442
self.assertIs(self._au.get_param('foobar', missing), missing)
1443
self.assertIs(self._au.get_param('attachment', missing,
1444
header='foobar'), missing)
1448
# Test the basic MIMEImage class
1449
class TestMIMEImage(unittest.TestCase):
1451
with openfile('PyBanner048.gif', 'rb') as fp:
1452
self._imgdata = fp.read()
1453
self._im = MIMEImage(self._imgdata)
1455
def test_guess_minor_type(self):
1456
self.assertEqual(self._im.get_content_type(), 'image/gif')
1458
def test_encoding(self):
1459
payload = self._im.get_payload()
1460
self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1463
def test_checkSetMinor(self):
1464
im = MIMEImage(self._imgdata, 'fish')
1465
self.assertEqual(im.get_content_type(), 'image/fish')
1467
def test_add_header(self):
1468
eq = self.assertEqual
1469
self._im.add_header('Content-Disposition', 'attachment',
1470
filename='dingusfish.gif')
1471
eq(self._im['content-disposition'],
1472
'attachment; filename="dingusfish.gif"')
1473
eq(self._im.get_params(header='content-disposition'),
1474
[('attachment', ''), ('filename', 'dingusfish.gif')])
1475
eq(self._im.get_param('filename', header='content-disposition'),
1478
eq(self._im.get_param('attachment', header='content-disposition'), '')
1479
self.assertIs(self._im.get_param('foo', failobj=missing,
1480
header='content-disposition'), missing)
1481
# Try some missing stuff
1482
self.assertIs(self._im.get_param('foobar', missing), missing)
1483
self.assertIs(self._im.get_param('attachment', missing,
1484
header='foobar'), missing)
1488
# Test the basic MIMEApplication class
1489
class TestMIMEApplication(unittest.TestCase):
1490
def test_headers(self):
1491
eq = self.assertEqual
1492
msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
1493
eq(msg.get_content_type(), 'application/octet-stream')
1494
eq(msg['content-transfer-encoding'], 'base64')
1496
def test_body(self):
1497
eq = self.assertEqual
1498
bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1499
msg = MIMEApplication(bytesdata)
1500
# whitespace in the cte encoded block is RFC-irrelevant.
1501
eq(msg.get_payload().strip(), '+vv8/f7/')
1502
eq(msg.get_payload(decode=True), bytesdata)
1504
def test_binary_body_with_encode_7or8bit(self):
1506
bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1507
msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1508
# Treated as a string, this will be invalid code points.
1509
self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1510
self.assertEqual(msg.get_payload(decode=True), bytesdata)
1511
self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1513
g = BytesGenerator(s)
1515
wireform = s.getvalue()
1516
msg2 = email.message_from_bytes(wireform)
1517
self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1518
self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1519
self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1521
def test_binary_body_with_encode_noop(self):
1522
# Issue 16564: This does not produce an RFC valid message, since to be
1523
# valid it should have a CTE of binary. But the below works in
1524
# Python2, and is documented as working this way.
1525
bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1526
msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1527
# Treated as a string, this will be invalid code points.
1528
self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1529
self.assertEqual(msg.get_payload(decode=True), bytesdata)
1531
g = BytesGenerator(s)
1533
wireform = s.getvalue()
1534
msg2 = email.message_from_bytes(wireform)
1535
self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1536
self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1538
def test_binary_body_with_encode_quopri(self):
1540
bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1541
msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1542
self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1543
self.assertEqual(msg.get_payload(decode=True), bytesdata)
1544
self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1546
g = BytesGenerator(s)
1548
wireform = s.getvalue()
1549
msg2 = email.message_from_bytes(wireform)
1550
self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1551
self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1552
self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1554
def test_binary_body_with_encode_base64(self):
1555
bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1556
msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1557
self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1558
self.assertEqual(msg.get_payload(decode=True), bytesdata)
1560
g = BytesGenerator(s)
1562
wireform = s.getvalue()
1563
msg2 = email.message_from_bytes(wireform)
1564
self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1565
self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1568
# Test the basic MIMEText class
1569
class TestMIMEText(unittest.TestCase):
1571
self._msg = MIMEText('hello there')
1573
def test_types(self):
1574
eq = self.assertEqual
1575
eq(self._msg.get_content_type(), 'text/plain')
1576
eq(self._msg.get_param('charset'), 'us-ascii')
1578
self.assertIs(self._msg.get_param('foobar', missing), missing)
1579
self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
1582
def test_payload(self):
1583
self.assertEqual(self._msg.get_payload(), 'hello there')
1584
self.assertFalse(self._msg.is_multipart())
1586
def test_charset(self):
1587
eq = self.assertEqual
1588
msg = MIMEText('hello there', _charset='us-ascii')
1589
eq(msg.get_charset().input_charset, 'us-ascii')
1590
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1592
def test_7bit_input(self):
1593
eq = self.assertEqual
1594
msg = MIMEText('hello there', _charset='us-ascii')
1595
eq(msg.get_charset().input_charset, 'us-ascii')
1596
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1598
def test_7bit_input_no_charset(self):
1599
eq = self.assertEqual
1600
msg = MIMEText('hello there')
1601
eq(msg.get_charset(), 'us-ascii')
1602
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1603
self.assertIn('hello there', msg.as_string())
1605
def test_utf8_input(self):
1606
teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1607
eq = self.assertEqual
1608
msg = MIMEText(teststr, _charset='utf-8')
1609
eq(msg.get_charset().output_charset, 'utf-8')
1610
eq(msg['content-type'], 'text/plain; charset="utf-8"')
1611
eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1613
@unittest.skip("can't fix because of backward compat in email5, "
1614
"will fix in email6")
1615
def test_utf8_input_no_charset(self):
1616
teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1617
self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1621
# Test complicated multipart/* messages
1622
class TestMultipart(TestEmailBase):
1624
with openfile('PyBanner048.gif', 'rb') as fp:
1626
container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1627
image = MIMEImage(data, name='dingusfish.gif')
1628
image.add_header('content-disposition', 'attachment',
1629
filename='dingusfish.gif')
1630
intro = MIMEText('''\
1633
This is the dingus fish.
1635
container.attach(intro)
1636
container.attach(image)
1637
container['From'] = 'Barry <barry@digicool.com>'
1638
container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1639
container['Subject'] = 'Here is your dingus fish'
1641
now = 987809702.54848599
1642
timetuple = time.localtime(now)
1643
if timetuple[-1] == 0:
1644
tzsecs = time.timezone
1646
tzsecs = time.altzone
1651
tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1652
container['Date'] = time.strftime(
1653
'%a, %d %b %Y %H:%M:%S',
1654
time.localtime(now)) + tzoffset
1655
self._msg = container
1659
def test_hierarchy(self):
1661
eq = self.assertEqual
1662
raises = self.assertRaises
1665
self.assertTrue(m.is_multipart())
1666
eq(m.get_content_type(), 'multipart/mixed')
1667
eq(len(m.get_payload()), 2)
1668
raises(IndexError, m.get_payload, 2)
1669
m0 = m.get_payload(0)
1670
m1 = m.get_payload(1)
1671
self.assertIs(m0, self._txt)
1672
self.assertIs(m1, self._im)
1673
eq(m.get_payload(), [m0, m1])
1674
self.assertFalse(m0.is_multipart())
1675
self.assertFalse(m1.is_multipart())
1677
def test_empty_multipart_idempotent(self):
1679
Content-Type: multipart/mixed; boundary="BOUNDARY"
1683
From: bperson@dom.ain
1691
msg = Parser().parsestr(text)
1692
self.ndiffAssertEqual(text, msg.as_string())
1694
def test_no_parts_in_a_multipart_with_none_epilogue(self):
1695
outer = MIMEBase('multipart', 'mixed')
1696
outer['Subject'] = 'A subject'
1697
outer['To'] = 'aperson@dom.ain'
1698
outer['From'] = 'bperson@dom.ain'
1699
outer.set_boundary('BOUNDARY')
1700
self.ndiffAssertEqual(outer.as_string(), '''\
1701
Content-Type: multipart/mixed; boundary="BOUNDARY"
1705
From: bperson@dom.ain
1711
def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1712
outer = MIMEBase('multipart', 'mixed')
1713
outer['Subject'] = 'A subject'
1714
outer['To'] = 'aperson@dom.ain'
1715
outer['From'] = 'bperson@dom.ain'
1718
outer.set_boundary('BOUNDARY')
1719
self.ndiffAssertEqual(outer.as_string(), '''\
1720
Content-Type: multipart/mixed; boundary="BOUNDARY"
1724
From: bperson@dom.ain
1732
def test_one_part_in_a_multipart(self):
1733
eq = self.ndiffAssertEqual
1734
outer = MIMEBase('multipart', 'mixed')
1735
outer['Subject'] = 'A subject'
1736
outer['To'] = 'aperson@dom.ain'
1737
outer['From'] = 'bperson@dom.ain'
1738
outer.set_boundary('BOUNDARY')
1739
msg = MIMEText('hello world')
1741
eq(outer.as_string(), '''\
1742
Content-Type: multipart/mixed; boundary="BOUNDARY"
1746
From: bperson@dom.ain
1749
Content-Type: text/plain; charset="us-ascii"
1751
Content-Transfer-Encoding: 7bit
1756
def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1757
eq = self.ndiffAssertEqual
1758
outer = MIMEBase('multipart', 'mixed')
1759
outer['Subject'] = 'A subject'
1760
outer['To'] = 'aperson@dom.ain'
1761
outer['From'] = 'bperson@dom.ain'
1763
msg = MIMEText('hello world')
1765
outer.set_boundary('BOUNDARY')
1766
eq(outer.as_string(), '''\
1767
Content-Type: multipart/mixed; boundary="BOUNDARY"
1771
From: bperson@dom.ain
1775
Content-Type: text/plain; charset="us-ascii"
1777
Content-Transfer-Encoding: 7bit
1783
def test_seq_parts_in_a_multipart_with_none_preamble(self):
1784
eq = self.ndiffAssertEqual
1785
outer = MIMEBase('multipart', 'mixed')
1786
outer['Subject'] = 'A subject'
1787
outer['To'] = 'aperson@dom.ain'
1788
outer['From'] = 'bperson@dom.ain'
1789
outer.preamble = None
1790
msg = MIMEText('hello world')
1792
outer.set_boundary('BOUNDARY')
1793
eq(outer.as_string(), '''\
1794
Content-Type: multipart/mixed; boundary="BOUNDARY"
1798
From: bperson@dom.ain
1801
Content-Type: text/plain; charset="us-ascii"
1803
Content-Transfer-Encoding: 7bit
1809
def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1810
eq = self.ndiffAssertEqual
1811
outer = MIMEBase('multipart', 'mixed')
1812
outer['Subject'] = 'A subject'
1813
outer['To'] = 'aperson@dom.ain'
1814
outer['From'] = 'bperson@dom.ain'
1815
outer.epilogue = None
1816
msg = MIMEText('hello world')
1818
outer.set_boundary('BOUNDARY')
1819
eq(outer.as_string(), '''\
1820
Content-Type: multipart/mixed; boundary="BOUNDARY"
1824
From: bperson@dom.ain
1827
Content-Type: text/plain; charset="us-ascii"
1829
Content-Transfer-Encoding: 7bit
1835
def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1836
eq = self.ndiffAssertEqual
1837
outer = MIMEBase('multipart', 'mixed')
1838
outer['Subject'] = 'A subject'
1839
outer['To'] = 'aperson@dom.ain'
1840
outer['From'] = 'bperson@dom.ain'
1842
msg = MIMEText('hello world')
1844
outer.set_boundary('BOUNDARY')
1845
eq(outer.as_string(), '''\
1846
Content-Type: multipart/mixed; boundary="BOUNDARY"
1850
From: bperson@dom.ain
1853
Content-Type: text/plain; charset="us-ascii"
1855
Content-Transfer-Encoding: 7bit
1862
def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1863
eq = self.ndiffAssertEqual
1864
outer = MIMEBase('multipart', 'mixed')
1865
outer['Subject'] = 'A subject'
1866
outer['To'] = 'aperson@dom.ain'
1867
outer['From'] = 'bperson@dom.ain'
1868
outer.epilogue = '\n'
1869
msg = MIMEText('hello world')
1871
outer.set_boundary('BOUNDARY')
1872
eq(outer.as_string(), '''\
1873
Content-Type: multipart/mixed; boundary="BOUNDARY"
1877
From: bperson@dom.ain
1880
Content-Type: text/plain; charset="us-ascii"
1882
Content-Transfer-Encoding: 7bit
1889
def test_message_external_body(self):
1890
eq = self.assertEqual
1891
msg = self._msgobj('msg_36.txt')
1892
eq(len(msg.get_payload()), 2)
1893
msg1 = msg.get_payload(1)
1894
eq(msg1.get_content_type(), 'multipart/alternative')
1895
eq(len(msg1.get_payload()), 2)
1896
for subpart in msg1.get_payload():
1897
eq(subpart.get_content_type(), 'message/external-body')
1898
eq(len(subpart.get_payload()), 1)
1899
subsubpart = subpart.get_payload(0)
1900
eq(subsubpart.get_content_type(), 'text/plain')
1902
def test_double_boundary(self):
1903
# msg_37.txt is a multipart that contains two dash-boundary's in a
1904
# row. Our interpretation of RFC 2046 calls for ignoring the second
1905
# and subsequent boundaries.
1906
msg = self._msgobj('msg_37.txt')
1907
self.assertEqual(len(msg.get_payload()), 3)
1909
def test_nested_inner_contains_outer_boundary(self):
1910
eq = self.ndiffAssertEqual
1911
# msg_38.txt has an inner part that contains outer boundaries. My
1912
# interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1913
# these are illegal and should be interpreted as unterminated inner
1915
msg = self._msgobj('msg_38.txt')
1917
iterators._structure(msg, sfp)
1918
eq(sfp.getvalue(), """\
1921
multipart/alternative
1928
def test_nested_with_same_boundary(self):
1929
eq = self.ndiffAssertEqual
1930
# msg 39.txt is similarly evil in that it's got inner parts that use
1931
# the same boundary as outer parts. Again, I believe the way this is
1932
# parsed is closest to the spirit of RFC 2046
1933
msg = self._msgobj('msg_39.txt')
1935
iterators._structure(msg, sfp)
1936
eq(sfp.getvalue(), """\
1939
multipart/alternative
1940
application/octet-stream
1941
application/octet-stream
1945
def test_boundary_in_non_multipart(self):
1946
msg = self._msgobj('msg_40.txt')
1947
self.assertEqual(msg.as_string(), '''\
1949
Content-Type: text/html; boundary="--961284236552522269"
1951
----961284236552522269
1952
Content-Type: text/html;
1953
Content-Transfer-Encoding: 7Bit
1957
----961284236552522269--
1960
def test_boundary_with_leading_space(self):
1961
eq = self.assertEqual
1962
msg = email.message_from_string('''\
1964
Content-Type: multipart/mixed; boundary=" XXXX"
1967
Content-Type: text/plain
1971
Content-Type: text/plain
1975
self.assertTrue(msg.is_multipart())
1976
eq(msg.get_boundary(), ' XXXX')
1977
eq(len(msg.get_payload()), 2)
1979
def test_boundary_without_trailing_newline(self):
1980
m = Parser().parsestr("""\
1981
Content-Type: multipart/mixed; boundary="===============0012394164=="
1984
--===============0012394164==
1985
Content-Type: image/file1.jpg
1987
Content-Transfer-Encoding: base64
1990
--===============0012394164==--""")
1991
self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
1995
# Test some badly formatted messages
1996
class TestNonConformant(TestEmailBase):
1998
def test_parse_missing_minor_type(self):
1999
eq = self.assertEqual
2000
msg = self._msgobj('msg_14.txt')
2001
eq(msg.get_content_type(), 'text/plain')
2002
eq(msg.get_content_maintype(), 'text')
2003
eq(msg.get_content_subtype(), 'plain')
2005
# test_defect_handling
2006
def test_same_boundary_inner_outer(self):
2007
msg = self._msgobj('msg_15.txt')
2008
# XXX We can probably eventually do better
2009
inner = msg.get_payload(0)
2010
self.assertTrue(hasattr(inner, 'defects'))
2011
self.assertEqual(len(inner.defects), 1)
2012
self.assertIsInstance(inner.defects[0],
2013
errors.StartBoundaryNotFoundDefect)
2015
# test_defect_handling
2016
def test_multipart_no_boundary(self):
2017
msg = self._msgobj('msg_25.txt')
2018
self.assertIsInstance(msg.get_payload(), str)
2019
self.assertEqual(len(msg.defects), 2)
2020
self.assertIsInstance(msg.defects[0],
2021
errors.NoBoundaryInMultipartDefect)
2022
self.assertIsInstance(msg.defects[1],
2023
errors.MultipartInvariantViolationDefect)
2025
multipart_msg = textwrap.dedent("""\
2026
Date: Wed, 14 Nov 2007 12:56:23 GMT
2027
From: foo@bar.invalid
2029
Subject: Content-Transfer-Encoding: base64 and multipart
2031
Content-Type: multipart/mixed;
2032
boundary="===============3344438784458119861=="{}
2034
--===============3344438784458119861==
2035
Content-Type: text/plain
2039
--===============3344438784458119861==
2040
Content-Type: application/octet-stream
2041
Content-Transfer-Encoding: base64
2045
--===============3344438784458119861==--
2048
# test_defect_handling
2049
def test_multipart_invalid_cte(self):
2050
msg = self._str_msg(
2051
self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2052
self.assertEqual(len(msg.defects), 1)
2053
self.assertIsInstance(msg.defects[0],
2054
errors.InvalidMultipartContentTransferEncodingDefect)
2056
# test_defect_handling
2057
def test_multipart_no_cte_no_defect(self):
2058
msg = self._str_msg(self.multipart_msg.format(''))
2059
self.assertEqual(len(msg.defects), 0)
2061
# test_defect_handling
2062
def test_multipart_valid_cte_no_defect(self):
2063
for cte in ('7bit', '8bit', 'BINary'):
2064
msg = self._str_msg(
2065
self.multipart_msg.format(
2066
"\nContent-Transfer-Encoding: {}".format(cte)))
2067
self.assertEqual(len(msg.defects), 0)
2069
# test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
2070
def test_invalid_content_type(self):
2071
eq = self.assertEqual
2072
neq = self.ndiffAssertEqual
2074
# RFC 2045, $5.2 says invalid yields text/plain
2075
msg['Content-Type'] = 'text'
2076
eq(msg.get_content_maintype(), 'text')
2077
eq(msg.get_content_subtype(), 'plain')
2078
eq(msg.get_content_type(), 'text/plain')
2079
# Clear the old value and try something /really/ invalid
2080
del msg['content-type']
2081
msg['Content-Type'] = 'foo'
2082
eq(msg.get_content_maintype(), 'text')
2083
eq(msg.get_content_subtype(), 'plain')
2084
eq(msg.get_content_type(), 'text/plain')
2085
# Still, make sure that the message is idempotently generated
2089
neq(s.getvalue(), 'Content-Type: foo\n\n')
2091
def test_no_start_boundary(self):
2092
eq = self.ndiffAssertEqual
2093
msg = self._msgobj('msg_31.txt')
2094
eq(msg.get_payload(), """\
2096
Content-Type: text/plain
2101
Content-Type: text/plain
2108
def test_no_separating_blank_line(self):
2109
eq = self.ndiffAssertEqual
2110
msg = self._msgobj('msg_35.txt')
2111
eq(msg.as_string(), """\
2112
From: aperson@dom.ain
2114
Subject: here's something interesting
2116
counter to RFC 2822, there's no separating newline here
2119
# test_defect_handling
2120
def test_lying_multipart(self):
2121
msg = self._msgobj('msg_41.txt')
2122
self.assertTrue(hasattr(msg, 'defects'))
2123
self.assertEqual(len(msg.defects), 2)
2124
self.assertIsInstance(msg.defects[0],
2125
errors.NoBoundaryInMultipartDefect)
2126
self.assertIsInstance(msg.defects[1],
2127
errors.MultipartInvariantViolationDefect)
2129
# test_defect_handling
2130
def test_missing_start_boundary(self):
2131
outer = self._msgobj('msg_42.txt')
2132
# The message structure is:
2137
# multipart/mixed [*]
2139
# [*] This message is missing its start boundary
2140
bad = outer.get_payload(1).get_payload(0)
2141
self.assertEqual(len(bad.defects), 1)
2142
self.assertIsInstance(bad.defects[0],
2143
errors.StartBoundaryNotFoundDefect)
2145
# test_defect_handling
2146
def test_first_line_is_continuation_header(self):
2147
eq = self.assertEqual
2148
m = ' Line 1\nSubject: test\n\nbody'
2149
msg = email.message_from_string(m)
2150
eq(msg.keys(), ['Subject'])
2151
eq(msg.get_payload(), 'body')
2152
eq(len(msg.defects), 1)
2153
self.assertDefectsEqual(msg.defects,
2154
[errors.FirstHeaderLineIsContinuationDefect])
2155
eq(msg.defects[0].line, ' Line 1\n')
2157
# test_defect_handling
2158
def test_missing_header_body_separator(self):
2159
# Our heuristic if we see a line that doesn't look like a header (no
2160
# leading whitespace but no ':') is to assume that the blank line that
2161
# separates the header from the body is missing, and to stop parsing
2162
# headers and start parsing the body.
2163
msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2164
self.assertEqual(msg.keys(), ['Subject'])
2165
self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2166
self.assertDefectsEqual(msg.defects,
2167
[errors.MissingHeaderBodySeparatorDefect])
2170
# Test RFC 2047 header encoding and decoding
2171
class TestRFC2047(TestEmailBase):
2172
def test_rfc2047_multiline(self):
2173
eq = self.assertEqual
2174
s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2175
foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2176
dh = decode_header(s)
2179
(b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2180
(b' baz foo bar ', None),
2181
(b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2182
header = make_header(dh)
2184
'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
2185
self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
2186
Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2187
=?mac-iceland?q?=9Arg=8Cs?=""")
2189
def test_whitespace_keeper_unicode(self):
2190
eq = self.assertEqual
2191
s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2192
dh = decode_header(s)
2193
eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2194
(b' Pirard <pirard@dom.ain>', None)])
2195
header = str(make_header(dh))
2196
eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2198
def test_whitespace_keeper_unicode_2(self):
2199
eq = self.assertEqual
2200
s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2201
dh = decode_header(s)
2202
eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2203
(b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
2204
hu = str(make_header(dh))
2205
eq(hu, 'The quick brown fox jumped over the lazy dog')
2207
def test_rfc2047_missing_whitespace(self):
2208
s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2209
dh = decode_header(s)
2210
self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2211
(b'rg', None), (b'\xe5', 'iso-8859-1'),
2214
def test_rfc2047_with_whitespace(self):
2215
s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2216
dh = decode_header(s)
2217
self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2218
(b' rg ', None), (b'\xe5', 'iso-8859-1'),
2221
def test_rfc2047_B_bad_padding(self):
2222
s = '=?iso-8859-1?B?%s?='
2223
data = [ # only test complete bytes
2224
('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2225
('dmk=', b'vi'), ('dmk', b'vi')
2228
dh = decode_header(s % q)
2229
self.assertEqual(dh, [(a, 'iso-8859-1')])
2231
def test_rfc2047_Q_invalid_digits(self):
2233
s = '=?iso-8659-1?Q?andr=e9=zz?='
2234
self.assertEqual(decode_header(s),
2235
[(b'andr\xe9=zz', 'iso-8659-1')])
2237
def test_rfc2047_rfc2047_1(self):
2238
# 1st testcase at end of rfc2047
2239
s = '(=?ISO-8859-1?Q?a?=)'
2240
self.assertEqual(decode_header(s),
2241
[(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2243
def test_rfc2047_rfc2047_2(self):
2244
# 2nd testcase at end of rfc2047
2245
s = '(=?ISO-8859-1?Q?a?= b)'
2246
self.assertEqual(decode_header(s),
2247
[(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2249
def test_rfc2047_rfc2047_3(self):
2250
# 3rd testcase at end of rfc2047
2251
s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2252
self.assertEqual(decode_header(s),
2253
[(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2255
def test_rfc2047_rfc2047_4(self):
2256
# 4th testcase at end of rfc2047
2257
s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2258
self.assertEqual(decode_header(s),
2259
[(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2261
def test_rfc2047_rfc2047_5a(self):
2262
# 5th testcase at end of rfc2047 newline is \r\n
2263
s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2264
self.assertEqual(decode_header(s),
2265
[(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2267
def test_rfc2047_rfc2047_5b(self):
2268
# 5th testcase at end of rfc2047 newline is \n
2269
s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2270
self.assertEqual(decode_header(s),
2271
[(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2273
def test_rfc2047_rfc2047_6(self):
2274
# 6th testcase at end of rfc2047
2275
s = '(=?ISO-8859-1?Q?a_b?=)'
2276
self.assertEqual(decode_header(s),
2277
[(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2279
def test_rfc2047_rfc2047_7(self):
2280
# 7th testcase at end of rfc2047
2281
s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2282
self.assertEqual(decode_header(s),
2283
[(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2285
self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2286
self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2288
def test_multiline_header(self):
2289
s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2290
self.assertEqual(decode_header(s),
2291
[(b'"M\xfcller T"', 'windows-1252'),
2292
(b'<T.Mueller@xxx.com>', None)])
2293
self.assertEqual(make_header(decode_header(s)).encode(),
2294
''.join(s.splitlines()))
2295
self.assertEqual(str(make_header(decode_header(s))),
2296
'"MĆ¼ller T" <T.Mueller@xxx.com>')
2299
# Test the MIMEMessage class
2300
class TestMIMEMessage(TestEmailBase):
2302
with openfile('msg_11.txt') as fp:
2303
self._text = fp.read()
2305
def test_type_error(self):
2306
self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2308
def test_valid_argument(self):
2309
eq = self.assertEqual
2310
subject = 'A sub-message'
2312
m['Subject'] = subject
2314
eq(r.get_content_type(), 'message/rfc822')
2315
payload = r.get_payload()
2316
self.assertIsInstance(payload, list)
2318
subpart = payload[0]
2319
self.assertIs(subpart, m)
2320
eq(subpart['subject'], subject)
2322
def test_bad_multipart(self):
2324
msg1['Subject'] = 'subpart 1'
2326
msg2['Subject'] = 'subpart 2'
2327
r = MIMEMessage(msg1)
2328
self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2330
def test_generate(self):
2331
# First craft the message to be encapsulated
2333
m['Subject'] = 'An enclosed message'
2334
m.set_payload('Here is the body of the message.\n')
2336
r['Subject'] = 'The enclosing message'
2340
self.assertEqual(s.getvalue(), """\
2341
Content-Type: message/rfc822
2343
Subject: The enclosing message
2345
Subject: An enclosed message
2347
Here is the body of the message.
2350
def test_parse_message_rfc822(self):
2351
eq = self.assertEqual
2352
msg = self._msgobj('msg_11.txt')
2353
eq(msg.get_content_type(), 'message/rfc822')
2354
payload = msg.get_payload()
2355
self.assertIsInstance(payload, list)
2358
self.assertIsInstance(submsg, Message)
2359
eq(submsg['subject'], 'An enclosed message')
2360
eq(submsg.get_payload(), 'Here is the body of the message.\n')
2363
eq = self.assertEqual
2364
# msg 16 is a Delivery Status Notification, see RFC 1894
2365
msg = self._msgobj('msg_16.txt')
2366
eq(msg.get_content_type(), 'multipart/report')
2367
self.assertTrue(msg.is_multipart())
2368
eq(len(msg.get_payload()), 3)
2369
# Subpart 1 is a text/plain, human readable section
2370
subpart = msg.get_payload(0)
2371
eq(subpart.get_content_type(), 'text/plain')
2372
eq(subpart.get_payload(), """\
2373
This report relates to a message you sent with the following header fields:
2375
Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2376
Date: Sun, 23 Sep 2001 20:10:55 -0700
2377
From: "Ian T. Henry" <henryi@oxy.edu>
2378
To: SoCal Raves <scr@socal-raves.org>
2379
Subject: [scr] yeah for Ians!!
2381
Your message cannot be delivered to the following recipients:
2383
Recipient address: jangel1@cougar.noc.ucla.edu
2384
Reason: recipient reached disk quota
2387
# Subpart 2 contains the machine parsable DSN information. It
2388
# consists of two blocks of headers, represented by two nested Message
2390
subpart = msg.get_payload(1)
2391
eq(subpart.get_content_type(), 'message/delivery-status')
2392
eq(len(subpart.get_payload()), 2)
2393
# message/delivery-status should treat each block as a bunch of
2394
# headers, i.e. a bunch of Message objects.
2395
dsn1 = subpart.get_payload(0)
2396
self.assertIsInstance(dsn1, Message)
2397
eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2398
eq(dsn1.get_param('dns', header='reporting-mta'), '')
2399
# Try a missing one <wink>
2400
eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2401
dsn2 = subpart.get_payload(1)
2402
self.assertIsInstance(dsn2, Message)
2403
eq(dsn2['action'], 'failed')
2404
eq(dsn2.get_params(header='original-recipient'),
2405
[('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2406
eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2407
# Subpart 3 is the original message
2408
subpart = msg.get_payload(2)
2409
eq(subpart.get_content_type(), 'message/rfc822')
2410
payload = subpart.get_payload()
2411
self.assertIsInstance(payload, list)
2413
subsubpart = payload[0]
2414
self.assertIsInstance(subsubpart, Message)
2415
eq(subsubpart.get_content_type(), 'text/plain')
2416
eq(subsubpart['message-id'],
2417
'<002001c144a6$8752e060$56104586@oxy.edu>')
2419
def test_epilogue(self):
2420
eq = self.ndiffAssertEqual
2421
with openfile('msg_21.txt') as fp:
2424
msg['From'] = 'aperson@dom.ain'
2425
msg['To'] = 'bperson@dom.ain'
2426
msg['Subject'] = 'Test'
2427
msg.preamble = 'MIME message'
2428
msg.epilogue = 'End of MIME message\n'
2429
msg1 = MIMEText('One')
2430
msg2 = MIMEText('Two')
2431
msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2437
eq(sfp.getvalue(), text)
2439
def test_no_nl_preamble(self):
2440
eq = self.ndiffAssertEqual
2442
msg['From'] = 'aperson@dom.ain'
2443
msg['To'] = 'bperson@dom.ain'
2444
msg['Subject'] = 'Test'
2445
msg.preamble = 'MIME message'
2447
msg1 = MIMEText('One')
2448
msg2 = MIMEText('Two')
2449
msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2452
eq(msg.as_string(), """\
2453
From: aperson@dom.ain
2456
Content-Type: multipart/mixed; boundary="BOUNDARY"
2460
Content-Type: text/plain; charset="us-ascii"
2462
Content-Transfer-Encoding: 7bit
2466
Content-Type: text/plain; charset="us-ascii"
2468
Content-Transfer-Encoding: 7bit
2474
def test_default_type(self):
2475
eq = self.assertEqual
2476
with openfile('msg_30.txt') as fp:
2477
msg = email.message_from_file(fp)
2478
container1 = msg.get_payload(0)
2479
eq(container1.get_default_type(), 'message/rfc822')
2480
eq(container1.get_content_type(), 'message/rfc822')
2481
container2 = msg.get_payload(1)
2482
eq(container2.get_default_type(), 'message/rfc822')
2483
eq(container2.get_content_type(), 'message/rfc822')
2484
container1a = container1.get_payload(0)
2485
eq(container1a.get_default_type(), 'text/plain')
2486
eq(container1a.get_content_type(), 'text/plain')
2487
container2a = container2.get_payload(0)
2488
eq(container2a.get_default_type(), 'text/plain')
2489
eq(container2a.get_content_type(), 'text/plain')
2491
def test_default_type_with_explicit_container_type(self):
2492
eq = self.assertEqual
2493
with openfile('msg_28.txt') as fp:
2494
msg = email.message_from_file(fp)
2495
container1 = msg.get_payload(0)
2496
eq(container1.get_default_type(), 'message/rfc822')
2497
eq(container1.get_content_type(), 'message/rfc822')
2498
container2 = msg.get_payload(1)
2499
eq(container2.get_default_type(), 'message/rfc822')
2500
eq(container2.get_content_type(), 'message/rfc822')
2501
container1a = container1.get_payload(0)
2502
eq(container1a.get_default_type(), 'text/plain')
2503
eq(container1a.get_content_type(), 'text/plain')
2504
container2a = container2.get_payload(0)
2505
eq(container2a.get_default_type(), 'text/plain')
2506
eq(container2a.get_content_type(), 'text/plain')
2508
def test_default_type_non_parsed(self):
2509
eq = self.assertEqual
2510
neq = self.ndiffAssertEqual
2512
container = MIMEMultipart('digest', 'BOUNDARY')
2513
container.epilogue = ''
2515
subpart1a = MIMEText('message 1\n')
2516
subpart2a = MIMEText('message 2\n')
2517
subpart1 = MIMEMessage(subpart1a)
2518
subpart2 = MIMEMessage(subpart2a)
2519
container.attach(subpart1)
2520
container.attach(subpart2)
2521
eq(subpart1.get_content_type(), 'message/rfc822')
2522
eq(subpart1.get_default_type(), 'message/rfc822')
2523
eq(subpart2.get_content_type(), 'message/rfc822')
2524
eq(subpart2.get_default_type(), 'message/rfc822')
2525
neq(container.as_string(0), '''\
2526
Content-Type: multipart/digest; boundary="BOUNDARY"
2530
Content-Type: message/rfc822
2533
Content-Type: text/plain; charset="us-ascii"
2535
Content-Transfer-Encoding: 7bit
2540
Content-Type: message/rfc822
2543
Content-Type: text/plain; charset="us-ascii"
2545
Content-Transfer-Encoding: 7bit
2551
del subpart1['content-type']
2552
del subpart1['mime-version']
2553
del subpart2['content-type']
2554
del subpart2['mime-version']
2555
eq(subpart1.get_content_type(), 'message/rfc822')
2556
eq(subpart1.get_default_type(), 'message/rfc822')
2557
eq(subpart2.get_content_type(), 'message/rfc822')
2558
eq(subpart2.get_default_type(), 'message/rfc822')
2559
neq(container.as_string(0), '''\
2560
Content-Type: multipart/digest; boundary="BOUNDARY"
2565
Content-Type: text/plain; charset="us-ascii"
2567
Content-Transfer-Encoding: 7bit
2573
Content-Type: text/plain; charset="us-ascii"
2575
Content-Transfer-Encoding: 7bit
2582
def test_mime_attachments_in_constructor(self):
2583
eq = self.assertEqual
2584
text1 = MIMEText('')
2585
text2 = MIMEText('')
2586
msg = MIMEMultipart(_subparts=(text1, text2))
2587
eq(len(msg.get_payload()), 2)
2588
eq(msg.get_payload(0), text1)
2589
eq(msg.get_payload(1), text2)
2591
def test_default_multipart_constructor(self):
2592
msg = MIMEMultipart()
2593
self.assertTrue(msg.is_multipart())
2596
# A general test of parser->model->generator idempotency. IOW, read a message
2597
# in, parse it into a message object tree, then without touching the tree,
2598
# regenerate the plain text. The original text and the transformed text
2599
# should be identical. Note: that we ignore the Unix-From since that may
2600
# contain a changed date.
2601
class TestIdempotent(TestEmailBase):
2605
def _msgobj(self, filename):
2606
with openfile(filename) as fp:
2608
msg = email.message_from_string(data)
2611
def _idempotent(self, msg, text, unixfrom=False):
2612
eq = self.ndiffAssertEqual
2614
g = Generator(s, maxheaderlen=0)
2615
g.flatten(msg, unixfrom=unixfrom)
2616
eq(text, s.getvalue())
2618
def test_parse_text_message(self):
2619
eq = self.assertEqual
2620
msg, text = self._msgobj('msg_01.txt')
2621
eq(msg.get_content_type(), 'text/plain')
2622
eq(msg.get_content_maintype(), 'text')
2623
eq(msg.get_content_subtype(), 'plain')
2624
eq(msg.get_params()[1], ('charset', 'us-ascii'))
2625
eq(msg.get_param('charset'), 'us-ascii')
2626
eq(msg.preamble, None)
2627
eq(msg.epilogue, None)
2628
self._idempotent(msg, text)
2630
def test_parse_untyped_message(self):
2631
eq = self.assertEqual
2632
msg, text = self._msgobj('msg_03.txt')
2633
eq(msg.get_content_type(), 'text/plain')
2634
eq(msg.get_params(), None)
2635
eq(msg.get_param('charset'), None)
2636
self._idempotent(msg, text)
2638
def test_simple_multipart(self):
2639
msg, text = self._msgobj('msg_04.txt')
2640
self._idempotent(msg, text)
2642
def test_MIME_digest(self):
2643
msg, text = self._msgobj('msg_02.txt')
2644
self._idempotent(msg, text)
2646
def test_long_header(self):
2647
msg, text = self._msgobj('msg_27.txt')
2648
self._idempotent(msg, text)
2650
def test_MIME_digest_with_part_headers(self):
2651
msg, text = self._msgobj('msg_28.txt')
2652
self._idempotent(msg, text)
2654
def test_mixed_with_image(self):
2655
msg, text = self._msgobj('msg_06.txt')
2656
self._idempotent(msg, text)
2658
def test_multipart_report(self):
2659
msg, text = self._msgobj('msg_05.txt')
2660
self._idempotent(msg, text)
2663
msg, text = self._msgobj('msg_16.txt')
2664
self._idempotent(msg, text)
2666
def test_preamble_epilogue(self):
2667
msg, text = self._msgobj('msg_21.txt')
2668
self._idempotent(msg, text)
2670
def test_multipart_one_part(self):
2671
msg, text = self._msgobj('msg_23.txt')
2672
self._idempotent(msg, text)
2674
def test_multipart_no_parts(self):
2675
msg, text = self._msgobj('msg_24.txt')
2676
self._idempotent(msg, text)
2678
def test_no_start_boundary(self):
2679
msg, text = self._msgobj('msg_31.txt')
2680
self._idempotent(msg, text)
2682
def test_rfc2231_charset(self):
2683
msg, text = self._msgobj('msg_32.txt')
2684
self._idempotent(msg, text)
2686
def test_more_rfc2231_parameters(self):
2687
msg, text = self._msgobj('msg_33.txt')
2688
self._idempotent(msg, text)
2690
def test_text_plain_in_a_multipart_digest(self):
2691
msg, text = self._msgobj('msg_34.txt')
2692
self._idempotent(msg, text)
2694
def test_nested_multipart_mixeds(self):
2695
msg, text = self._msgobj('msg_12a.txt')
2696
self._idempotent(msg, text)
2698
def test_message_external_body_idempotent(self):
2699
msg, text = self._msgobj('msg_36.txt')
2700
self._idempotent(msg, text)
2702
def test_message_delivery_status(self):
2703
msg, text = self._msgobj('msg_43.txt')
2704
self._idempotent(msg, text, unixfrom=True)
2706
def test_message_signed_idempotent(self):
2707
msg, text = self._msgobj('msg_45.txt')
2708
self._idempotent(msg, text)
2710
def test_content_type(self):
2711
eq = self.assertEqual
2712
# Get a message object and reset the seek pointer for other tests
2713
msg, text = self._msgobj('msg_05.txt')
2714
eq(msg.get_content_type(), 'multipart/report')
2715
# Test the Content-Type: parameters
2717
for pk, pv in msg.get_params():
2719
eq(params['report-type'], 'delivery-status')
2720
eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2721
eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2722
eq(msg.epilogue, self.linesep)
2723
eq(len(msg.get_payload()), 3)
2724
# Make sure the subparts are what we expect
2725
msg1 = msg.get_payload(0)
2726
eq(msg1.get_content_type(), 'text/plain')
2727
eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
2728
msg2 = msg.get_payload(1)
2729
eq(msg2.get_content_type(), 'text/plain')
2730
eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
2731
msg3 = msg.get_payload(2)
2732
eq(msg3.get_content_type(), 'message/rfc822')
2733
self.assertIsInstance(msg3, Message)
2734
payload = msg3.get_payload()
2735
self.assertIsInstance(payload, list)
2738
self.assertIsInstance(msg4, Message)
2739
eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
2741
def test_parser(self):
2742
eq = self.assertEqual
2743
msg, text = self._msgobj('msg_06.txt')
2744
# Check some of the outer headers
2745
eq(msg.get_content_type(), 'message/rfc822')
2746
# Make sure the payload is a list of exactly one sub-Message, and that
2747
# that submessage has a type of text/plain
2748
payload = msg.get_payload()
2749
self.assertIsInstance(payload, list)
2752
self.assertIsInstance(msg1, Message)
2753
eq(msg1.get_content_type(), 'text/plain')
2754
self.assertIsInstance(msg1.get_payload(), str)
2755
eq(msg1.get_payload(), self.linesep)
2759
# Test various other bits of the package's functionality
2760
class TestMiscellaneous(TestEmailBase):
2761
def test_message_from_string(self):
2762
with openfile('msg_01.txt') as fp:
2764
msg = email.message_from_string(text)
2766
# Don't wrap/continue long headers since we're trying to test
2768
g = Generator(s, maxheaderlen=0)
2770
self.assertEqual(text, s.getvalue())
2772
def test_message_from_file(self):
2773
with openfile('msg_01.txt') as fp:
2776
msg = email.message_from_file(fp)
2778
# Don't wrap/continue long headers since we're trying to test
2780
g = Generator(s, maxheaderlen=0)
2782
self.assertEqual(text, s.getvalue())
2784
def test_message_from_string_with_class(self):
2785
with openfile('msg_01.txt') as fp:
2789
class MyMessage(Message):
2792
msg = email.message_from_string(text, MyMessage)
2793
self.assertIsInstance(msg, MyMessage)
2794
# Try something more complicated
2795
with openfile('msg_02.txt') as fp:
2797
msg = email.message_from_string(text, MyMessage)
2798
for subpart in msg.walk():
2799
self.assertIsInstance(subpart, MyMessage)
2801
def test_message_from_file_with_class(self):
2803
class MyMessage(Message):
2806
with openfile('msg_01.txt') as fp:
2807
msg = email.message_from_file(fp, MyMessage)
2808
self.assertIsInstance(msg, MyMessage)
2809
# Try something more complicated
2810
with openfile('msg_02.txt') as fp:
2811
msg = email.message_from_file(fp, MyMessage)
2812
for subpart in msg.walk():
2813
self.assertIsInstance(subpart, MyMessage)
2815
def test_custom_message_does_not_require_arguments(self):
2816
class MyMessage(Message):
2819
msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2820
self.assertIsInstance(msg, MyMessage)
2822
def test__all__(self):
2823
module = __import__('email')
2824
self.assertEqual(sorted(module.__all__), [
2825
'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2826
'generator', 'header', 'iterators', 'message',
2827
'message_from_binary_file', 'message_from_bytes',
2828
'message_from_file', 'message_from_string', 'mime', 'parser',
2829
'quoprimime', 'utils',
2832
def test_formatdate(self):
2834
self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2835
time.gmtime(now)[:6])
2837
def test_formatdate_localtime(self):
2840
utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2841
time.localtime(now)[:6])
2843
def test_formatdate_usegmt(self):
2846
utils.formatdate(now, localtime=False),
2847
time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2849
utils.formatdate(now, localtime=False, usegmt=True),
2850
time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2852
# parsedate and parsedate_tz will become deprecated interfaces someday
2853
def test_parsedate_returns_None_for_invalid_strings(self):
2854
self.assertIsNone(utils.parsedate(''))
2855
self.assertIsNone(utils.parsedate_tz(''))
2856
self.assertIsNone(utils.parsedate('0'))
2857
self.assertIsNone(utils.parsedate_tz('0'))
2858
self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2859
self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2860
# Not a part of the spec but, but this has historically worked:
2861
self.assertIsNone(utils.parsedate(None))
2862
self.assertIsNone(utils.parsedate_tz(None))
2864
def test_parsedate_compact(self):
2865
# The FWS after the comma is optional
2866
self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2867
utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2869
def test_parsedate_no_dayofweek(self):
2870
eq = self.assertEqual
2871
eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2872
(2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2874
def test_parsedate_compact_no_dayofweek(self):
2875
eq = self.assertEqual
2876
eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2877
(2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2879
def test_parsedate_no_space_before_positive_offset(self):
2880
self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2881
(2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2883
def test_parsedate_no_space_before_negative_offset(self):
2884
# Issue 1155362: we already handled '+' for this case.
2885
self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2886
(2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2889
def test_parsedate_accepts_time_with_dots(self):
2890
eq = self.assertEqual
2891
eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2892
(2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2893
eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2894
(2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2896
def test_parsedate_acceptable_to_time_functions(self):
2897
eq = self.assertEqual
2898
timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2899
t = int(time.mktime(timetup))
2900
eq(time.localtime(t)[:6], timetup[:6])
2901
eq(int(time.strftime('%Y', timetup)), 2003)
2902
timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2903
t = int(time.mktime(timetup[:9]))
2904
eq(time.localtime(t)[:6], timetup[:6])
2905
eq(int(time.strftime('%Y', timetup[:9])), 2003)
2907
def test_mktime_tz(self):
2908
self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2910
self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2911
-1, -1, -1, 1234)), -1234)
2913
def test_parsedate_y2k(self):
2914
"""Test for parsing a date with a two-digit year.
2916
Parsing a date with a two-digit year should return the correct
2917
four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2918
obsoletes RFC822) requires four-digit years.
2921
self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2922
utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2923
self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2924
utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2926
def test_parseaddr_empty(self):
2927
self.assertEqual(utils.parseaddr('<>'), ('', ''))
2928
self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2930
def test_noquote_dump(self):
2932
utils.formataddr(('A Silly Person', 'person@dom.ain')),
2933
'A Silly Person <person@dom.ain>')
2935
def test_escape_dump(self):
2937
utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2938
r'"A (Very) Silly Person" <person@dom.ain>')
2940
utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2941
('A (Very) Silly Person', 'person@dom.ain'))
2942
a = r'A \(Special\) Person'
2943
b = 'person@dom.ain'
2944
self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2946
def test_escape_backslashes(self):
2948
utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2949
r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2950
a = r'Arthur \Backslash\ Foobar'
2951
b = 'person@dom.ain'
2952
self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2954
def test_quotes_unicode_names(self):
2955
# issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2956
name = "H\u00e4ns W\u00fcrst"
2957
addr = 'person@dom.ain'
2958
utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2959
latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2960
self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2961
self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2964
def test_accepts_any_charset_like_object(self):
2965
# issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2966
name = "H\u00e4ns W\u00fcrst"
2967
addr = 'person@dom.ain'
2968
utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2971
def header_encode(self, string):
2973
mock = CharsetMock()
2974
mock_expected = "%s <%s>" % (foobar, addr)
2975
self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2976
self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2979
def test_invalid_charset_like_object_raises_error(self):
2980
# issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2981
name = "H\u00e4ns W\u00fcrst"
2982
addr = 'person@dom.ain'
2983
# A object without a header_encode method:
2984
bad_charset = object()
2985
self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2988
def test_unicode_address_raises_error(self):
2989
# issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2990
addr = 'pers\u00f6n@dom.in'
2991
self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2992
self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2994
def test_name_with_dot(self):
2995
x = 'John X. Doe <jxd@example.com>'
2996
y = '"John X. Doe" <jxd@example.com>'
2997
a, b = ('John X. Doe', 'jxd@example.com')
2998
self.assertEqual(utils.parseaddr(x), (a, b))
2999
self.assertEqual(utils.parseaddr(y), (a, b))
3000
# formataddr() quotes the name if there's a dot in it
3001
self.assertEqual(utils.formataddr((a, b)), y)
3003
def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
3004
# issue 10005. Note that in the third test the second pair of
3005
# backslashes is not actually a quoted pair because it is not inside a
3006
# comment or quoted string: the address being parsed has a quoted
3007
# string containing a quoted backslash, followed by 'example' and two
3008
# backslashes, followed by another quoted string containing a space and
3009
# the word 'example'. parseaddr copies those two backslashes
3010
# literally. Per rfc5322 this is not technically correct since a \ may
3011
# not appear in an address outside of a quoted string. It is probably
3012
# a sensible Postel interpretation, though.
3013
eq = self.assertEqual
3014
eq(utils.parseaddr('""example" example"@example.com'),
3015
('', '""example" example"@example.com'))
3016
eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3017
('', '"\\"example\\" example"@example.com'))
3018
eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3019
('', '"\\\\"example\\\\" example"@example.com'))
3021
def test_parseaddr_preserves_spaces_in_local_part(self):
3022
# issue 9286. A normal RFC5322 local part should not contain any
3023
# folding white space, but legacy local parts can (they are a sequence
3024
# of atoms, not dotatoms). On the other hand we strip whitespace from
3025
# before the @ and around dots, on the assumption that the whitespace
3026
# around the punctuation is a mistake in what would otherwise be
3027
# an RFC5322 local part. Leading whitespace is, usual, stripped as well.
3028
self.assertEqual(('', "merwok wok@xample.com"),
3029
utils.parseaddr("merwok wok@xample.com"))
3030
self.assertEqual(('', "merwok wok@xample.com"),
3031
utils.parseaddr("merwok wok@xample.com"))
3032
self.assertEqual(('', "merwok wok@xample.com"),
3033
utils.parseaddr(" merwok wok @xample.com"))
3034
self.assertEqual(('', 'merwok"wok" wok@xample.com'),
3035
utils.parseaddr('merwok"wok" wok@xample.com'))
3036
self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3037
utils.parseaddr('merwok. wok . wok@xample.com'))
3039
def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3040
addr = ("'foo@example.com' (foo@example.com)",
3042
addrstr = ('"\'foo@example.com\' '
3043
'(foo@example.com)" <foo@example.com>')
3044
self.assertEqual(utils.parseaddr(addrstr), addr)
3045
self.assertEqual(utils.formataddr(addr), addrstr)
3048
def test_multiline_from_comment(self):
3051
\tBar <foo@example.com>"""
3052
self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3054
def test_quote_dump(self):
3056
utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3057
r'"A Silly; Person" <person@dom.ain>')
3059
def test_charset_richcomparisons(self):
3060
eq = self.assertEqual
3061
ne = self.assertNotEqual
3064
eq(cset1, 'us-ascii')
3065
eq(cset1, 'US-ASCII')
3066
eq(cset1, 'Us-AsCiI')
3067
eq('us-ascii', cset1)
3068
eq('US-ASCII', cset1)
3069
eq('Us-AsCiI', cset1)
3070
ne(cset1, 'usascii')
3071
ne(cset1, 'USASCII')
3072
ne(cset1, 'UsAsCiI')
3073
ne('usascii', cset1)
3074
ne('USASCII', cset1)
3075
ne('UsAsCiI', cset1)
3079
def test_getaddresses(self):
3080
eq = self.assertEqual
3081
eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3082
'Bud Person <bperson@dom.ain>']),
3083
[('Al Person', 'aperson@dom.ain'),
3084
('Bud Person', 'bperson@dom.ain')])
3086
def test_getaddresses_nasty(self):
3087
eq = self.assertEqual
3088
eq(utils.getaddresses(['foo: ;']), [('', '')])
3089
eq(utils.getaddresses(
3091
[('', ''), ('', ''), ('', '*--')])
3092
eq(utils.getaddresses(
3093
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3094
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3096
def test_getaddresses_embedded_comment(self):
3097
"""Test proper handling of a nested comment"""
3098
eq = self.assertEqual
3099
addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3100
eq(addrs[0][1], 'foo@bar.com')
3102
def test_utils_quote_unquote(self):
3103
eq = self.assertEqual
3105
msg.add_header('content-disposition', 'attachment',
3106
filename='foo\\wacky"name')
3107
eq(msg.get_filename(), 'foo\\wacky"name')
3109
def test_get_body_encoding_with_bogus_charset(self):
3110
charset = Charset('not a charset')
3111
self.assertEqual(charset.get_body_encoding(), 'base64')
3113
def test_get_body_encoding_with_uppercase_charset(self):
3114
eq = self.assertEqual
3116
msg['Content-Type'] = 'text/plain; charset=UTF-8'
3117
eq(msg['content-type'], 'text/plain; charset=UTF-8')
3118
charsets = msg.get_charsets()
3119
eq(len(charsets), 1)
3120
eq(charsets[0], 'utf-8')
3121
charset = Charset(charsets[0])
3122
eq(charset.get_body_encoding(), 'base64')
3123
msg.set_payload(b'hello world', charset=charset)
3124
eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3125
eq(msg.get_payload(decode=True), b'hello world')
3126
eq(msg['content-transfer-encoding'], 'base64')
3129
msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3130
charsets = msg.get_charsets()
3131
eq(len(charsets), 1)
3132
eq(charsets[0], 'us-ascii')
3133
charset = Charset(charsets[0])
3134
eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3135
msg.set_payload('hello world', charset=charset)
3136
eq(msg.get_payload(), 'hello world')
3137
eq(msg['content-transfer-encoding'], '7bit')
3139
def test_charsets_case_insensitive(self):
3140
lc = Charset('us-ascii')
3141
uc = Charset('US-ASCII')
3142
self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3144
def test_partial_falls_inside_message_delivery_status(self):
3145
eq = self.ndiffAssertEqual
3146
# The Parser interface provides chunks of data to FeedParser in 8192
3147
# byte gulps. SF bug #1076485 found one of those chunks inside
3148
# message/delivery-status header block, which triggered an
3149
# unreadline() of NeedMoreData.
3150
msg = self._msgobj('msg_43.txt')
3152
iterators._structure(msg, sfp)
3153
eq(sfp.getvalue(), """\
3156
message/delivery-status
3186
def test_make_msgid_domain(self):
3188
email.utils.make_msgid(domain='testdomain-string')[-19:],
3189
'@testdomain-string>')
3191
def test_Generator_linend(self):
3193
with openfile('msg_26.txt', newline='\n') as f:
3195
msgtxt_nl = msgtxt.replace('\r\n', '\n')
3196
msg = email.message_from_string(msgtxt)
3198
g = email.generator.Generator(s)
3200
self.assertEqual(s.getvalue(), msgtxt_nl)
3202
def test_BytesGenerator_linend(self):
3204
with openfile('msg_26.txt', newline='\n') as f:
3206
msgtxt_nl = msgtxt.replace('\r\n', '\n')
3207
msg = email.message_from_string(msgtxt_nl)
3209
g = email.generator.BytesGenerator(s)
3210
g.flatten(msg, linesep='\r\n')
3211
self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3213
def test_BytesGenerator_linend_with_non_ascii(self):
3215
with openfile('msg_26.txt', 'rb') as f:
3217
msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3218
msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3219
msg = email.message_from_bytes(msgtxt_nl)
3221
g = email.generator.BytesGenerator(s)
3222
g.flatten(msg, linesep='\r\n')
3223
self.assertEqual(s.getvalue(), msgtxt)
3226
# Test the iterator/generators
3227
class TestIterators(TestEmailBase):
3228
def test_body_line_iterator(self):
3229
eq = self.assertEqual
3230
neq = self.ndiffAssertEqual
3231
# First a simple non-multipart message
3232
msg = self._msgobj('msg_01.txt')
3233
it = iterators.body_line_iterator(msg)
3236
neq(EMPTYSTRING.join(lines), msg.get_payload())
3237
# Now a more complicated multipart
3238
msg = self._msgobj('msg_02.txt')
3239
it = iterators.body_line_iterator(msg)
3242
with openfile('msg_19.txt') as fp:
3243
neq(EMPTYSTRING.join(lines), fp.read())
3245
def test_typed_subpart_iterator(self):
3246
eq = self.assertEqual
3247
msg = self._msgobj('msg_04.txt')
3248
it = iterators.typed_subpart_iterator(msg, 'text')
3253
lines.append(subpart.get_payload())
3255
eq(EMPTYSTRING.join(lines), """\
3256
a simple kind of mirror
3257
to reflect upon our own
3258
a simple kind of mirror
3259
to reflect upon our own
3262
def test_typed_subpart_iterator_default_type(self):
3263
eq = self.assertEqual
3264
msg = self._msgobj('msg_03.txt')
3265
it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3270
lines.append(subpart.get_payload())
3272
eq(EMPTYSTRING.join(lines), """\
3276
Do you like this message?
3281
def test_pushCR_LF(self):
3282
'''FeedParser BufferedSubFile.push() assumed it received complete
3283
line endings. A CR ending one push() followed by a LF starting
3284
the next push() added an empty line.
3296
from email.feedparser import BufferedSubFile, NeedMoreData
3297
bsf = BufferedSubFile()
3306
if ol == NeedMoreData:
3310
self.assertEqual(n, n1)
3311
self.assertEqual(len(om), nt)
3312
self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
3316
class TestParsers(TestEmailBase):
3318
def test_header_parser(self):
3319
eq = self.assertEqual
3320
# Parse only the headers of a complex multipart MIME document
3321
with openfile('msg_02.txt') as fp:
3322
msg = HeaderParser().parse(fp)
3323
eq(msg['from'], 'ppp-request@zzz.org')
3324
eq(msg['to'], 'ppp@zzz.org')
3325
eq(msg.get_content_type(), 'multipart/mixed')
3326
self.assertFalse(msg.is_multipart())
3327
self.assertIsInstance(msg.get_payload(), str)
3329
def test_bytes_header_parser(self):
3330
eq = self.assertEqual
3331
# Parse only the headers of a complex multipart MIME document
3332
with openfile('msg_02.txt', 'rb') as fp:
3333
msg = email.parser.BytesHeaderParser().parse(fp)
3334
eq(msg['from'], 'ppp-request@zzz.org')
3335
eq(msg['to'], 'ppp@zzz.org')
3336
eq(msg.get_content_type(), 'multipart/mixed')
3337
self.assertFalse(msg.is_multipart())
3338
self.assertIsInstance(msg.get_payload(), str)
3339
self.assertIsInstance(msg.get_payload(decode=True), bytes)
3341
def test_whitespace_continuation(self):
3342
eq = self.assertEqual
3343
# This message contains a line after the Subject: header that has only
3344
# whitespace, but it is not empty!
3345
msg = email.message_from_string("""\
3346
From: aperson@dom.ain
3348
Subject: the next line has a space on it
3350
Date: Mon, 8 Apr 2002 15:09:19 -0400
3353
Here's the message body
3355
eq(msg['subject'], 'the next line has a space on it\n ')
3356
eq(msg['message-id'], 'spam')
3357
eq(msg.get_payload(), "Here's the message body\n")
3359
def test_whitespace_continuation_last_header(self):
3360
eq = self.assertEqual
3361
# Like the previous test, but the subject line is the last
3363
msg = email.message_from_string("""\
3364
From: aperson@dom.ain
3366
Date: Mon, 8 Apr 2002 15:09:19 -0400
3368
Subject: the next line has a space on it
3371
Here's the message body
3373
eq(msg['subject'], 'the next line has a space on it\n ')
3374
eq(msg['message-id'], 'spam')
3375
eq(msg.get_payload(), "Here's the message body\n")
3377
def test_crlf_separation(self):
3378
eq = self.assertEqual
3379
with openfile('msg_26.txt', newline='\n') as fp:
3380
msg = Parser().parse(fp)
3381
eq(len(msg.get_payload()), 2)
3382
part1 = msg.get_payload(0)
3383
eq(part1.get_content_type(), 'text/plain')
3384
eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3385
part2 = msg.get_payload(1)
3386
eq(part2.get_content_type(), 'application/riscos')
3388
def test_crlf_flatten(self):
3389
# Using newline='\n' preserves the crlfs in this input file.
3390
with openfile('msg_26.txt', newline='\n') as fp:
3392
msg = email.message_from_string(text)
3395
g.flatten(msg, linesep='\r\n')
3396
self.assertEqual(s.getvalue(), text)
3400
def test_multipart_digest_with_extra_mime_headers(self):
3401
eq = self.assertEqual
3402
neq = self.ndiffAssertEqual
3403
with openfile('msg_28.txt') as fp:
3404
msg = email.message_from_file(fp)
3411
eq(msg.is_multipart(), 1)
3412
eq(len(msg.get_payload()), 2)
3413
part1 = msg.get_payload(0)
3414
eq(part1.get_content_type(), 'message/rfc822')
3415
eq(part1.is_multipart(), 1)
3416
eq(len(part1.get_payload()), 1)
3417
part1a = part1.get_payload(0)
3418
eq(part1a.is_multipart(), 0)
3419
eq(part1a.get_content_type(), 'text/plain')
3420
neq(part1a.get_payload(), 'message 1\n')
3421
# next message/rfc822
3422
part2 = msg.get_payload(1)
3423
eq(part2.get_content_type(), 'message/rfc822')
3424
eq(part2.is_multipart(), 1)
3425
eq(len(part2.get_payload()), 1)
3426
part2a = part2.get_payload(0)
3427
eq(part2a.is_multipart(), 0)
3428
eq(part2a.get_content_type(), 'text/plain')
3429
neq(part2a.get_payload(), 'message 2\n')
3431
def test_three_lines(self):
3432
# A bug report by Andrew McNamara
3433
lines = ['From: Andrew Person <aperson@dom.ain',
3435
'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3436
msg = email.message_from_string(NL.join(lines))
3437
self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3439
def test_strip_line_feed_and_carriage_return_in_headers(self):
3440
eq = self.assertEqual
3441
# For [ 1002475 ] email message parser doesn't handle \r\n correctly
3443
value2 = 'more text'
3444
m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3446
msg = email.message_from_string(m)
3447
eq(msg.get('Header'), value1)
3448
eq(msg.get('Next-Header'), value2)
3450
def test_rfc2822_header_syntax(self):
3451
eq = self.assertEqual
3452
m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3453
msg = email.message_from_string(m)
3455
eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3456
eq(msg.get_payload(), 'body')
3458
def test_rfc2822_space_not_allowed_in_header(self):
3459
eq = self.assertEqual
3460
m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3461
msg = email.message_from_string(m)
3462
eq(len(msg.keys()), 0)
3464
def test_rfc2822_one_character_header(self):
3465
eq = self.assertEqual
3466
m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3467
msg = email.message_from_string(m)
3468
headers = msg.keys()
3470
eq(headers, ['A', 'B', 'CC'])
3471
eq(msg.get_payload(), 'body')
3473
def test_CRLFLF_at_end_of_part(self):
3474
# issue 5610: feedparser should not eat two chars from body part ending
3477
"From: foo@bar.com\n"
3479
"Mime-Version: 1.0\n"
3480
"Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3483
"Content-Type: text/plain\n"
3485
"body ending with CRLF newline\r\n"
3489
msg = email.message_from_string(m)
3490
self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
3493
class Test8BitBytesHandling(unittest.TestCase):
3494
# In Python3 all input is string, but that doesn't work if the actual input
3495
# uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3496
# decode byte streams using the surrogateescape error handler, and
3497
# reconvert to binary at appropriate places if we detect surrogates. This
3498
# doesn't allow us to transform headers with 8bit bytes (they get munged),
3499
# but it does allow us to parse and preserve them, and to decode body
3500
# parts that use an 8bit CTE.
3502
bodytest_msg = textwrap.dedent("""\
3506
Content-Type: text/plain; charset={charset}
3507
Content-Transfer-Encoding: {cte}
3512
def test_known_8bit_CTE(self):
3513
m = self.bodytest_msg.format(charset='utf-8',
3515
bodyline='pƶstal').encode('utf-8')
3516
msg = email.message_from_bytes(m)
3517
self.assertEqual(msg.get_payload(), "pƶstal\n")
3518
self.assertEqual(msg.get_payload(decode=True),
3519
"pƶstal\n".encode('utf-8'))
3521
def test_unknown_8bit_CTE(self):
3522
m = self.bodytest_msg.format(charset='notavalidcharset',
3524
bodyline='pƶstal').encode('utf-8')
3525
msg = email.message_from_bytes(m)
3526
self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
3527
self.assertEqual(msg.get_payload(decode=True),
3528
"pƶstal\n".encode('utf-8'))
3530
def test_8bit_in_quopri_body(self):
3531
# This is non-RFC compliant data...without 'decode' the library code
3532
# decodes the body using the charset from the headers, and because the
3533
# source byte really is utf-8 this works. This is likely to fail
3534
# against real dirty data (ie: produce mojibake), but the data is
3535
# invalid anyway so it is as good a guess as any. But this means that
3536
# this test just confirms the current behavior; that behavior is not
3537
# necessarily the best possible behavior. With 'decode' it is
3538
# returning the raw bytes, so that test should be of correct behavior,
3539
# or at least produce the same result that email4 did.
3540
m = self.bodytest_msg.format(charset='utf-8',
3541
cte='quoted-printable',
3542
bodyline='p=C3=B6stƔl').encode('utf-8')
3543
msg = email.message_from_bytes(m)
3544
self.assertEqual(msg.get_payload(), 'p=C3=B6stƔl\n')
3545
self.assertEqual(msg.get_payload(decode=True),
3546
'pƶstƔl\n'.encode('utf-8'))
3548
def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3549
# This is similar to the previous test, but proves that if the 8bit
3550
# byte is undecodeable in the specified charset, it gets replaced
3551
# by the unicode 'unknown' character. Again, this may or may not
3552
# be the ideal behavior. Note that if decode=False none of the
3553
# decoders will get involved, so this is the only test we need
3554
# for this behavior.
3555
m = self.bodytest_msg.format(charset='ascii',
3556
cte='quoted-printable',
3557
bodyline='p=C3=B6stƔl').encode('utf-8')
3558
msg = email.message_from_bytes(m)
3559
self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
3560
self.assertEqual(msg.get_payload(decode=True),
3561
'pƶstƔl\n'.encode('utf-8'))
3563
# test_defect_handling:test_invalid_chars_in_base64_payload
3564
def test_8bit_in_base64_body(self):
3565
# If we get 8bit bytes in a base64 body, we can just ignore them
3566
# as being outside the base64 alphabet and decode anyway. But
3567
# we register a defect.
3568
m = self.bodytest_msg.format(charset='utf-8',
3570
bodyline='cMO2c3RhbAĆ”=').encode('utf-8')
3571
msg = email.message_from_bytes(m)
3572
self.assertEqual(msg.get_payload(decode=True),
3573
'pƶstal'.encode('utf-8'))
3574
self.assertIsInstance(msg.defects[0],
3575
errors.InvalidBase64CharactersDefect)
3577
def test_8bit_in_uuencode_body(self):
3578
# Sticking an 8bit byte in a uuencode block makes it undecodable by
3579
# normal means, so the block is returned undecoded, but as bytes.
3580
m = self.bodytest_msg.format(charset='utf-8',
3582
bodyline='<,.V<W1A; Ć” ').encode('utf-8')
3583
msg = email.message_from_bytes(m)
3584
self.assertEqual(msg.get_payload(decode=True),
3585
'<,.V<W1A; Ć” \n'.encode('utf-8'))
3588
headertest_headers = (
3589
('From: foo@bar.com', ('From', 'foo@bar.com')),
3590
('To: bƔz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3591
('Subject: Maintenant je vous prĆ©sente mon collĆØgue, le pouf cĆ©lĆØbre\n'
3593
('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3594
'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3595
' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3596
('From: gƶst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3598
headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3599
'\nYes, they are flying.\n').encode('utf-8')
3601
def test_get_8bit_header(self):
3602
msg = email.message_from_bytes(self.headertest_msg)
3603
self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3604
self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
3606
def test_print_8bit_headers(self):
3607
msg = email.message_from_bytes(self.headertest_msg)
3608
self.assertEqual(str(msg),
3609
textwrap.dedent("""\
3615
Yes, they are flying.
3616
""").format(*[expected[1] for (_, expected) in
3617
self.headertest_headers]))
3619
def test_values_with_8bit_headers(self):
3620
msg = email.message_from_bytes(self.headertest_msg)
3621
self.assertListEqual([str(x) for x in msg.values()],
3624
'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3625
'coll\uFFFD\uFFFDgue, le pouf '
3626
'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3630
def test_items_with_8bit_headers(self):
3631
msg = email.message_from_bytes(self.headertest_msg)
3632
self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
3633
[('From', 'foo@bar.com'),
3634
('To', 'b\uFFFD\uFFFDz'),
3635
('Subject', 'Maintenant je vous '
3636
'pr\uFFFD\uFFFDsente '
3637
'mon coll\uFFFD\uFFFDgue, le pouf '
3638
'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3639
'\tJean de Baddie'),
3640
('From', 'g\uFFFD\uFFFDst')])
3642
def test_get_all_with_8bit_headers(self):
3643
msg = email.message_from_bytes(self.headertest_msg)
3644
self.assertListEqual([str(x) for x in msg.get_all('from')],
3648
def test_get_content_type_with_8bit(self):
3649
msg = email.message_from_bytes(textwrap.dedent("""\
3650
Content-Type: text/pl\xA7in; charset=utf-8
3651
""").encode('latin-1'))
3652
self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3653
self.assertEqual(msg.get_content_maintype(), "text")
3654
self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3656
# test_headerregistry.TestContentTypeHeader.non_ascii_in_params
3657
def test_get_params_with_8bit(self):
3658
msg = email.message_from_bytes(
3659
'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3660
self.assertEqual(msg.get_params(header='x-header'),
3661
[('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3662
self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3663
# XXX: someday you might be able to get 'b\xa7r', for now you can't.
3664
self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3666
# test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
3667
def test_get_rfc2231_params_with_8bit(self):
3668
msg = email.message_from_bytes(textwrap.dedent("""\
3669
Content-Type: text/plain; charset=us-ascii;
3670
title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3671
).encode('latin-1'))
3672
self.assertEqual(msg.get_param('title'),
3673
('us-ascii', 'en', 'This is not f\uFFFDn'))
3675
def test_set_rfc2231_params_with_8bit(self):
3676
msg = email.message_from_bytes(textwrap.dedent("""\
3677
Content-Type: text/plain; charset=us-ascii;
3678
title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3679
).encode('latin-1'))
3680
msg.set_param('title', 'test')
3681
self.assertEqual(msg.get_param('title'), 'test')
3683
def test_del_rfc2231_params_with_8bit(self):
3684
msg = email.message_from_bytes(textwrap.dedent("""\
3685
Content-Type: text/plain; charset=us-ascii;
3686
title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3687
).encode('latin-1'))
3688
msg.del_param('title')
3689
self.assertEqual(msg.get_param('title'), None)
3690
self.assertEqual(msg.get_content_maintype(), 'text')
3692
def test_get_payload_with_8bit_cte_header(self):
3693
msg = email.message_from_bytes(textwrap.dedent("""\
3694
Content-Transfer-Encoding: b\xa7se64
3695
Content-Type: text/plain; charset=latin-1
3698
""").encode('latin-1'))
3699
self.assertEqual(msg.get_payload(), 'payload\n')
3700
self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3702
non_latin_bin_msg = textwrap.dedent("""\
3705
Subject: Maintenant je vous prĆ©sente mon collĆØgue, le pouf cĆ©lĆØbre
3708
Content-Type: text/plain; charset="utf-8"
3709
Content-Transfer-Encoding: 8bit
3711
ŠŠ°, Š¾Š½Šø Š»ŠµŃŃŃ.
3712
""").encode('utf-8')
3714
def test_bytes_generator(self):
3715
msg = email.message_from_bytes(self.non_latin_bin_msg)
3717
email.generator.BytesGenerator(out).flatten(msg)
3718
self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3720
def test_bytes_generator_handles_None_body(self):
3722
msg = email.message.Message()
3724
email.generator.BytesGenerator(out).flatten(msg)
3725
self.assertEqual(out.getvalue(), b"\n")
3727
non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
3729
To: =?unknown-8bit?q?b=C3=A1z?=
3730
Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3731
=?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3732
=?unknown-8bit?q?_Jean_de_Baddie?=
3734
Content-Type: text/plain; charset="utf-8"
3735
Content-Transfer-Encoding: base64
3737
0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3740
def test_generator_handles_8bit(self):
3741
msg = email.message_from_bytes(self.non_latin_bin_msg)
3743
email.generator.Generator(out).flatten(msg)
3744
self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
3746
def test_bytes_generator_with_unix_from(self):
3747
# The unixfrom contains a current date, so we can't check it
3748
# literally. Just make sure the first word is 'From' and the
3749
# rest of the message matches the input.
3750
msg = email.message_from_bytes(self.non_latin_bin_msg)
3752
email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3753
lines = out.getvalue().split(b'\n')
3754
self.assertEqual(lines[0].split()[0], b'From')
3755
self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3757
non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3758
non_latin_bin_msg_as7bit[2:4] = [
3759
'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3760
'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3761
non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3763
def test_message_from_binary_file(self):
3765
self.addCleanup(unlink, fn)
3766
with open(fn, 'wb') as testfile:
3767
testfile.write(self.non_latin_bin_msg)
3768
with open(fn, 'rb') as testfile:
3769
m = email.parser.BytesParser().parse(testfile)
3770
self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3772
latin_bin_msg = textwrap.dedent("""\
3775
Subject: Nudge nudge, wink, wink
3777
Content-Type: text/plain; charset="latin-1"
3778
Content-Transfer-Encoding: 8bit
3780
oh lĆ lĆ , know what I mean, know what I mean?
3781
""").encode('latin-1')
3783
latin_bin_msg_as7bit = textwrap.dedent("""\
3786
Subject: Nudge nudge, wink, wink
3788
Content-Type: text/plain; charset="iso-8859-1"
3789
Content-Transfer-Encoding: quoted-printable
3791
oh l=E0 l=E0, know what I mean, know what I mean?
3794
def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3795
m = email.message_from_bytes(self.latin_bin_msg)
3796
self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3798
def test_decoded_generator_emits_unicode_body(self):
3799
m = email.message_from_bytes(self.latin_bin_msg)
3801
email.generator.DecodedGenerator(out).flatten(m)
3802
#DecodedHeader output contains an extra blank line compared
3803
#to the input message. RDM: not sure if this is a bug or not,
3804
#but it is not specific to the 8bit->7bit conversion.
3805
self.assertEqual(out.getvalue(),
3806
self.latin_bin_msg.decode('latin-1')+'\n')
3808
def test_bytes_feedparser(self):
3809
bfp = email.feedparser.BytesFeedParser()
3810
for i in range(0, len(self.latin_bin_msg), 10):
3811
bfp.feed(self.latin_bin_msg[i:i+10])
3813
self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3815
def test_crlf_flatten(self):
3816
with openfile('msg_26.txt', 'rb') as fp:
3818
msg = email.message_from_bytes(text)
3820
g = email.generator.BytesGenerator(s)
3821
g.flatten(msg, linesep='\r\n')
3822
self.assertEqual(s.getvalue(), text)
3824
def test_8bit_multipart(self):
3826
source = textwrap.dedent("""\
3827
Date: Fri, 18 Mar 2011 17:15:43 +0100
3829
From: foodwatch-Newsletter <bar@example.com>
3830
Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3831
Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3833
Content-Type: multipart/alternative;
3834
boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3836
--b1_76a486bee62b0d200f33dc2ca08220ad
3837
Content-Type: text/plain; charset="utf-8"
3838
Content-Transfer-Encoding: 8bit
3842
mit groĆer Betroffenheit verfolgen auch wir im foodwatch-Team die
3843
Nachrichten aus Japan.
3846
--b1_76a486bee62b0d200f33dc2ca08220ad
3847
Content-Type: text/html; charset="utf-8"
3848
Content-Transfer-Encoding: 8bit
3850
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3851
"http://www.w3.org/TR/html4/loose.dtd">
3854
<title>foodwatch - Newsletter</title>
3857
<p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team
3858
die Nachrichten aus Japan.</p>
3861
--b1_76a486bee62b0d200f33dc2ca08220ad--
3863
""").encode('utf-8')
3864
msg = email.message_from_bytes(source)
3866
g = email.generator.BytesGenerator(s)
3868
self.assertEqual(s.getvalue(), source)
3870
def test_bytes_generator_b_encoding_linesep(self):
3871
# Issue 14062: b encoding was tacking on an extra \n.
3873
# This has enough non-ascii that it should always end up b encoded.
3874
m['Subject'] = Header('žluÅ„ouÄkĆ½ kÅÆÅ')
3876
g = email.generator.BytesGenerator(s)
3877
g.flatten(m, linesep='\r\n')
3880
b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3882
def test_generator_b_encoding_linesep(self):
3883
# Since this broke in ByteGenerator, test Generator for completeness.
3885
# This has enough non-ascii that it should always end up b encoded.
3886
m['Subject'] = Header('žluÅ„ouÄkĆ½ kÅÆÅ')
3888
g = email.generator.Generator(s)
3889
g.flatten(m, linesep='\r\n')
3892
'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3897
class BaseTestBytesGeneratorIdempotent:
3901
def _msgobj(self, filename):
3902
with openfile(filename, 'rb') as fp:
3904
data = self.normalize_linesep_regex.sub(self.blinesep, data)
3905
msg = email.message_from_bytes(data)
3908
def _idempotent(self, msg, data, unixfrom=False):
3910
g = email.generator.BytesGenerator(b, maxheaderlen=0)
3911
g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
3912
self.assertEqual(data, b.getvalue())
3915
class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3919
normalize_linesep_regex = re.compile(br'\r\n')
3922
class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3926
normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3929
class TestBase64(unittest.TestCase):
3931
eq = self.assertEqual
3932
eq(base64mime.header_length('hello'),
3933
len(base64mime.body_encode(b'hello', eol='')))
3934
for size in range(15):
3935
if size == 0 : bsize = 0
3936
elif size <= 3 : bsize = 4
3937
elif size <= 6 : bsize = 8
3938
elif size <= 9 : bsize = 12
3939
elif size <= 12: bsize = 16
3941
eq(base64mime.header_length('x' * size), bsize)
3943
def test_decode(self):
3944
eq = self.assertEqual
3945
eq(base64mime.decode(''), b'')
3946
eq(base64mime.decode('aGVsbG8='), b'hello')
3948
def test_encode(self):
3949
eq = self.assertEqual
3950
eq(base64mime.body_encode(b''), b'')
3951
eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
3952
# Test the binary flag
3953
eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
3954
# Test the maxlinelen arg
3955
eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
3956
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3957
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3958
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3961
# Test the eol argument
3962
eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3964
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3965
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3966
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3970
def test_header_encode(self):
3971
eq = self.assertEqual
3972
he = base64mime.header_encode
3973
eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
3974
eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3975
eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
3976
# Test the charset option
3977
eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3978
eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
3982
class TestQuopri(unittest.TestCase):
3984
# Set of characters (as byte integers) that don't need to be encoded
3986
self.hlit = list(chain(
3987
range(ord('a'), ord('z') + 1),
3988
range(ord('A'), ord('Z') + 1),
3989
range(ord('0'), ord('9') + 1),
3990
(c for c in b'!*+-/')))
3991
# Set of characters (as byte integers) that do need to be encoded in
3993
self.hnon = [c for c in range(256) if c not in self.hlit]
3994
assert len(self.hlit) + len(self.hnon) == 256
3995
# Set of characters (as byte integers) that don't need to be encoded
3997
self.blit = list(range(ord(' '), ord('~') + 1))
3998
self.blit.append(ord('\t'))
3999
self.blit.remove(ord('='))
4000
# Set of characters (as byte integers) that do need to be encoded in
4002
self.bnon = [c for c in range(256) if c not in self.blit]
4003
assert len(self.blit) + len(self.bnon) == 256
4005
def test_quopri_header_check(self):
4007
self.assertFalse(quoprimime.header_check(c),
4008
'Should not be header quopri encoded: %s' % chr(c))
4010
self.assertTrue(quoprimime.header_check(c),
4011
'Should be header quopri encoded: %s' % chr(c))
4013
def test_quopri_body_check(self):
4015
self.assertFalse(quoprimime.body_check(c),
4016
'Should not be body quopri encoded: %s' % chr(c))
4018
self.assertTrue(quoprimime.body_check(c),
4019
'Should be body quopri encoded: %s' % chr(c))
4021
def test_header_quopri_len(self):
4022
eq = self.assertEqual
4023
eq(quoprimime.header_length(b'hello'), 5)
4024
# RFC 2047 chrome is not included in header_length().
4025
eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
4026
quoprimime.header_length(b'hello') +
4027
# =?xxx?q?...?= means 10 extra characters
4029
eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4030
# RFC 2047 chrome is not included in header_length().
4031
eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
4032
quoprimime.header_length(b'h@e@l@l@o@') +
4033
# =?xxx?q?...?= means 10 extra characters
4036
eq(quoprimime.header_length(bytes([c])), 1,
4037
'expected length 1 for %r' % chr(c))
4039
# Space is special; it's encoded to _
4042
eq(quoprimime.header_length(bytes([c])), 3,
4043
'expected length 3 for %r' % chr(c))
4044
eq(quoprimime.header_length(b' '), 1)
4046
def test_body_quopri_len(self):
4047
eq = self.assertEqual
4049
eq(quoprimime.body_length(bytes([c])), 1)
4051
eq(quoprimime.body_length(bytes([c])), 3)
4053
def test_quote_unquote_idempotent(self):
4054
for x in range(256):
4056
self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4058
def _test_header_encode(self, header, expected_encoded_header, charset=None):
4060
encoded_header = quoprimime.header_encode(header)
4062
encoded_header = quoprimime.header_encode(header, charset)
4063
self.assertEqual(encoded_header, expected_encoded_header)
4065
def test_header_encode_null(self):
4066
self._test_header_encode(b'', '')
4068
def test_header_encode_one_word(self):
4069
self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4071
def test_header_encode_two_lines(self):
4072
self._test_header_encode(b'hello\nworld',
4073
'=?iso-8859-1?q?hello=0Aworld?=')
4075
def test_header_encode_non_ascii(self):
4076
self._test_header_encode(b'hello\xc7there',
4077
'=?iso-8859-1?q?hello=C7there?=')
4079
def test_header_encode_alt_charset(self):
4080
self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4081
charset='iso-8859-2')
4083
def _test_header_decode(self, encoded_header, expected_decoded_header):
4084
decoded_header = quoprimime.header_decode(encoded_header)
4085
self.assertEqual(decoded_header, expected_decoded_header)
4087
def test_header_decode_null(self):
4088
self._test_header_decode('', '')
4090
def test_header_decode_one_word(self):
4091
self._test_header_decode('hello', 'hello')
4093
def test_header_decode_two_lines(self):
4094
self._test_header_decode('hello=0Aworld', 'hello\nworld')
4096
def test_header_decode_non_ascii(self):
4097
self._test_header_decode('hello=C7there', 'hello\xc7there')
4099
def test_header_decode_re_bug_18380(self):
4100
# Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4101
self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4103
def _test_decode(self, encoded, expected_decoded, eol=None):
4105
decoded = quoprimime.decode(encoded)
4107
decoded = quoprimime.decode(encoded, eol=eol)
4108
self.assertEqual(decoded, expected_decoded)
4110
def test_decode_null_word(self):
4111
self._test_decode('', '')
4113
def test_decode_null_line_null_word(self):
4114
self._test_decode('\r\n', '\n')
4116
def test_decode_one_word(self):
4117
self._test_decode('hello', 'hello')
4119
def test_decode_one_word_eol(self):
4120
self._test_decode('hello', 'hello', eol='X')
4122
def test_decode_one_line(self):
4123
self._test_decode('hello\r\n', 'hello\n')
4125
def test_decode_one_line_lf(self):
4126
self._test_decode('hello\n', 'hello\n')
4128
def test_decode_one_line_cr(self):
4129
self._test_decode('hello\r', 'hello\n')
4131
def test_decode_one_line_nl(self):
4132
self._test_decode('hello\n', 'helloX', eol='X')
4134
def test_decode_one_line_crnl(self):
4135
self._test_decode('hello\r\n', 'helloX', eol='X')
4137
def test_decode_one_line_one_word(self):
4138
self._test_decode('hello\r\nworld', 'hello\nworld')
4140
def test_decode_one_line_one_word_eol(self):
4141
self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4143
def test_decode_two_lines(self):
4144
self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4146
def test_decode_two_lines_eol(self):
4147
self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4149
def test_decode_one_long_line(self):
4150
self._test_decode('Spam' * 250, 'Spam' * 250)
4152
def test_decode_one_space(self):
4153
self._test_decode(' ', '')
4155
def test_decode_multiple_spaces(self):
4156
self._test_decode(' ' * 5, '')
4158
def test_decode_one_line_trailing_spaces(self):
4159
self._test_decode('hello \r\n', 'hello\n')
4161
def test_decode_two_lines_trailing_spaces(self):
4162
self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
4164
def test_decode_quoted_word(self):
4165
self._test_decode('=22quoted=20words=22', '"quoted words"')
4167
def test_decode_uppercase_quoting(self):
4168
self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4170
def test_decode_lowercase_quoting(self):
4171
self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4173
def test_decode_soft_line_break(self):
4174
self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4176
def test_decode_false_quoting(self):
4177
self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4179
def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4181
if maxlinelen is None:
4182
# Use body_encode's default.
4185
kwargs['maxlinelen'] = maxlinelen
4187
# Use body_encode's default.
4191
encoded_body = quoprimime.body_encode(body, **kwargs)
4192
self.assertEqual(encoded_body, expected_encoded_body)
4193
if eol == '\n' or eol == '\r\n':
4194
# We know how to split the result back into lines, so maxlinelen
4196
for line in encoded_body.splitlines():
4197
self.assertLessEqual(len(line), maxlinelen)
4199
def test_encode_null(self):
4200
self._test_encode('', '')
4202
def test_encode_null_lines(self):
4203
self._test_encode('\n\n', '\n\n')
4205
def test_encode_one_line(self):
4206
self._test_encode('hello\n', 'hello\n')
4208
def test_encode_one_line_crlf(self):
4209
self._test_encode('hello\r\n', 'hello\n')
4211
def test_encode_one_line_eol(self):
4212
self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4214
def test_encode_one_space(self):
4215
self._test_encode(' ', '=20')
4217
def test_encode_one_line_one_space(self):
4218
self._test_encode(' \n', '=20\n')
4220
# XXX: body_encode() expect strings, but uses ord(char) from these strings
4221
# to index into a 256-entry list. For code points above 255, this will fail.
4222
# Should there be a check for 8-bit only ord() values in body, or at least
4223
# a comment about the expected input?
4225
def test_encode_two_lines_one_space(self):
4226
self._test_encode(' \n \n', '=20\n=20\n')
4228
def test_encode_one_word_trailing_spaces(self):
4229
self._test_encode('hello ', 'hello =20')
4231
def test_encode_one_line_trailing_spaces(self):
4232
self._test_encode('hello \n', 'hello =20\n')
4234
def test_encode_one_word_trailing_tab(self):
4235
self._test_encode('hello \t', 'hello =09')
4237
def test_encode_one_line_trailing_tab(self):
4238
self._test_encode('hello \t\n', 'hello =09\n')
4240
def test_encode_trailing_space_before_maxlinelen(self):
4241
self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4243
def test_encode_trailing_space_at_maxlinelen(self):
4244
self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4246
def test_encode_trailing_space_beyond_maxlinelen(self):
4247
self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4249
def test_encode_whitespace_lines(self):
4250
self._test_encode(' \n' * 5, '=20\n' * 5)
4252
def test_encode_quoted_equals(self):
4253
self._test_encode('a = b', 'a =3D b')
4255
def test_encode_one_long_string(self):
4256
self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4258
def test_encode_one_long_line(self):
4259
self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4261
def test_encode_one_very_long_line(self):
4262
self._test_encode('x' * 200 + '\n',
4263
2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4265
def test_encode_shortest_maxlinelen(self):
4266
self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
4268
def test_encode_maxlinelen_too_small(self):
4269
self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4271
def test_encode(self):
4272
eq = self.assertEqual
4273
eq(quoprimime.body_encode(''), '')
4274
eq(quoprimime.body_encode('hello'), 'hello')
4275
# Test the binary flag
4276
eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
4277
# Test the maxlinelen arg
4278
eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
4279
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4280
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4281
x xxxx xxxx xxxx xxxx=20""")
4282
# Test the eol argument
4283
eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4285
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4286
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4287
x xxxx xxxx xxxx xxxx=20""")
4288
eq(quoprimime.body_encode("""\
4298
# Test the Charset class
4299
class TestCharset(unittest.TestCase):
4301
from email import charset as CharsetModule
4303
del CharsetModule.CHARSETS['fake']
4307
def test_codec_encodeable(self):
4308
eq = self.assertEqual
4309
# Make sure us-ascii = no Unicode conversion
4310
c = Charset('us-ascii')
4311
eq(c.header_encode('Hello World!'), 'Hello World!')
4312
# Test 8-bit idempotency with us-ascii
4313
s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
4314
self.assertRaises(UnicodeError, c.header_encode, s)
4315
c = Charset('utf-8')
4316
eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
4318
def test_body_encode(self):
4319
eq = self.assertEqual
4320
# Try a charset with QP body encoding
4321
c = Charset('iso-8859-1')
4322
eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
4323
# Try a charset with Base64 body encoding
4324
c = Charset('utf-8')
4325
eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
4326
# Try a charset with None body encoding
4327
c = Charset('us-ascii')
4328
eq('hello world', c.body_encode('hello world'))
4329
# Try the convert argument, where input codec != output codec
4330
c = Charset('euc-jp')
4331
# With apologies to Tokio Kikuchi ;)
4334
## eq('\x1b$B5FCO;~IW\x1b(B',
4335
## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4336
## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4337
## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4338
## except LookupError:
4339
## # We probably don't have the Japanese codecs installed
4341
# Testing SF bug #625509, which we have to fake, since there are no
4342
# built-in encodings where the header encoding is QP but the body
4344
from email import charset as CharsetModule
4345
CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
4347
eq('hello world', c.body_encode('hello world'))
4349
def test_unicode_charset_name(self):
4350
charset = Charset('us-ascii')
4351
self.assertEqual(str(charset), 'us-ascii')
4352
self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4356
# Test multilingual MIME headers.
4357
class TestHeader(TestEmailBase):
4358
def test_simple(self):
4359
eq = self.ndiffAssertEqual
4360
h = Header('Hello World!')
4361
eq(h.encode(), 'Hello World!')
4362
h.append(' Goodbye World!')
4363
eq(h.encode(), 'Hello World! Goodbye World!')
4365
def test_simple_surprise(self):
4366
eq = self.ndiffAssertEqual
4367
h = Header('Hello World!')
4368
eq(h.encode(), 'Hello World!')
4369
h.append('Goodbye World!')
4370
eq(h.encode(), 'Hello World! Goodbye World!')
4372
def test_header_needs_no_decoding(self):
4373
h = 'no decoding needed'
4374
self.assertEqual(decode_header(h), [(h, None)])
4376
def test_long(self):
4377
h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4379
for l in h.encode(splitchars=' ').split('\n '):
4380
self.assertLessEqual(len(l), 76)
4382
def test_multilingual(self):
4383
eq = self.ndiffAssertEqual
4384
g = Charset("iso-8859-1")
4385
cz = Charset("iso-8859-2")
4386
utf8 = Charset("utf-8")
4387
g_head = (b'Die Mieter treten hier ein werden mit einem '
4388
b'Foerderband komfortabel den Korridor entlang, '
4389
b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4390
b'gegen die rotierenden Klingen bef\xf6rdert. ')
4391
cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4393
utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4394
'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4395
'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4396
'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4397
'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4398
'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4399
'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4400
'\u3044\u307e\u3059\u3002')
4401
h = Header(g_head, g)
4402
h.append(cz_head, cz)
4403
h.append(utf8_head, utf8)
4404
enc = h.encode(maxlinelen=76)
4406
=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4407
=?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4408
=?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4409
=?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
4410
=?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4411
=?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4412
=?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4413
=?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
4414
=?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4415
=?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4416
=?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4417
decoded = decode_header(enc)
4419
eq(decoded[0], (g_head, 'iso-8859-1'))
4420
eq(decoded[1], (cz_head, 'iso-8859-2'))
4421
eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
4424
(b'Die Mieter treten hier ein werden mit einem Foerderband '
4425
b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4426
b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4427
b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4428
b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4429
b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4430
b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4431
b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4432
b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4433
b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4434
b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4435
b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4436
b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4437
b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4438
b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4439
b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4441
# Test make_header()
4442
newh = make_header(decode_header(enc))
4445
def test_empty_header_encode(self):
4447
self.assertEqual(h.encode(), '')
4449
def test_header_ctor_default_args(self):
4450
eq = self.ndiffAssertEqual
4453
h.append('foo', Charset('iso-8859-1'))
4456
def test_explicit_maxlinelen(self):
4457
eq = self.ndiffAssertEqual
4458
hstr = ('A very long line that must get split to something other '
4459
'than at the 76th character boundary to test the non-default '
4463
A very long line that must get split to something other than at the 76th
4464
character boundary to test the non-default behavior''')
4466
h = Header(hstr, header_name='Subject')
4468
A very long line that must get split to something other than at the
4469
76th character boundary to test the non-default behavior''')
4471
h = Header(hstr, maxlinelen=1024, header_name='Subject')
4472
eq(h.encode(), hstr)
4475
def test_quopri_splittable(self):
4476
eq = self.ndiffAssertEqual
4477
h = Header(charset='iso-8859-1', maxlinelen=20)
4482
=?iso-8859-1?q?xxx?=
4531
=?iso-8859-1?q?_?=""")
4532
eq(x, str(make_header(decode_header(s))))
4533
h = Header(charset='iso-8859-1', maxlinelen=40)
4534
h.append('xxxx ' * 20)
4537
=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4538
=?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4539
=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4540
=?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4541
=?iso-8859-1?q?_xxxx_xxxx_?=""")
4542
eq(x, str(make_header(decode_header(s))))
4544
def test_base64_splittable(self):
4545
eq = self.ndiffAssertEqual
4546
h = Header(charset='koi8-r', maxlinelen=20)
4584
=?koi8-r?b?IA==?=""")
4585
eq(x, str(make_header(decode_header(s))))
4586
h = Header(charset='koi8-r', maxlinelen=40)
4590
=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4591
=?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4592
=?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4593
=?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4594
=?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4595
=?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4596
eq(x, str(make_header(decode_header(s))))
4598
def test_us_ascii_header(self):
4599
eq = self.assertEqual
4601
x = decode_header(s)
4602
eq(x, [('hello', None)])
4606
def test_string_charset(self):
4607
eq = self.assertEqual
4609
h.append('hello', 'iso-8859-1')
4612
## def test_unicode_error(self):
4613
## raises = self.assertRaises
4614
## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4615
## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4617
## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4618
## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4619
## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4621
def test_utf8_shortest(self):
4622
eq = self.assertEqual
4623
h = Header('p\xf6stal', 'utf-8')
4624
eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4625
h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4626
eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4628
def test_bad_8bit_header(self):
4629
raises = self.assertRaises
4630
eq = self.assertEqual
4631
x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4632
raises(UnicodeError, Header, x)
4634
raises(UnicodeError, h.append, x)
4635
e = x.decode('utf-8', 'replace')
4636
eq(str(Header(x, errors='replace')), e)
4637
h.append(x, errors='replace')
4640
def test_escaped_8bit_header(self):
4641
x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4642
e = x.decode('ascii', 'surrogateescape')
4643
h = Header(e, charset=email.charset.UNKNOWN8BIT)
4644
self.assertEqual(str(h),
4645
'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4646
self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4648
def test_header_handles_binary_unknown8bit(self):
4649
x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4650
h = Header(x, charset=email.charset.UNKNOWN8BIT)
4651
self.assertEqual(str(h),
4652
'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4653
self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4655
def test_make_header_handles_binary_unknown8bit(self):
4656
x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4657
h = Header(x, charset=email.charset.UNKNOWN8BIT)
4658
h2 = email.header.make_header(email.header.decode_header(h))
4659
self.assertEqual(str(h2),
4660
'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4661
self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4663
def test_modify_returned_list_does_not_change_header(self):
4665
chunks = email.header.decode_header(h)
4666
chunks.append(('ascii', 'test2'))
4667
self.assertEqual(str(h), 'test')
4669
def test_encoded_adjacent_nonencoded(self):
4670
eq = self.assertEqual
4672
h.append('hello', 'iso-8859-1')
4675
eq(s, '=?iso-8859-1?q?hello?= world')
4676
h = make_header(decode_header(s))
4679
def test_whitespace_keeper(self):
4680
eq = self.assertEqual
4681
s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4682
parts = decode_header(s)
4683
eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
4684
hdr = make_header(parts)
4686
'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4688
def test_broken_base64_header(self):
4689
raises = self.assertRaises
4690
s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
4691
raises(errors.HeaderParseError, decode_header, s)
4693
def test_shift_jis_charset(self):
4694
h = Header('ę', charset='shift_jis')
4695
self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4697
def test_flatten_header_with_no_value(self):
4698
# Issue 11401 (regression from email 4.x) Note that the space after
4699
# the header doesn't reflect the input, but this is also the way
4700
# email 4.x behaved. At some point it would be nice to fix that.
4701
msg = email.message_from_string("EmptyHeader:")
4702
self.assertEqual(str(msg), "EmptyHeader: \n\n")
4704
def test_encode_preserves_leading_ws_on_value(self):
4706
msg['SomeHeader'] = ' value with leading ws'
4707
self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4711
# Test RFC 2231 header parameters (en/de)coding
4712
class TestRFC2231(TestEmailBase):
4714
# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4715
# test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
4716
def test_get_param(self):
4717
eq = self.assertEqual
4718
msg = self._msgobj('msg_29.txt')
4719
eq(msg.get_param('title'),
4720
('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4721
eq(msg.get_param('title', unquote=False),
4722
('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4724
def test_set_param(self):
4725
eq = self.ndiffAssertEqual
4727
msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4729
eq(msg.get_param('title'),
4730
('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4731
msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4732
charset='us-ascii', language='en')
4733
eq(msg.get_param('title'),
4734
('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4735
msg = self._msgobj('msg_01.txt')
4736
msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4737
charset='us-ascii', language='en')
4738
eq(msg.as_string(maxheaderlen=78), """\
4739
Return-Path: <bbb@zzz.org>
4740
Delivered-To: bbb@zzz.org
4741
Received: by mail.zzz.org (Postfix, from userid 889)
4742
\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4744
Content-Transfer-Encoding: 7bit
4745
Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4746
From: bbb@ddd.com (John X. Doe)
4748
Subject: This is a test message
4749
Date: Fri, 4 May 2001 14:05:44 -0400
4750
Content-Type: text/plain; charset=us-ascii;
4751
title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
4756
Do you like this message?
4761
def test_set_param_requote(self):
4763
msg.set_param('title', 'foo')
4764
self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4765
msg.set_param('title', 'bar', requote=False)
4766
self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4767
# tspecial is still quoted.
4768
msg.set_param('title', "(bar)bell", requote=False)
4769
self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4771
def test_del_param(self):
4772
eq = self.ndiffAssertEqual
4773
msg = self._msgobj('msg_01.txt')
4774
msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4775
msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4776
charset='us-ascii', language='en')
4777
msg.del_param('foo', header='Content-Type')
4778
eq(msg.as_string(maxheaderlen=78), """\
4779
Return-Path: <bbb@zzz.org>
4780
Delivered-To: bbb@zzz.org
4781
Received: by mail.zzz.org (Postfix, from userid 889)
4782
\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4784
Content-Transfer-Encoding: 7bit
4785
Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4786
From: bbb@ddd.com (John X. Doe)
4788
Subject: This is a test message
4789
Date: Fri, 4 May 2001 14:05:44 -0400
4790
Content-Type: text/plain; charset="us-ascii";
4791
title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
4796
Do you like this message?
4801
# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4802
# I changed the charset name, though, because the one in the file isn't
4803
# a legal charset name. Should add a test for an illegal charset.
4804
def test_rfc2231_get_content_charset(self):
4805
eq = self.assertEqual
4806
msg = self._msgobj('msg_32.txt')
4807
eq(msg.get_content_charset(), 'us-ascii')
4809
# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
4810
def test_rfc2231_parse_rfc_quoting(self):
4811
m = textwrap.dedent('''\
4812
Content-Disposition: inline;
4813
\tfilename*0*=''This%20is%20even%20more%20;
4814
\tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4815
\tfilename*2="is it not.pdf"
4818
msg = email.message_from_string(m)
4819
self.assertEqual(msg.get_filename(),
4820
'This is even more ***fun*** is it not.pdf')
4821
self.assertEqual(m, msg.as_string())
4823
# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4824
def test_rfc2231_parse_extra_quoting(self):
4825
m = textwrap.dedent('''\
4826
Content-Disposition: inline;
4827
\tfilename*0*="''This%20is%20even%20more%20";
4828
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4829
\tfilename*2="is it not.pdf"
4832
msg = email.message_from_string(m)
4833
self.assertEqual(msg.get_filename(),
4834
'This is even more ***fun*** is it not.pdf')
4835
self.assertEqual(m, msg.as_string())
4837
# test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4838
# but new test uses *0* because otherwise lang/charset is not valid.
4839
# test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
4840
def test_rfc2231_no_language_or_charset(self):
4842
Content-Transfer-Encoding: 8bit
4843
Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4844
Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4847
msg = email.message_from_string(m)
4848
param = msg.get_param('NAME')
4849
self.assertNotIsInstance(param, tuple)
4852
'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4854
# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
4855
def test_rfc2231_no_language_or_charset_in_filename(self):
4857
Content-Disposition: inline;
4858
\tfilename*0*="''This%20is%20even%20more%20";
4859
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4860
\tfilename*2="is it not.pdf"
4863
msg = email.message_from_string(m)
4864
self.assertEqual(msg.get_filename(),
4865
'This is even more ***fun*** is it not.pdf')
4867
# Duplicate of previous test?
4868
def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4870
Content-Disposition: inline;
4871
\tfilename*0*="''This%20is%20even%20more%20";
4872
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4873
\tfilename*2="is it not.pdf"
4876
msg = email.message_from_string(m)
4877
self.assertEqual(msg.get_filename(),
4878
'This is even more ***fun*** is it not.pdf')
4880
# test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4881
# but the test below is wrong (the first part should be decoded).
4882
def test_rfc2231_partly_encoded(self):
4884
Content-Disposition: inline;
4885
\tfilename*0="''This%20is%20even%20more%20";
4886
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4887
\tfilename*2="is it not.pdf"
4890
msg = email.message_from_string(m)
4893
'This%20is%20even%20more%20***fun*** is it not.pdf')
4895
def test_rfc2231_partly_nonencoded(self):
4897
Content-Disposition: inline;
4898
\tfilename*0="This%20is%20even%20more%20";
4899
\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4900
\tfilename*2="is it not.pdf"
4903
msg = email.message_from_string(m)
4906
'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4908
def test_rfc2231_no_language_or_charset_in_boundary(self):
4910
Content-Type: multipart/alternative;
4911
\tboundary*0*="''This%20is%20even%20more%20";
4912
\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4913
\tboundary*2="is it not.pdf"
4916
msg = email.message_from_string(m)
4917
self.assertEqual(msg.get_boundary(),
4918
'This is even more ***fun*** is it not.pdf')
4920
def test_rfc2231_no_language_or_charset_in_charset(self):
4921
# This is a nonsensical charset value, but tests the code anyway
4923
Content-Type: text/plain;
4924
\tcharset*0*="This%20is%20even%20more%20";
4925
\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4926
\tcharset*2="is it not.pdf"
4929
msg = email.message_from_string(m)
4930
self.assertEqual(msg.get_content_charset(),
4931
'this is even more ***fun*** is it not.pdf')
4933
# test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
4934
def test_rfc2231_bad_encoding_in_filename(self):
4936
Content-Disposition: inline;
4937
\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4938
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4939
\tfilename*2="is it not.pdf"
4942
msg = email.message_from_string(m)
4943
self.assertEqual(msg.get_filename(),
4944
'This is even more ***fun*** is it not.pdf')
4946
def test_rfc2231_bad_encoding_in_charset(self):
4948
Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4951
msg = email.message_from_string(m)
4952
# This should return None because non-ascii characters in the charset
4954
self.assertEqual(msg.get_content_charset(), None)
4956
def test_rfc2231_bad_character_in_charset(self):
4958
Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4961
msg = email.message_from_string(m)
4962
# This should return None because non-ascii characters in the charset
4964
self.assertEqual(msg.get_content_charset(), None)
4966
def test_rfc2231_bad_character_in_filename(self):
4968
Content-Disposition: inline;
4969
\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4970
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4971
\tfilename*2*="is it not.pdf%E2"
4974
msg = email.message_from_string(m)
4975
self.assertEqual(msg.get_filename(),
4976
'This is even more ***fun*** is it not.pdf\ufffd')
4978
def test_rfc2231_unknown_encoding(self):
4980
Content-Transfer-Encoding: 8bit
4981
Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4984
msg = email.message_from_string(m)
4985
self.assertEqual(msg.get_filename(), 'myfile.txt')
4987
def test_rfc2231_single_tick_in_filename_extended(self):
4988
eq = self.assertEqual
4990
Content-Type: application/x-foo;
4991
\tname*0*=\"Frank's\"; name*1*=\" Document\"
4994
msg = email.message_from_string(m)
4995
charset, language, s = msg.get_param('name')
4998
eq(s, "Frank's Document")
5000
# test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5001
def test_rfc2231_single_tick_in_filename(self):
5003
Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
5006
msg = email.message_from_string(m)
5007
param = msg.get_param('name')
5008
self.assertNotIsInstance(param, tuple)
5009
self.assertEqual(param, "Frank's Document")
5011
# test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
5012
def test_rfc2231_tick_attack_extended(self):
5013
eq = self.assertEqual
5015
Content-Type: application/x-foo;
5016
\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5019
msg = email.message_from_string(m)
5020
charset, language, s = msg.get_param('name')
5021
eq(charset, 'us-ascii')
5022
eq(language, 'en-us')
5023
eq(s, "Frank's Document")
5025
# test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
5026
def test_rfc2231_tick_attack(self):
5028
Content-Type: application/x-foo;
5029
\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5032
msg = email.message_from_string(m)
5033
param = msg.get_param('name')
5034
self.assertNotIsInstance(param, tuple)
5035
self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5037
# test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
5038
def test_rfc2231_no_extended_values(self):
5039
eq = self.assertEqual
5041
Content-Type: application/x-foo; name=\"Frank's Document\"
5044
msg = email.message_from_string(m)
5045
eq(msg.get_param('name'), "Frank's Document")
5047
# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
5048
def test_rfc2231_encoded_then_unencoded_segments(self):
5049
eq = self.assertEqual
5051
Content-Type: application/x-foo;
5052
\tname*0*=\"us-ascii'en-us'My\";
5053
\tname*1=\" Document\";
5054
\tname*2*=\" For You\"
5057
msg = email.message_from_string(m)
5058
charset, language, s = msg.get_param('name')
5059
eq(charset, 'us-ascii')
5060
eq(language, 'en-us')
5061
eq(s, 'My Document For You')
5063
# test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5064
# test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
5065
def test_rfc2231_unencoded_then_encoded_segments(self):
5066
eq = self.assertEqual
5068
Content-Type: application/x-foo;
5069
\tname*0=\"us-ascii'en-us'My\";
5070
\tname*1*=\" Document\";
5071
\tname*2*=\" For You\"
5074
msg = email.message_from_string(m)
5075
charset, language, s = msg.get_param('name')
5076
eq(charset, 'us-ascii')
5077
eq(language, 'en-us')
5078
eq(s, 'My Document For You')
5082
# Tests to ensure that signed parts of an email are completely preserved, as
5083
# required by RFC1847 section 2.1. Note that these are incomplete, because the
5084
# email package does not currently always preserve the body. See issue 1670765.
5085
class TestSigned(TestEmailBase):
5087
def _msg_and_obj(self, filename):
5088
with openfile(filename) as fp:
5089
original = fp.read()
5090
msg = email.message_from_string(original)
5091
return original, msg
5093
def _signed_parts_eq(self, original, result):
5094
# Extract the first mime part of each message
5096
repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5097
inpart = repart.search(original).group(2)
5098
outpart = repart.search(result).group(2)
5099
self.assertEqual(outpart, inpart)
5101
def test_long_headers_as_string(self):
5102
original, msg = self._msg_and_obj('msg_45.txt')
5103
result = msg.as_string()
5104
self._signed_parts_eq(original, result)
5106
def test_long_headers_as_string_maxheaderlen(self):
5107
original, msg = self._msg_and_obj('msg_45.txt')
5108
result = msg.as_string(maxheaderlen=60)
5109
self._signed_parts_eq(original, result)
5111
def test_long_headers_flatten(self):
5112
original, msg = self._msg_and_obj('msg_45.txt')
5114
Generator(fp).flatten(msg)
5115
result = fp.getvalue()
5116
self._signed_parts_eq(original, result)
5120
if __name__ == '__main__':