~ubuntu-branches/ubuntu/trusty/python3.4/trusty-proposed

from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)

1106

\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)

1107

1108

"""

1109

h = Header(hstr, continuation_ws='\t')

1110

eq(h.encode(), """\

1111

from babylon.socal-raves.org (localhost [127.0.0.1]);

1112

by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;

1113

for <mailman-admin@babylon.socal-raves.org>;

1114

Sat, 2 Feb 2002 17:00:06 -0800 (PST)

1115

\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);

1116

by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;

1117

for <mailman-admin@babylon.socal-raves.org>;

1118

Sat, 2 Feb 2002 17:00:06 -0800 (PST)

1119

\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);

1120

by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;

1121

for <mailman-admin@babylon.socal-raves.org>;

1122

Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")

1123

1124

def test_splitting_first_line_only_is_long(self):

1125

eq = self.ndiffAssertEqual

1126

hstr = """\

1127

from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)

1128

\tby kronos.mems-exchange.org with esmtp (Exim 4.05)

1129

\tid 17k4h5-00034i-00

1130

\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""

1131

h = Header(hstr, maxlinelen=78, header_name='Received',

1132

continuation_ws='\t')

1133

eq(h.encode(), """\

1134

from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]

1135

helo=cthulhu.gerg.ca)

1136

\tby kronos.mems-exchange.org with esmtp (Exim 4.05)

1137

\tid 17k4h5-00034i-00

1138

\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")

1139

1140

def test_long_8bit_header(self):

1141

eq = self.ndiffAssertEqual

1142

msg = Message()

1143

h = Header('Britische Regierung gibt', 'iso-8859-1',

1144

header_name='Subject')

1145

h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')

1146

eq(h.encode(maxlinelen=76), """\

1147

=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=

1148

=?iso-8859-1?q?hore-Windkraftprojekte?=""")

1149

msg['Subject'] = h

1150

eq(msg.as_string(maxheaderlen=76), """\

1151

Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=

1152

=?iso-8859-1?q?hore-Windkraftprojekte?=

1153

1154

""")

1155

eq(msg.as_string(maxheaderlen=0), """\

1156

Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=

1157

1158

""")

1159

1160

def test_long_8bit_header_no_charset(self):

1161

eq = self.ndiffAssertEqual

1162

msg = Message()

1163

header_string = ('Britische Regierung gibt gr\xfcnes Licht '

1164

'f\xfcr Offshore-Windkraftprojekte '

1165

'<a-very-long-address@example.com>')

1166

msg['Reply-To'] = header_string

1167

eq(msg.as_string(maxheaderlen=78), """\

1168

Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=

1169

=?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=

1170

1171

""")

1172

msg = Message()

1173

msg['Reply-To'] = Header(header_string,

1174

header_name='Reply-To')

1175

eq(msg.as_string(maxheaderlen=78), """\

1176

Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=

1177

=?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=

1178

1179

""")

1180

1181

def test_long_to_header(self):

1182

eq = self.ndiffAssertEqual

1183

to = ('"Someone Test #A" <someone@eecs.umich.edu>,'

1184

'<someone@eecs.umich.edu>, '

1185

'"Someone Test #B" <someone@umich.edu>, '

1186

'"Someone Test #C" <someone@eecs.umich.edu>, '

1187

'"Someone Test #D" <someone@eecs.umich.edu>')

1188

msg = Message()

1189

msg['To'] = to

1190

eq(msg.as_string(maxheaderlen=78), '''\

1191

To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,

1192

"Someone Test #B" <someone@umich.edu>,

1193

"Someone Test #C" <someone@eecs.umich.edu>,

1194

"Someone Test #D" <someone@eecs.umich.edu>

1195

1196

''')

1197

1198

def test_long_line_after_append(self):

1199

eq = self.ndiffAssertEqual

1200

s = 'This is an example of string which has almost the limit of header length.'

1201

h = Header(s)

1202

h.append('Add another line.')

1203

eq(h.encode(maxlinelen=76), """\

1204

This is an example of string which has almost the limit of header length.

1205

Add another line.""")

1206

1207

def test_shorter_line_with_append(self):

1208

eq = self.ndiffAssertEqual

1209

s = 'This is a shorter line.'

1210

h = Header(s)

1211

h.append('Add another sentence. (Surprise?)')

1212

eq(h.encode(),

1213

'This is a shorter line. Add another sentence. (Surprise?)')

1214

1215

def test_long_field_name(self):

1216

eq = self.ndiffAssertEqual

1217

fn = 'X-Very-Very-Very-Long-Header-Name'

1218

gs = ('Die Mieter treten hier ein werden mit einem Foerderband '

1219

'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '

1220

'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '

1221

'bef\xf6rdert. ')

1222

h = Header(gs, 'iso-8859-1', header_name=fn)

1223

# BAW: this seems broken because the first line is too long

1224

eq(h.encode(maxlinelen=76), """\

1225

=?iso-8859-1?q?Die_Mieter_treten_hier_e?=

1226

=?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=

1227

=?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=

1228

=?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")

1229

1230

def test_long_received_header(self):

1231

h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '

1232

'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '

1233

'Wed, 05 Mar 2003 18:10:18 -0700')

1234

msg = Message()

1235

msg['Received-1'] = Header(h, continuation_ws='\t')

1236

msg['Received-2'] = h

1237

# This should be splitting on spaces not semicolons.

1238

self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\

1239

Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by

1240

hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;

1241

Wed, 05 Mar 2003 18:10:18 -0700

1242

Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by

1243

hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;

1244

Wed, 05 Mar 2003 18:10:18 -0700

1245

1246

""")

1247

1248

def test_string_headerinst_eq(self):

1249

h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'

1250

'tu-muenchen.de> (David Bremner\'s message of '

1251

'"Thu, 6 Mar 2003 13:58:21 +0100")')

1252

msg = Message()

1253

msg['Received-1'] = Header(h, header_name='Received-1',

1254

continuation_ws='\t')

1255

msg['Received-2'] = h

1256

# XXX The space after the ':' should not be there.

1257

self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\

1258

Received-1:\x20

1259

<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David

1260

Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")

1261

Received-2:\x20

1262

<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David

1263

Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")

1264

1265

""")

1266

1267

def test_long_unbreakable_lines_with_continuation(self):

1268

eq = self.ndiffAssertEqual

1269

msg = Message()

1270

t = """\

1271

iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9

1272

locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""

1273

msg['Face-1'] = t

1274

msg['Face-2'] = Header(t, header_name='Face-2')

1275

msg['Face-3'] = ' ' + t

1276

# XXX This splitting is all wrong. It the first value line should be

1277

# snug against the field name or the space after the header not there.

1278

eq(msg.as_string(maxheaderlen=78), """\

1279

Face-1:\x20

1280

iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9

1281

locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp

1282

Face-2:\x20

1283

iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9

1284

locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp

1285

Face-3:\x20

1286

iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9

1287

locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp

1288

1289

""")

1290

1291

def test_another_long_multiline_header(self):

1292

eq = self.ndiffAssertEqual

1293

m = ('Received: from siimage.com '

1294

'([172.25.1.3]) by zima.siliconimage.com with '

1295

'Microsoft SMTPSVC(5.0.2195.4905); '

1296

'Wed, 16 Oct 2002 07:41:11 -0700')

1297

msg = email.message_from_string(m)

1298

eq(msg.as_string(maxheaderlen=78), '''\

1299

Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with

1300

Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700

1301

1302

''')

1303

1304

def test_long_lines_with_different_header(self):

1305

eq = self.ndiffAssertEqual

1306

h = ('List-Unsubscribe: '

1307

'<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'

1308

' <mailto:spamassassin-talk-request@lists.sourceforge.net'

1309

'?subject=unsubscribe>')

1310

msg = Message()

1311

msg['List'] = h

1312

msg['List'] = Header(h, header_name='List')

1313

eq(msg.as_string(maxheaderlen=78), """\

1314

List: List-Unsubscribe:

1315

<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,

1316

<mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>

1317

List: List-Unsubscribe:

1318

<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,

1319

<mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>

1320

1321

""")

1322

1323

def test_long_rfc2047_header_with_embedded_fws(self):

1324

h = Header(textwrap.dedent("""\

1325

We're going to pretend this header is in a non-ascii character set

1326

\tto see if line wrapping with encoded words and embedded

1327

folding white space works"""),

1328

charset='utf-8',

1329

header_name='Test')

1330

self.assertEqual(h.encode()+'\n', textwrap.dedent("""\

1331

=?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=

1332

=?utf-8?q?cter_set?=

1333

=?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=

1334

=?utf-8?q?_folding_white_space_works?=""")+'\n')

1335

1336

1337

1338

# Test mangling of "From " lines in the body of a message

1339

class TestFromMangling(unittest.TestCase):

1340

def setUp(self):

1341

self.msg = Message()

1342

self.msg['From'] = 'aaa@bbb.org'

1343

self.msg.set_payload("""\

1344

From the desk of A.A.A.:

1345

Blah blah blah

1346

""")

1347

1348

def test_mangled_from(self):

1349

s = StringIO()

1350

g = Generator(s, mangle_from_=True)

1351

g.flatten(self.msg)

1352

self.assertEqual(s.getvalue(), """\

1353

From: aaa@bbb.org

1354

1355

>From the desk of A.A.A.:

1356

Blah blah blah

1357

""")

1358

1359

def test_dont_mangle_from(self):

1360

s = StringIO()

1361

g = Generator(s, mangle_from_=False)

1362

g.flatten(self.msg)

1363

self.assertEqual(s.getvalue(), """\

1364

From: aaa@bbb.org

1365

1366

From the desk of A.A.A.:

1367

Blah blah blah

1368

""")

1369

1370

def test_mangle_from_in_preamble_and_epilog(self):

1371

s = StringIO()

1372

g = Generator(s, mangle_from_=True)

1373

msg = email.message_from_string(textwrap.dedent("""\

1374

From: foo@bar.com

1375

Mime-Version: 1.0

1376

Content-Type: multipart/mixed; boundary=XXX

1377

1378

From somewhere unknown

1379

1380

--XXX

1381

Content-Type: text/plain

1382

1383

foo

1384

1385

--XXX--

1386

1387

From somewhere unknowable

1388

"""))

1389

g.flatten(msg)

1390

self.assertEqual(len([1 for x in s.getvalue().split('\n')

1391

if x.startswith('>From ')]), 2)

1392

1393

def test_mangled_from_with_bad_bytes(self):

1394

source = textwrap.dedent("""\

1395

Content-Type: text/plain; charset="utf-8"

1396

MIME-Version: 1.0

1397

Content-Transfer-Encoding: 8bit

1398

From: aaa@bbb.org

1399

1400

""").encode('utf-8')

1401

msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')

1402

b = BytesIO()

1403

g = BytesGenerator(b, mangle_from_=True)

1404

g.flatten(msg)

1405

self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')

1406

1407

1408

# Test the basic MIMEAudio class

1409

class TestMIMEAudio(unittest.TestCase):

1410

def setUp(self):

1411

with openfile('audiotest.au', 'rb') as fp:

1412

self._audiodata = fp.read()

1413

self._au = MIMEAudio(self._audiodata)

1414

1415

def test_guess_minor_type(self):

1416

self.assertEqual(self._au.get_content_type(), 'audio/basic')

1417

1418

def test_encoding(self):

1419

payload = self._au.get_payload()

1420

self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),

1421

self._audiodata)

1422

1423

def test_checkSetMinor(self):

1424

au = MIMEAudio(self._audiodata, 'fish')

1425

self.assertEqual(au.get_content_type(), 'audio/fish')

1426

1427

def test_add_header(self):

1428

eq = self.assertEqual

1429

self._au.add_header('Content-Disposition', 'attachment',

1430

filename='audiotest.au')

1431

eq(self._au['content-disposition'],

1432

'attachment; filename="audiotest.au"')

1433

eq(self._au.get_params(header='content-disposition'),

1434

[('attachment', ''), ('filename', 'audiotest.au')])

1435

eq(self._au.get_param('filename', header='content-disposition'),

1436

'audiotest.au')

1437

missing = []

1438

eq(self._au.get_param('attachment', header='content-disposition'), '')

1439

self.assertIs(self._au.get_param('foo', failobj=missing,

1440

header='content-disposition'), missing)

1441

# Try some missing stuff

1442

self.assertIs(self._au.get_param('foobar', missing), missing)

1443

self.assertIs(self._au.get_param('attachment', missing,

1444

header='foobar'), missing)

1445

1446

1447

1448

# Test the basic MIMEImage class

1449

class TestMIMEImage(unittest.TestCase):

1450

def setUp(self):

1451

with openfile('PyBanner048.gif', 'rb') as fp:

1452

self._imgdata = fp.read()

1453

self._im = MIMEImage(self._imgdata)

1454

1455

def test_guess_minor_type(self):

1456

self.assertEqual(self._im.get_content_type(), 'image/gif')

1457

1458

def test_encoding(self):

1459

payload = self._im.get_payload()

1460

self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),

1461

self._imgdata)

1462

1463

def test_checkSetMinor(self):

1464

im = MIMEImage(self._imgdata, 'fish')

1465

self.assertEqual(im.get_content_type(), 'image/fish')

1466

1467

def test_add_header(self):

1468

eq = self.assertEqual

1469

self._im.add_header('Content-Disposition', 'attachment',

1470

filename='dingusfish.gif')

1471

eq(self._im['content-disposition'],

1472

'attachment; filename="dingusfish.gif"')

1473

eq(self._im.get_params(header='content-disposition'),

1474

[('attachment', ''), ('filename', 'dingusfish.gif')])

1475

eq(self._im.get_param('filename', header='content-disposition'),

1476

'dingusfish.gif')

1477

missing = []

1478

eq(self._im.get_param('attachment', header='content-disposition'), '')

1479

self.assertIs(self._im.get_param('foo', failobj=missing,

1480

header='content-disposition'), missing)

1481

# Try some missing stuff

1482

self.assertIs(self._im.get_param('foobar', missing), missing)

1483

self.assertIs(self._im.get_param('attachment', missing,

1484

header='foobar'), missing)

1485

1486

1487

1488

# Test the basic MIMEApplication class

1489

class TestMIMEApplication(unittest.TestCase):

1490

def test_headers(self):

1491

eq = self.assertEqual

1492

msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')

1493

eq(msg.get_content_type(), 'application/octet-stream')

1494

eq(msg['content-transfer-encoding'], 'base64')

1495

1496

def test_body(self):

1497

eq = self.assertEqual

1498

bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'

1499

msg = MIMEApplication(bytesdata)

1500

# whitespace in the cte encoded block is RFC-irrelevant.

1501

eq(msg.get_payload().strip(), '+vv8/f7/')

1502

eq(msg.get_payload(decode=True), bytesdata)

1503

1504

def test_binary_body_with_encode_7or8bit(self):

1505

# Issue 17171.

1506

bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'

1507

msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)

1508

# Treated as a string, this will be invalid code points.

1509

self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))

1510

self.assertEqual(msg.get_payload(decode=True), bytesdata)

1511

self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')

1512

s = BytesIO()

1513

g = BytesGenerator(s)

1514

g.flatten(msg)

1515

wireform = s.getvalue()

1516

msg2 = email.message_from_bytes(wireform)

1517

self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))

1518

self.assertEqual(msg2.get_payload(decode=True), bytesdata)

1519

self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')

1520

1521

def test_binary_body_with_encode_noop(self):

1522

# Issue 16564: This does not produce an RFC valid message, since to be

1523

# valid it should have a CTE of binary. But the below works in

1524

# Python2, and is documented as working this way.

1525

bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'

1526

msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)

1527

# Treated as a string, this will be invalid code points.

1528

self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))

1529

self.assertEqual(msg.get_payload(decode=True), bytesdata)

1530

s = BytesIO()

1531

g = BytesGenerator(s)

1532

g.flatten(msg)

1533

wireform = s.getvalue()

1534

msg2 = email.message_from_bytes(wireform)

1535

self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))

1536

self.assertEqual(msg2.get_payload(decode=True), bytesdata)

1537

1538

def test_binary_body_with_encode_quopri(self):

1539

# Issue 14360.

1540

bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '

1541

msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)

1542

self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')

1543

self.assertEqual(msg.get_payload(decode=True), bytesdata)

1544

self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')

1545

s = BytesIO()

1546

g = BytesGenerator(s)

1547

g.flatten(msg)

1548

wireform = s.getvalue()

1549

msg2 = email.message_from_bytes(wireform)

1550

self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')

1551

self.assertEqual(msg2.get_payload(decode=True), bytesdata)

1552

self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')

1553

1554

def test_binary_body_with_encode_base64(self):

1555

bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'

1556

msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)

1557

self.assertEqual(msg.get_payload(), '+vv8/f7/\n')

1558

self.assertEqual(msg.get_payload(decode=True), bytesdata)

1559

s = BytesIO()

1560

g = BytesGenerator(s)

1561

g.flatten(msg)

1562

wireform = s.getvalue()

1563

msg2 = email.message_from_bytes(wireform)

1564

self.assertEqual(msg.get_payload(), '+vv8/f7/\n')

1565

self.assertEqual(msg2.get_payload(decode=True), bytesdata)

1566

1567

1568

# Test the basic MIMEText class

1569

class TestMIMEText(unittest.TestCase):

1570

def setUp(self):

1571

self._msg = MIMEText('hello there')

1572

1573

def test_types(self):

1574

eq = self.assertEqual

1575

eq(self._msg.get_content_type(), 'text/plain')

1576

eq(self._msg.get_param('charset'), 'us-ascii')

1577

missing = []

1578

self.assertIs(self._msg.get_param('foobar', missing), missing)

1579

self.assertIs(self._msg.get_param('charset', missing, header='foobar'),

1580

missing)

1581

1582

def test_payload(self):

1583

self.assertEqual(self._msg.get_payload(), 'hello there')

1584

self.assertFalse(self._msg.is_multipart())

1585

1586

def test_charset(self):

1587

eq = self.assertEqual

1588

msg = MIMEText('hello there', _charset='us-ascii')

1589

eq(msg.get_charset().input_charset, 'us-ascii')

1590

eq(msg['content-type'], 'text/plain; charset="us-ascii"')

1591

1592

def test_7bit_input(self):

1593

eq = self.assertEqual

1594

msg = MIMEText('hello there', _charset='us-ascii')

1595

eq(msg.get_charset().input_charset, 'us-ascii')

1596

eq(msg['content-type'], 'text/plain; charset="us-ascii"')

1597

1598

def test_7bit_input_no_charset(self):

1599

eq = self.assertEqual

1600

msg = MIMEText('hello there')

1601

eq(msg.get_charset(), 'us-ascii')

1602

eq(msg['content-type'], 'text/plain; charset="us-ascii"')

1603

self.assertIn('hello there', msg.as_string())

1604

1605

def test_utf8_input(self):

1606

teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'

1607

eq = self.assertEqual

1608

msg = MIMEText(teststr, _charset='utf-8')

1609

eq(msg.get_charset().output_charset, 'utf-8')

1610

eq(msg['content-type'], 'text/plain; charset="utf-8"')

1611

eq(msg.get_payload(decode=True), teststr.encode('utf-8'))

1612

1613

@unittest.skip("can't fix because of backward compat in email5, "

1614

"will fix in email6")

1615

def test_utf8_input_no_charset(self):

1616

teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'

1617

self.assertRaises(UnicodeEncodeError, MIMEText, teststr)

1618

1619

1620

1621

# Test complicated multipart/* messages

1622

class TestMultipart(TestEmailBase):

1623

def setUp(self):

1624

with openfile('PyBanner048.gif', 'rb') as fp:

1625

data = fp.read()

1626

container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')

1627

image = MIMEImage(data, name='dingusfish.gif')

1628

image.add_header('content-disposition', 'attachment',

1629

filename='dingusfish.gif')

1630

intro = MIMEText('''\

1631

Hi there,

1632

1633

This is the dingus fish.

1634

''')

1635

container.attach(intro)

1636

container.attach(image)

1637

container['From'] = 'Barry <barry@digicool.com>'

1638

container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'

1639

container['Subject'] = 'Here is your dingus fish'

1640

1641

now = 987809702.54848599

1642

timetuple = time.localtime(now)

1643

if timetuple[-1] == 0:

1644

tzsecs = time.timezone

1645

else:

1646

tzsecs = time.altzone

1647

if tzsecs > 0:

1648

sign = '-'

1649

else:

1650

sign = '+'

1651

tzoffset = ' %s%04d' % (sign, tzsecs / 36)

1652

container['Date'] = time.strftime(

1653

'%a, %d %b %Y %H:%M:%S',

1654

time.localtime(now)) + tzoffset

1655

self._msg = container

1656

self._im = image

1657

self._txt = intro

1658

1659

def test_hierarchy(self):

1660

# convenience

1661

eq = self.assertEqual

1662

raises = self.assertRaises

1663

# tests

1664

m = self._msg

1665

self.assertTrue(m.is_multipart())

1666

eq(m.get_content_type(), 'multipart/mixed')

1667

eq(len(m.get_payload()), 2)

1668

raises(IndexError, m.get_payload, 2)

1669

m0 = m.get_payload(0)

1670

m1 = m.get_payload(1)

1671

self.assertIs(m0, self._txt)

1672

self.assertIs(m1, self._im)

1673

eq(m.get_payload(), [m0, m1])

1674

self.assertFalse(m0.is_multipart())

1675

self.assertFalse(m1.is_multipart())

1676

1677

def test_empty_multipart_idempotent(self):

1678

text = """\

1679

Content-Type: multipart/mixed; boundary="BOUNDARY"

1680

MIME-Version: 1.0

1681

Subject: A subject

1682

To: aperson@dom.ain

1683

From: bperson@dom.ain

1684

1685

1686

--BOUNDARY

1687

1688

1689

--BOUNDARY--

1690

"""

1691

msg = Parser().parsestr(text)

1692

self.ndiffAssertEqual(text, msg.as_string())

1693

1694

def test_no_parts_in_a_multipart_with_none_epilogue(self):

1695

outer = MIMEBase('multipart', 'mixed')

1696

outer['Subject'] = 'A subject'

1697

outer['To'] = 'aperson@dom.ain'

1698

outer['From'] = 'bperson@dom.ain'

1699

outer.set_boundary('BOUNDARY')

1700

self.ndiffAssertEqual(outer.as_string(), '''\

1701

Content-Type: multipart/mixed; boundary="BOUNDARY"

1702

MIME-Version: 1.0

1703

Subject: A subject

1704

To: aperson@dom.ain

1705

From: bperson@dom.ain

1706

1707

--BOUNDARY

1708

1709

--BOUNDARY--''')

1710

1711

def test_no_parts_in_a_multipart_with_empty_epilogue(self):

1712

outer = MIMEBase('multipart', 'mixed')

1713

outer['Subject'] = 'A subject'

1714

outer['To'] = 'aperson@dom.ain'

1715

outer['From'] = 'bperson@dom.ain'

1716

outer.preamble = ''

1717

outer.epilogue = ''

1718

outer.set_boundary('BOUNDARY')

1719

self.ndiffAssertEqual(outer.as_string(), '''\

1720

Content-Type: multipart/mixed; boundary="BOUNDARY"

1721

MIME-Version: 1.0

1722

Subject: A subject

1723

To: aperson@dom.ain

1724

From: bperson@dom.ain

1725

1726

1727

--BOUNDARY

1728

1729

--BOUNDARY--

1730

''')

1731

1732

def test_one_part_in_a_multipart(self):

1733

eq = self.ndiffAssertEqual

1734

outer = MIMEBase('multipart', 'mixed')

1735

outer['Subject'] = 'A subject'

1736

outer['To'] = 'aperson@dom.ain'

1737

outer['From'] = 'bperson@dom.ain'

1738

outer.set_boundary('BOUNDARY')

1739

msg = MIMEText('hello world')

1740

outer.attach(msg)

1741

eq(outer.as_string(), '''\

1742

Content-Type: multipart/mixed; boundary="BOUNDARY"

1743

MIME-Version: 1.0

1744

Subject: A subject

1745

To: aperson@dom.ain

1746

From: bperson@dom.ain

1747

1748

--BOUNDARY

1749

Content-Type: text/plain; charset="us-ascii"

1750

MIME-Version: 1.0

1751

Content-Transfer-Encoding: 7bit

1752

1753

hello world

1754

--BOUNDARY--''')

1755

1756

def test_seq_parts_in_a_multipart_with_empty_preamble(self):

1757

eq = self.ndiffAssertEqual

1758

outer = MIMEBase('multipart', 'mixed')

1759

outer['Subject'] = 'A subject'

1760

outer['To'] = 'aperson@dom.ain'

1761

outer['From'] = 'bperson@dom.ain'

1762

outer.preamble = ''

1763

msg = MIMEText('hello world')

1764

outer.attach(msg)

1765

outer.set_boundary('BOUNDARY')

1766

eq(outer.as_string(), '''\

1767

Content-Type: multipart/mixed; boundary="BOUNDARY"

1768

MIME-Version: 1.0

1769

Subject: A subject

1770

To: aperson@dom.ain

1771

From: bperson@dom.ain

1772

1773

1774

--BOUNDARY

1775

Content-Type: text/plain; charset="us-ascii"

1776

MIME-Version: 1.0

1777

Content-Transfer-Encoding: 7bit

1778

1779

hello world

1780

--BOUNDARY--''')

1781

1782

1783

def test_seq_parts_in_a_multipart_with_none_preamble(self):

1784

eq = self.ndiffAssertEqual

1785

outer = MIMEBase('multipart', 'mixed')

1786

outer['Subject'] = 'A subject'

1787

outer['To'] = 'aperson@dom.ain'

1788

outer['From'] = 'bperson@dom.ain'

1789

outer.preamble = None

1790

msg = MIMEText('hello world')

1791

outer.attach(msg)

1792

outer.set_boundary('BOUNDARY')

1793

eq(outer.as_string(), '''\

1794

Content-Type: multipart/mixed; boundary="BOUNDARY"

1795

MIME-Version: 1.0

1796

Subject: A subject

1797

To: aperson@dom.ain

1798

From: bperson@dom.ain

1799

1800

--BOUNDARY

1801

Content-Type: text/plain; charset="us-ascii"

1802

MIME-Version: 1.0

1803

Content-Transfer-Encoding: 7bit

1804

1805

hello world

1806

--BOUNDARY--''')

1807

1808

1809

def test_seq_parts_in_a_multipart_with_none_epilogue(self):

1810

eq = self.ndiffAssertEqual

1811

outer = MIMEBase('multipart', 'mixed')

1812

outer['Subject'] = 'A subject'

1813

outer['To'] = 'aperson@dom.ain'

1814

outer['From'] = 'bperson@dom.ain'

1815

outer.epilogue = None

1816

msg = MIMEText('hello world')

1817

outer.attach(msg)

1818

outer.set_boundary('BOUNDARY')

1819

eq(outer.as_string(), '''\

1820

Content-Type: multipart/mixed; boundary="BOUNDARY"

1821

MIME-Version: 1.0

1822

Subject: A subject

1823

To: aperson@dom.ain

1824

From: bperson@dom.ain

1825

1826

--BOUNDARY

1827

Content-Type: text/plain; charset="us-ascii"

1828

MIME-Version: 1.0

1829

Content-Transfer-Encoding: 7bit

1830

1831

hello world

1832

--BOUNDARY--''')

1833

1834

1835

def test_seq_parts_in_a_multipart_with_empty_epilogue(self):

1836

eq = self.ndiffAssertEqual

1837

outer = MIMEBase('multipart', 'mixed')

1838

outer['Subject'] = 'A subject'

1839

outer['To'] = 'aperson@dom.ain'

1840

outer['From'] = 'bperson@dom.ain'

1841

outer.epilogue = ''

1842

msg = MIMEText('hello world')

1843

outer.attach(msg)

1844

outer.set_boundary('BOUNDARY')

1845

eq(outer.as_string(), '''\

1846

Content-Type: multipart/mixed; boundary="BOUNDARY"

1847

MIME-Version: 1.0

1848

Subject: A subject

1849

To: aperson@dom.ain

1850

From: bperson@dom.ain

1851

1852

--BOUNDARY

1853

Content-Type: text/plain; charset="us-ascii"

1854

MIME-Version: 1.0

1855

Content-Transfer-Encoding: 7bit

1856

1857

hello world

1858

--BOUNDARY--

1859

''')

1860

1861

1862

def test_seq_parts_in_a_multipart_with_nl_epilogue(self):

1863

eq = self.ndiffAssertEqual

1864

outer = MIMEBase('multipart', 'mixed')

1865

outer['Subject'] = 'A subject'

1866

outer['To'] = 'aperson@dom.ain'

1867

outer['From'] = 'bperson@dom.ain'

1868

outer.epilogue = '\n'

1869

msg = MIMEText('hello world')

1870

outer.attach(msg)

1871

outer.set_boundary('BOUNDARY')

1872

eq(outer.as_string(), '''\

1873

Content-Type: multipart/mixed; boundary="BOUNDARY"

1874

MIME-Version: 1.0

1875

Subject: A subject

1876

To: aperson@dom.ain

1877

From: bperson@dom.ain

1878

1879

--BOUNDARY

1880

Content-Type: text/plain; charset="us-ascii"

1881

MIME-Version: 1.0

1882

Content-Transfer-Encoding: 7bit

1883

1884

hello world

1885

--BOUNDARY--

1886

1887

''')

1888

1889

def test_message_external_body(self):

1890

eq = self.assertEqual

1891

msg = self._msgobj('msg_36.txt')

1892

eq(len(msg.get_payload()), 2)

1893

msg1 = msg.get_payload(1)

1894

eq(msg1.get_content_type(), 'multipart/alternative')

1895

eq(len(msg1.get_payload()), 2)

1896

for subpart in msg1.get_payload():

1897

eq(subpart.get_content_type(), 'message/external-body')

1898

eq(len(subpart.get_payload()), 1)

1899

subsubpart = subpart.get_payload(0)

1900

eq(subsubpart.get_content_type(), 'text/plain')

1901

1902

def test_double_boundary(self):

1903

# msg_37.txt is a multipart that contains two dash-boundary's in a

1904

# row. Our interpretation of RFC 2046 calls for ignoring the second

1905

# and subsequent boundaries.

1906

msg = self._msgobj('msg_37.txt')

1907

self.assertEqual(len(msg.get_payload()), 3)

1908

1909

def test_nested_inner_contains_outer_boundary(self):

1910

eq = self.ndiffAssertEqual

1911

# msg_38.txt has an inner part that contains outer boundaries. My

1912

# interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say

1913

# these are illegal and should be interpreted as unterminated inner

1914

# parts.

1915

msg = self._msgobj('msg_38.txt')

1916

sfp = StringIO()

1917

iterators._structure(msg, sfp)

1918

eq(sfp.getvalue(), """\

1919

multipart/mixed

1920

multipart/mixed

1921

multipart/alternative

1922

text/plain

1923

text/plain

1924

text/plain

1925

text/plain

1926

""")

1927

1928

def test_nested_with_same_boundary(self):

1929

eq = self.ndiffAssertEqual

1930

# msg 39.txt is similarly evil in that it's got inner parts that use

1931

# the same boundary as outer parts. Again, I believe the way this is

1932

# parsed is closest to the spirit of RFC 2046

1933

msg = self._msgobj('msg_39.txt')

1934

sfp = StringIO()

1935

iterators._structure(msg, sfp)

1936

eq(sfp.getvalue(), """\

1937

multipart/mixed

1938

multipart/mixed

1939

multipart/alternative

1940

application/octet-stream

1941

application/octet-stream

1942

text/plain

1943

""")

1944

1945

def test_boundary_in_non_multipart(self):

1946

msg = self._msgobj('msg_40.txt')

1947

self.assertEqual(msg.as_string(), '''\

1948

MIME-Version: 1.0

1949

Content-Type: text/html; boundary="--961284236552522269"

1950

1951

----961284236552522269

1952

Content-Type: text/html;

1953

Content-Transfer-Encoding: 7Bit

1954

1955

1956

1957

----961284236552522269--

1958

''')

1959

1960

def test_boundary_with_leading_space(self):

1961

eq = self.assertEqual

1962

msg = email.message_from_string('''\

1963

MIME-Version: 1.0

1964

Content-Type: multipart/mixed; boundary=" XXXX"

1965

1966

-- XXXX

1967

Content-Type: text/plain

1968

1969

1970

-- XXXX

1971

Content-Type: text/plain

1972

1973

-- XXXX--

1974

''')

1975

self.assertTrue(msg.is_multipart())

1976

eq(msg.get_boundary(), ' XXXX')

1977

eq(len(msg.get_payload()), 2)

1978

1979

def test_boundary_without_trailing_newline(self):

1980

m = Parser().parsestr("""\

1981

Content-Type: multipart/mixed; boundary="===============0012394164=="

1982

MIME-Version: 1.0

1983

1984

--===============0012394164==

1985

Content-Type: image/file1.jpg

1986

MIME-Version: 1.0

1987

Content-Transfer-Encoding: base64

1988

1989

YXNkZg==

1990

--===============0012394164==--""")

1991

self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')

1992

1993

1994

1995

# Test some badly formatted messages

1996

class TestNonConformant(TestEmailBase):

1997

1998

def test_parse_missing_minor_type(self):

1999

eq = self.assertEqual

2000

msg = self._msgobj('msg_14.txt')

2001

eq(msg.get_content_type(), 'text/plain')

2002

eq(msg.get_content_maintype(), 'text')

2003

eq(msg.get_content_subtype(), 'plain')

2004

2005

# test_defect_handling

2006

def test_same_boundary_inner_outer(self):

2007

msg = self._msgobj('msg_15.txt')

2008

# XXX We can probably eventually do better

2009

inner = msg.get_payload(0)

2010

self.assertTrue(hasattr(inner, 'defects'))

2011

self.assertEqual(len(inner.defects), 1)

2012

self.assertIsInstance(inner.defects[0],

2013

errors.StartBoundaryNotFoundDefect)

2014

2015

# test_defect_handling

2016

def test_multipart_no_boundary(self):

2017

msg = self._msgobj('msg_25.txt')

2018

self.assertIsInstance(msg.get_payload(), str)

2019

self.assertEqual(len(msg.defects), 2)

2020

self.assertIsInstance(msg.defects[0],

2021

errors.NoBoundaryInMultipartDefect)

2022

self.assertIsInstance(msg.defects[1],

2023

errors.MultipartInvariantViolationDefect)

2024

2025

multipart_msg = textwrap.dedent("""\

2026

Date: Wed, 14 Nov 2007 12:56:23 GMT

2027

From: foo@bar.invalid

2028

To: foo@bar.invalid

2029

Subject: Content-Transfer-Encoding: base64 and multipart

2030

MIME-Version: 1.0

2031

Content-Type: multipart/mixed;

2032

boundary="===============3344438784458119861=="{}

2033

2034

--===============3344438784458119861==

2035

Content-Type: text/plain

2036

2037

Test message

2038

2039

--===============3344438784458119861==

2040

Content-Type: application/octet-stream

2041

Content-Transfer-Encoding: base64

2042

2043

YWJj

2044

2045

--===============3344438784458119861==--

2046

""")

2047

2048

# test_defect_handling

2049

def test_multipart_invalid_cte(self):

2050

msg = self._str_msg(

2051

self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))

2052

self.assertEqual(len(msg.defects), 1)

2053

self.assertIsInstance(msg.defects[0],

2054

errors.InvalidMultipartContentTransferEncodingDefect)

2055

2056

# test_defect_handling

2057

def test_multipart_no_cte_no_defect(self):

2058

msg = self._str_msg(self.multipart_msg.format(''))

2059

self.assertEqual(len(msg.defects), 0)

2060

2061

# test_defect_handling

2062

def test_multipart_valid_cte_no_defect(self):

2063

for cte in ('7bit', '8bit', 'BINary'):

2064

msg = self._str_msg(

2065

self.multipart_msg.format(

2066

"\nContent-Transfer-Encoding: {}".format(cte)))

2067

self.assertEqual(len(msg.defects), 0)

2068

2069

# test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.

2070

def test_invalid_content_type(self):

2071

eq = self.assertEqual

2072

neq = self.ndiffAssertEqual

2073

msg = Message()

2074

# RFC 2045, $5.2 says invalid yields text/plain

2075

msg['Content-Type'] = 'text'

2076

eq(msg.get_content_maintype(), 'text')

2077

eq(msg.get_content_subtype(), 'plain')

2078

eq(msg.get_content_type(), 'text/plain')

2079

# Clear the old value and try something /really/ invalid

2080

del msg['content-type']

2081

msg['Content-Type'] = 'foo'

2082

eq(msg.get_content_maintype(), 'text')

2083

eq(msg.get_content_subtype(), 'plain')

2084

eq(msg.get_content_type(), 'text/plain')

2085

# Still, make sure that the message is idempotently generated

2086

s = StringIO()

2087

g = Generator(s)

2088

g.flatten(msg)

2089

neq(s.getvalue(), 'Content-Type: foo\n\n')

2090

2091

def test_no_start_boundary(self):

2092

eq = self.ndiffAssertEqual

2093

msg = self._msgobj('msg_31.txt')

2094

eq(msg.get_payload(), """\

2095

--BOUNDARY

2096

Content-Type: text/plain

2097

2098

message 1

2099

2100

--BOUNDARY

2101

Content-Type: text/plain

2102

2103

message 2

2104

2105

--BOUNDARY--

2106

""")

2107

2108

def test_no_separating_blank_line(self):

2109

eq = self.ndiffAssertEqual

2110

msg = self._msgobj('msg_35.txt')

2111

eq(msg.as_string(), """\

2112

From: aperson@dom.ain

2113

To: bperson@dom.ain

2114

Subject: here's something interesting

2115

2116

counter to RFC 2822, there's no separating newline here

2117

""")

2118

2119

# test_defect_handling

2120

def test_lying_multipart(self):

2121

msg = self._msgobj('msg_41.txt')

2122

self.assertTrue(hasattr(msg, 'defects'))

2123

self.assertEqual(len(msg.defects), 2)

2124

self.assertIsInstance(msg.defects[0],

2125

errors.NoBoundaryInMultipartDefect)

2126

self.assertIsInstance(msg.defects[1],

2127

errors.MultipartInvariantViolationDefect)

2128

2129

# test_defect_handling

2130

def test_missing_start_boundary(self):

2131

outer = self._msgobj('msg_42.txt')

2132

# The message structure is:

2133

2134

# multipart/mixed

2135

# text/plain

2136

# message/rfc822

2137

# multipart/mixed [*]

2138

2139

# [*] This message is missing its start boundary

2140

bad = outer.get_payload(1).get_payload(0)

2141

self.assertEqual(len(bad.defects), 1)

2142

self.assertIsInstance(bad.defects[0],

2143

errors.StartBoundaryNotFoundDefect)

2144

2145

# test_defect_handling

2146

def test_first_line_is_continuation_header(self):

2147

eq = self.assertEqual

2148

m = ' Line 1\nSubject: test\n\nbody'

2149

msg = email.message_from_string(m)

2150

eq(msg.keys(), ['Subject'])

2151

eq(msg.get_payload(), 'body')

2152

eq(len(msg.defects), 1)

2153

self.assertDefectsEqual(msg.defects,

2154

[errors.FirstHeaderLineIsContinuationDefect])

2155

eq(msg.defects[0].line, ' Line 1\n')

2156

2157

# test_defect_handling

2158

def test_missing_header_body_separator(self):

2159

# Our heuristic if we see a line that doesn't look like a header (no

2160

# leading whitespace but no ':') is to assume that the blank line that

2161

# separates the header from the body is missing, and to stop parsing

2162

# headers and start parsing the body.

2163

msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')

2164

self.assertEqual(msg.keys(), ['Subject'])

2165

self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')

2166

self.assertDefectsEqual(msg.defects,

2167

[errors.MissingHeaderBodySeparatorDefect])

2168

2169

2170

# Test RFC 2047 header encoding and decoding

2171

class TestRFC2047(TestEmailBase):

2172

def test_rfc2047_multiline(self):

2173

eq = self.assertEqual

2174

s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz

2175

foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""

2176

dh = decode_header(s)

2177

eq(dh, [

2178

(b'Re: ', None),

2179

(b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),

2180

(b' baz foo bar ', None),

2181

(b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])

2182

header = make_header(dh)

2183

eq(str(header),

2184

'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')

2185

self.ndiffAssertEqual(header.encode(maxlinelen=76), """\

2186

Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=

2187

=?mac-iceland?q?=9Arg=8Cs?=""")

2188

2189

def test_whitespace_keeper_unicode(self):

2190

eq = self.assertEqual

2191

s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'

2192

dh = decode_header(s)

2193

eq(dh, [(b'Andr\xe9', 'iso-8859-1'),

2194

(b' Pirard <pirard@dom.ain>', None)])

2195

header = str(make_header(dh))

2196

eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')

2197

2198

def test_whitespace_keeper_unicode_2(self):

2199

eq = self.assertEqual

2200

s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='

2201

dh = decode_header(s)

2202

eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),

2203

(b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])

2204

hu = str(make_header(dh))

2205

eq(hu, 'The quick brown fox jumped over the lazy dog')

2206

2207

def test_rfc2047_missing_whitespace(self):

2208

s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'

2209

dh = decode_header(s)

2210

self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),

2211

(b'rg', None), (b'\xe5', 'iso-8859-1'),

2212

(b'sbord', None)])

2213

2214

def test_rfc2047_with_whitespace(self):

2215

s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'

2216

dh = decode_header(s)

2217

self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),

2218

(b' rg ', None), (b'\xe5', 'iso-8859-1'),

2219

(b' sbord', None)])

2220

2221

def test_rfc2047_B_bad_padding(self):

2222

s = '=?iso-8859-1?B?%s?='

2223

data = [ # only test complete bytes

2224

('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),

2225

('dmk=', b'vi'), ('dmk', b'vi')

2226

]

2227

for q, a in data:

2228

dh = decode_header(s % q)

2229

self.assertEqual(dh, [(a, 'iso-8859-1')])

2230

2231

def test_rfc2047_Q_invalid_digits(self):

2232

# issue 10004.

2233

s = '=?iso-8659-1?Q?andr=e9=zz?='

2234

self.assertEqual(decode_header(s),

2235

[(b'andr\xe9=zz', 'iso-8659-1')])

2236

2237

def test_rfc2047_rfc2047_1(self):

2238

# 1st testcase at end of rfc2047

2239

s = '(=?ISO-8859-1?Q?a?=)'

2240

self.assertEqual(decode_header(s),

2241

[(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])

2242

2243

def test_rfc2047_rfc2047_2(self):

2244

# 2nd testcase at end of rfc2047

2245

s = '(=?ISO-8859-1?Q?a?= b)'

2246

self.assertEqual(decode_header(s),

2247

[(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])

2248

2249

def test_rfc2047_rfc2047_3(self):

2250

# 3rd testcase at end of rfc2047

2251

s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'

2252

self.assertEqual(decode_header(s),

2253

[(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])

2254

2255

def test_rfc2047_rfc2047_4(self):

2256

# 4th testcase at end of rfc2047

2257

s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'

2258

self.assertEqual(decode_header(s),

2259

[(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])

2260

2261

def test_rfc2047_rfc2047_5a(self):

2262

# 5th testcase at end of rfc2047 newline is \r\n

2263

s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'

2264

self.assertEqual(decode_header(s),

2265

[(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])

2266

2267

def test_rfc2047_rfc2047_5b(self):

2268

# 5th testcase at end of rfc2047 newline is \n

2269

s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'

2270

self.assertEqual(decode_header(s),

2271

[(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])

2272

2273

def test_rfc2047_rfc2047_6(self):

2274

# 6th testcase at end of rfc2047

2275

s = '(=?ISO-8859-1?Q?a_b?=)'

2276

self.assertEqual(decode_header(s),

2277

[(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])

2278

2279

def test_rfc2047_rfc2047_7(self):

2280

# 7th testcase at end of rfc2047

2281

s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'

2282

self.assertEqual(decode_header(s),

2283

[(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),

2284

(b')', None)])

2285

self.assertEqual(make_header(decode_header(s)).encode(), s.lower())

2286

self.assertEqual(str(make_header(decode_header(s))), '(a b)')

2287

2288

def test_multiline_header(self):

2289

s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'

2290

self.assertEqual(decode_header(s),

2291

[(b'"M\xfcller T"', 'windows-1252'),

2292

(b'<T.Mueller@xxx.com>', None)])

2293

self.assertEqual(make_header(decode_header(s)).encode(),

2294

''.join(s.splitlines()))

2295

self.assertEqual(str(make_header(decode_header(s))),

2296

'"Müller T" <T.Mueller@xxx.com>')

2297

2298

2299

# Test the MIMEMessage class

2300

class TestMIMEMessage(TestEmailBase):

2301

def setUp(self):

2302

with openfile('msg_11.txt') as fp:

2303

self._text = fp.read()

2304

2305

def test_type_error(self):

2306

self.assertRaises(TypeError, MIMEMessage, 'a plain string')

2307

2308

def test_valid_argument(self):

2309

eq = self.assertEqual

2310

subject = 'A sub-message'

2311

m = Message()

2312

m['Subject'] = subject

2313

r = MIMEMessage(m)

2314

eq(r.get_content_type(), 'message/rfc822')

2315

payload = r.get_payload()

2316

self.assertIsInstance(payload, list)

2317

eq(len(payload), 1)

2318

subpart = payload[0]

2319

self.assertIs(subpart, m)

2320

eq(subpart['subject'], subject)

2321

2322

def test_bad_multipart(self):

2323

msg1 = Message()

2324

msg1['Subject'] = 'subpart 1'

2325

msg2 = Message()

2326

msg2['Subject'] = 'subpart 2'

2327

r = MIMEMessage(msg1)

2328

self.assertRaises(errors.MultipartConversionError, r.attach, msg2)

2329

2330

def test_generate(self):

2331

# First craft the message to be encapsulated

2332

m = Message()

2333

m['Subject'] = 'An enclosed message'

2334

m.set_payload('Here is the body of the message.\n')

2335

r = MIMEMessage(m)

2336

r['Subject'] = 'The enclosing message'

2337

s = StringIO()

2338

g = Generator(s)

2339

g.flatten(r)

2340

self.assertEqual(s.getvalue(), """\

2341

Content-Type: message/rfc822

2342

MIME-Version: 1.0

2343

Subject: The enclosing message

2344

2345

Subject: An enclosed message

2346

2347

Here is the body of the message.

2348

""")

2349

2350

def test_parse_message_rfc822(self):

2351

eq = self.assertEqual

2352

msg = self._msgobj('msg_11.txt')

2353

eq(msg.get_content_type(), 'message/rfc822')

2354

payload = msg.get_payload()

2355

self.assertIsInstance(payload, list)

2356

eq(len(payload), 1)

2357

submsg = payload[0]

2358

self.assertIsInstance(submsg, Message)

2359

eq(submsg['subject'], 'An enclosed message')

2360

eq(submsg.get_payload(), 'Here is the body of the message.\n')

2361

2362

def test_dsn(self):

2363

eq = self.assertEqual

2364

# msg 16 is a Delivery Status Notification, see RFC 1894

2365

msg = self._msgobj('msg_16.txt')

2366

eq(msg.get_content_type(), 'multipart/report')

2367

self.assertTrue(msg.is_multipart())

2368

eq(len(msg.get_payload()), 3)

2369

# Subpart 1 is a text/plain, human readable section

2370

subpart = msg.get_payload(0)

2371

eq(subpart.get_content_type(), 'text/plain')

2372

eq(subpart.get_payload(), """\

2373

This report relates to a message you sent with the following header fields:

2374

2375

Message-id: <002001c144a6$8752e060$56104586@oxy.edu>

2376

Date: Sun, 23 Sep 2001 20:10:55 -0700

2377

From: "Ian T. Henry" <henryi@oxy.edu>

2378

To: SoCal Raves <scr@socal-raves.org>

2379

Subject: [scr] yeah for Ians!!

2380

2381

Your message cannot be delivered to the following recipients:

2382

2383

Recipient address: jangel1@cougar.noc.ucla.edu

2384

Reason: recipient reached disk quota

2385

2386

""")

2387

# Subpart 2 contains the machine parsable DSN information. It

2388

# consists of two blocks of headers, represented by two nested Message

2389

# objects.

2390

subpart = msg.get_payload(1)

2391

eq(subpart.get_content_type(), 'message/delivery-status')

2392

eq(len(subpart.get_payload()), 2)

2393

# message/delivery-status should treat each block as a bunch of

2394

# headers, i.e. a bunch of Message objects.

2395

dsn1 = subpart.get_payload(0)

2396

self.assertIsInstance(dsn1, Message)

2397

eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')

2398

eq(dsn1.get_param('dns', header='reporting-mta'), '')

2399

# Try a missing one <wink>

2400

eq(dsn1.get_param('nsd', header='reporting-mta'), None)

2401

dsn2 = subpart.get_payload(1)

2402

self.assertIsInstance(dsn2, Message)

2403

eq(dsn2['action'], 'failed')

2404

eq(dsn2.get_params(header='original-recipient'),

2405

[('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])

2406

eq(dsn2.get_param('rfc822', header='final-recipient'), '')

2407

# Subpart 3 is the original message

2408

subpart = msg.get_payload(2)

2409

eq(subpart.get_content_type(), 'message/rfc822')

2410

payload = subpart.get_payload()

2411

self.assertIsInstance(payload, list)

2412

eq(len(payload), 1)

2413

subsubpart = payload[0]

2414

self.assertIsInstance(subsubpart, Message)

2415

eq(subsubpart.get_content_type(), 'text/plain')

2416

eq(subsubpart['message-id'],

2417

'<002001c144a6$8752e060$56104586@oxy.edu>')

2418

2419

def test_epilogue(self):

2420

eq = self.ndiffAssertEqual

2421

with openfile('msg_21.txt') as fp:

2422

text = fp.read()

2423

msg = Message()

2424

msg['From'] = 'aperson@dom.ain'

2425

msg['To'] = 'bperson@dom.ain'

2426

msg['Subject'] = 'Test'

2427

msg.preamble = 'MIME message'

2428

msg.epilogue = 'End of MIME message\n'

2429

msg1 = MIMEText('One')

2430

msg2 = MIMEText('Two')

2431

msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')

2432

msg.attach(msg1)

2433

msg.attach(msg2)

2434

sfp = StringIO()

2435

g = Generator(sfp)

2436

g.flatten(msg)

2437

eq(sfp.getvalue(), text)

2438

2439

def test_no_nl_preamble(self):

2440

eq = self.ndiffAssertEqual

2441

msg = Message()

2442

msg['From'] = 'aperson@dom.ain'

2443

msg['To'] = 'bperson@dom.ain'

2444

msg['Subject'] = 'Test'

2445

msg.preamble = 'MIME message'

2446

msg.epilogue = ''

2447

msg1 = MIMEText('One')

2448

msg2 = MIMEText('Two')

2449

msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')

2450

msg.attach(msg1)

2451

msg.attach(msg2)

2452

eq(msg.as_string(), """\

2453

From: aperson@dom.ain

2454

To: bperson@dom.ain

2455

Subject: Test

2456

Content-Type: multipart/mixed; boundary="BOUNDARY"

2457

2458

MIME message

2459

--BOUNDARY

2460

Content-Type: text/plain; charset="us-ascii"

2461

MIME-Version: 1.0

2462

Content-Transfer-Encoding: 7bit

2463

2464

One

2465

--BOUNDARY

2466

Content-Type: text/plain; charset="us-ascii"

2467

MIME-Version: 1.0

2468

Content-Transfer-Encoding: 7bit

2469

2470

Two

2471

--BOUNDARY--

2472

""")

2473

2474

def test_default_type(self):

2475

eq = self.assertEqual

2476

with openfile('msg_30.txt') as fp:

2477

msg = email.message_from_file(fp)

2478

container1 = msg.get_payload(0)

2479

eq(container1.get_default_type(), 'message/rfc822')

2480

eq(container1.get_content_type(), 'message/rfc822')

2481

container2 = msg.get_payload(1)

2482

eq(container2.get_default_type(), 'message/rfc822')

2483

eq(container2.get_content_type(), 'message/rfc822')

2484

container1a = container1.get_payload(0)

2485

eq(container1a.get_default_type(), 'text/plain')

2486

eq(container1a.get_content_type(), 'text/plain')

2487

container2a = container2.get_payload(0)

2488

eq(container2a.get_default_type(), 'text/plain')

2489

eq(container2a.get_content_type(), 'text/plain')

2490

2491

def test_default_type_with_explicit_container_type(self):

2492

eq = self.assertEqual

2493

with openfile('msg_28.txt') as fp:

2494

msg = email.message_from_file(fp)

2495

container1 = msg.get_payload(0)

2496

eq(container1.get_default_type(), 'message/rfc822')

2497

eq(container1.get_content_type(), 'message/rfc822')

2498

container2 = msg.get_payload(1)

2499

eq(container2.get_default_type(), 'message/rfc822')

2500

eq(container2.get_content_type(), 'message/rfc822')

2501

container1a = container1.get_payload(0)

2502

eq(container1a.get_default_type(), 'text/plain')

2503

eq(container1a.get_content_type(), 'text/plain')

2504

container2a = container2.get_payload(0)

2505

eq(container2a.get_default_type(), 'text/plain')

2506

eq(container2a.get_content_type(), 'text/plain')

2507

2508

def test_default_type_non_parsed(self):

2509

eq = self.assertEqual

2510

neq = self.ndiffAssertEqual

2511

# Set up container

2512

container = MIMEMultipart('digest', 'BOUNDARY')

2513

container.epilogue = ''

2514

# Set up subparts

2515

subpart1a = MIMEText('message 1\n')

2516

subpart2a = MIMEText('message 2\n')

2517

subpart1 = MIMEMessage(subpart1a)

2518

subpart2 = MIMEMessage(subpart2a)

2519

container.attach(subpart1)

2520

container.attach(subpart2)

2521

eq(subpart1.get_content_type(), 'message/rfc822')

2522

eq(subpart1.get_default_type(), 'message/rfc822')

2523

eq(subpart2.get_content_type(), 'message/rfc822')

2524

eq(subpart2.get_default_type(), 'message/rfc822')

2525

neq(container.as_string(0), '''\

2526

Content-Type: multipart/digest; boundary="BOUNDARY"

2527

MIME-Version: 1.0

2528

2529

--BOUNDARY

2530

Content-Type: message/rfc822

2531

MIME-Version: 1.0

2532

2533

Content-Type: text/plain; charset="us-ascii"

2534

MIME-Version: 1.0

2535

Content-Transfer-Encoding: 7bit

2536

2537

message 1

2538

2539

--BOUNDARY

2540

Content-Type: message/rfc822

2541

MIME-Version: 1.0

2542

2543

Content-Type: text/plain; charset="us-ascii"

2544

MIME-Version: 1.0

2545

Content-Transfer-Encoding: 7bit

2546

2547

message 2

2548

2549

--BOUNDARY--

2550

''')

2551

del subpart1['content-type']

2552

del subpart1['mime-version']

2553

del subpart2['content-type']

2554

del subpart2['mime-version']

2555

eq(subpart1.get_content_type(), 'message/rfc822')

2556

eq(subpart1.get_default_type(), 'message/rfc822')

2557

eq(subpart2.get_content_type(), 'message/rfc822')

2558

eq(subpart2.get_default_type(), 'message/rfc822')

2559

neq(container.as_string(0), '''\

2560

Content-Type: multipart/digest; boundary="BOUNDARY"

2561

MIME-Version: 1.0

2562

2563

--BOUNDARY

2564

2565

Content-Type: text/plain; charset="us-ascii"

2566

MIME-Version: 1.0

2567

Content-Transfer-Encoding: 7bit

2568

2569

message 1

2570

2571

--BOUNDARY

2572

2573

Content-Type: text/plain; charset="us-ascii"

2574

MIME-Version: 1.0

2575

Content-Transfer-Encoding: 7bit

2576

2577

message 2

2578

2579

--BOUNDARY--

2580

''')

2581

2582

def test_mime_attachments_in_constructor(self):

2583

eq = self.assertEqual

2584

text1 = MIMEText('')

2585

text2 = MIMEText('')

2586

msg = MIMEMultipart(_subparts=(text1, text2))

2587

eq(len(msg.get_payload()), 2)

2588

eq(msg.get_payload(0), text1)

2589

eq(msg.get_payload(1), text2)

2590

2591

def test_default_multipart_constructor(self):

2592

msg = MIMEMultipart()

2593

self.assertTrue(msg.is_multipart())

2594

2595

2596

# A general test of parser->model->generator idempotency. IOW, read a message

2597

# in, parse it into a message object tree, then without touching the tree,

2598

# regenerate the plain text. The original text and the transformed text

2599

# should be identical. Note: that we ignore the Unix-From since that may

2600

# contain a changed date.

2601

class TestIdempotent(TestEmailBase):

2602

2603

linesep = '\n'

2604

2605

def _msgobj(self, filename):

2606

with openfile(filename) as fp:

2607

data = fp.read()

2608

msg = email.message_from_string(data)

2609

return msg, data

2610

2611

def _idempotent(self, msg, text, unixfrom=False):

2612

eq = self.ndiffAssertEqual

2613

s = StringIO()

2614

g = Generator(s, maxheaderlen=0)

2615

g.flatten(msg, unixfrom=unixfrom)

2616

eq(text, s.getvalue())

2617

2618

def test_parse_text_message(self):

2619

eq = self.assertEqual

2620

msg, text = self._msgobj('msg_01.txt')

2621

eq(msg.get_content_type(), 'text/plain')

2622

eq(msg.get_content_maintype(), 'text')

2623

eq(msg.get_content_subtype(), 'plain')

2624

eq(msg.get_params()[1], ('charset', 'us-ascii'))

2625

eq(msg.get_param('charset'), 'us-ascii')

2626

eq(msg.preamble, None)

2627

eq(msg.epilogue, None)

2628

self._idempotent(msg, text)

2629

2630

def test_parse_untyped_message(self):

2631

eq = self.assertEqual

2632

msg, text = self._msgobj('msg_03.txt')

2633

eq(msg.get_content_type(), 'text/plain')

2634

eq(msg.get_params(), None)

2635

eq(msg.get_param('charset'), None)

2636

self._idempotent(msg, text)

2637

2638

def test_simple_multipart(self):

2639

msg, text = self._msgobj('msg_04.txt')

2640

self._idempotent(msg, text)

2641

2642

def test_MIME_digest(self):

2643

msg, text = self._msgobj('msg_02.txt')

2644

self._idempotent(msg, text)

2645

2646

def test_long_header(self):

2647

msg, text = self._msgobj('msg_27.txt')

2648

self._idempotent(msg, text)

2649

2650

def test_MIME_digest_with_part_headers(self):

2651

msg, text = self._msgobj('msg_28.txt')

2652

self._idempotent(msg, text)

2653

2654

def test_mixed_with_image(self):

2655

msg, text = self._msgobj('msg_06.txt')

2656

self._idempotent(msg, text)

2657

2658

def test_multipart_report(self):

2659

msg, text = self._msgobj('msg_05.txt')

2660

self._idempotent(msg, text)

2661

2662

def test_dsn(self):

2663

msg, text = self._msgobj('msg_16.txt')

2664

self._idempotent(msg, text)

2665

2666

def test_preamble_epilogue(self):

2667

msg, text = self._msgobj('msg_21.txt')

2668

self._idempotent(msg, text)

2669

2670

def test_multipart_one_part(self):

2671

msg, text = self._msgobj('msg_23.txt')

2672

self._idempotent(msg, text)

2673

2674

def test_multipart_no_parts(self):

2675

msg, text = self._msgobj('msg_24.txt')

2676

self._idempotent(msg, text)

2677

2678

def test_no_start_boundary(self):

2679

msg, text = self._msgobj('msg_31.txt')

2680

self._idempotent(msg, text)

2681

2682

def test_rfc2231_charset(self):

2683

msg, text = self._msgobj('msg_32.txt')

2684

self._idempotent(msg, text)

2685

2686

def test_more_rfc2231_parameters(self):

2687

msg, text = self._msgobj('msg_33.txt')

2688

self._idempotent(msg, text)

2689

2690

def test_text_plain_in_a_multipart_digest(self):

2691

msg, text = self._msgobj('msg_34.txt')

2692

self._idempotent(msg, text)

2693

2694

def test_nested_multipart_mixeds(self):

2695

msg, text = self._msgobj('msg_12a.txt')

2696

self._idempotent(msg, text)

2697

2698

def test_message_external_body_idempotent(self):

2699

msg, text = self._msgobj('msg_36.txt')

2700

self._idempotent(msg, text)

2701

2702

def test_message_delivery_status(self):

2703

msg, text = self._msgobj('msg_43.txt')

2704

self._idempotent(msg, text, unixfrom=True)

2705

2706

def test_message_signed_idempotent(self):

2707

msg, text = self._msgobj('msg_45.txt')

2708

self._idempotent(msg, text)

2709

2710

def test_content_type(self):

2711

eq = self.assertEqual

2712

# Get a message object and reset the seek pointer for other tests

2713

msg, text = self._msgobj('msg_05.txt')

2714

eq(msg.get_content_type(), 'multipart/report')

2715

# Test the Content-Type: parameters

2716

params = {}

2717

for pk, pv in msg.get_params():

2718

params[pk] = pv

2719

eq(params['report-type'], 'delivery-status')

2720

eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')

2721

eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)

2722

eq(msg.epilogue, self.linesep)

2723

eq(len(msg.get_payload()), 3)

2724

# Make sure the subparts are what we expect

2725

msg1 = msg.get_payload(0)

2726

eq(msg1.get_content_type(), 'text/plain')

2727

eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)

2728

msg2 = msg.get_payload(1)

2729

eq(msg2.get_content_type(), 'text/plain')

2730

eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)

2731

msg3 = msg.get_payload(2)

2732

eq(msg3.get_content_type(), 'message/rfc822')

2733

self.assertIsInstance(msg3, Message)

2734

payload = msg3.get_payload()

2735

self.assertIsInstance(payload, list)

2736

eq(len(payload), 1)

2737

msg4 = payload[0]

2738

self.assertIsInstance(msg4, Message)

2739

eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)

2740

2741

def test_parser(self):

2742

eq = self.assertEqual

2743

msg, text = self._msgobj('msg_06.txt')

2744

# Check some of the outer headers

2745

eq(msg.get_content_type(), 'message/rfc822')

2746

# Make sure the payload is a list of exactly one sub-Message, and that

2747

# that submessage has a type of text/plain

2748

payload = msg.get_payload()

2749

self.assertIsInstance(payload, list)

2750

eq(len(payload), 1)

2751

msg1 = payload[0]

2752

self.assertIsInstance(msg1, Message)

2753

eq(msg1.get_content_type(), 'text/plain')

2754

self.assertIsInstance(msg1.get_payload(), str)

2755

eq(msg1.get_payload(), self.linesep)

2756

2757

2758

2759

# Test various other bits of the package's functionality

2760

class TestMiscellaneous(TestEmailBase):

2761

def test_message_from_string(self):

2762

with openfile('msg_01.txt') as fp:

2763

text = fp.read()

2764

msg = email.message_from_string(text)

2765

s = StringIO()

2766

# Don't wrap/continue long headers since we're trying to test

2767

# idempotency.

2768

g = Generator(s, maxheaderlen=0)

2769

g.flatten(msg)

2770

self.assertEqual(text, s.getvalue())

2771

2772

def test_message_from_file(self):

2773

with openfile('msg_01.txt') as fp:

2774

text = fp.read()

2775

fp.seek(0)

2776

msg = email.message_from_file(fp)

2777

s = StringIO()

2778

# Don't wrap/continue long headers since we're trying to test

2779

# idempotency.

2780

g = Generator(s, maxheaderlen=0)

2781

g.flatten(msg)

2782

self.assertEqual(text, s.getvalue())

2783

2784

def test_message_from_string_with_class(self):

2785

with openfile('msg_01.txt') as fp:

2786

text = fp.read()

2787

2788

# Create a subclass

2789

class MyMessage(Message):

2790

pass

2791

2792

msg = email.message_from_string(text, MyMessage)

2793

self.assertIsInstance(msg, MyMessage)

2794

# Try something more complicated

2795

with openfile('msg_02.txt') as fp:

2796

text = fp.read()

2797

msg = email.message_from_string(text, MyMessage)

2798

for subpart in msg.walk():

2799

self.assertIsInstance(subpart, MyMessage)

2800

2801

def test_message_from_file_with_class(self):

2802

# Create a subclass

2803

class MyMessage(Message):

2804

pass

2805

2806

with openfile('msg_01.txt') as fp:

2807

msg = email.message_from_file(fp, MyMessage)

2808

self.assertIsInstance(msg, MyMessage)

2809

# Try something more complicated

2810

with openfile('msg_02.txt') as fp:

2811

msg = email.message_from_file(fp, MyMessage)

2812

for subpart in msg.walk():

2813

self.assertIsInstance(subpart, MyMessage)

2814

2815

def test_custom_message_does_not_require_arguments(self):

2816

class MyMessage(Message):

2817

def __init__(self):

2818

super().__init__()

2819

msg = self._str_msg("Subject: test\n\ntest", MyMessage)

2820

self.assertIsInstance(msg, MyMessage)

2821

2822

def test__all__(self):

2823

module = __import__('email')

2824

self.assertEqual(sorted(module.__all__), [

2825

'base64mime', 'charset', 'encoders', 'errors', 'feedparser',

2826

'generator', 'header', 'iterators', 'message',

2827

'message_from_binary_file', 'message_from_bytes',

2828

'message_from_file', 'message_from_string', 'mime', 'parser',

2829

'quoprimime', 'utils',

2830

])

2831

2832

def test_formatdate(self):

2833

now = time.time()

2834

self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],

2835

time.gmtime(now)[:6])

2836

2837

def test_formatdate_localtime(self):

2838

now = time.time()

2839

self.assertEqual(

2840

utils.parsedate(utils.formatdate(now, localtime=True))[:6],

2841

time.localtime(now)[:6])

2842

2843

def test_formatdate_usegmt(self):

2844

now = time.time()

2845

self.assertEqual(

2846

utils.formatdate(now, localtime=False),

2847

time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))

2848

self.assertEqual(

2849

utils.formatdate(now, localtime=False, usegmt=True),

2850

time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))

2851

2852

# parsedate and parsedate_tz will become deprecated interfaces someday

2853

def test_parsedate_returns_None_for_invalid_strings(self):

2854

self.assertIsNone(utils.parsedate(''))

2855

self.assertIsNone(utils.parsedate_tz(''))

2856

self.assertIsNone(utils.parsedate('0'))

2857

self.assertIsNone(utils.parsedate_tz('0'))

2858

self.assertIsNone(utils.parsedate('A Complete Waste of Time'))

2859

self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))

2860

# Not a part of the spec but, but this has historically worked:

2861

self.assertIsNone(utils.parsedate(None))

2862

self.assertIsNone(utils.parsedate_tz(None))

2863

2864

def test_parsedate_compact(self):

2865

# The FWS after the comma is optional

2866

self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),

2867

utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))

2868

2869

def test_parsedate_no_dayofweek(self):

2870

eq = self.assertEqual

2871

eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),

2872

(2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))

2873

2874

def test_parsedate_compact_no_dayofweek(self):

2875

eq = self.assertEqual

2876

eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),

2877

(2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))

2878

2879

def test_parsedate_no_space_before_positive_offset(self):

2880

self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),

2881

(2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))

2882

2883

def test_parsedate_no_space_before_negative_offset(self):

2884

# Issue 1155362: we already handled '+' for this case.

2885

self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),

2886

(2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))

2887

2888

2889

def test_parsedate_accepts_time_with_dots(self):

2890

eq = self.assertEqual

2891

eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),

2892

(2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))

2893

eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),

2894

(2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))

2895

2896

def test_parsedate_acceptable_to_time_functions(self):

2897

eq = self.assertEqual

2898

timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')

2899

t = int(time.mktime(timetup))

2900

eq(time.localtime(t)[:6], timetup[:6])

2901

eq(int(time.strftime('%Y', timetup)), 2003)

2902

timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')

2903

t = int(time.mktime(timetup[:9]))

2904

eq(time.localtime(t)[:6], timetup[:6])

2905

eq(int(time.strftime('%Y', timetup[:9])), 2003)

2906

2907

def test_mktime_tz(self):

2908

self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,

2909

-1, -1, -1, 0)), 0)

2910

self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,

2911

-1, -1, -1, 1234)), -1234)

2912

2913

def test_parsedate_y2k(self):

2914

"""Test for parsing a date with a two-digit year.

2915

2916

Parsing a date with a two-digit year should return the correct

2917

four-digit year. RFC822 allows two-digit years, but RFC2822 (which

2918

obsoletes RFC822) requires four-digit years.

2919

2920

"""

2921

self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),

2922

utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))

2923

self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),

2924

utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))

2925

2926

def test_parseaddr_empty(self):

2927

self.assertEqual(utils.parseaddr('<>'), ('', ''))

2928

self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')

2929

2930

def test_noquote_dump(self):

2931

self.assertEqual(

2932

utils.formataddr(('A Silly Person', 'person@dom.ain')),

2933

'A Silly Person <person@dom.ain>')

2934

2935

def test_escape_dump(self):

2936

self.assertEqual(

2937

utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),

2938

r'"A (Very) Silly Person" <person@dom.ain>')

2939

self.assertEqual(

2940

utils.parseaddr(r'"A $Very$ Silly Person" <person@dom.ain>'),

2941

('A (Very) Silly Person', 'person@dom.ain'))

2942

a = r'A $Special$ Person'

2943

b = 'person@dom.ain'

2944

self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))

2945

2946

def test_escape_backslashes(self):

2947

self.assertEqual(

2948

utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),

2949

r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')

2950

a = r'Arthur \Backslash\ Foobar'

2951

b = 'person@dom.ain'

2952

self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))

2953

2954

def test_quotes_unicode_names(self):

2955

# issue 1690608. email.utils.formataddr() should be rfc2047 aware.

2956

name = "H\u00e4ns W\u00fcrst"

2957

addr = 'person@dom.ain'

2958

utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"

2959

latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"

2960

self.assertEqual(utils.formataddr((name, addr)), utf8_base64)

2961

self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),

2962

latin1_quopri)

2963

2964

def test_accepts_any_charset_like_object(self):

2965

# issue 1690608. email.utils.formataddr() should be rfc2047 aware.

2966

name = "H\u00e4ns W\u00fcrst"

2967

addr = 'person@dom.ain'

2968

utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"

2969

foobar = "FOOBAR"

2970

class CharsetMock:

2971

def header_encode(self, string):

2972

return foobar

2973

mock = CharsetMock()

2974

mock_expected = "%s <%s>" % (foobar, addr)

2975

self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)

2976

self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),

2977

utf8_base64)

2978

2979

def test_invalid_charset_like_object_raises_error(self):

2980

# issue 1690608. email.utils.formataddr() should be rfc2047 aware.

2981

name = "H\u00e4ns W\u00fcrst"

2982

addr = 'person@dom.ain'

2983

# A object without a header_encode method:

2984

bad_charset = object()

2985

self.assertRaises(AttributeError, utils.formataddr, (name, addr),

2986

bad_charset)

2987

2988

def test_unicode_address_raises_error(self):

2989

# issue 1690608. email.utils.formataddr() should be rfc2047 aware.

2990

addr = 'pers\u00f6n@dom.in'

2991

self.assertRaises(UnicodeError, utils.formataddr, (None, addr))

2992

self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))

2993

2994

def test_name_with_dot(self):

2995

x = 'John X. Doe <jxd@example.com>'

2996

y = '"John X. Doe" <jxd@example.com>'

2997

a, b = ('John X. Doe', 'jxd@example.com')

2998

self.assertEqual(utils.parseaddr(x), (a, b))

2999

self.assertEqual(utils.parseaddr(y), (a, b))

3000

# formataddr() quotes the name if there's a dot in it

3001

self.assertEqual(utils.formataddr((a, b)), y)

3002

3003

def test_parseaddr_preserves_quoted_pairs_in_addresses(self):

3004

# issue 10005. Note that in the third test the second pair of

3005

# backslashes is not actually a quoted pair because it is not inside a

3006

# comment or quoted string: the address being parsed has a quoted

3007

# string containing a quoted backslash, followed by 'example' and two

3008

# backslashes, followed by another quoted string containing a space and

3009

# the word 'example'. parseaddr copies those two backslashes

3010

# literally. Per rfc5322 this is not technically correct since a \ may

3011

# not appear in an address outside of a quoted string. It is probably

3012

# a sensible Postel interpretation, though.

3013

eq = self.assertEqual

3014

eq(utils.parseaddr('""example" example"@example.com'),

3015

('', '""example" example"@example.com'))

3016

eq(utils.parseaddr('"\\"example\\" example"@example.com'),

3017

('', '"\\"example\\" example"@example.com'))

3018

eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),

3019

('', '"\\\\"example\\\\" example"@example.com'))

3020

3021

def test_parseaddr_preserves_spaces_in_local_part(self):

3022

# issue 9286. A normal RFC5322 local part should not contain any

3023

# folding white space, but legacy local parts can (they are a sequence

3024

# of atoms, not dotatoms). On the other hand we strip whitespace from

3025

# before the @ and around dots, on the assumption that the whitespace

3026

# around the punctuation is a mistake in what would otherwise be

3027

# an RFC5322 local part. Leading whitespace is, usual, stripped as well.

3028

self.assertEqual(('', "merwok wok@xample.com"),

3029

utils.parseaddr("merwok wok@xample.com"))

3030

self.assertEqual(('', "merwok wok@xample.com"),

3031

utils.parseaddr("merwok wok@xample.com"))

3032

self.assertEqual(('', "merwok wok@xample.com"),

3033

utils.parseaddr(" merwok wok @xample.com"))

3034

self.assertEqual(('', 'merwok"wok" wok@xample.com'),

3035

utils.parseaddr('merwok"wok" wok@xample.com'))

3036

self.assertEqual(('', 'merwok.wok.wok@xample.com'),

3037

utils.parseaddr('merwok. wok . wok@xample.com'))

3038

3039

def test_formataddr_does_not_quote_parens_in_quoted_string(self):

3040

addr = ("'foo@example.com' (foo@example.com)",

3041

'foo@example.com')

3042

addrstr = ('"\'foo@example.com\' '

3043

'(foo@example.com)" <foo@example.com>')

3044

self.assertEqual(utils.parseaddr(addrstr), addr)

3045

self.assertEqual(utils.formataddr(addr), addrstr)

3046

3047

3048

def test_multiline_from_comment(self):

3049

x = """\

3050

Foo

3051

\tBar <foo@example.com>"""

3052

self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))

3053

3054

def test_quote_dump(self):

3055

self.assertEqual(

3056

utils.formataddr(('A Silly; Person', 'person@dom.ain')),

3057

r'"A Silly; Person" <person@dom.ain>')

3058

3059

def test_charset_richcomparisons(self):

3060

eq = self.assertEqual

3061

ne = self.assertNotEqual

3062

cset1 = Charset()

3063

cset2 = Charset()

3064

eq(cset1, 'us-ascii')

3065

eq(cset1, 'US-ASCII')

3066

eq(cset1, 'Us-AsCiI')

3067

eq('us-ascii', cset1)

3068

eq('US-ASCII', cset1)

3069

eq('Us-AsCiI', cset1)

3070

ne(cset1, 'usascii')

3071

ne(cset1, 'USASCII')

3072

ne(cset1, 'UsAsCiI')

3073

ne('usascii', cset1)

3074

ne('USASCII', cset1)

3075

ne('UsAsCiI', cset1)

3076

eq(cset1, cset2)

3077

eq(cset2, cset1)

3078

3079

def test_getaddresses(self):

3080

eq = self.assertEqual

3081

eq(utils.getaddresses(['aperson@dom.ain (Al Person)',

3082

'Bud Person <bperson@dom.ain>']),

3083

[('Al Person', 'aperson@dom.ain'),

3084

('Bud Person', 'bperson@dom.ain')])

3085

3086

def test_getaddresses_nasty(self):

3087

eq = self.assertEqual

3088

eq(utils.getaddresses(['foo: ;']), [('', '')])

3089

eq(utils.getaddresses(

3090

['[]*-- =~$']),

3091

[('', ''), ('', ''), ('', '*--')])

3092

eq(utils.getaddresses(

3093

['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),

3094

[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])

3095

3096

def test_getaddresses_embedded_comment(self):

3097

"""Test proper handling of a nested comment"""

3098

eq = self.assertEqual

3099

addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])

3100

eq(addrs[0][1], 'foo@bar.com')

3101

3102

def test_utils_quote_unquote(self):

3103

eq = self.assertEqual

3104

msg = Message()

3105

msg.add_header('content-disposition', 'attachment',

3106

filename='foo\\wacky"name')

3107

eq(msg.get_filename(), 'foo\\wacky"name')

3108

3109

def test_get_body_encoding_with_bogus_charset(self):

3110

charset = Charset('not a charset')

3111

self.assertEqual(charset.get_body_encoding(), 'base64')

3112

3113

def test_get_body_encoding_with_uppercase_charset(self):

3114

eq = self.assertEqual

3115

msg = Message()

3116

msg['Content-Type'] = 'text/plain; charset=UTF-8'

3117

eq(msg['content-type'], 'text/plain; charset=UTF-8')

3118

charsets = msg.get_charsets()

3119

eq(len(charsets), 1)

3120

eq(charsets[0], 'utf-8')

3121

charset = Charset(charsets[0])

3122

eq(charset.get_body_encoding(), 'base64')

3123

msg.set_payload(b'hello world', charset=charset)

3124

eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')

3125

eq(msg.get_payload(decode=True), b'hello world')

3126

eq(msg['content-transfer-encoding'], 'base64')

3127

# Try another one

3128

msg = Message()

3129

msg['Content-Type'] = 'text/plain; charset="US-ASCII"'

3130

charsets = msg.get_charsets()

3131

eq(len(charsets), 1)

3132

eq(charsets[0], 'us-ascii')

3133

charset = Charset(charsets[0])

3134

eq(charset.get_body_encoding(), encoders.encode_7or8bit)

3135

msg.set_payload('hello world', charset=charset)

3136

eq(msg.get_payload(), 'hello world')

3137

eq(msg['content-transfer-encoding'], '7bit')

3138

3139

def test_charsets_case_insensitive(self):

3140

lc = Charset('us-ascii')

3141

uc = Charset('US-ASCII')

3142

self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())

3143

3144

def test_partial_falls_inside_message_delivery_status(self):

3145

eq = self.ndiffAssertEqual

3146

# The Parser interface provides chunks of data to FeedParser in 8192

3147

# byte gulps. SF bug #1076485 found one of those chunks inside

3148

# message/delivery-status header block, which triggered an

3149

# unreadline() of NeedMoreData.

3150

msg = self._msgobj('msg_43.txt')

3151

sfp = StringIO()

3152

iterators._structure(msg, sfp)

3153

eq(sfp.getvalue(), """\

3154

multipart/report

3155

text/plain

3156

message/delivery-status

3157

text/plain

3158

text/plain

3159

text/plain

3160

text/plain

3161

text/plain

3162

text/plain

3163

text/plain

3164

text/plain

3165

text/plain

3166

text/plain

3167

text/plain

3168

text/plain

3169

text/plain

3170

text/plain

3171

text/plain

3172

text/plain

3173

text/plain

3174

text/plain

3175

text/plain

3176

text/plain

3177

text/plain

3178

text/plain

3179

text/plain

3180

text/plain

3181

text/plain

3182

text/plain

3183

text/rfc822-headers

3184

""")

3185

3186

def test_make_msgid_domain(self):

3187

self.assertEqual(

3188

email.utils.make_msgid(domain='testdomain-string')[-19:],

3189

'@testdomain-string>')

3190

3191

def test_Generator_linend(self):

3192

# Issue 14645.

3193

with openfile('msg_26.txt', newline='\n') as f:

3194

msgtxt = f.read()

3195

msgtxt_nl = msgtxt.replace('\r\n', '\n')

3196

msg = email.message_from_string(msgtxt)

3197

s = StringIO()

3198

g = email.generator.Generator(s)

3199

g.flatten(msg)

3200

self.assertEqual(s.getvalue(), msgtxt_nl)

3201

3202

def test_BytesGenerator_linend(self):

3203

# Issue 14645.

3204

with openfile('msg_26.txt', newline='\n') as f:

3205

msgtxt = f.read()

3206

msgtxt_nl = msgtxt.replace('\r\n', '\n')

3207

msg = email.message_from_string(msgtxt_nl)

3208

s = BytesIO()

3209

g = email.generator.BytesGenerator(s)

3210

g.flatten(msg, linesep='\r\n')

3211

self.assertEqual(s.getvalue().decode('ascii'), msgtxt)

3212

3213

def test_BytesGenerator_linend_with_non_ascii(self):

3214

# Issue 14645.

3215

with openfile('msg_26.txt', 'rb') as f:

3216

msgtxt = f.read()

3217

msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')

3218

msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')

3219

msg = email.message_from_bytes(msgtxt_nl)

3220

s = BytesIO()

3221

g = email.generator.BytesGenerator(s)

3222

g.flatten(msg, linesep='\r\n')

3223

self.assertEqual(s.getvalue(), msgtxt)

3224

3225

3226

# Test the iterator/generators

3227

class TestIterators(TestEmailBase):

3228

def test_body_line_iterator(self):

3229

eq = self.assertEqual

3230

neq = self.ndiffAssertEqual

3231

# First a simple non-multipart message

3232

msg = self._msgobj('msg_01.txt')

3233

it = iterators.body_line_iterator(msg)

3234

lines = list(it)

3235

eq(len(lines), 6)

3236

neq(EMPTYSTRING.join(lines), msg.get_payload())

3237

# Now a more complicated multipart

3238

msg = self._msgobj('msg_02.txt')

3239

it = iterators.body_line_iterator(msg)

3240

lines = list(it)

3241

eq(len(lines), 43)

3242

with openfile('msg_19.txt') as fp:

3243

neq(EMPTYSTRING.join(lines), fp.read())

3244

3245

def test_typed_subpart_iterator(self):

3246

eq = self.assertEqual

3247

msg = self._msgobj('msg_04.txt')

3248

it = iterators.typed_subpart_iterator(msg, 'text')

3249

lines = []

3250

subparts = 0

3251

for subpart in it:

3252

subparts += 1

3253

lines.append(subpart.get_payload())

3254

eq(subparts, 2)

3255

eq(EMPTYSTRING.join(lines), """\

3256

a simple kind of mirror

3257

to reflect upon our own

3258

a simple kind of mirror

3259

to reflect upon our own

3260

""")

3261

3262

def test_typed_subpart_iterator_default_type(self):

3263

eq = self.assertEqual

3264

msg = self._msgobj('msg_03.txt')

3265

it = iterators.typed_subpart_iterator(msg, 'text', 'plain')

3266

lines = []

3267

subparts = 0

3268

for subpart in it:

3269

subparts += 1

3270

lines.append(subpart.get_payload())

3271

eq(subparts, 1)

3272

eq(EMPTYSTRING.join(lines), """\

3273

3274

Hi,

3275

3276

Do you like this message?

3277

3278

-Me

3279

""")

3280

3281

def test_pushCR_LF(self):

3282

'''FeedParser BufferedSubFile.push() assumed it received complete

3283

line endings. A CR ending one push() followed by a LF starting

3284

the next push() added an empty line.

3285

'''

3286

imt = [

3287

("a\r \n", 2),

3288

("b", 0),

3289

("c\n", 1),

3290

("", 0),

3291

("d\r\n", 1),

3292

("e\r", 0),

3293

("\nf", 1),

3294

("\r\n", 1),

3295

]

3296

from email.feedparser import BufferedSubFile, NeedMoreData

3297

bsf = BufferedSubFile()

3298

om = []

3299

nt = 0

3300

for il, n in imt:

3301

bsf.push(il)

3302

nt += n

3303

n1 = 0

3304

while True:

3305

ol = bsf.readline()

3306

if ol == NeedMoreData:

3307

break

3308

om.append(ol)

3309

n1 += 1

3310

self.assertEqual(n, n1)

3311

self.assertEqual(len(om), nt)

3312

self.assertEqual(''.join([il for il, n in imt]), ''.join(om))

3313

3314

3315

3316

class TestParsers(TestEmailBase):

3317

3318

def test_header_parser(self):

3319

eq = self.assertEqual

3320

# Parse only the headers of a complex multipart MIME document

3321

with openfile('msg_02.txt') as fp:

3322

msg = HeaderParser().parse(fp)

3323

eq(msg['from'], 'ppp-request@zzz.org')

3324

eq(msg['to'], 'ppp@zzz.org')

3325

eq(msg.get_content_type(), 'multipart/mixed')

3326

self.assertFalse(msg.is_multipart())

3327

self.assertIsInstance(msg.get_payload(), str)

3328

3329

def test_bytes_header_parser(self):

3330

eq = self.assertEqual

3331

# Parse only the headers of a complex multipart MIME document

3332

with openfile('msg_02.txt', 'rb') as fp:

3333

msg = email.parser.BytesHeaderParser().parse(fp)

3334

eq(msg['from'], 'ppp-request@zzz.org')

3335

eq(msg['to'], 'ppp@zzz.org')

3336

eq(msg.get_content_type(), 'multipart/mixed')

3337

self.assertFalse(msg.is_multipart())

3338

self.assertIsInstance(msg.get_payload(), str)

3339

self.assertIsInstance(msg.get_payload(decode=True), bytes)

3340

3341

def test_whitespace_continuation(self):

3342

eq = self.assertEqual

3343

# This message contains a line after the Subject: header that has only

3344

# whitespace, but it is not empty!

3345

msg = email.message_from_string("""\

3346

From: aperson@dom.ain

3347

To: bperson@dom.ain

3348

Subject: the next line has a space on it

3349

\x20

3350

Date: Mon, 8 Apr 2002 15:09:19 -0400

3351

Message-ID: spam

3352

3353

Here's the message body

3354

""")

3355

eq(msg['subject'], 'the next line has a space on it\n ')

3356

eq(msg['message-id'], 'spam')

3357

eq(msg.get_payload(), "Here's the message body\n")

3358

3359

def test_whitespace_continuation_last_header(self):

3360

eq = self.assertEqual

3361

# Like the previous test, but the subject line is the last

3362

# header.

3363

msg = email.message_from_string("""\

3364

From: aperson@dom.ain

3365

To: bperson@dom.ain

3366

Date: Mon, 8 Apr 2002 15:09:19 -0400

3367

Message-ID: spam

3368

Subject: the next line has a space on it

3369

\x20

3370

3371

Here's the message body

3372

""")

3373

eq(msg['subject'], 'the next line has a space on it\n ')

3374

eq(msg['message-id'], 'spam')

3375

eq(msg.get_payload(), "Here's the message body\n")

3376

3377

def test_crlf_separation(self):

3378

eq = self.assertEqual

3379

with openfile('msg_26.txt', newline='\n') as fp:

3380

msg = Parser().parse(fp)

3381

eq(len(msg.get_payload()), 2)

3382

part1 = msg.get_payload(0)

3383

eq(part1.get_content_type(), 'text/plain')

3384

eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')

3385

part2 = msg.get_payload(1)

3386

eq(part2.get_content_type(), 'application/riscos')

3387

3388

def test_crlf_flatten(self):

3389

# Using newline='\n' preserves the crlfs in this input file.

3390

with openfile('msg_26.txt', newline='\n') as fp:

3391

text = fp.read()

3392

msg = email.message_from_string(text)

3393

s = StringIO()

3394

g = Generator(s)

3395

g.flatten(msg, linesep='\r\n')

3396

self.assertEqual(s.getvalue(), text)

3397

3398

maxDiff = None

3399

3400

def test_multipart_digest_with_extra_mime_headers(self):

3401

eq = self.assertEqual

3402

neq = self.ndiffAssertEqual

3403

with openfile('msg_28.txt') as fp:

3404

msg = email.message_from_file(fp)

3405

# Structure is:

3406

# multipart/digest

3407

# message/rfc822

3408

# text/plain

3409

# message/rfc822

3410

# text/plain

3411

eq(msg.is_multipart(), 1)

3412

eq(len(msg.get_payload()), 2)

3413

part1 = msg.get_payload(0)

3414

eq(part1.get_content_type(), 'message/rfc822')

3415

eq(part1.is_multipart(), 1)

3416

eq(len(part1.get_payload()), 1)

3417

part1a = part1.get_payload(0)

3418

eq(part1a.is_multipart(), 0)

3419

eq(part1a.get_content_type(), 'text/plain')

3420

neq(part1a.get_payload(), 'message 1\n')

3421

# next message/rfc822

3422

part2 = msg.get_payload(1)

3423

eq(part2.get_content_type(), 'message/rfc822')

3424

eq(part2.is_multipart(), 1)

3425

eq(len(part2.get_payload()), 1)

3426

part2a = part2.get_payload(0)

3427

eq(part2a.is_multipart(), 0)

3428

eq(part2a.get_content_type(), 'text/plain')

3429

neq(part2a.get_payload(), 'message 2\n')

3430

3431

def test_three_lines(self):

3432

# A bug report by Andrew McNamara

3433

lines = ['From: Andrew Person <aperson@dom.ain',

3434

'Subject: Test',

3435

'Date: Tue, 20 Aug 2002 16:43:45 +1000']

3436

msg = email.message_from_string(NL.join(lines))

3437

self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')

3438

3439

def test_strip_line_feed_and_carriage_return_in_headers(self):

3440

eq = self.assertEqual

3441

# For [ 1002475 ] email message parser doesn't handle \r\n correctly

3442

value1 = 'text'

3443

value2 = 'more text'

3444

m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (

3445

value1, value2)

3446

msg = email.message_from_string(m)

3447

eq(msg.get('Header'), value1)

3448

eq(msg.get('Next-Header'), value2)

3449

3450

def test_rfc2822_header_syntax(self):

3451

eq = self.assertEqual

3452

m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'

3453

msg = email.message_from_string(m)

3454

eq(len(msg), 3)

3455

eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])

3456

eq(msg.get_payload(), 'body')

3457

3458

def test_rfc2822_space_not_allowed_in_header(self):

3459

eq = self.assertEqual

3460

m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'

3461

msg = email.message_from_string(m)

3462

eq(len(msg.keys()), 0)

3463

3464

def test_rfc2822_one_character_header(self):

3465

eq = self.assertEqual

3466

m = 'A: first header\nB: second header\nCC: third header\n\nbody'

3467

msg = email.message_from_string(m)

3468

headers = msg.keys()

3469

headers.sort()

3470

eq(headers, ['A', 'B', 'CC'])

3471

eq(msg.get_payload(), 'body')

3472

3473

def test_CRLFLF_at_end_of_part(self):

3474

# issue 5610: feedparser should not eat two chars from body part ending

3475

# with "\r\n\n".

3476

m = (

3477

"From: foo@bar.com\n"

3478

"To: baz\n"

3479

"Mime-Version: 1.0\n"

3480

"Content-Type: multipart/mixed; boundary=BOUNDARY\n"

3481

"\n"

3482

"--BOUNDARY\n"

3483

"Content-Type: text/plain\n"

3484

"\n"

3485

"body ending with CRLF newline\r\n"

3486

"\n"

3487

"--BOUNDARY--\n"

3488

)

3489

msg = email.message_from_string(m)

3490

self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))

3491

3492

3493

class Test8BitBytesHandling(unittest.TestCase):

3494

# In Python3 all input is string, but that doesn't work if the actual input

3495

# uses an 8bit transfer encoding. To hack around that, in email 5.1 we

3496

# decode byte streams using the surrogateescape error handler, and

3497

# reconvert to binary at appropriate places if we detect surrogates. This

3498

# doesn't allow us to transform headers with 8bit bytes (they get munged),

3499

# but it does allow us to parse and preserve them, and to decode body

3500

# parts that use an 8bit CTE.

3501

3502

bodytest_msg = textwrap.dedent("""\

3503

From: foo@bar.com

3504

To: baz

3505

Mime-Version: 1.0

3506

Content-Type: text/plain; charset={charset}

3507

Content-Transfer-Encoding: {cte}

3508

3509

{bodyline}

3510

""")

3511

3512

def test_known_8bit_CTE(self):

3513

m = self.bodytest_msg.format(charset='utf-8',

3514

cte='8bit',

3515

bodyline='pöstal').encode('utf-8')

3516

msg = email.message_from_bytes(m)

3517

self.assertEqual(msg.get_payload(), "pöstal\n")

3518

self.assertEqual(msg.get_payload(decode=True),

3519

"pöstal\n".encode('utf-8'))

3520

3521

def test_unknown_8bit_CTE(self):

3522

m = self.bodytest_msg.format(charset='notavalidcharset',

3523

cte='8bit',

3524

bodyline='pöstal').encode('utf-8')

3525

msg = email.message_from_bytes(m)

3526

self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")

3527

self.assertEqual(msg.get_payload(decode=True),

3528

"pöstal\n".encode('utf-8'))

3529

3530

def test_8bit_in_quopri_body(self):

3531

# This is non-RFC compliant data...without 'decode' the library code

3532

# decodes the body using the charset from the headers, and because the

3533

# source byte really is utf-8 this works. This is likely to fail

3534

# against real dirty data (ie: produce mojibake), but the data is

3535

# invalid anyway so it is as good a guess as any. But this means that

3536

# this test just confirms the current behavior; that behavior is not

3537

# necessarily the best possible behavior. With 'decode' it is

3538

# returning the raw bytes, so that test should be of correct behavior,

3539

# or at least produce the same result that email4 did.

3540

m = self.bodytest_msg.format(charset='utf-8',

3541

cte='quoted-printable',

3542

bodyline='p=C3=B6stál').encode('utf-8')

3543

msg = email.message_from_bytes(m)

3544

self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')

3545

self.assertEqual(msg.get_payload(decode=True),

3546

'pöstál\n'.encode('utf-8'))

3547

3548

def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):

3549

# This is similar to the previous test, but proves that if the 8bit

3550

# byte is undecodeable in the specified charset, it gets replaced

3551

# by the unicode 'unknown' character. Again, this may or may not

3552

# be the ideal behavior. Note that if decode=False none of the

3553

# decoders will get involved, so this is the only test we need

3554

# for this behavior.

3555

m = self.bodytest_msg.format(charset='ascii',

3556

cte='quoted-printable',

3557

bodyline='p=C3=B6stál').encode('utf-8')

3558

msg = email.message_from_bytes(m)

3559

self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')

3560

self.assertEqual(msg.get_payload(decode=True),

3561

'pöstál\n'.encode('utf-8'))

3562

3563

# test_defect_handling:test_invalid_chars_in_base64_payload

3564

def test_8bit_in_base64_body(self):

3565

# If we get 8bit bytes in a base64 body, we can just ignore them

3566

# as being outside the base64 alphabet and decode anyway. But

3567

# we register a defect.

3568

m = self.bodytest_msg.format(charset='utf-8',

3569

cte='base64',

3570

bodyline='cMO2c3RhbAá=').encode('utf-8')

3571

msg = email.message_from_bytes(m)

3572

self.assertEqual(msg.get_payload(decode=True),

3573

'pöstal'.encode('utf-8'))

3574

self.assertIsInstance(msg.defects[0],

3575

errors.InvalidBase64CharactersDefect)

3576

3577

def test_8bit_in_uuencode_body(self):

3578

# Sticking an 8bit byte in a uuencode block makes it undecodable by

3579

# normal means, so the block is returned undecoded, but as bytes.

3580

m = self.bodytest_msg.format(charset='utf-8',

3581

cte='uuencode',

3582

bodyline='<,.V<W1A; á ').encode('utf-8')

3583

msg = email.message_from_bytes(m)

3584

self.assertEqual(msg.get_payload(decode=True),

3585

'<,.V<W1A; á \n'.encode('utf-8'))

3586

3587

3588

headertest_headers = (

3589

('From: foo@bar.com', ('From', 'foo@bar.com')),

3590

('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),

3591

('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'

3592

'\tJean de Baddie',

3593

('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'

3594

'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'

3595

' =?unknown-8bit?q?_Jean_de_Baddie?=')),

3596

('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),

3597

)

3598

headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +

3599

'\nYes, they are flying.\n').encode('utf-8')

3600

3601

def test_get_8bit_header(self):

3602

msg = email.message_from_bytes(self.headertest_msg)

3603

self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')

3604

self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')

3605

3606

def test_print_8bit_headers(self):

3607

msg = email.message_from_bytes(self.headertest_msg)

3608

self.assertEqual(str(msg),

3609

textwrap.dedent("""\

3610

From: {}

3611

To: {}

3612

Subject: {}

3613

From: {}

3614

3615

Yes, they are flying.

3616

""").format(*[expected[1] for (_, expected) in

3617

self.headertest_headers]))

3618

3619

def test_values_with_8bit_headers(self):

3620

msg = email.message_from_bytes(self.headertest_msg)

3621

self.assertListEqual([str(x) for x in msg.values()],

3622

['foo@bar.com',

3623

'b\uFFFD\uFFFDz',

3624

'Maintenant je vous pr\uFFFD\uFFFDsente mon '

3625

'coll\uFFFD\uFFFDgue, le pouf '

3626

'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'

3627

'\tJean de Baddie',

3628

"g\uFFFD\uFFFDst"])

3629

3630

def test_items_with_8bit_headers(self):

3631

msg = email.message_from_bytes(self.headertest_msg)

3632

self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],

3633

[('From', 'foo@bar.com'),

3634

('To', 'b\uFFFD\uFFFDz'),

3635

('Subject', 'Maintenant je vous '

3636

'pr\uFFFD\uFFFDsente '

3637

'mon coll\uFFFD\uFFFDgue, le pouf '

3638

'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'

3639

'\tJean de Baddie'),

3640

('From', 'g\uFFFD\uFFFDst')])

3641

3642

def test_get_all_with_8bit_headers(self):

3643

msg = email.message_from_bytes(self.headertest_msg)

3644

self.assertListEqual([str(x) for x in msg.get_all('from')],

3645

['foo@bar.com',

3646

'g\uFFFD\uFFFDst'])

3647

3648

def test_get_content_type_with_8bit(self):

3649

msg = email.message_from_bytes(textwrap.dedent("""\

3650

Content-Type: text/pl\xA7in; charset=utf-8

3651

""").encode('latin-1'))

3652

self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")

3653

self.assertEqual(msg.get_content_maintype(), "text")

3654

self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")

3655

3656

# test_headerregistry.TestContentTypeHeader.non_ascii_in_params

3657

def test_get_params_with_8bit(self):

3658

msg = email.message_from_bytes(

3659

'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))

3660

self.assertEqual(msg.get_params(header='x-header'),

3661

[('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])

3662

self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')

3663

# XXX: someday you might be able to get 'b\xa7r', for now you can't.

3664

self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)

3665

3666

# test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value

3667

def test_get_rfc2231_params_with_8bit(self):

3668

msg = email.message_from_bytes(textwrap.dedent("""\

3669

Content-Type: text/plain; charset=us-ascii;

3670

title*=us-ascii'en'This%20is%20not%20f\xa7n"""

3671

).encode('latin-1'))

3672

self.assertEqual(msg.get_param('title'),

3673

('us-ascii', 'en', 'This is not f\uFFFDn'))

3674

3675

def test_set_rfc2231_params_with_8bit(self):

3676

msg = email.message_from_bytes(textwrap.dedent("""\

3677

Content-Type: text/plain; charset=us-ascii;

3678

title*=us-ascii'en'This%20is%20not%20f\xa7n"""

3679

).encode('latin-1'))

3680

msg.set_param('title', 'test')

3681

self.assertEqual(msg.get_param('title'), 'test')

3682

3683

def test_del_rfc2231_params_with_8bit(self):

3684

msg = email.message_from_bytes(textwrap.dedent("""\

3685

Content-Type: text/plain; charset=us-ascii;

3686

title*=us-ascii'en'This%20is%20not%20f\xa7n"""

3687

).encode('latin-1'))

3688

msg.del_param('title')

3689

self.assertEqual(msg.get_param('title'), None)

3690

self.assertEqual(msg.get_content_maintype(), 'text')

3691

3692

def test_get_payload_with_8bit_cte_header(self):

3693

msg = email.message_from_bytes(textwrap.dedent("""\

3694

Content-Transfer-Encoding: b\xa7se64

3695

Content-Type: text/plain; charset=latin-1

3696

3697

payload

3698

""").encode('latin-1'))

3699

self.assertEqual(msg.get_payload(), 'payload\n')

3700

self.assertEqual(msg.get_payload(decode=True), b'payload\n')

3701

3702

non_latin_bin_msg = textwrap.dedent("""\

3703

From: foo@bar.com

3704

To: báz

3705

Subject: Maintenant je vous présente mon collègue, le pouf célèbre

3706

\tJean de Baddie

3707

Mime-Version: 1.0

3708

Content-Type: text/plain; charset="utf-8"

3709

Content-Transfer-Encoding: 8bit

3710

3711

Да, они летят.

3712

""").encode('utf-8')

3713

3714

def test_bytes_generator(self):

3715

msg = email.message_from_bytes(self.non_latin_bin_msg)

3716

out = BytesIO()

3717

email.generator.BytesGenerator(out).flatten(msg)

3718

self.assertEqual(out.getvalue(), self.non_latin_bin_msg)

3719

3720

def test_bytes_generator_handles_None_body(self):

3721

#Issue 11019

3722

msg = email.message.Message()

3723

out = BytesIO()

3724

email.generator.BytesGenerator(out).flatten(msg)

3725

self.assertEqual(out.getvalue(), b"\n")

3726

3727

non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\

3728

From: foo@bar.com

3729

To: =?unknown-8bit?q?b=C3=A1z?=

3730

Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=

3731

=?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=

3732

=?unknown-8bit?q?_Jean_de_Baddie?=

3733

Mime-Version: 1.0

3734

Content-Type: text/plain; charset="utf-8"

3735

Content-Transfer-Encoding: base64

3736

3737

0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==

3738

""")

3739

3740

def test_generator_handles_8bit(self):

3741

msg = email.message_from_bytes(self.non_latin_bin_msg)

3742

out = StringIO()

3743

email.generator.Generator(out).flatten(msg)

3744

self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)

3745

3746

def test_bytes_generator_with_unix_from(self):

3747

# The unixfrom contains a current date, so we can't check it

3748

# literally. Just make sure the first word is 'From' and the

3749

# rest of the message matches the input.

3750

msg = email.message_from_bytes(self.non_latin_bin_msg)

3751

out = BytesIO()

3752

email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)

3753

lines = out.getvalue().split(b'\n')

3754

self.assertEqual(lines[0].split()[0], b'From')

3755

self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)

3756

3757

non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')

3758

non_latin_bin_msg_as7bit[2:4] = [

3759

'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'

3760

'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']

3761

non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)

3762

3763

def test_message_from_binary_file(self):

3764

fn = 'test.msg'

3765

self.addCleanup(unlink, fn)

3766

with open(fn, 'wb') as testfile:

3767

testfile.write(self.non_latin_bin_msg)

3768

with open(fn, 'rb') as testfile:

3769

m = email.parser.BytesParser().parse(testfile)

3770

self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)

3771

3772

latin_bin_msg = textwrap.dedent("""\

3773

From: foo@bar.com

3774

To: Dinsdale

3775

Subject: Nudge nudge, wink, wink

3776

Mime-Version: 1.0

3777

Content-Type: text/plain; charset="latin-1"

3778

Content-Transfer-Encoding: 8bit

3779

3780

oh là là, know what I mean, know what I mean?

3781

""").encode('latin-1')

3782

3783

latin_bin_msg_as7bit = textwrap.dedent("""\

3784

From: foo@bar.com

3785

To: Dinsdale

3786

Subject: Nudge nudge, wink, wink

3787

Mime-Version: 1.0

3788

Content-Type: text/plain; charset="iso-8859-1"

3789

Content-Transfer-Encoding: quoted-printable

3790

3791

oh l=E0 l=E0, know what I mean, know what I mean?

3792

""")

3793

3794

def test_string_generator_reencodes_to_quopri_when_appropriate(self):

3795

m = email.message_from_bytes(self.latin_bin_msg)

3796

self.assertEqual(str(m), self.latin_bin_msg_as7bit)

3797

3798

def test_decoded_generator_emits_unicode_body(self):

3799

m = email.message_from_bytes(self.latin_bin_msg)

3800

out = StringIO()

3801

email.generator.DecodedGenerator(out).flatten(m)

3802

#DecodedHeader output contains an extra blank line compared

3803

#to the input message. RDM: not sure if this is a bug or not,

3804

#but it is not specific to the 8bit->7bit conversion.

3805

self.assertEqual(out.getvalue(),

3806

self.latin_bin_msg.decode('latin-1')+'\n')

3807

3808

def test_bytes_feedparser(self):

3809

bfp = email.feedparser.BytesFeedParser()

3810

for i in range(0, len(self.latin_bin_msg), 10):

3811

bfp.feed(self.latin_bin_msg[i:i+10])

3812

m = bfp.close()

3813

self.assertEqual(str(m), self.latin_bin_msg_as7bit)

3814

3815

def test_crlf_flatten(self):

3816

with openfile('msg_26.txt', 'rb') as fp:

3817

text = fp.read()

3818

msg = email.message_from_bytes(text)

3819

s = BytesIO()

3820

g = email.generator.BytesGenerator(s)

3821

g.flatten(msg, linesep='\r\n')

3822

self.assertEqual(s.getvalue(), text)

3823

3824

def test_8bit_multipart(self):

3825

# Issue 11605

3826

source = textwrap.dedent("""\

3827

Date: Fri, 18 Mar 2011 17:15:43 +0100

3828

To: foo@example.com

3829

From: foodwatch-Newsletter <bar@example.com>

3830

Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System

3831

Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>

3832

MIME-Version: 1.0

3833

Content-Type: multipart/alternative;

3834

boundary="b1_76a486bee62b0d200f33dc2ca08220ad"

3835

3836

--b1_76a486bee62b0d200f33dc2ca08220ad

3837

Content-Type: text/plain; charset="utf-8"

3838

Content-Transfer-Encoding: 8bit

3839

3840

Guten Tag, ,

3841

3842

mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die

3843

Nachrichten aus Japan.

3844

3845

3846

--b1_76a486bee62b0d200f33dc2ca08220ad

3847

Content-Type: text/html; charset="utf-8"

3848

Content-Transfer-Encoding: 8bit

3849

3850

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"

3851

"http://www.w3.org/TR/html4/loose.dtd">

3852

3853

<head>

3854

<title>foodwatch - Newsletter</title>

3855

</head>

3856

<body>

3857

<p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team

3858

die Nachrichten aus Japan.</p>

3859

</body>

3860

</html>

3861

--b1_76a486bee62b0d200f33dc2ca08220ad--

3862

3863

""").encode('utf-8')

3864

msg = email.message_from_bytes(source)

3865

s = BytesIO()

3866

g = email.generator.BytesGenerator(s)

3867

g.flatten(msg)

3868

self.assertEqual(s.getvalue(), source)

3869

3870

def test_bytes_generator_b_encoding_linesep(self):

3871

# Issue 14062: b encoding was tacking on an extra \n.

3872

m = Message()

3873

# This has enough non-ascii that it should always end up b encoded.

3874

m['Subject'] = Header('žluťoučký kůň')

3875

s = BytesIO()

3876

g = email.generator.BytesGenerator(s)

3877

g.flatten(m, linesep='\r\n')

3878

self.assertEqual(

3879

s.getvalue(),

3880

b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')

3881

3882

def test_generator_b_encoding_linesep(self):

3883

# Since this broke in ByteGenerator, test Generator for completeness.

3884

m = Message()

3885

# This has enough non-ascii that it should always end up b encoded.

3886

m['Subject'] = Header('žluťoučký kůň')

3887

s = StringIO()

3888

g = email.generator.Generator(s)

3889

g.flatten(m, linesep='\r\n')

3890

self.assertEqual(

3891

s.getvalue(),

3892

'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')

3893

3894

maxDiff = None

3895

3896

3897

class BaseTestBytesGeneratorIdempotent:

3898

3899

maxDiff = None

3900

3901

def _msgobj(self, filename):

3902

with openfile(filename, 'rb') as fp:

3903

data = fp.read()

3904

data = self.normalize_linesep_regex.sub(self.blinesep, data)

3905

msg = email.message_from_bytes(data)

3906

return msg, data

3907

3908

def _idempotent(self, msg, data, unixfrom=False):

3909

b = BytesIO()

3910

g = email.generator.BytesGenerator(b, maxheaderlen=0)

3911

g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)

3912

self.assertEqual(data, b.getvalue())

3913

3914

3915

class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,

3916

TestIdempotent):

3917

linesep = '\n'

3918

blinesep = b'\n'

3919

normalize_linesep_regex = re.compile(br'\r\n')

3920

3921

3922

class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,

3923

TestIdempotent):

3924

linesep = '\r\n'

3925

blinesep = b'\r\n'

3926

normalize_linesep_regex = re.compile(br'(?<!\r)\n')

3927

3928

3929

class TestBase64(unittest.TestCase):

3930

def test_len(self):

3931

eq = self.assertEqual

3932

eq(base64mime.header_length('hello'),

3933

len(base64mime.body_encode(b'hello', eol='')))

3934

for size in range(15):

3935

if size == 0 : bsize = 0

3936

elif size <= 3 : bsize = 4

3937

elif size <= 6 : bsize = 8

3938

elif size <= 9 : bsize = 12

3939

elif size <= 12: bsize = 16

3940

else : bsize = 20

3941

eq(base64mime.header_length('x' * size), bsize)

3942

3943

def test_decode(self):

3944

eq = self.assertEqual

3945

eq(base64mime.decode(''), b'')

3946

eq(base64mime.decode('aGVsbG8='), b'hello')

3947

3948

def test_encode(self):

3949

eq = self.assertEqual

3950

eq(base64mime.body_encode(b''), b'')

3951

eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')

3952

# Test the binary flag

3953

eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')

3954

# Test the maxlinelen arg

3955

eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\

3956

eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg

3957

eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg

3958

eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg

3959

eHh4eCB4eHh4IA==

3960

""")

3961

# Test the eol argument

3962

eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),

3963

"""\

3964

eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r

3965

eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r

3966

eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r

3967

eHh4eCB4eHh4IA==\r

3968

""")

3969

3970

def test_header_encode(self):

3971

eq = self.assertEqual

3972

he = base64mime.header_encode

3973

eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')

3974

eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')

3975

eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')

3976

# Test the charset option

3977

eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')

3978

eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')

3979

3980

3981

3982

class TestQuopri(unittest.TestCase):

3983

def setUp(self):

3984

# Set of characters (as byte integers) that don't need to be encoded

3985

# in headers.

3986

self.hlit = list(chain(

3987

range(ord('a'), ord('z') + 1),

3988

range(ord('A'), ord('Z') + 1),

3989

range(ord('0'), ord('9') + 1),

3990

(c for c in b'!*+-/')))

3991

# Set of characters (as byte integers) that do need to be encoded in

3992

# headers.

3993

self.hnon = [c for c in range(256) if c not in self.hlit]

3994

assert len(self.hlit) + len(self.hnon) == 256

3995

# Set of characters (as byte integers) that don't need to be encoded

3996

# in bodies.

3997

self.blit = list(range(ord(' '), ord('~') + 1))

3998

self.blit.append(ord('\t'))

3999

self.blit.remove(ord('='))

4000

# Set of characters (as byte integers) that do need to be encoded in

4001

# bodies.

4002

self.bnon = [c for c in range(256) if c not in self.blit]

4003

assert len(self.blit) + len(self.bnon) == 256

4004

4005

def test_quopri_header_check(self):

4006

for c in self.hlit:

4007

self.assertFalse(quoprimime.header_check(c),

4008

'Should not be header quopri encoded: %s' % chr(c))

4009

for c in self.hnon:

4010

self.assertTrue(quoprimime.header_check(c),

4011

'Should be header quopri encoded: %s' % chr(c))

4012

4013

def test_quopri_body_check(self):

4014

for c in self.blit:

4015

self.assertFalse(quoprimime.body_check(c),

4016

'Should not be body quopri encoded: %s' % chr(c))

4017

for c in self.bnon:

4018

self.assertTrue(quoprimime.body_check(c),

4019

'Should be body quopri encoded: %s' % chr(c))

4020

4021

def test_header_quopri_len(self):

4022

eq = self.assertEqual

4023

eq(quoprimime.header_length(b'hello'), 5)

4024

# RFC 2047 chrome is not included in header_length().

4025

eq(len(quoprimime.header_encode(b'hello', charset='xxx')),

4026

quoprimime.header_length(b'hello') +

4027

# =?xxx?q?...?= means 10 extra characters

4028

10)

4029

eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)

4030

# RFC 2047 chrome is not included in header_length().

4031

eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),

4032

quoprimime.header_length(b'h@e@l@l@o@') +

4033

# =?xxx?q?...?= means 10 extra characters

4034

10)

4035

for c in self.hlit:

4036

eq(quoprimime.header_length(bytes([c])), 1,

4037

'expected length 1 for %r' % chr(c))

4038

for c in self.hnon:

4039

# Space is special; it's encoded to _

4040

if c == ord(' '):

4041

continue

4042

eq(quoprimime.header_length(bytes([c])), 3,

4043

'expected length 3 for %r' % chr(c))

4044

eq(quoprimime.header_length(b' '), 1)

4045

4046

def test_body_quopri_len(self):

4047

eq = self.assertEqual

4048

for c in self.blit:

4049

eq(quoprimime.body_length(bytes([c])), 1)

4050

for c in self.bnon:

4051

eq(quoprimime.body_length(bytes([c])), 3)

4052

4053

def test_quote_unquote_idempotent(self):

4054

for x in range(256):

4055

c = chr(x)

4056

self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)

4057

4058

def _test_header_encode(self, header, expected_encoded_header, charset=None):

4059

if charset is None:

4060

encoded_header = quoprimime.header_encode(header)

4061

else:

4062

encoded_header = quoprimime.header_encode(header, charset)

4063

self.assertEqual(encoded_header, expected_encoded_header)

4064

4065

def test_header_encode_null(self):

4066

self._test_header_encode(b'', '')

4067

4068

def test_header_encode_one_word(self):

4069

self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')

4070

4071

def test_header_encode_two_lines(self):

4072

self._test_header_encode(b'hello\nworld',

4073

'=?iso-8859-1?q?hello=0Aworld?=')

4074

4075

def test_header_encode_non_ascii(self):

4076

self._test_header_encode(b'hello\xc7there',

4077

'=?iso-8859-1?q?hello=C7there?=')

4078

4079

def test_header_encode_alt_charset(self):

4080

self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',

4081

charset='iso-8859-2')

4082

4083

def _test_header_decode(self, encoded_header, expected_decoded_header):

4084

decoded_header = quoprimime.header_decode(encoded_header)

4085

self.assertEqual(decoded_header, expected_decoded_header)

4086

4087

def test_header_decode_null(self):

4088

self._test_header_decode('', '')

4089

4090

def test_header_decode_one_word(self):

4091

self._test_header_decode('hello', 'hello')

4092

4093

def test_header_decode_two_lines(self):

4094

self._test_header_decode('hello=0Aworld', 'hello\nworld')

4095

4096

def test_header_decode_non_ascii(self):

4097

self._test_header_decode('hello=C7there', 'hello\xc7there')

4098

4099

def test_header_decode_re_bug_18380(self):

4100

# Issue 18380: Call re.sub with a positional argument for flags in the wrong position

4101

self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)

4102

4103

def _test_decode(self, encoded, expected_decoded, eol=None):

4104

if eol is None:

4105

decoded = quoprimime.decode(encoded)

4106

else:

4107

decoded = quoprimime.decode(encoded, eol=eol)

4108

self.assertEqual(decoded, expected_decoded)

4109

4110

def test_decode_null_word(self):

4111

self._test_decode('', '')

4112

4113

def test_decode_null_line_null_word(self):

4114

self._test_decode('\r\n', '\n')

4115

4116

def test_decode_one_word(self):

4117

self._test_decode('hello', 'hello')

4118

4119

def test_decode_one_word_eol(self):

4120

self._test_decode('hello', 'hello', eol='X')

4121

4122

def test_decode_one_line(self):

4123

self._test_decode('hello\r\n', 'hello\n')

4124

4125

def test_decode_one_line_lf(self):

4126

self._test_decode('hello\n', 'hello\n')

4127

4128

def test_decode_one_line_cr(self):

4129

self._test_decode('hello\r', 'hello\n')

4130

4131

def test_decode_one_line_nl(self):

4132

self._test_decode('hello\n', 'helloX', eol='X')

4133

4134

def test_decode_one_line_crnl(self):

4135

self._test_decode('hello\r\n', 'helloX', eol='X')

4136

4137

def test_decode_one_line_one_word(self):

4138

self._test_decode('hello\r\nworld', 'hello\nworld')

4139

4140

def test_decode_one_line_one_word_eol(self):

4141

self._test_decode('hello\r\nworld', 'helloXworld', eol='X')

4142

4143

def test_decode_two_lines(self):

4144

self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')

4145

4146

def test_decode_two_lines_eol(self):

4147

self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')

4148

4149

def test_decode_one_long_line(self):

4150

self._test_decode('Spam' * 250, 'Spam' * 250)

4151

4152

def test_decode_one_space(self):

4153

self._test_decode(' ', '')

4154

4155

def test_decode_multiple_spaces(self):

4156

self._test_decode(' ' * 5, '')

4157

4158

def test_decode_one_line_trailing_spaces(self):

4159

self._test_decode('hello \r\n', 'hello\n')

4160

4161

def test_decode_two_lines_trailing_spaces(self):

4162

self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')

4163

4164

def test_decode_quoted_word(self):

4165

self._test_decode('=22quoted=20words=22', '"quoted words"')

4166

4167

def test_decode_uppercase_quoting(self):

4168

self._test_decode('ab=CD=EF', 'ab\xcd\xef')

4169

4170

def test_decode_lowercase_quoting(self):

4171

self._test_decode('ab=cd=ef', 'ab\xcd\xef')

4172

4173

def test_decode_soft_line_break(self):

4174

self._test_decode('soft line=\r\nbreak', 'soft linebreak')

4175

4176

def test_decode_false_quoting(self):

4177

self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')

4178

4179

def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):

4180

kwargs = {}

4181

if maxlinelen is None:

4182

# Use body_encode's default.

4183

maxlinelen = 76

4184

else:

4185

kwargs['maxlinelen'] = maxlinelen

4186

if eol is None:

4187

# Use body_encode's default.

4188

eol = '\n'

4189

else:

4190

kwargs['eol'] = eol

4191

encoded_body = quoprimime.body_encode(body, **kwargs)

4192

self.assertEqual(encoded_body, expected_encoded_body)

4193

if eol == '\n' or eol == '\r\n':

4194

# We know how to split the result back into lines, so maxlinelen

4195

# can be checked.

4196

for line in encoded_body.splitlines():

4197

self.assertLessEqual(len(line), maxlinelen)

4198

4199

def test_encode_null(self):

4200

self._test_encode('', '')

4201

4202

def test_encode_null_lines(self):

4203

self._test_encode('\n\n', '\n\n')

4204

4205

def test_encode_one_line(self):

4206

self._test_encode('hello\n', 'hello\n')

4207

4208

def test_encode_one_line_crlf(self):

4209

self._test_encode('hello\r\n', 'hello\n')

4210

4211

def test_encode_one_line_eol(self):

4212

self._test_encode('hello\n', 'hello\r\n', eol='\r\n')

4213

4214

def test_encode_one_space(self):

4215

self._test_encode(' ', '=20')

4216

4217

def test_encode_one_line_one_space(self):

4218

self._test_encode(' \n', '=20\n')

4219

4220

# XXX: body_encode() expect strings, but uses ord(char) from these strings

4221

# to index into a 256-entry list. For code points above 255, this will fail.

4222

# Should there be a check for 8-bit only ord() values in body, or at least

4223

# a comment about the expected input?

4224

4225

def test_encode_two_lines_one_space(self):

4226

self._test_encode(' \n \n', '=20\n=20\n')

4227

4228

def test_encode_one_word_trailing_spaces(self):

4229

self._test_encode('hello ', 'hello =20')

4230

4231

def test_encode_one_line_trailing_spaces(self):

4232

self._test_encode('hello \n', 'hello =20\n')

4233

4234

def test_encode_one_word_trailing_tab(self):

4235

self._test_encode('hello \t', 'hello =09')

4236

4237

def test_encode_one_line_trailing_tab(self):

4238

self._test_encode('hello \t\n', 'hello =09\n')

4239

4240

def test_encode_trailing_space_before_maxlinelen(self):

4241

self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)

4242

4243

def test_encode_trailing_space_at_maxlinelen(self):

4244

self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)

4245

4246

def test_encode_trailing_space_beyond_maxlinelen(self):

4247

self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)

4248

4249

def test_encode_whitespace_lines(self):

4250

self._test_encode(' \n' * 5, '=20\n' * 5)

4251

4252

def test_encode_quoted_equals(self):

4253

self._test_encode('a = b', 'a =3D b')

4254

4255

def test_encode_one_long_string(self):

4256

self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)

4257

4258

def test_encode_one_long_line(self):

4259

self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')

4260

4261

def test_encode_one_very_long_line(self):

4262

self._test_encode('x' * 200 + '\n',

4263

2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')

4264

4265

def test_encode_shortest_maxlinelen(self):

4266

self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)

4267

4268

def test_encode_maxlinelen_too_small(self):

4269

self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)

4270

4271

def test_encode(self):

4272

eq = self.assertEqual

4273

eq(quoprimime.body_encode(''), '')

4274

eq(quoprimime.body_encode('hello'), 'hello')

4275

# Test the binary flag

4276

eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')

4277

# Test the maxlinelen arg

4278

eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\

4279

xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=

4280

xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=

4281

x xxxx xxxx xxxx xxxx=20""")

4282

# Test the eol argument

4283

eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),

4284

"""\

4285

xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r

4286

xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r

4287

x xxxx xxxx xxxx xxxx=20""")

4288

eq(quoprimime.body_encode("""\

4289

one line

4290

4291

two line"""), """\

4292

one line

4293

4294

two line""")

4295

4296

4297

4298

# Test the Charset class

4299

class TestCharset(unittest.TestCase):

4300

def tearDown(self):

4301

from email import charset as CharsetModule

4302

try:

4303

del CharsetModule.CHARSETS['fake']

4304

except KeyError:

4305

pass

4306

4307

def test_codec_encodeable(self):

4308

eq = self.assertEqual

4309

# Make sure us-ascii = no Unicode conversion

4310

c = Charset('us-ascii')

4311

eq(c.header_encode('Hello World!'), 'Hello World!')

4312

# Test 8-bit idempotency with us-ascii

4313

s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'

4314

self.assertRaises(UnicodeError, c.header_encode, s)

4315

c = Charset('utf-8')

4316

eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')

4317

4318

def test_body_encode(self):

4319

eq = self.assertEqual

4320

# Try a charset with QP body encoding

4321

c = Charset('iso-8859-1')

4322

eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))

4323

# Try a charset with Base64 body encoding

4324

c = Charset('utf-8')

4325

eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))

4326

# Try a charset with None body encoding

4327

c = Charset('us-ascii')

4328

eq('hello world', c.body_encode('hello world'))

4329

# Try the convert argument, where input codec != output codec

4330

c = Charset('euc-jp')

4331

# With apologies to Tokio Kikuchi ;)

4332

# XXX FIXME

4333

## try:

4334

## eq('\x1b$B5FCO;~IW\x1b(B',

4335

## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))

4336

## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',

4337

## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))

4338

## except LookupError:

4339

## # We probably don't have the Japanese codecs installed

4340

## pass

4341

# Testing SF bug #625509, which we have to fake, since there are no

4342

# built-in encodings where the header encoding is QP but the body

4343

# encoding is not.

4344

from email import charset as CharsetModule

4345

CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')

4346

c = Charset('fake')

4347

eq('hello world', c.body_encode('hello world'))

4348

4349

def test_unicode_charset_name(self):

4350

charset = Charset('us-ascii')

4351

self.assertEqual(str(charset), 'us-ascii')

4352

self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')

4353

4354

4355

4356

# Test multilingual MIME headers.

4357

class TestHeader(TestEmailBase):

4358

def test_simple(self):

4359

eq = self.ndiffAssertEqual

4360

h = Header('Hello World!')

4361

eq(h.encode(), 'Hello World!')

4362

h.append(' Goodbye World!')

4363

eq(h.encode(), 'Hello World! Goodbye World!')

4364

4365

def test_simple_surprise(self):

4366

eq = self.ndiffAssertEqual

4367

h = Header('Hello World!')

4368

eq(h.encode(), 'Hello World!')

4369

h.append('Goodbye World!')

4370

eq(h.encode(), 'Hello World! Goodbye World!')

4371

4372

def test_header_needs_no_decoding(self):

4373

h = 'no decoding needed'

4374

self.assertEqual(decode_header(h), [(h, None)])

4375

4376

def test_long(self):

4377

h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",

4378

maxlinelen=76)

4379

for l in h.encode(splitchars=' ').split('\n '):

4380

self.assertLessEqual(len(l), 76)

4381

4382

def test_multilingual(self):

4383

eq = self.ndiffAssertEqual

4384

g = Charset("iso-8859-1")

4385

cz = Charset("iso-8859-2")

4386

utf8 = Charset("utf-8")

4387

g_head = (b'Die Mieter treten hier ein werden mit einem '

4388

b'Foerderband komfortabel den Korridor entlang, '

4389

b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '

4390

b'gegen die rotierenden Klingen bef\xf6rdert. ')

4391

cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '

4392

b'd\xf9vtipu.. ')

4393

utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'

4394

'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'

4395

'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'

4396

'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'

4397

'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '

4398

'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '

4399

'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'

4400

'\u3044\u307e\u3059\u3002')

4401

h = Header(g_head, g)

4402

h.append(cz_head, cz)

4403

h.append(utf8_head, utf8)

4404

enc = h.encode(maxlinelen=76)

4405

eq(enc, """\

4406

=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=

4407

=?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=

4408

=?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=

4409

=?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=

4410

=?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=

4411

=?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=

4412

=?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=

4413

=?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=

4414

=?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=

4415

=?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=

4416

=?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")

4417

decoded = decode_header(enc)

4418

eq(len(decoded), 3)

4419

eq(decoded[0], (g_head, 'iso-8859-1'))

4420

eq(decoded[1], (cz_head, 'iso-8859-2'))

4421

eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))

4422

ustr = str(h)

4423

eq(ustr,

4424

(b'Die Mieter treten hier ein werden mit einem Foerderband '

4425

b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '

4426

b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '

4427

b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '

4428

b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'

4429

b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'

4430

b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'

4431

b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'

4432

b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'

4433

b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'

4434

b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'

4435

b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'

4436

b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '

4437

b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '

4438

b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'

4439

b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'

4440

).decode('utf-8'))

4441

# Test make_header()

4442

newh = make_header(decode_header(enc))

4443

eq(newh, h)

4444

4445

def test_empty_header_encode(self):

4446

h = Header()

4447

self.assertEqual(h.encode(), '')

4448

4449

def test_header_ctor_default_args(self):

4450

eq = self.ndiffAssertEqual

4451

h = Header()

4452

eq(h, '')

4453

h.append('foo', Charset('iso-8859-1'))

4454

eq(h, 'foo')

4455

4456

def test_explicit_maxlinelen(self):

4457

eq = self.ndiffAssertEqual

4458

hstr = ('A very long line that must get split to something other '

4459

'than at the 76th character boundary to test the non-default '

4460

'behavior')

4461

h = Header(hstr)

4462

eq(h.encode(), '''\

4463

A very long line that must get split to something other than at the 76th

4464

character boundary to test the non-default behavior''')

4465

eq(str(h), hstr)

4466

h = Header(hstr, header_name='Subject')

4467

eq(h.encode(), '''\

4468

A very long line that must get split to something other than at the

4469

76th character boundary to test the non-default behavior''')

4470

eq(str(h), hstr)

4471

h = Header(hstr, maxlinelen=1024, header_name='Subject')

4472

eq(h.encode(), hstr)

4473

eq(str(h), hstr)

4474

4475

def test_quopri_splittable(self):

4476

eq = self.ndiffAssertEqual

4477

h = Header(charset='iso-8859-1', maxlinelen=20)

4478

x = 'xxxx ' * 20

4479

h.append(x)

4480

s = h.encode()

4481

eq(s, """\

4482

=?iso-8859-1?q?xxx?=

4483

=?iso-8859-1?q?x_?=

4484

=?iso-8859-1?q?xx?=

4485

=?iso-8859-1?q?xx?=

4486

=?iso-8859-1?q?_x?=

4487

=?iso-8859-1?q?xx?=

4488

=?iso-8859-1?q?x_?=

4489

=?iso-8859-1?q?xx?=

4490

=?iso-8859-1?q?xx?=

4491

=?iso-8859-1?q?_x?=

4492

=?iso-8859-1?q?xx?=

4493

=?iso-8859-1?q?x_?=

4494

=?iso-8859-1?q?xx?=

4495

=?iso-8859-1?q?xx?=

4496

=?iso-8859-1?q?_x?=

4497

=?iso-8859-1?q?xx?=

4498

=?iso-8859-1?q?x_?=

4499

=?iso-8859-1?q?xx?=

4500

=?iso-8859-1?q?xx?=

4501

=?iso-8859-1?q?_x?=

4502

=?iso-8859-1?q?xx?=

4503

=?iso-8859-1?q?x_?=

4504

=?iso-8859-1?q?xx?=

4505

=?iso-8859-1?q?xx?=

4506

=?iso-8859-1?q?_x?=

4507

=?iso-8859-1?q?xx?=

4508

=?iso-8859-1?q?x_?=

4509

=?iso-8859-1?q?xx?=

4510

=?iso-8859-1?q?xx?=

4511

=?iso-8859-1?q?_x?=

4512

=?iso-8859-1?q?xx?=

4513

=?iso-8859-1?q?x_?=

4514

=?iso-8859-1?q?xx?=

4515

=?iso-8859-1?q?xx?=

4516

=?iso-8859-1?q?_x?=

4517

=?iso-8859-1?q?xx?=

4518

=?iso-8859-1?q?x_?=

4519

=?iso-8859-1?q?xx?=

4520

=?iso-8859-1?q?xx?=

4521

=?iso-8859-1?q?_x?=

4522

=?iso-8859-1?q?xx?=

4523

=?iso-8859-1?q?x_?=

4524

=?iso-8859-1?q?xx?=

4525

=?iso-8859-1?q?xx?=

4526

=?iso-8859-1?q?_x?=

4527

=?iso-8859-1?q?xx?=

4528

=?iso-8859-1?q?x_?=

4529

=?iso-8859-1?q?xx?=

4530

=?iso-8859-1?q?xx?=

4531

=?iso-8859-1?q?_?=""")

4532

eq(x, str(make_header(decode_header(s))))

4533

h = Header(charset='iso-8859-1', maxlinelen=40)

4534

h.append('xxxx ' * 20)

4535

s = h.encode()

4536

eq(s, """\

4537

=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=

4538

=?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=

4539

=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=

4540

=?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=

4541

=?iso-8859-1?q?_xxxx_xxxx_?=""")

4542

eq(x, str(make_header(decode_header(s))))

4543

4544

def test_base64_splittable(self):

4545

eq = self.ndiffAssertEqual

4546

h = Header(charset='koi8-r', maxlinelen=20)

4547

x = 'xxxx ' * 20

4548

h.append(x)

4549

s = h.encode()

4550

eq(s, """\

4551

=?koi8-r?b?eHh4?=

4552

=?koi8-r?b?eCB4?=

4553

=?koi8-r?b?eHh4?=

4554

=?koi8-r?b?IHh4?=

4555

=?koi8-r?b?eHgg?=

4556

=?koi8-r?b?eHh4?=

4557

=?koi8-r?b?eCB4?=

4558

=?koi8-r?b?eHh4?=

4559

=?koi8-r?b?IHh4?=

4560

=?koi8-r?b?eHgg?=

4561

=?koi8-r?b?eHh4?=

4562

=?koi8-r?b?eCB4?=

4563

=?koi8-r?b?eHh4?=

4564

=?koi8-r?b?IHh4?=

4565

=?koi8-r?b?eHgg?=

4566

=?koi8-r?b?eHh4?=

4567

=?koi8-r?b?eCB4?=

4568

=?koi8-r?b?eHh4?=

4569

=?koi8-r?b?IHh4?=

4570

=?koi8-r?b?eHgg?=

4571

=?koi8-r?b?eHh4?=

4572

=?koi8-r?b?eCB4?=

4573

=?koi8-r?b?eHh4?=

4574

=?koi8-r?b?IHh4?=

4575

=?koi8-r?b?eHgg?=

4576

=?koi8-r?b?eHh4?=

4577

=?koi8-r?b?eCB4?=

4578

=?koi8-r?b?eHh4?=

4579

=?koi8-r?b?IHh4?=

4580

=?koi8-r?b?eHgg?=

4581

=?koi8-r?b?eHh4?=

4582

=?koi8-r?b?eCB4?=

4583

=?koi8-r?b?eHh4?=

4584

=?koi8-r?b?IA==?=""")

4585

eq(x, str(make_header(decode_header(s))))

4586

h = Header(charset='koi8-r', maxlinelen=40)

4587

h.append(x)

4588

s = h.encode()

4589

eq(s, """\

4590

=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=

4591

=?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=

4592

=?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=

4593

=?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=

4594

=?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=

4595

=?koi8-r?b?eHh4eCB4eHh4IA==?=""")

4596

eq(x, str(make_header(decode_header(s))))

4597

4598

def test_us_ascii_header(self):

4599

eq = self.assertEqual

4600

s = 'hello'

4601

x = decode_header(s)

4602

eq(x, [('hello', None)])

4603

h = make_header(x)

4604

eq(s, h.encode())

4605

4606

def test_string_charset(self):

4607

eq = self.assertEqual

4608

h = Header()

4609

h.append('hello', 'iso-8859-1')

4610

eq(h, 'hello')

4611

4612

## def test_unicode_error(self):

4613

## raises = self.assertRaises

4614

## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')

4615

## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')

4616

## h = Header()

4617

## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')

4618

## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')

4619

## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')

4620

4621

def test_utf8_shortest(self):

4622

eq = self.assertEqual

4623

h = Header('p\xf6stal', 'utf-8')

4624

eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')

4625

h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')

4626

eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')

4627

4628

def test_bad_8bit_header(self):

4629

raises = self.assertRaises

4630

eq = self.assertEqual

4631

x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'

4632

raises(UnicodeError, Header, x)

4633

h = Header()

4634

raises(UnicodeError, h.append, x)

4635

e = x.decode('utf-8', 'replace')

4636

eq(str(Header(x, errors='replace')), e)

4637

h.append(x, errors='replace')

4638

eq(str(h), e)

4639

4640

def test_escaped_8bit_header(self):

4641

x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'

4642

e = x.decode('ascii', 'surrogateescape')

4643

h = Header(e, charset=email.charset.UNKNOWN8BIT)

4644

self.assertEqual(str(h),

4645

'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')

4646

self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])

4647

4648

def test_header_handles_binary_unknown8bit(self):

4649

x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'

4650

h = Header(x, charset=email.charset.UNKNOWN8BIT)

4651

self.assertEqual(str(h),

4652

'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')

4653

self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])

4654

4655

def test_make_header_handles_binary_unknown8bit(self):

4656

x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'

4657

h = Header(x, charset=email.charset.UNKNOWN8BIT)

4658

h2 = email.header.make_header(email.header.decode_header(h))

4659

self.assertEqual(str(h2),

4660

'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')

4661

self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])

4662

4663

def test_modify_returned_list_does_not_change_header(self):

4664

h = Header('test')

4665

chunks = email.header.decode_header(h)

4666

chunks.append(('ascii', 'test2'))

4667

self.assertEqual(str(h), 'test')

4668

4669

def test_encoded_adjacent_nonencoded(self):

4670

eq = self.assertEqual

4671

h = Header()

4672

h.append('hello', 'iso-8859-1')

4673

h.append('world')

4674

s = h.encode()

4675

eq(s, '=?iso-8859-1?q?hello?= world')

4676

h = make_header(decode_header(s))

4677

eq(h.encode(), s)

4678

4679

def test_whitespace_keeper(self):

4680

eq = self.assertEqual

4681

s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'

4682

parts = decode_header(s)

4683

eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])

4684

hdr = make_header(parts)

4685

eq(hdr.encode(),

4686

'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')

4687

4688

def test_broken_base64_header(self):

4689

raises = self.assertRaises

4690

s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='

4691

raises(errors.HeaderParseError, decode_header, s)

4692

4693

def test_shift_jis_charset(self):

4694

h = Header('文', charset='shift_jis')

4695

self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')

4696

4697

def test_flatten_header_with_no_value(self):

4698

# Issue 11401 (regression from email 4.x) Note that the space after

4699

# the header doesn't reflect the input, but this is also the way

4700

# email 4.x behaved. At some point it would be nice to fix that.

4701

msg = email.message_from_string("EmptyHeader:")

4702

self.assertEqual(str(msg), "EmptyHeader: \n\n")

4703

4704

def test_encode_preserves_leading_ws_on_value(self):

4705

msg = Message()

4706

msg['SomeHeader'] = ' value with leading ws'

4707

self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")

4708

4709

4710

4711

# Test RFC 2231 header parameters (en/de)coding

4712

class TestRFC2231(TestEmailBase):

4713

4714

# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes

4715

# test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes

4716

def test_get_param(self):

4717

eq = self.assertEqual

4718

msg = self._msgobj('msg_29.txt')

4719

eq(msg.get_param('title'),

4720

('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))

4721

eq(msg.get_param('title', unquote=False),

4722

('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))

4723

4724

def test_set_param(self):

4725

eq = self.ndiffAssertEqual

4726

msg = Message()

4727

msg.set_param('title', 'This is even more ***fun*** isn\'t it!',

4728

charset='us-ascii')

4729

eq(msg.get_param('title'),

4730

('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))

4731

msg.set_param('title', 'This is even more ***fun*** isn\'t it!',

4732

charset='us-ascii', language='en')

4733

eq(msg.get_param('title'),

4734

('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))

4735

msg = self._msgobj('msg_01.txt')

4736

msg.set_param('title', 'This is even more ***fun*** isn\'t it!',

4737

charset='us-ascii', language='en')

4738

eq(msg.as_string(maxheaderlen=78), """\

4739

Return-Path: <bbb@zzz.org>

4740

Delivered-To: bbb@zzz.org

4741

Received: by mail.zzz.org (Postfix, from userid 889)

4742

\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)

4743

MIME-Version: 1.0

4744

Content-Transfer-Encoding: 7bit

4745

Message-ID: <15090.61304.110929.45684@aaa.zzz.org>

4746

From: bbb@ddd.com (John X. Doe)

4747

To: bbb@zzz.org

4748

Subject: This is a test message

4749

Date: Fri, 4 May 2001 14:05:44 -0400

4750

Content-Type: text/plain; charset=us-ascii;

4751

title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21

4752

4753

4754

Hi,

4755

4756

Do you like this message?

4757

4758

-Me

4759

""")

4760

4761

def test_set_param_requote(self):

4762

msg = Message()

4763

msg.set_param('title', 'foo')

4764

self.assertEqual(msg['content-type'], 'text/plain; title="foo"')

4765

msg.set_param('title', 'bar', requote=False)

4766

self.assertEqual(msg['content-type'], 'text/plain; title=bar')

4767

# tspecial is still quoted.

4768

msg.set_param('title', "(bar)bell", requote=False)

4769

self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')

4770

4771

def test_del_param(self):

4772

eq = self.ndiffAssertEqual

4773

msg = self._msgobj('msg_01.txt')

4774

msg.set_param('foo', 'bar', charset='us-ascii', language='en')

4775

msg.set_param('title', 'This is even more ***fun*** isn\'t it!',

4776

charset='us-ascii', language='en')

4777

msg.del_param('foo', header='Content-Type')

4778

eq(msg.as_string(maxheaderlen=78), """\

4779

Return-Path: <bbb@zzz.org>

4780

Delivered-To: bbb@zzz.org

4781

Received: by mail.zzz.org (Postfix, from userid 889)

4782

\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)

4783

MIME-Version: 1.0

4784

Content-Transfer-Encoding: 7bit

4785

Message-ID: <15090.61304.110929.45684@aaa.zzz.org>

4786

From: bbb@ddd.com (John X. Doe)

4787

To: bbb@zzz.org

4788

Subject: This is a test message

4789

Date: Fri, 4 May 2001 14:05:44 -0400

4790

Content-Type: text/plain; charset="us-ascii";

4791

title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21

4792

4793

4794

Hi,

4795

4796

Do you like this message?

4797

4798

-Me

4799

""")

4800

4801

# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset

4802

# I changed the charset name, though, because the one in the file isn't

4803

# a legal charset name. Should add a test for an illegal charset.

4804

def test_rfc2231_get_content_charset(self):

4805

eq = self.assertEqual

4806

msg = self._msgobj('msg_32.txt')

4807

eq(msg.get_content_charset(), 'us-ascii')

4808

4809

# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes

4810

def test_rfc2231_parse_rfc_quoting(self):

4811

m = textwrap.dedent('''\

4812

Content-Disposition: inline;

4813

\tfilename*0*=''This%20is%20even%20more%20;

4814

\tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;

4815

\tfilename*2="is it not.pdf"

4816

4817

''')

4818

msg = email.message_from_string(m)

4819

self.assertEqual(msg.get_filename(),

4820

'This is even more ***fun*** is it not.pdf')

4821

self.assertEqual(m, msg.as_string())

4822

4823

# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes

4824

def test_rfc2231_parse_extra_quoting(self):

4825

m = textwrap.dedent('''\

4826

Content-Disposition: inline;

4827

\tfilename*0*="''This%20is%20even%20more%20";

4828

\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";

4829

\tfilename*2="is it not.pdf"

4830

4831

''')

4832

msg = email.message_from_string(m)

4833

self.assertEqual(msg.get_filename(),

4834

'This is even more ***fun*** is it not.pdf')

4835

self.assertEqual(m, msg.as_string())

4836

4837

# test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset

4838

# but new test uses *0* because otherwise lang/charset is not valid.

4839

# test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values

4840

def test_rfc2231_no_language_or_charset(self):

4841

m = '''\

4842

Content-Transfer-Encoding: 8bit

4843

Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"

4844

Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm

4845

4846

'''

4847

msg = email.message_from_string(m)

4848

param = msg.get_param('NAME')

4849

self.assertNotIsInstance(param, tuple)

4850

self.assertEqual(

4851

param,

4852

'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')

4853

4854

# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset

4855

def test_rfc2231_no_language_or_charset_in_filename(self):

4856

m = '''\

4857

Content-Disposition: inline;

4858

\tfilename*0*="''This%20is%20even%20more%20";

4859

\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";

4860

\tfilename*2="is it not.pdf"

4861

4862

'''

4863

msg = email.message_from_string(m)

4864

self.assertEqual(msg.get_filename(),

4865

'This is even more ***fun*** is it not.pdf')

4866

4867

# Duplicate of previous test?

4868

def test_rfc2231_no_language_or_charset_in_filename_encoded(self):

4869

m = '''\

4870

Content-Disposition: inline;

4871

\tfilename*0*="''This%20is%20even%20more%20";

4872

\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";

4873

\tfilename*2="is it not.pdf"

4874

4875

'''

4876

msg = email.message_from_string(m)

4877

self.assertEqual(msg.get_filename(),

4878

'This is even more ***fun*** is it not.pdf')

4879

4880

# test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,

4881

# but the test below is wrong (the first part should be decoded).

4882

def test_rfc2231_partly_encoded(self):

4883

m = '''\

4884

Content-Disposition: inline;

4885

\tfilename*0="''This%20is%20even%20more%20";

4886

\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";

4887

\tfilename*2="is it not.pdf"

4888

4889

'''

4890

msg = email.message_from_string(m)

4891

self.assertEqual(

4892

msg.get_filename(),

4893

'This%20is%20even%20more%20***fun*** is it not.pdf')

4894

4895

def test_rfc2231_partly_nonencoded(self):

4896

m = '''\

4897

Content-Disposition: inline;

4898

\tfilename*0="This%20is%20even%20more%20";

4899

\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";

4900

\tfilename*2="is it not.pdf"

4901

4902

'''

4903

msg = email.message_from_string(m)

4904

self.assertEqual(

4905

msg.get_filename(),

4906

'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')

4907

4908

def test_rfc2231_no_language_or_charset_in_boundary(self):

4909

m = '''\

4910

Content-Type: multipart/alternative;

4911

\tboundary*0*="''This%20is%20even%20more%20";

4912

\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";

4913

\tboundary*2="is it not.pdf"

4914

4915

'''

4916

msg = email.message_from_string(m)

4917

self.assertEqual(msg.get_boundary(),

4918

'This is even more ***fun*** is it not.pdf')

4919

4920

def test_rfc2231_no_language_or_charset_in_charset(self):

4921

# This is a nonsensical charset value, but tests the code anyway

4922

m = '''\

4923

Content-Type: text/plain;

4924

\tcharset*0*="This%20is%20even%20more%20";

4925

\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";

4926

\tcharset*2="is it not.pdf"

4927

4928

'''

4929

msg = email.message_from_string(m)

4930

self.assertEqual(msg.get_content_charset(),

4931

'this is even more ***fun*** is it not.pdf')

4932

4933

# test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii

4934

def test_rfc2231_bad_encoding_in_filename(self):

4935

m = '''\

4936

Content-Disposition: inline;

4937

\tfilename*0*="bogus'xx'This%20is%20even%20more%20";

4938

\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";

4939

\tfilename*2="is it not.pdf"

4940

4941

'''

4942

msg = email.message_from_string(m)

4943

self.assertEqual(msg.get_filename(),

4944

'This is even more ***fun*** is it not.pdf')

4945

4946

def test_rfc2231_bad_encoding_in_charset(self):

4947

m = """\

4948

Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D

4949

4950

"""

4951

msg = email.message_from_string(m)

4952

# This should return None because non-ascii characters in the charset

4953

# are not allowed.

4954

self.assertEqual(msg.get_content_charset(), None)

4955

4956

def test_rfc2231_bad_character_in_charset(self):

4957

m = """\

4958

Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D

4959

4960

"""

4961

msg = email.message_from_string(m)

4962

# This should return None because non-ascii characters in the charset

4963

# are not allowed.

4964

self.assertEqual(msg.get_content_charset(), None)

4965

4966

def test_rfc2231_bad_character_in_filename(self):

4967

m = '''\

4968

Content-Disposition: inline;

4969

\tfilename*0*="ascii'xx'This%20is%20even%20more%20";

4970

\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";

4971

\tfilename*2*="is it not.pdf%E2"

4972

4973

'''

4974

msg = email.message_from_string(m)

4975

self.assertEqual(msg.get_filename(),

4976

'This is even more ***fun*** is it not.pdf\ufffd')

4977

4978

def test_rfc2231_unknown_encoding(self):

4979

m = """\

4980

Content-Transfer-Encoding: 8bit

4981

Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt

4982

4983

"""

4984

msg = email.message_from_string(m)

4985

self.assertEqual(msg.get_filename(), 'myfile.txt')

4986

4987

def test_rfc2231_single_tick_in_filename_extended(self):

4988

eq = self.assertEqual

4989

m = """\

4990

Content-Type: application/x-foo;

4991

\tname*0*=\"Frank's\"; name*1*=\" Document\"

4992

4993

"""

4994

msg = email.message_from_string(m)

4995

charset, language, s = msg.get_param('name')

4996

eq(charset, None)

4997

eq(language, None)

4998

eq(s, "Frank's Document")

4999

5000

# test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes

5001

def test_rfc2231_single_tick_in_filename(self):

5002

m = """\

5003

Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"

5004

5005

"""

5006

msg = email.message_from_string(m)

5007

param = msg.get_param('name')

5008

self.assertNotIsInstance(param, tuple)

5009

self.assertEqual(param, "Frank's Document")

5010

5011

# test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang

5012

def test_rfc2231_tick_attack_extended(self):

5013

eq = self.assertEqual

5014

m = """\

5015

Content-Type: application/x-foo;

5016

\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"

5017

5018

"""

5019

msg = email.message_from_string(m)

5020

charset, language, s = msg.get_param('name')

5021

eq(charset, 'us-ascii')

5022

eq(language, 'en-us')

5023

eq(s, "Frank's Document")

5024

5025

# test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value

5026

def test_rfc2231_tick_attack(self):

5027

m = """\

5028

Content-Type: application/x-foo;

5029

\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"

5030

5031

"""

5032

msg = email.message_from_string(m)

5033

param = msg.get_param('name')

5034

self.assertNotIsInstance(param, tuple)

5035

self.assertEqual(param, "us-ascii'en-us'Frank's Document")

5036

5037

# test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes

5038

def test_rfc2231_no_extended_values(self):

5039

eq = self.assertEqual

5040

m = """\

5041

Content-Type: application/x-foo; name=\"Frank's Document\"

5042

5043

"""

5044

msg = email.message_from_string(m)

5045

eq(msg.get_param('name'), "Frank's Document")

5046

5047

# test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments

5048

def test_rfc2231_encoded_then_unencoded_segments(self):

5049

eq = self.assertEqual

5050

m = """\

5051

Content-Type: application/x-foo;

5052

\tname*0*=\"us-ascii'en-us'My\";

5053

\tname*1=\" Document\";

5054

\tname*2*=\" For You\"

5055

5056

"""

5057

msg = email.message_from_string(m)

5058

charset, language, s = msg.get_param('name')

5059

eq(charset, 'us-ascii')

5060

eq(language, 'en-us')

5061

eq(s, 'My Document For You')

5062

5063

# test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments

5064

# test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments

5065

def test_rfc2231_unencoded_then_encoded_segments(self):

5066

eq = self.assertEqual

5067

m = """\

5068

Content-Type: application/x-foo;

5069

\tname*0=\"us-ascii'en-us'My\";

5070

\tname*1*=\" Document\";

5071

\tname*2*=\" For You\"

5072

5073

"""

5074

msg = email.message_from_string(m)

5075

charset, language, s = msg.get_param('name')

5076

eq(charset, 'us-ascii')

5077

eq(language, 'en-us')

5078

eq(s, 'My Document For You')

5079

5080

5081

5082

# Tests to ensure that signed parts of an email are completely preserved, as

5083

# required by RFC1847 section 2.1. Note that these are incomplete, because the

5084

# email package does not currently always preserve the body. See issue 1670765.

5085

class TestSigned(TestEmailBase):

5086

5087

def _msg_and_obj(self, filename):

5088

with openfile(filename) as fp:

5089

original = fp.read()

5090

msg = email.message_from_string(original)

5091

return original, msg

5092

5093

def _signed_parts_eq(self, original, result):

5094

# Extract the first mime part of each message

5095

import re

5096

repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)

5097

inpart = repart.search(original).group(2)

5098

outpart = repart.search(result).group(2)

5099

self.assertEqual(outpart, inpart)

5100

5101

def test_long_headers_as_string(self):

5102

original, msg = self._msg_and_obj('msg_45.txt')

5103

result = msg.as_string()

5104

self._signed_parts_eq(original, result)

5105

5106

def test_long_headers_as_string_maxheaderlen(self):

5107

original, msg = self._msg_and_obj('msg_45.txt')

5108

result = msg.as_string(maxheaderlen=60)

5109

self._signed_parts_eq(original, result)

5110

5111

def test_long_headers_flatten(self):

5112

original, msg = self._msg_and_obj('msg_45.txt')

5113

fp = StringIO()

5114

Generator(fp).flatten(msg)

5115

result = fp.getvalue()

5116

self._signed_parts_eq(original, result)

5117

5118

5119

5120

if __name__ == '__main__':

5121

unittest.main()

Older »