1
# -*- test-case-name: quotient.test.test_mimemessage -*-
3
# This module is part of the Quotient project and is Copyright 2003 Divmod:
4
# http://www.divmod.org/. This is free software. You can redistribute it
5
# and/or modify it under the terms of version 2.1 of the GNU Lesser General
6
# Public License as published by the Free Software Foundation.
15
from cStringIO import StringIO
17
from atop.filepile import symlink
18
from twisted.python.failure import Failure
20
from twisted.internet.error import ConnectionDone
21
from twisted.persisted.styles import Versioned
23
from atop.tpython import iterateInReactor
24
from atop.store import Item, Pool
25
from atop.powerup import Powerup, IPowerStation
27
from twisted.python import components
31
if st[0] == st[-1] == '"':
32
return st[1:-1].replace('\\\\', '\\').replace('\\"', '"')
33
if st.startswith('<') and st.endswith('>'):
37
class HeaderBodyParser:
38
def __init__(self, part, parent):
40
self.parsingHeaders = 1
41
self.prevheader = None
45
self.bodyMode = 'body'
46
self.gotFirstHeader = False
52
def startBody(self, linebegin, lineend):
53
self.parsingHeaders = 0
54
self.part.headersLength = linebegin - self.part.headersOffset
55
self.part.bodyOffset = lineend
57
def lineReceived(self, line, linebegin, lineend):
58
if self.parsingHeaders:
59
if not self.gotFirstHeader:
60
self.part.headersOffset = linebegin
61
self.gotFirstHeader = True
62
return self.parseHeaders(line, linebegin, lineend)
64
return self.parseBody(line, linebegin, lineend)
67
self.warnings.append(text)
69
def finishHeader(self):
70
if self.prevheader is not None:
71
self.part[self.prevheader] = self.prevvalue
72
self.prevheader = self.prevvalue = None
74
def parseHeaders(self, line, linebegin, lineend):
77
self.startBody(linebegin, lineend)
80
self.prevvalue += '\n' + line
82
h = line.split(': ', 1)
86
self.prevheader = header
87
self.prevvalue = value
88
elif line and line[-1] == ':':
89
# is this even a warning case? need to read the rfc... -glyph
90
self.prevheader = line[:-1]
93
self.warn("perhaps a body line?: %r" % line)
95
self.startBody(linebegin, lineend)
96
self.lineReceived(line, linebegin, lineend)
99
def parseBody(self, line, linebegin, lineend):
100
return getattr(self, "parse_" + self.bodyMode)(line, linebegin, lineend)
102
class MIMEMessageParser(HeaderBodyParser):
104
def startBody(self, linebegin, lineend):
105
HeaderBodyParser.startBody(self, linebegin, lineend)
106
self.boundary = self._calcBoundary()
108
self.finalBoundary = self.boundary + '--'
109
self.bodyMode = 'preamble'
111
ctyp = self.part['content-type']
112
if ctyp and ctyp.split()[0].strip().lower() == 'message/rfc822':
113
self.bodyMode = 'rfc822'
115
self.bodyMode = 'body'
116
# self.bodyFile = self.part.getBody("wb")
117
# ^ was only used for on-the-fly decoding
121
self.bodyFile.close()
122
HeaderBodyParser.close(self)
124
def _calcBoundary(self):
125
ctype = self.part['content-type']
126
if ctype and ctype.strip().lower().startswith('multipart'):
127
parts = ctype.split(';')
129
ps = part.split('=', 1)
132
key = key.strip().lower()
133
if key.lower() == 'boundary':
134
return '--' + unquote(val.strip())
139
def parse_body(self, line, b, e):
140
# TODO: on-the-fly decoding
143
def parse_rfc822(self, line, b, e):
144
np = self.subpart(parent=self, factory=MIMEMessageParser)
145
np.lineReceived(line, b, e)
148
def subpart(self, parent=None, factory=None):
152
factory = MIMEPartParser
153
newpart = self.part.newChild()
154
nmp = factory(newpart, parent)
157
def parse_preamble(self, line, b, e):
158
if line.strip('\r\n') == self.boundary:
159
self.bodyMode = 'nextpart'
160
return self.subpart()
163
def parse_nextpart(self, line, b, e):
164
if line.strip('\r\n') == self.boundary:
165
# If it's a boundary here, that means that we've seen TWO
166
# boundaries, one right after another! I can only assume that the
167
# sub-human cretins who have thusly encoded their MIME parts are
168
# attempting to convey the idea that the message *really* has a
169
# part-break there...
172
nmp.lineReceived(line, b, e)
175
def parse_postamble(self, line, b, e):
178
class MIMEPartParser(MIMEMessageParser):
179
def parseBody(self, line, linebegin, lineend):
180
if line.strip('\r\n') == self.parent.boundary:
181
# my body is over now - this is a boundary line so don't count it
182
self.part.bodyLength = linebegin - self.part.bodyOffset
184
elif line == self.parent.finalBoundary:
185
self.parent.bodyMode = 'postamble'
186
self.part.bodyLength = linebegin - self.part.bodyOffset
189
return MIMEMessageParser.parseBody(self, line, linebegin, lineend)
191
def parse_rfc822(self, line, linebegin, lineend):
192
np = self.subpart(parent=self.parent)
193
np.lineReceived(line, linebegin, lineend)
197
def __init__(self, parent=None):
202
# for parser use only
203
def setHeadersInfo(self, hoffset, hlength):
204
self.headersInfo = hoffset, hlength
206
def setBodyInfo(self, boffset, blength):
207
self.bodyInfo = boffset, blength
209
# email.Message compat: note non-coding-standard-compliant method names
213
for child in self.children:
214
for part in child.walk():
217
def get_all(self, field, failObj):
218
return self.get(field, failObj)
220
def get_filename(self, failObj=None):
221
return self.get_param('filename', failObj, 'content-disposition')
223
def get_param(self, param, failObj=None, header='content-type', unquote=True):
227
param = param.lower()
228
for pair in [x.split('=', 1) for x in h.split(';')[1:]]:
229
if pair[0].strip().lower() == param:
230
r = len(pair) == 2 and pair[1].strip() or ''
232
return mimeparser.unquote(r)
238
self.children.append(c)
241
# email.Message compat
243
def __setitem__(self, key, val):
244
self.headers.append((key, val))
246
def __getitem__(self, key, failobj=None):
247
for k,v in self.headers:
248
if key.lower() == k.lower():
253
def __contains__(self, name):
254
return not not self.get(name)
256
def has_key(self, name):
262
def get_charset(self):
265
def get_type(self, failobj=None):
266
return self.get('content-type', failobj)
268
def get_payload(self, decode=False):
269
"""Get the message payload.
272
offt = self.bodyOffset
273
leng = self.bodyLength
277
ctran = self['content-transfer-encoding']
279
ct = ctran.lower().strip()
280
if ct == 'quoted-printable':
281
return quopri.decodestring(data)
283
return base64.decodestring(data)
288
def _uberparent(self):
295
return open(self._uberparent().filename, 'rb')
297
def get_default_type(self):
300
def get_content_type(self):
302
value = self.get('content-type', missing)
304
return self.get_default_type()
305
ctype = value.split(';', 1)[0].lower().strip()
306
if ctype.count('/') != 1:
310
def get_content_maintype(self):
311
ctype = self.get_content_type()
312
return ctype.split('/')[0]
314
def get_content_subtype(self):
315
ctype = self.get_content_type()
316
return ctype.split('/')[1]
318
def get_main_type(self, failobj=None):
319
"""Return the message's main content type if present."""
321
ctype = self.get_type(missing)
324
if ctype.count('/') != 1:
326
return ctype.split('/')[0]
328
def is_multipart(self):
329
return bool(self.children)
331
def getdate(self, name):
332
data = self.get(name)
334
return rfc822.parsedate(data)
336
def getHeaderParams(self, hdrname):
337
ctype = self[hdrname]
338
typeinfo = ctype.split(';')
339
ctype = typeinfo[0].strip().lower()
341
for t in typeinfo[1:]:
344
k = kv[0].strip().lower()
345
v = kv[1].strip().strip('"')
349
def getAttachmentName(self):
350
params = self.getHeaderParams("content-type")
351
for fnk in 'name', 'filename':
352
if params.has_key(fnk):
355
gtl = self.get_type().split(';')[0].lower()
356
ext = {'text/html': 'html',
357
'text/plain': 'plain',
358
'image/jpeg': 'jpeg',
360
'image/gif': 'gif'}.get(gtl, 'bin')
361
return 'Unknown.'+ext
364
"""Infer a content-type. This will attempt to do something with
365
garbage data that isn't properly typed.
367
ctype = self['content-type']
370
if ctype.lower().startswith("application/octet-stream"):
371
self.getAttachmentName()
372
ext = params['name'].strip().split(".")[-1]
373
if exts.has_key(ext):
377
def getTypedParts(self, *types):
378
for part in self.walk():
379
# possible change: rather than get_content_type, use inferType to
380
# catch parts which are malformed MIME-ly but still valid data.
381
if part.get_content_type() in types:
384
def getAttachments(self):
385
for part in self.walk():
386
cd = part['content-disposition']
388
cd = cd.split(';')[0].strip().lower()
389
if cd == 'attachment':
393
return [k for k, v in self.headers]
395
# STUBBED METHODS: these will prevent spambayes et. al. from raising
396
# exceptions, but we should look into how far we want to support them.
398
def __delitem__(self, thing):
401
def add_header(self, header, value):
404
def get_charsets(self, failObj=None):
407
# end stubbed methods
410
class MIMEMessage(MIMEPart, Item, Versioned):
414
# A string describing how this message came to us
417
# Reference to the contact who sent this message
420
# Don't call Item.__init__ - we don't want to initialize the item part of
421
# ourselves until we're filled out enough to exist in the database.
425
for part in self.walk():
429
persistenceVersion = 1
431
def upgradeToVersion1(self):
434
def getPartByID(self, mimeID):
442
def addToStore(self, store):
444
Item.__init__(self, store)
446
def index_name(self):
447
if hasattr(self,'contact'):
448
return self.contact.name
452
def index_subject(self):
453
return self['subject']
455
def index_date(self):
456
return self.dateReceived
458
def index_pop(self, pool):
461
def getDisplayPart(self):
462
return self.getTypedParts('text/plain','text/html','text/rtf').next()
465
# message started - headers begin (begin of line)
467
# headers ended - headers end (begin of line), body begins (end of line)
469
# boundary hit - body ends for previous child (begin of line) headers begin for
470
# next child (end of line)
472
# "rfc822-begin" - headers begin for sub-rfc822-message
474
# subpart headers ended - headers end for child (begin of line), body begins
475
# for child (end of line)
477
# subpart ended - body
479
# message ended (body ends)
481
class MIMEMessageReceiver:
482
def __init__(self, avatar, deliver, trustDateHeaders=False):
484
self.deliver = deliver
485
self.trustDateHeaders = trustDateHeaders
487
self.lineReceived = self.firstLineReceived
489
def makeConnection(self, t):
492
self.connectionMade()
494
def connectionMade(self):
495
self.message = MIMEMessage()
496
self.file = self.avatar.newFile()
497
# self.message._currentsize = self.file.tell
498
# ^ causes problems with pickle, obviously
499
self.parser = MIMEMessageParser(self.message, None)
501
def firstLineReceived(self, line):
502
del self.lineReceived
503
if line.startswith('From '):
505
return self.lineReceived(line)
507
def lineReceived(self, line):
508
linebegin = self.bytecount
509
self.bytecount += (len(line) + 1)
510
lineend = self.bytecount
511
self.file.write(line+'\n')
512
newParser = self.parser.lineReceived(line, linebegin, lineend)
513
oldParser = self.parser
514
if newParser is not oldParser:
515
self.parser = newParser
517
def connectionLost(self, reason):
522
def messageDone(self):
524
localNow = time.time()
525
gmtDate = time.gmtime(localNow)
526
self.parser.part.bodyLength = (self.bytecount - self.parser.part.bodyOffset)
527
if self.trustDateHeaders:
529
rdate = time.struct_time(rfc822.parsedate(self.message['received'].split(';')[-1]))
534
self.message['x-divmod-processed'] = rfc822.formatdate(localNow)
535
self.message.dateReceived = rdate
539
size = self.file.tell()
540
self.message.size = size
541
self.message.addToStore(self.avatar)
542
dplist = [str(x) for x in rdate[:3]] # Y/M/D
543
dplist.append(str(self.message.storeID))
544
# store/avatarid/Y/M/D/msgid
545
self.file.close(os.path.join(*dplist))
546
self.message.filename = self.file.finalpath
547
self.deliver(self.message)
548
self.avatar.transact(_)
552
def feedFile(self, f):
555
return iterateInReactor(self._deliverer(f)).addCallback(
556
lambda x: self.message)
558
def feedString(self, s):
561
return self.feedFile(StringIO(s))
563
def feedFileNow(self, f):
564
for x in self._deliverer(f):
568
def feedStringNow(self, s):
569
return self.feedFileNow(StringIO(s))
571
def _deliverer(self, f):
572
self.makeConnection(None)
578
line = line.strip('\r\n')
579
self.lineReceived(line)
582
self.connectionLost(Failure())
586
self.connectionLost(Failure(ConnectionDone()))
588
class IMIMEDelivery(components.Interface):
589
"""I am a MIME delivery object. I can wrap a storage avatar.
592
def createMIMEReceiver(self, trustReceivedHeaders):
593
"""Create a MIME receiver. 'trustReceivedHeaders' is an option to
594
specify the primary date index: if it is True, it will use the last
595
'Received' header. If False, it will use the current time of the
596
message's delivery. This is dependent upon the delivery mechanism.
597
For example, SMTP should NOT trustReceivedHeaders, because although the
598
message was received from another mail server whose clock is probably
599
correct, the message is not *finished* being 'received' until the
600
system the user uses to check their mail has got its hands on it (in
601
this case, us). POP3, on the other hand, SHOULD trustReceivedHeaders,
602
because the mail hosting system on the other end of the POP connection
603
has arguably already added a header as to when it arrived at the
604
address represented by the POP account.
606
Finally, file imports should always, always trustReceivedHeaders,
607
otherwise you will end up with a date index with all the imported
608
messages clustered within 5 seconds of each other.
611
class MIMEDeliverator:
612
__implements__ = IMIMEDelivery
613
def __init__(self, avatar, arrivalRef):
615
self.arrivalRef = arrivalRef
617
def getArrivalRef(self):
618
"get arrival reference"
619
return self.arrivalRef
621
def createMIMEReceiver(self, trustReceivedHeaders):
622
arrivalPool = self.arrivalRef.getItem()
623
return MIMEMessageReceiver(self.avatar, arrivalPool.addItem, trustReceivedHeaders)
625
class EmailPowerup(Powerup):
626
def setUpPools(self, avatar):
627
p = Pool(avatar, name='arrival')
628
avatar.getRootPool().addItem(p)
629
avatar.setComponent(IMIMEDelivery, MIMEDeliverator(avatar, p.referenceTo()))
633
"jpeg": "image/jpeg",