~vcs-imports/quotient/main

« back to all changes in this revision

Viewing changes to quotient/mimemessage.py

Committer: glyph
Date: 2003-10-26 23:44:25 UTC
Revision ID: Arch-1:unnamed@bazaar.ubuntu.com%series--4208--patch-749

whitespace

files added:
atop

atop/.cvsignore

atop/__init__.py

atop/credup.py

atop/filepile.py

atop/formless.py

atop/fwd.py

atop/powerup.py

atop/regadapt.py

atop/sched.py

atop/store.py

atop/test_credup.py

atop/test_filepile.py

atop/test_powerup.py

atop/test_regadapt.py

atop/test_sched.py

atop/test_store.py

atop/tpython.py

nevow/events.py

nevow/test/test_passobj.py

quotient/mimemessage.py

quotient/proto

quotient/proto/.cvsignore

quotient/proto/__init__.py

quotient/proto/popup.py

quotient/proto/sip.py

quotient/proto/smtpin.py

quotient/proto/smtpout.py

quotient/stats.py

quotient/test/test_coverage.py

quotient/test/test_throughput.py

quotient/web/webup.py

sandbox/amir

sandbox/amir/bayesfilter.py

sandbox/dash

sandbox/dash/contacts.py

sandbox/dash/extractor.py

sandbox/dash/phones.py

sandbox/exarkun/im.py

sandbox/exarkun/imapbox.py

sandbox/exarkun/test_imap.py

sandbox/exarkun/test_smtpin.py

sandbox/exarkun/test_smtpout.py

sandbox/todo.py

files removed:
quotient/admin.py

quotient/bayesfilter.py

quotient/boxbase.py

quotient/emailtap.py

quotient/events.py

quotient/extractor.py

quotient/ezdb.py

quotient/filepile.py

quotient/filters.py

quotient/formless.py

quotient/im.py

quotient/imapbox.py

quotient/improxy.py

quotient/itempool.py

quotient/items

quotient/items/.cvsignore

quotient/items/__init__.py

quotient/items/appointments.py

quotient/items/contacts.py

quotient/items/conversation.py

quotient/items/grabbers.py

quotient/items/messages.py

quotient/items/mimemessage.py

quotient/items/phones.py

quotient/items/popup.py

quotient/items/powerup.py

quotient/items/provisioner.py

quotient/items/todo.py

quotient/items/webup.py

quotient/itemstore.py

quotient/login.py

quotient/lupytask.py

quotient/mimeparser.py

quotient/nullfilter.py

quotient/passobj.py

quotient/personality.py

quotient/picklejar.py

quotient/popbox.py

quotient/powerup.py

quotient/qq.py

quotient/qqworker.py

quotient/regadapt.py

quotient/sched.py

quotient/siptap.py

quotient/smtpin.py

quotient/smtpout.py

quotient/storcred.py

quotient/storq.py

quotient/tap.py

quotient/telephony

quotient/telephony/.cvsignore

quotient/telephony/__init__.py

quotient/telephony/sip.py

quotient/test/test_calendar.py

quotient/test/test_contacts.py

quotient/test/test_filepile.py

quotient/test/test_glue.py

quotient/test/test_imap.py

quotient/test/test_import.py

quotient/test/test_itempool.py

quotient/test/test_message.py

quotient/test/test_powerup.py

quotient/test/test_qq.py

quotient/test/test_regadapt.py

quotient/test/test_routing.py

quotient/test/test_sched.py

quotient/test/test_smtpin.py

quotient/test/test_smtpout.py

quotient/test/test_storcred.py

quotient/test/test_storq.py

quotient/test/test_todo.py

quotient/util.py

quotient/whitelist.py

files modified:
nevow/test/__init__.py

quotient/components.csv

quotient/grabbers.py

quotient/plugins.tml

quotient/provisioner.py

quotient/taglib.py

quotient/tap2.py

quotient/test/__init__.py

quotient/test/test_mimemessage.py

quotient/test/test_popup.py

quotient/test/test_sip.py

quotient/web/root.py

Show diffs side-by-side

added added

removed removed

quotient/mimemessage.py

# -*- test-case-name: quotient.test.test_mimemessage -*-

# http://www.divmod.org/. This is free software. You can redistribute it

# and/or modify it under the terms of version 2.1 of the GNU Lesser General

# Public License as published by the Free Software Foundation.

import os

import quopri

import base64

import rfc822

import time

import itertools

from cStringIO import StringIO

from atop.filepile import symlink

from twisted.python.failure import Failure

from twisted.internet.error import ConnectionDone

from twisted.persisted.styles import Versioned

from atop.tpython import iterateInReactor

from atop.store import Item, Pool

from atop.powerup import Powerup, IPowerStation

from twisted.python import components

def unquote(st):

if len(st) > 1:

if st[0] == st[-1] == '"':

return st[1:-1].replace('\\\\', '\\').replace('\\"', '"')

if st.startswith('<') and st.endswith('>'):

return st[1:-1]

return st

class HeaderBodyParser:

def __init__(self, part, parent):

self.parent = parent

self.parsingHeaders = 1

self.prevheader = None

self.prevvalue = None

self.warnings = []

self.part = part

self.bodyMode = 'body'

self.gotFirstHeader = False

def close(self):

if self.parent:

self.parent.close()

def startBody(self, linebegin, lineend):

self.parsingHeaders = 0

self.part.headersLength = linebegin - self.part.headersOffset

self.part.bodyOffset = lineend

def lineReceived(self, line, linebegin, lineend):

if self.parsingHeaders:

if not self.gotFirstHeader:

self.part.headersOffset = linebegin

self.gotFirstHeader = True

return self.parseHeaders(line, linebegin, lineend)

else:

return self.parseBody(line, linebegin, lineend)

def warn(self, text):

self.warnings.append(text)

def finishHeader(self):

if self.prevheader is not None:

self.part[self.prevheader] = self.prevvalue

self.prevheader = self.prevvalue = None

def parseHeaders(self, line, linebegin, lineend):

if not line:

self.finishHeader()

self.startBody(linebegin, lineend)

return self

if line[0] in ' \t':

self.prevvalue += '\n' + line

return self

h = line.split(': ', 1)

if len(h) == 2:

self.finishHeader()

header, value = h

self.prevheader = header

self.prevvalue = value

elif line and line[-1] == ':':

# is this even a warning case? need to read the rfc... -glyph

self.prevheader = line[:-1]

self.prevvalue = ''

else:

self.warn("perhaps a body line?: %r" % line)

self.finishHeader()

self.startBody(linebegin, lineend)

self.lineReceived(line, linebegin, lineend)

return self

def parseBody(self, line, linebegin, lineend):

100

return getattr(self, "parse_" + self.bodyMode)(line, linebegin, lineend)

101

102

class MIMEMessageParser(HeaderBodyParser):

103

bodyFile = None

104

def startBody(self, linebegin, lineend):

105

HeaderBodyParser.startBody(self, linebegin, lineend)

106

self.boundary = self._calcBoundary()

107

if self.boundary:

108

self.finalBoundary = self.boundary + '--'

109

self.bodyMode = 'preamble'

110

return

111

ctyp = self.part['content-type']

112

if ctyp and ctyp.split()[0].strip().lower() == 'message/rfc822':

113

self.bodyMode = 'rfc822'

114

return

115

self.bodyMode = 'body'

116

# self.bodyFile = self.part.getBody("wb")

117

# ^ was only used for on-the-fly decoding

118

119

def close(self):

120

if self.bodyFile:

121

self.bodyFile.close()

122

HeaderBodyParser.close(self)

123

124

def _calcBoundary(self):

125

ctype = self.part['content-type']

126

if ctype and ctype.strip().lower().startswith('multipart'):

127

parts = ctype.split(';')

128

for part in parts:

129

ps = part.split('=', 1)

130

if len(ps) == 2:

131

key, val = ps

132

key = key.strip().lower()

133

if key.lower() == 'boundary':

134

return '--' + unquote(val.strip())

135

return None

136

else:

137

return None

138

139

def parse_body(self, line, b, e):

140

# TODO: on-the-fly decoding

141

return self

142

143

def parse_rfc822(self, line, b, e):

144

np = self.subpart(parent=self, factory=MIMEMessageParser)

145

np.lineReceived(line, b, e)

146

return np

147

148

def subpart(self, parent=None, factory=None):

149

if parent is None:

150

parent = self

151

if factory is None:

152

factory = MIMEPartParser

153

newpart = self.part.newChild()

154

nmp = factory(newpart, parent)

155

return nmp

156

157

def parse_preamble(self, line, b, e):

158

if line.strip('\r\n') == self.boundary:

159

self.bodyMode = 'nextpart'

160

return self.subpart()

161

return self

162

163

def parse_nextpart(self, line, b, e):

164

if line.strip('\r\n') == self.boundary:

165

# If it's a boundary here, that means that we've seen TWO

166

# boundaries, one right after another! I can only assume that the

167

# sub-human cretins who have thusly encoded their MIME parts are

168

# attempting to convey the idea that the message *really* has a

169

# part-break there...

170

return self

171

nmp = self.subpart()

172

nmp.lineReceived(line, b, e)

173

return nmp

174

175

def parse_postamble(self, line, b, e):

176

return self

177

178

class MIMEPartParser(MIMEMessageParser):

179

def parseBody(self, line, linebegin, lineend):

180

if line.strip('\r\n') == self.parent.boundary:

181

# my body is over now - this is a boundary line so don't count it

182

self.part.bodyLength = linebegin - self.part.bodyOffset

183

return self.parent

184

elif line == self.parent.finalBoundary:

185

self.parent.bodyMode = 'postamble'

186

self.part.bodyLength = linebegin - self.part.bodyOffset

187

return self.parent

188

else:

189

return MIMEMessageParser.parseBody(self, line, linebegin, lineend)

190

191

def parse_rfc822(self, line, linebegin, lineend):

192

np = self.subpart(parent=self.parent)

193

np.lineReceived(line, linebegin, lineend)

194

return np

195

196

class MIMEPart:

197

def __init__(self, parent=None):

198

self.parent = parent

199

self.children = []

200

self.headers = []

201

202

# for parser use only

203

def setHeadersInfo(self, hoffset, hlength):

204

self.headersInfo = hoffset, hlength

205

206

def setBodyInfo(self, boffset, blength):

207

self.bodyInfo = boffset, blength

208

209

# email.Message compat: note non-coding-standard-compliant method names

210

211

def walk(self):

212

yield self

213

for child in self.children:

214

for part in child.walk():

215

yield part

216

217

def get_all(self, field, failObj):

218

return self.get(field, failObj)

219

220

def get_filename(self, failObj=None):

221

return self.get_param('filename', failObj, 'content-disposition')

222

223

def get_param(self, param, failObj=None, header='content-type', unquote=True):

224

h = self[header]

225

if not h:

226

return failObj

227

param = param.lower()

228

for pair in [x.split('=', 1) for x in h.split(';')[1:]]:

229

if pair[0].strip().lower() == param:

230

r = len(pair) == 2 and pair[1].strip() or ''

231

if unquote:

232

return mimeparser.unquote(r)

233

return r

234

return failObj

235

236

def newChild(self):

237

c = MIMEPart(self)

238

self.children.append(c)

239

return c

240

241

# email.Message compat

242

243

def __setitem__(self, key, val):

244

self.headers.append((key, val))

245

246

def __getitem__(self, key, failobj=None):

247

for k,v in self.headers:

248

if key.lower() == k.lower():

249

return v

250

return failobj

251

get = __getitem__

252

253

def __contains__(self, name):

254

return not not self.get(name)

255

256

def has_key(self, name):

257

return name in self

258

259

def items(self):

260

return self.headers

261

262

def get_charset(self):

263

return None

264

265

def get_type(self, failobj=None):

266

return self.get('content-type', failobj)

267

268

def get_payload(self, decode=False):

269

"""Get the message payload.

270

"""

271

f = self.openFile()

272

offt = self.bodyOffset

273

leng = self.bodyLength

274

f.seek(offt)

275

data = f.read(leng)

276

if decode:

277

ctran = self['content-transfer-encoding']

278

if ctran:

279

ct = ctran.lower().strip()

280

if ct == 'quoted-printable':

281

return quopri.decodestring(data)

282

elif ct == 'base64':

283

return base64.decodestring(data)

284

elif ct == '7bit':

285

return data

286

return data

287

288

def _uberparent(self):

289

o = self

290

while o.parent:

291

o = o.parent

292

return o

293

294

def openFile(self):

295

return open(self._uberparent().filename, 'rb')

296

297

def get_default_type(self):

298

return 'text/plain'

299

300

def get_content_type(self):

301

missing = object()

302

value = self.get('content-type', missing)

303

if value is missing:

304

return self.get_default_type()

305

ctype = value.split(';', 1)[0].lower().strip()

306

if ctype.count('/') != 1:

307

return 'text/plain'

308

return ctype

309

310

def get_content_maintype(self):

311

ctype = self.get_content_type()

312

return ctype.split('/')[0]

313

314

def get_content_subtype(self):

315

ctype = self.get_content_type()

316

return ctype.split('/')[1]

317

318

def get_main_type(self, failobj=None):

319

"""Return the message's main content type if present."""

320

missing = object()

321

ctype = self.get_type(missing)

322

if ctype is missing:

323

return failobj

324

if ctype.count('/') != 1:

325

return failobj

326

return ctype.split('/')[0]

327

328

def is_multipart(self):

329

return bool(self.children)

330

331

def getdate(self, name):

332

data = self.get(name)

333

if data:

334

return rfc822.parsedate(data)

335

336

def getHeaderParams(self, hdrname):

337

ctype = self[hdrname]

338

typeinfo = ctype.split(';')

339

ctype = typeinfo[0].strip().lower()

340

params = {}

341

for t in typeinfo[1:]:

342

kv = t.split('=', 1)

343

if len(kv) == 2:

344

k = kv[0].strip().lower()

345

v = kv[1].strip().strip('"')

346

params[k] = v

347

return params

348

349

def getAttachmentName(self):

350

params = self.getHeaderParams("content-type")

351

for fnk in 'name', 'filename':

352

if params.has_key(fnk):

353

return params[fnk]

354

else:

355

gtl = self.get_type().split(';')[0].lower()

356

ext = {'text/html': 'html',

357

'text/plain': 'plain',

358

'image/jpeg': 'jpeg',

359

'image/png': 'png',

360

'image/gif': 'gif'}.get(gtl, 'bin')

361

return 'Unknown.'+ext

362

363

def inferType(self):

364

"""Infer a content-type. This will attempt to do something with

365

garbage data that isn't properly typed.

366

"""

367

ctype = self['content-type']

368

if not ctype:

369

return 'text/plain'

370

if ctype.lower().startswith("application/octet-stream"):

371

self.getAttachmentName()

372

ext = params['name'].strip().split(".")[-1]

373

if exts.has_key(ext):

374

return exts[ext]

375

return ctype

376

377

def getTypedParts(self, *types):

378

for part in self.walk():

379

# possible change: rather than get_content_type, use inferType to

380

# catch parts which are malformed MIME-ly but still valid data.

381

if part.get_content_type() in types:

382

yield part

383

384

def getAttachments(self):

385

for part in self.walk():

386

cd = part['content-disposition']

387

if cd:

388

cd = cd.split(';')[0].strip().lower()

389

if cd == 'attachment':

390

yield part

391

392

def keys(self):

393

return [k for k, v in self.headers]

394

395

# STUBBED METHODS: these will prevent spambayes et. al. from raising

396

# exceptions, but we should look into how far we want to support them.

397

398

def __delitem__(self, thing):

399

pass

400

401

def add_header(self, header, value):

402

pass

403

404

def get_charsets(self, failObj=None):

405

return []

406

407

# end stubbed methods

408

409

410

class MIMEMessage(MIMEPart, Item, Versioned):

411

parent = None

412

smtpInfo = None

413

414

# A string describing how this message came to us

415

receivedVia = None

416

417

# Reference to the contact who sent this message

418

contactRef = None

419

420

# Don't call Item.__init__ - we don't want to initialize the item part of

421

# ourselves until we're filled out enough to exist in the database.

422

423

def assignIDs(self):

424

mimeID = 0

425

for part in self.walk():

426

part.mimeID = mimeID

427

mimeID += 1

428

429

persistenceVersion = 1

430

431

def upgradeToVersion1(self):

432

self.assignIDs()

433

434

def getPartByID(self, mimeID):

435

w = self.walk()

436

c = 0

437

for p in w:

438

if mimeID == c:

439

return p

440

c += 1

441

442

def addToStore(self, store):

443

self.assignIDs()

444

Item.__init__(self, store)

445

446

def index_name(self):

447

if hasattr(self,'contact'):

448

return self.contact.name

449

else:

450

return self['from']

451

452

def index_subject(self):

453

return self['subject']

454

455

def index_date(self):

456

return self.dateReceived

457

458

def index_pop(self, pool):

459

return self.storeID

460

461

def getDisplayPart(self):

462

return self.getTypedParts('text/plain','text/html','text/rtf').next()

463

464

465

# message started - headers begin (begin of line)

466

467

# headers ended - headers end (begin of line), body begins (end of line)

468

469

# boundary hit - body ends for previous child (begin of line) headers begin for

470

# next child (end of line)

471

472

# "rfc822-begin" - headers begin for sub-rfc822-message

473

474

# subpart headers ended - headers end for child (begin of line), body begins

475

# for child (end of line)

476

477

# subpart ended - body

478

479

# message ended (body ends)

480

481

class MIMEMessageReceiver:

482

def __init__(self, avatar, deliver, trustDateHeaders=False):

483

self.avatar = avatar

484

self.deliver = deliver

485

self.trustDateHeaders = trustDateHeaders

486

self.done = False

487

self.lineReceived = self.firstLineReceived

488

489

def makeConnection(self, t):

490

# rhg protocol

491

self.bytecount = 0

492

self.connectionMade()

493

494

def connectionMade(self):

495

self.message = MIMEMessage()

496

self.file = self.avatar.newFile()

497

# self.message._currentsize = self.file.tell

498

# ^ causes problems with pickle, obviously

499

self.parser = MIMEMessageParser(self.message, None)

500

501

def firstLineReceived(self, line):

502

del self.lineReceived

503

if line.startswith('From '):

504

return

505

return self.lineReceived(line)

506

507

def lineReceived(self, line):

508

linebegin = self.bytecount

509

self.bytecount += (len(line) + 1)

510

lineend = self.bytecount

511

self.file.write(line+'\n')

512

newParser = self.parser.lineReceived(line, linebegin, lineend)

513

oldParser = self.parser

514

if newParser is not oldParser:

515

self.parser = newParser

516

517

def connectionLost(self, reason):

518

if self.done:

519

return

520

self.file.abort()

521

522

def messageDone(self):

523

self.done = True

524

localNow = time.time()

525

gmtDate = time.gmtime(localNow)

526

self.parser.part.bodyLength = (self.bytecount - self.parser.part.bodyOffset)

527

if self.trustDateHeaders:

528

try:

529

rdate = time.struct_time(rfc822.parsedate(self.message['received'].split(';')[-1]))

530

except:

531

rdate = gmtDate

532

else:

533

rdate = gmtDate

534

self.message['x-divmod-processed'] = rfc822.formatdate(localNow)

535

self.message.dateReceived = rdate

536

537

def _():

538

self.file.flush()

539

size = self.file.tell()

540

self.message.size = size

541

self.message.addToStore(self.avatar)

542

dplist = [str(x) for x in rdate[:3]] # Y/M/D

543

dplist.append(str(self.message.storeID))

544

# store/avatarid/Y/M/D/msgid

545

self.file.close(os.path.join(*dplist))

546

self.message.filename = self.file.finalpath

547

self.deliver(self.message)

548

self.avatar.transact(_)

549

550

# utility methods

551

552

def feedFile(self, f):

553

"""Feed a file in.

554

"""

555

return iterateInReactor(self._deliverer(f)).addCallback(

556

lambda x: self.message)

557

558

def feedString(self, s):

559

"""Feed a string in.

560

"""

561

return self.feedFile(StringIO(s))

562

563

def feedFileNow(self, f):

564

for x in self._deliverer(f):

565

pass

566

return self.message

567

568

def feedStringNow(self, s):

569

return self.feedFileNow(StringIO(s))

570

571

def _deliverer(self, f):

572

self.makeConnection(None)

573

try:

574

while True:

575

line = f.readline()

576

if not line:

577

break

578

line = line.strip('\r\n')

579

self.lineReceived(line)

580

yield None

581

except:

582

self.connectionLost(Failure())

583

raise

584

else:

585

self.messageDone()

586

self.connectionLost(Failure(ConnectionDone()))

587

588

class IMIMEDelivery(components.Interface):

589

"""I am a MIME delivery object. I can wrap a storage avatar.

590

"""

591

592

def createMIMEReceiver(self, trustReceivedHeaders):

593

"""Create a MIME receiver. 'trustReceivedHeaders' is an option to

594

specify the primary date index: if it is True, it will use the last

595

'Received' header. If False, it will use the current time of the

596

message's delivery. This is dependent upon the delivery mechanism.

597

For example, SMTP should NOT trustReceivedHeaders, because although the

598

message was received from another mail server whose clock is probably

599

correct, the message is not *finished* being 'received' until the

600

system the user uses to check their mail has got its hands on it (in

601

this case, us). POP3, on the other hand, SHOULD trustReceivedHeaders,

602

because the mail hosting system on the other end of the POP connection

603

has arguably already added a header as to when it arrived at the

604

address represented by the POP account.

605

606

Finally, file imports should always, always trustReceivedHeaders,

607

otherwise you will end up with a date index with all the imported

608

messages clustered within 5 seconds of each other.

609

"""

610

611

class MIMEDeliverator:

612

__implements__ = IMIMEDelivery

613

def __init__(self, avatar, arrivalRef):

614

self.avatar = avatar

615

self.arrivalRef = arrivalRef

616

617

def getArrivalRef(self):

618

"get arrival reference"

619

return self.arrivalRef

620

621

def createMIMEReceiver(self, trustReceivedHeaders):

622

arrivalPool = self.arrivalRef.getItem()

623

return MIMEMessageReceiver(self.avatar, arrivalPool.addItem, trustReceivedHeaders)

624

625

class EmailPowerup(Powerup):

626

def setUpPools(self, avatar):

627

p = Pool(avatar, name='arrival')

628

avatar.getRootPool().addItem(p)

629

avatar.setComponent(IMIMEDelivery, MIMEDeliverator(avatar, p.referenceTo()))

630

631

exts = {

632

"jpg": "image/jpeg",

633

"jpeg": "image/jpeg",

634

"png": "image/png",

635

"gif": "image/gif"

636

}

Older »