~lfaraone/ubuntu/lucid/python-lamson/lp548998

« back to all changes in this revision

Viewing changes to lamson/encoding.py

Committer: Bazaar Package Importer
Author(s): David Watson
Date: 2009-08-21 14:45:16 UTC
Revision ID: james.westby@ubuntu.com-20090821144516-08tp9e4pyl4t493y

Tags: upstream-1.0pre2

Import upstream version 1.0pre2

files added:

LICENSE

MANIFEST.in

PKG-INFO

README

bin/lamson

build.vel

doc/done.txt

doc/lamsonproject.org

doc/lamsonproject.org/Session.vim

doc/lamsonproject.org/build.vel

doc/lamsonproject.org/config.py

doc/lamsonproject.org/input

doc/lamsonproject.org/input/about.txt

doc/lamsonproject.org/input/blog

doc/lamsonproject.org/input/blog/._html_email_in_gmail.png

doc/lamsonproject.org/input/blog/2009-05-16.txt

doc/lamsonproject.org/input/blog/2009-05-18.txt

doc/lamsonproject.org/input/blog/2009-05-19.txt

doc/lamsonproject.org/input/blog/2009-05-20.txt

doc/lamsonproject.org/input/blog/2009-05-24.txt

doc/lamsonproject.org/input/blog/2009-05-28.txt

doc/lamsonproject.org/input/blog/2009-05-31.txt

doc/lamsonproject.org/input/blog/2009-06-01.txt

doc/lamsonproject.org/input/blog/2009-06-03-2.txt

doc/lamsonproject.org/input/blog/2009-06-03.txt

doc/lamsonproject.org/input/blog/2009-06-04.txt

doc/lamsonproject.org/input/blog/2009-06-06.txt

doc/lamsonproject.org/input/blog/2009-06-08.txt

doc/lamsonproject.org/input/blog/2009-06-09.txt

doc/lamsonproject.org/input/blog/2009-06-14.txt

doc/lamsonproject.org/input/blog/2009-06-20.txt

doc/lamsonproject.org/input/blog/2009-06-22.txt

doc/lamsonproject.org/input/blog/2009-06-26.txt

doc/lamsonproject.org/input/blog/2009-07-03.txt

doc/lamsonproject.org/input/blog/2009-07-07.txt

doc/lamsonproject.org/input/blog/2009-07-09.txt

doc/lamsonproject.org/input/blog/2009-07-14.txt

doc/lamsonproject.org/input/blog/2009-07-19.txt

doc/lamsonproject.org/input/blog/2009-07-20.txt

doc/lamsonproject.org/input/blog/2009-08-03.txt

doc/lamsonproject.org/input/blog/html_email_in_gmail.png

doc/lamsonproject.org/input/blog/index.txt

doc/lamsonproject.org/input/blog/template.html

doc/lamsonproject.org/input/contact.txt

doc/lamsonproject.org/input/docs

doc/lamsonproject.org/input/docs/deferred_processing_to_queues.txt

doc/lamsonproject.org/input/docs/deploying_lamson.txt

doc/lamsonproject.org/input/docs/faq.txt

doc/lamsonproject.org/input/docs/filtering_spam.txt

doc/lamsonproject.org/input/docs/getting_started.txt

doc/lamsonproject.org/input/docs/index.txt

doc/lamsonproject.org/input/docs/introduction_to_finite_state_machines.txt

doc/lamsonproject.org/input/docs/lamson_commands.txt

doc/lamsonproject.org/input/docs/lamson_virtual_env.txt

doc/lamsonproject.org/input/docs/primary_vs_secondary_registration.txt

doc/lamsonproject.org/input/docs/template.html

doc/lamsonproject.org/input/docs/writing_a_state_storage.txt

doc/lamsonproject.org/input/download.txt

doc/lamsonproject.org/input/home_template.html

doc/lamsonproject.org/input/index.txt

doc/lamsonproject.org/input/lists

doc/lamsonproject.org/input/lists/index.txt

doc/lamsonproject.org/input/releases

doc/lamsonproject.org/input/releases/index.txt

doc/lamsonproject.org/input/videos

doc/lamsonproject.org/input/videos/index.txt

doc/lamsonproject.org/output

doc/lamsonproject.org/output/._prettify.js

doc/lamsonproject.org/output/about.html

doc/lamsonproject.org/output/about.txt

doc/lamsonproject.org/output/blog

doc/lamsonproject.org/output/blog/2009-05-16.html

doc/lamsonproject.org/output/blog/2009-05-16.txt

doc/lamsonproject.org/output/blog/2009-05-18.html

doc/lamsonproject.org/output/blog/2009-05-18.txt

doc/lamsonproject.org/output/blog/2009-05-19.html

doc/lamsonproject.org/output/blog/2009-05-19.txt

doc/lamsonproject.org/output/blog/2009-05-20.html

doc/lamsonproject.org/output/blog/2009-05-20.txt

doc/lamsonproject.org/output/blog/2009-05-24.html

doc/lamsonproject.org/output/blog/2009-05-24.txt

doc/lamsonproject.org/output/blog/2009-05-28.html

doc/lamsonproject.org/output/blog/2009-05-28.txt

doc/lamsonproject.org/output/blog/2009-05-31.html

doc/lamsonproject.org/output/blog/2009-05-31.txt

doc/lamsonproject.org/output/blog/2009-06-01.html

doc/lamsonproject.org/output/blog/2009-06-01.txt

doc/lamsonproject.org/output/blog/2009-06-03-2.html

doc/lamsonproject.org/output/blog/2009-06-03-2.txt

doc/lamsonproject.org/output/blog/2009-06-03.html

doc/lamsonproject.org/output/blog/2009-06-03.txt

doc/lamsonproject.org/output/blog/2009-06-04.html

doc/lamsonproject.org/output/blog/2009-06-04.txt

doc/lamsonproject.org/output/blog/2009-06-06.html

doc/lamsonproject.org/output/blog/2009-06-06.txt

doc/lamsonproject.org/output/blog/2009-06-08.html

doc/lamsonproject.org/output/blog/2009-06-08.txt

doc/lamsonproject.org/output/blog/2009-06-09.html

doc/lamsonproject.org/output/blog/2009-06-09.txt

doc/lamsonproject.org/output/blog/2009-06-14.html

doc/lamsonproject.org/output/blog/2009-06-14.txt

doc/lamsonproject.org/output/blog/2009-06-20.html

doc/lamsonproject.org/output/blog/2009-06-20.txt

doc/lamsonproject.org/output/blog/2009-06-22.html

doc/lamsonproject.org/output/blog/2009-06-22.txt

doc/lamsonproject.org/output/blog/2009-06-26.html

doc/lamsonproject.org/output/blog/2009-06-26.txt

doc/lamsonproject.org/output/blog/2009-07-03.html

doc/lamsonproject.org/output/blog/2009-07-03.txt

doc/lamsonproject.org/output/blog/2009-07-07.html

doc/lamsonproject.org/output/blog/2009-07-07.txt

doc/lamsonproject.org/output/blog/2009-07-09.html

doc/lamsonproject.org/output/blog/2009-07-09.txt

doc/lamsonproject.org/output/blog/2009-07-14.html

doc/lamsonproject.org/output/blog/2009-07-14.txt

doc/lamsonproject.org/output/blog/2009-07-19.html

doc/lamsonproject.org/output/blog/2009-07-19.txt

doc/lamsonproject.org/output/blog/2009-07-20.html

doc/lamsonproject.org/output/blog/2009-07-20.txt

doc/lamsonproject.org/output/blog/index.html

doc/lamsonproject.org/output/blog/index.txt

doc/lamsonproject.org/output/blog/template.html

doc/lamsonproject.org/output/contact.html

doc/lamsonproject.org/output/contact.txt

doc/lamsonproject.org/output/css

doc/lamsonproject.org/output/css/code.css

doc/lamsonproject.org/output/css/style.css

doc/lamsonproject.org/output/css/style_ie.css

doc/lamsonproject.org/output/docs

doc/lamsonproject.org/output/docs/deferred_processing_to_queues.html

doc/lamsonproject.org/output/docs/deferred_processing_to_queues.txt

doc/lamsonproject.org/output/docs/deploying_lamson.html

doc/lamsonproject.org/output/docs/deploying_lamson.txt

doc/lamsonproject.org/output/docs/faq.html

doc/lamsonproject.org/output/docs/faq.txt

doc/lamsonproject.org/output/docs/filtering_spam.html

doc/lamsonproject.org/output/docs/filtering_spam.txt

doc/lamsonproject.org/output/docs/getting_started.html

doc/lamsonproject.org/output/docs/getting_started.txt

doc/lamsonproject.org/output/docs/index.html

doc/lamsonproject.org/output/docs/index.txt

doc/lamsonproject.org/output/docs/introduction_to_finite_state_machines.html

doc/lamsonproject.org/output/docs/introduction_to_finite_state_machines.txt

doc/lamsonproject.org/output/docs/lamson_commands.html

doc/lamsonproject.org/output/docs/lamson_commands.txt

doc/lamsonproject.org/output/docs/lamson_virtual_env.html

doc/lamsonproject.org/output/docs/lamson_virtual_env.txt

doc/lamsonproject.org/output/docs/primary_vs_secondary_registration.html

doc/lamsonproject.org/output/docs/primary_vs_secondary_registration.txt

doc/lamsonproject.org/output/docs/template.html

doc/lamsonproject.org/output/docs/writing_a_state_storage.html

doc/lamsonproject.org/output/docs/writing_a_state_storage.txt

doc/lamsonproject.org/output/download.html

doc/lamsonproject.org/output/download.txt

doc/lamsonproject.org/output/favicon.ico

doc/lamsonproject.org/output/feed.xml

doc/lamsonproject.org/output/home_template.html

doc/lamsonproject.org/output/images

doc/lamsonproject.org/output/images/bg.gif

doc/lamsonproject.org/output/images/buttons.png

doc/lamsonproject.org/output/images/capbl.gif

doc/lamsonproject.org/output/images/capbr.gif

doc/lamsonproject.org/output/images/captl.gif

doc/lamsonproject.org/output/images/captr.gif

doc/lamsonproject.org/output/images/dashed.gif

doc/lamsonproject.org/output/images/gradient_bg.gif

doc/lamsonproject.org/output/images/lamson.png

doc/lamsonproject.org/output/images/li_dot1.gif

doc/lamsonproject.org/output/images/li_dot2.gif

doc/lamsonproject.org/output/images/menuact.gif

doc/lamsonproject.org/output/images/menubg.gif

doc/lamsonproject.org/output/images/menucapbr.gif

doc/lamsonproject.org/output/images/menucaptr.gif

doc/lamsonproject.org/output/images/metanavbg.gif

doc/lamsonproject.org/output/images/metanavbl.gif

doc/lamsonproject.org/output/images/metanavbr.gif

doc/lamsonproject.org/output/images/metanavtl.gif

doc/lamsonproject.org/output/images/metanavtr.gif

doc/lamsonproject.org/output/images/quotebg.gif

doc/lamsonproject.org/output/images/rss.png

doc/lamsonproject.org/output/images/smenubg.gif

doc/lamsonproject.org/output/images/smenucapbr.gif

doc/lamsonproject.org/output/images/smenucaptr.gif

doc/lamsonproject.org/output/images/testimg.gif

doc/lamsonproject.org/output/index.html

doc/lamsonproject.org/output/index.txt

doc/lamsonproject.org/output/lists

doc/lamsonproject.org/output/lists/index.html

doc/lamsonproject.org/output/lists/index.txt

doc/lamsonproject.org/output/mailocalypse.py

doc/lamsonproject.org/output/prettify.css

doc/lamsonproject.org/output/prettify.js

doc/lamsonproject.org/output/releases

doc/lamsonproject.org/output/releases/index.html

doc/lamsonproject.org/output/releases/index.txt

doc/lamsonproject.org/output/styles

doc/lamsonproject.org/output/styles/global.css

doc/lamsonproject.org/output/styles/reset.css

doc/lamsonproject.org/output/videos

doc/lamsonproject.org/output/videos/index.html

doc/lamsonproject.org/output/videos/index.txt

doc/lamsonproject.org/template.html

doc/lamsonproject.org/webgen.py

doc/report.txt

doc/todo.txt

doc/todo.txt.bak

examples

examples/librelist

examples/librelist/README

examples/librelist/app

examples/librelist/app/__init__.py

examples/librelist/app/handlers

examples/librelist/app/handlers/__init__.py

examples/librelist/app/handlers/admin.py

examples/librelist/app/handlers/bounce.py

examples/librelist/app/model

examples/librelist/app/model/__init__.py

examples/librelist/app/model/archive.py

examples/librelist/app/model/bounce.py

examples/librelist/app/model/confirmation.py

examples/librelist/app/model/mailinglist.py

examples/librelist/app/model/state_storage.py

examples/librelist/app/templates

examples/librelist/app/templates/mail

examples/librelist/app/templates/mail/bad_list_name.msg

examples/librelist/app/templates/mail/confirmation.msg

examples/librelist/app/templates/mail/create_confirmation.msg

examples/librelist/app/templates/mail/subscribed.msg

examples/librelist/app/templates/mail/unbounce_confirm.msg

examples/librelist/app/templates/mail/unsubscribed.msg

examples/librelist/app/templates/mail/we_have_disabled_you.msg

examples/librelist/app/templates/mail/you_are_unbounced.msg

examples/librelist/app/templates/mail/you_bounced.msg

examples/librelist/config

examples/librelist/config/__init__.py

examples/librelist/config/boot.py

examples/librelist/config/logging.conf

examples/librelist/config/settings.py

examples/librelist/config/test_logging.conf

examples/librelist/config/testing.py

examples/librelist/deploy

examples/librelist/deploy/backup

examples/librelist/deploy/env

examples/librelist/deploy/env/testing

examples/librelist/deploy/forward

examples/librelist/deploy/lib

examples/librelist/deploy/lib/log4sh

examples/librelist/deploy/lib/migrate

examples/librelist/deploy/lib/shunit2

examples/librelist/deploy/log4sh.properties

examples/librelist/deploy/migrations

examples/librelist/deploy/migrations/001

examples/librelist/deploy/migrations/002

examples/librelist/deploy/migrations/003

examples/librelist/deploy/migrations/004

examples/librelist/deploy/rollback

examples/librelist/deploy/scripts

examples/librelist/deploy/scripts/json_convert.py

examples/librelist/lib

examples/librelist/lib/__init__.py

examples/librelist/lib/metaphone.py

examples/librelist/muttrc

examples/librelist/tests

examples/librelist/tests/bounce.msg

examples/librelist/tests/handlers

examples/librelist/tests/handlers/__init__.py

examples/librelist/tests/handlers/admin_tests.py

examples/librelist/tests/handlers/bounce_tests.py

examples/librelist/tests/lots_of_headers.msg

examples/librelist/tests/model

examples/librelist/tests/model/__init__.py

examples/librelist/tests/model/archive_tests.py

examples/librelist/tests/model/bounce_tests.py

examples/librelist/tests/model/confirmation_tests.py

examples/librelist/tests/model/mailinglist_tests.py

examples/librelist/tests/model/state_storage_tests.py

examples/librelist/tests/templates

examples/librelist/tests/templates/__init__.py

examples/librelist/webapp

examples/librelist/webapp/__init__.py

examples/librelist/webapp/librelist

examples/librelist/webapp/librelist/__init__.py

examples/librelist/webapp/librelist/admin.py

examples/librelist/webapp/librelist/migrations

examples/librelist/webapp/librelist/migrations/0001_initial.py

examples/librelist/webapp/librelist/migrations/__init__.py

examples/librelist/webapp/librelist/models.py

examples/librelist/webapp/librelist/urls.py

examples/librelist/webapp/librelist/views.py

examples/librelist/webapp/manage.py

examples/librelist/webapp/settings.py

examples/librelist/webapp/urls.py

examples/osb

examples/osb/README

examples/osb/app

examples/osb/app/__init__.py

examples/osb/app/data

examples/osb/app/data/about.html

examples/osb/app/data/help.html

examples/osb/app/data/jquery.js

examples/osb/app/data/spam.html

examples/osb/app/data/styles

examples/osb/app/data/styles/main.css

examples/osb/app/data/styles/reset.css

examples/osb/app/handlers

examples/osb/app/handlers/__init__.py

examples/osb/app/handlers/comment.py

examples/osb/app/handlers/index.py

examples/osb/app/handlers/post.py

examples/osb/app/model

examples/osb/app/model/__init__.py

examples/osb/app/model/comment.py

examples/osb/app/model/confirmation.py

examples/osb/app/model/post.py

examples/osb/app/templates

examples/osb/app/templates/mail

examples/osb/app/templates/mail/comment_confirm.msg

examples/osb/app/templates/mail/comment_submitted.msg

examples/osb/app/templates/mail/confirm.msg

examples/osb/app/templates/mail/deleted.msg

examples/osb/app/templates/mail/page_ready.msg

examples/osb/app/templates/mail/welcome.msg

examples/osb/app/templates/web

examples/osb/app/templates/web/base.html

examples/osb/app/templates/web/comments.html

examples/osb/app/templates/web/index.html

examples/osb/app/templates/web/post.html

examples/osb/config

examples/osb/config/__init__.py

examples/osb/config/boot.py

examples/osb/config/forward.py

examples/osb/config/logging.conf

examples/osb/config/queue.py

examples/osb/config/settings.py

examples/osb/config/test_logging.conf

examples/osb/config/testing.py

examples/osb/doc

examples/osb/doc/done.txt

examples/osb/doc/report.txt

examples/osb/doc/todo.txt

examples/osb/muttrc

examples/osb/pendingrc

examples/osb/tests

examples/osb/tests/handlers

examples/osb/tests/handlers/__init__.py

examples/osb/tests/handlers/comments_tests.py

examples/osb/tests/handlers/index_tests.py

examples/osb/tests/handlers/post_tests.py

examples/osb/tests/model

examples/osb/tests/model/__init__.py

examples/osb/tests/model/comment.py

examples/osb/tests/model/confirmation_tests.py

examples/osb/tests/model/post_tests.py

examples/osb/tests/spam

examples/osb/tests/templates

examples/osb/tests/templates/__init__.py

examples/osb/tests/templates/osb_tests.py

lamson

lamson.egg-info

lamson.egg-info/PKG-INFO

lamson.egg-info/SOURCES.txt

lamson.egg-info/dependency_links.txt

lamson.egg-info/requires.txt

lamson.egg-info/top_level.txt

lamson/__init__.py

lamson/args.py

lamson/bounce.py

lamson/commands.py

lamson/data

lamson/data/prototype

lamson/data/prototype.zip

lamson/data/prototype/README

lamson/data/prototype/app

lamson/data/prototype/app/__init__.py

lamson/data/prototype/app/handlers

lamson/data/prototype/app/handlers/__init__.py

lamson/data/prototype/app/handlers/sample.py

lamson/data/prototype/app/model

lamson/data/prototype/app/model/__init__.py

lamson/data/prototype/config

lamson/data/prototype/config/__init__.py

lamson/data/prototype/config/boot.py

lamson/data/prototype/config/logging.conf

lamson/data/prototype/config/settings.py

lamson/data/prototype/config/test_logging.conf

lamson/data/prototype/config/testing.py

lamson/data/prototype/muttrc

lamson/data/prototype/tests

lamson/data/prototype/tests/handlers

lamson/data/prototype/tests/handlers/__init__.py

lamson/data/prototype/tests/handlers/open_relay_tests.py

lamson/data/prototype/tests/model

lamson/data/prototype/tests/model/__init__.py

lamson/data/prototype/tests/templates

lamson/data/prototype/tests/templates/__init__.py

lamson/encoding.py

lamson/handlers

lamson/handlers/__init__.py

lamson/handlers/forward.py

lamson/handlers/log.py

lamson/handlers/queue.py

lamson/html.py

lamson/mail.py

lamson/queue.py

lamson/routing.py

lamson/server.py

lamson/spam.py

lamson/testing.py

lamson/utils.py

lamson/version.py

lamson/view.py

scripts

scripts/dist.vel

scripts/sample.vel

scripts/setup.py

scripts/testing.vel

setup.cfg

setup.py

tests

tests/borked.msg

tests/bounce.msg

tests/config

tests/config/__init__.py

tests/config/logging.conf

tests/config/settings.py

tests/config/testing.py

tests/lamson.png

tests/lamson_tests

tests/lamson_tests/__init__.py

tests/lamson_tests/args_tests.py

tests/lamson_tests/bounce_filtered_mod.py

tests/lamson_tests/bounce_tests.py

tests/lamson_tests/command_tests.py

tests/lamson_tests/encoding_tests.py

tests/lamson_tests/handler_tests.py

tests/lamson_tests/html_tests.py

tests/lamson_tests/message_tests.py

tests/lamson_tests/queue_tests.py

tests/lamson_tests/routing_tests.py

tests/lamson_tests/server_tests.py

tests/lamson_tests/simple_fsm_mod.py

tests/lamson_tests/spam_filtered_mod.py

tests/lamson_tests/spam_tests.py

tests/lamson_tests/templates

tests/lamson_tests/templates/content.markdown

tests/lamson_tests/templates/html_test.html

tests/lamson_tests/templates/style.css

tests/lamson_tests/templates/template.html

tests/lamson_tests/templates/template.txt

tests/lamson_tests/templates/unicode.html

tests/lamson_tests/testing_tests.py

tests/lamson_tests/utils_tests.py

tests/lamson_tests/view_tests.py

tests/signed.msg

tests/spam

tests/statesdb.db

Show diffs side-by-side

added added

removed removed

lamson/encoding.py

"""

Lamson takes the policy that email it receives is most likely complete garbage

using bizarre pre-Unicode formats that are irrelevant and unnecessary in today's

modern world. These emails must be cleansed of their unholy stench of

randomness and turned into something nice and clean that a regular Python

programmer can work with: unicode.

That's the receiving end, but on the sending end Lamson wants to make the world

better by not increasing the suffering. To that end, Lamson will canonicalize

all email it sends to be ascii or utf-8 (whichever is simpler and works to

encode the data). When you get an email from Lamson, it is a pristine easily

parseable clean unit of goodness you can count on.

To accomplish these tasks, Lamson goes back to basics and assert a few simple

rules on each email it receives:

1) NO ENCODING IS TRUSTED, NO LANGUAGE IS SACRED, ALL ARE SUSPECT.

2) Python wants Unicode, it will get Unicode.

3) Any email that CANNOT become Unicode, CANNOT be processed by Lamson or

Python.

4) Email addresses are ESSENTIAL to Lamson's routing and security, and therefore

will be canonicalized and properly encoded.

5) Lamson will therefore try to "upgrade" all email it receives to Unicode

internally, and cleaning all email addresses.

6) It does this by decoding all codecs, and if the codec LIES, then it will

attempt to statistically detect the codec using chardet.

7) If it can't detect the codec, and the codec lies, then the email is bad.

8) All text bodies and attachments are then converted to Python unicode in the

same way as the headers.

9) All other attachments are converted to raw strings as-is.

Once Lamson has done this, your Python handler can now assume that all

MailRequest objects are happily unicode enabled and ready to go. The rule is:

IF IT CANNOT BE UNICODE, THEN PYTHON CANNOT WORK WITH IT.

On the outgoing end (when you send a MailResponse), Lamson tries to create the

email it wants to receive by canonicalizing it:

1) All email will be encoded in the simplest cleanest way possible without

losing information.

2) All headers are converted to 'ascii', and if that doesn't work, then 'utf-8'.

3) All text/* attachments and bodies are converted to ascii, and if that doesn't

work, 'utf-8'.

4) All other attachments are left alone.

5) All email addresses are normalized and encoded if they have not been already.

The end result is an email that has the highest probability of not containing

any obfuscation techniques, hidden characters, bad characters, improper

formatting, invalid non-characterset headers, or any of the other billions of

things email clients do to the world. The output rule of Lamson is:

ALL EMAIL IS ASCII FIRST, THEN UTF-8, AND IF CANNOT BE EITHER THOSE IT WILL

NOT BE SENT.

Following these simple rules, this module does the work of converting email

to the canonical format and sending the canonical format. The code is

probably the most complex part of Lamson since the job it does is difficult.

Test results show that Lamson can safely canonicalize most email from any

culture (not just English) to the canonical form, and that if it can't then the

email is not formatted right and/or spam.

If you find an instance where this is not the case, then submit it to the

project as a test case.

"""

import string

from email.charset import Charset

import chardet

import re

import email

from email import encoders

from email.mime.base import MIMEBase

from email.utils import parseaddr

import sys

DEFAULT_ENCODING = "utf-8"

DEFAULT_ERROR_HANDLING = "strict"

CONTENT_ENCODING_KEYS = set(['Content-Type', 'Content-Transfer-Encoding',

'Content-Disposition', 'Mime-Version'])

CONTENT_ENCODING_REMOVED_PARAMS = ['boundary']

REGEX_OPTS = re.IGNORECASE | re.MULTILINE

ENCODING_REGEX = re.compile(r"\=\?([a-z0-9\-]+?)\?([bq])\?", REGEX_OPTS)

ENCODING_END_REGEX = re.compile(r"\?=", REGEX_OPTS)

INDENT_REGEX = re.compile(r"\n\s+")

class EncodingError(Exception):

"""Thrown when there is an encoding error."""

pass

class MailBase(object):

"""MailBase is used as the basis of lamson.mail and contains the basics of

encoding an email. You actually can do all your email processing with this

class, but it's more raw.

100

"""

101

def __init__(self, items=()):

102

self.headers = dict(items)

103

self.parts = []

104

self.body = None

105

self.content_encoding = {'Content-Type': (None, {}),

106

'Content-Disposition': (None, {}),

107

'Content-Transfer-Encoding': (None, {})}

108

109

def __getitem__(self, key):

110

return self.headers.get(normalize_header(key), None)

111

112

def __len__(self):

113

return len(self.headers)

114

115

def __iter__(self):

116

return iter(self.headers)

117

118

def __contains__(self, key):

119

return normalize_header(key) in self.headers

120

121

def __setitem__(self, key, value):

122

self.headers[normalize_header(key)] = value

123

124

def __delitem__(self, key):

125

del self.headers[normalize_header(key)]

126

127

def __nonzero__(self):

128

return self.body != None or len(self.headers)

129

130

def keys(self):

131

"""Returns the sorted keys."""

132

return sorted(self.headers.keys())

133

134

def attach_file(self, filename, data, ctype, disposition):

135

"""

136

A file attachment is a raw attachment with a disposition that

137

indicates the file name.

138

"""

139

assert filename, "You can't attach a file without a filename."

140

141

part = MailBase()

142

part.body = data

143

part.content_encoding['Content-Type'] = (ctype, {'name': filename})

144

part.content_encoding['Content-Disposition'] = (disposition,

145

{'filename': filename})

146

self.parts.append(part)

147

148

149

def attach_text(self, data, ctype):

150

"""

151

This attaches a simpler text encoded part, which doesn't have a

152

filename.

153

"""

154

part = MailBase()

155

part.body = data

156

part.content_encoding['Content-Type'] = (ctype, {})

157

self.parts.append(part)

158

159

def walk(self):

160

for p in self.parts:

161

yield p

162

for x in p.walk():

163

yield x

164

165

166

class MIMEPart(MIMEBase):

167

"""

168

A reimplementation of nearly everything in email.mime to be more useful

169

for actually attaching things. Rather than one class for every type of

170

thing you'd encode, there's just this one, and it figures out how to

171

encode what you ask it.

172

"""

173

def __init__(self, type, **params):

174

self.maintype, self.subtype = type.split('/')

175

MIMEBase.__init__(self, self.maintype, self.subtype, **params)

176

177

def add_text(self, content):

178

# this is text, so encode it in canonical form

179

try:

180

encoded = content.encode('ascii')

181

charset = 'ascii'

182

except UnicodeError:

183

encoded = content.encode('utf-8')

184

charset = 'utf-8'

185

186

self.set_payload(encoded, charset=charset)

187

188

189

def extract_payload(self, mail):

190

if mail.body == None: return # only None, '' is still ok

191

192

ctype, ctype_params = mail.content_encoding['Content-Type']

193

cdisp, cdisp_params = mail.content_encoding['Content-Disposition']

194

195

assert ctype, "Extract payload requires that mail.content_encoding have a valid Content-Type."

196

197

if ctype.startswith("text/"):

198

self.add_text(mail.body)

199

else:

200

if cdisp:

201

# replicate the content-disposition settings

202

self.add_header('Content-Disposition', cdisp, **cdisp_params)

203

204

self.set_payload(mail.body)

205

encoders.encode_base64(self)

206

207

def __repr__(self):

208

return "<MIMEPart '%s/%s': %r, %r, multipart=%r>" % (self.subtype, self.maintype, self['Content-Type'],

209

self['Content-Disposition'],

210

self.is_multipart())

211

212

def from_message(message):

213

"""

214

Given a MIMEBase or similar Python email API message object, this

215

will canonicalize it and give you back a pristine MailBase.

216

If it can't then it raises a EncodingError.

217

"""

218

mail = MailBase()

219

220

# parse the content information out of message

221

for k in CONTENT_ENCODING_KEYS:

222

params = parse_parameter_header(message, k)

223

mail.content_encoding[k] = params

224

225

# copy over any keys that are not part of the content information

226

for k in message.keys():

227

if normalize_header(k) not in mail.content_encoding:

228

mail[k] = header_from_mime_encoding(message[k])

229

230

decode_message_body(mail, message)

231

232

if message.is_multipart():

233

# recursively go through each subpart and decode in the same way

234

for msg in message.get_payload():

235

if msg != message: # skip the multipart message itself

236

mail.parts.append(from_message(msg))

237

238

return mail

239

240

241

242

def to_message(mail):

243

"""

244

Given a MailBase message, this will construct a MIMEPart

245

that is canonicalized for use with the Python email API.

246

"""

247

ctype, params = mail.content_encoding['Content-Type']

248

249

if not ctype:

250

if mail.parts:

251

ctype = 'multipart/mixed'

252

else:

253

ctype = 'text/plain'

254

else:

255

if mail.parts:

256

assert ctype.startswith("multipart") or ctype.startswith("message"), "Content type should be multipart or message, not %r" % ctype

257

258

# adjust the content type according to what it should be now

259

mail.content_encoding['Content-Type'] = (ctype, params)

260

261

try:

262

out = MIMEPart(ctype, **params)

263

except TypeError:

264

raise EncodingError("Content-Type malformed, not allowed: %r; %r" %

265

(ctype, params))

266

267

for k in mail.keys():

268

out[k.encode('ascii')] = header_to_mime_encoding(mail[k])

269

270

out.extract_payload(mail)

271

272

# go through the children

273

for part in mail.parts:

274

out.attach(to_message(part))

275

276

return out

277

278

279

def to_string(mail, envelope_header=True):

280

"""Returns a canonicalized email string you can use to send or store

281

somewhere."""

282

return to_message(mail).as_string(envelope_header)

283

284

285

def from_string(data):

286

"""Takes a string, and tries to clean it up into a clean MailBase."""

287

return from_message(email.message_from_string(data))

288

289

290

def to_file(mail, fileobj):

291

"""Writes a canonicalized message to the given file."""

292

fileobj.write(to_string(mail))

293

294

def from_file(fileobj):

295

"""Reads an email and cleans it up to make a MailBase."""

296

return from_message(email.message_from_file(fileobj))

297

298

299

def normalize_header(header):

300

return string.capwords(header.lower(), '-')

301

302

303

def parse_parameter_header(message, header):

304

params = message.get_params(header=header)

305

if params:

306

value = params.pop(0)[0]

307

params_dict = dict(params)

308

309

for key in CONTENT_ENCODING_REMOVED_PARAMS:

310

if key in params_dict: del params_dict[key]

311

312

return value, params_dict

313

else:

314

return None, {}

315

316

def decode_message_body(mail, message):

317

mail.body = message.get_payload(decode=True)

318

if mail.body:

319

# decode the payload according to the charset given if it's text

320

ctype, params = mail.content_encoding['Content-Type']

321

322

if not ctype:

323

charset = 'ascii'

324

mail.body = attempt_decoding(charset, mail.body)

325

elif ctype.startswith("text/"):

326

charset = params.get('charset', 'ascii')

327

mail.body = attempt_decoding(charset, mail.body)

328

else:

329

# it's a binary codec of some kind, so just decode and leave it

330

# alone for now

331

pass

332

333

334

335

def header_to_mime_encoding(value):

336

if not value: return ""

337

338

encoder = Charset(DEFAULT_ENCODING)

339

340

try:

341

return value.encode("ascii")

342

except UnicodeEncodeError:

343

if '@' in value:

344

# this could have an email address, make sure we don't screw it up

345

name, address = parseaddr(value)

346

return '"%s" <%s>' % (encoder.header_encode(name.encode("utf-8")), address)

347

348

return encoder.header_encode(value.encode("utf-8"))

349

350

351

def header_from_mime_encoding(header):

352

if header is None:

353

return header

354

elif type(header) == list:

355

return [properly_decode_header(h) for h in header]

356

else:

357

return properly_decode_header(header)

358

359

360

361

362

def guess_encoding_and_decode(original, data, errors=DEFAULT_ERROR_HANDLING):

363

try:

364

charset = chardet.detect(str(data))

365

366

if not charset['encoding']:

367

raise EncodingError("Header claimed %r charset, but detection found none. Decoding failed." % original)

368

369

return data.decode(charset["encoding"], errors)

370

except UnicodeError, exc:

371

raise EncodingError("Header lied and claimed %r charset, guessing said "

372

"%r charset, neither worked so this is a bad email: "

373

"%s." % (original, charset, exc))

374

375

376

def attempt_decoding(charset, dec):

377

try:

378

if isinstance(dec, unicode):

379

# it's already unicode so just return it

380

return dec

381

else:

382

return dec.decode(charset)

383

except UnicodeError:

384

# looks like the charset lies, try to detect it

385

return guess_encoding_and_decode(charset, dec)

386

except LookupError:

387

# they gave a crap encoding

388

return guess_encoding_and_decode(charset, dec)

389

390

391

def apply_charset_to_header(charset, encoding, data):

392

if encoding == 'b' or encoding == 'B':

393

dec = email.base64mime.decode(data.encode('ascii'))

394

elif encoding == 'q' or encoding == 'Q':

395

dec = email.quoprimime.header_decode(data.encode('ascii'))

396

else:

397

raise EncodingError("Invalid header encoding %r should be 'Q' or 'B'." % encoding)

398

399

return attempt_decoding(charset, dec)

400

401

402

403

404

def _match(data, pattern, pos):

405

found = pattern.search(data, pos)

406

if found:

407

# contract: returns data before the match, and the match groups

408

left = data[pos:found.start()]

409

return left, found.groups(), found.end()

410

else:

411

left = data[pos:]

412

return left, None, -1

413

414

415

416

def _tokenize(data, next):

417

enc_data = None

418

419

left, enc_header, next = _match(data, ENCODING_REGEX, next)

420

421

if next != -1:

422

enc_data, _, next = _match(data, ENCODING_END_REGEX, next)

423

424

return left, enc_header, enc_data, next

425

426

427

def _scan(data):

428

next = 0

429

continued = False

430

while next != -1:

431

left, enc_header, enc_data, next = _tokenize(data, next)

432

433

if next != -1 and INDENT_REGEX.match(data, next):

434

continued = True

435

else:

436

continued = False

437

438

yield left, enc_header, enc_data, continued

439

440

441

def _parse_charset_header(data):

442

scanner = _scan(data)

443

oddness = None

444

445

try:

446

while True:

447

if not oddness:

448

left, enc_header, enc_data, continued = scanner.next()

449

else:

450

left, enc_header, enc_data, continued = oddness

451

oddness = None

452

453

while continued:

454

l, eh, ed, continued = scanner.next()

455

456

if not eh:

457

assert not ed, "Parsing error, give Zed this: %r" % data

458

oddness = (" " + l.lstrip(), eh, ed, continued)

459

elif eh[0] == enc_header[0] and eh[1] == enc_header[1]:

460

enc_data += ed

461

else:

462

# odd case, it's continued but not from the same base64

463

# need to stack this for the next loop, and drop the \n\s+

464

oddness = ('', eh, ed, continued)

465

break

466

467

if left:

468

yield attempt_decoding('ascii', left)

469

470

if enc_header:

471

yield apply_charset_to_header(enc_header[0], enc_header[1], enc_data)

472

473

except StopIteration:

474

pass

475

476

477

def properly_decode_header(header):

478

return u"".join(_parse_charset_header(header))

479

480

Older »