1
from mailbox import PortableUnixMailbox
2
from email import message_from_file
3
from email.errors import MessageParseError
4
from email.utils import parseaddr, parsedate
5
from calendar import timegm
6
from datetime import datetime
11
from paths import mbox_dir, stamp_dir
13
MESSAGE_NOT_PARSEABLE = object()
15
class MailResult(object):
26
def __init__(self, msg_id, package=None, version=None):
29
self.package = package
31
self.version = version
33
def last_msg_id(mbox):
34
stamp_file = os.path.join(stamp_dir, mbox+".stamp")
35
if not os.path.exists(stamp_file):
37
return open(stamp_file).read().strip()
39
def write_last_msg_id(mbox, msg_id):
40
if not os.path.exists(stamp_dir):
41
os.makedirs(stamp_dir)
42
stamp_file = os.path.join(stamp_dir, mbox+".stamp")
43
if os.path.exists(stamp_file):
45
f = open(stamp_file, "w")
49
def download_archive(mbox):
50
if not os.path.exists(mbox_dir):
52
url = "http://daniel.holba.ch/lists/%s.mbox" % (mbox)
53
filename = os.path.join(mbox_dir, os.path.basename(url))
56
if os.path.exists(filename):
58
os.system("cd %s; wget %s %s; cd %s" % (mbox_dir, args, url, pwd))
59
return (os.path.join(mbox_dir, os.path.basename(url)), last_msg_id(mbox))
61
def message_factory(fp):
63
return message_from_file(fp)
64
except MessageParseError:
65
# Don't return None since that will stop the mailbox iterator.
67
return MESSAGE_NOT_PARSEABLE
69
def get_persons_from_payload(pl):
72
res = re.findall(r"Changed-By: (.*?) <(.*?)>", pl, re.DOTALL)
76
fallback_regexes = [ r"Changed-By: <(.*?)>", r"Changed-By: (.*?)" ]
77
for regex in fallback_regexes:
79
res = re.findall(regex, pl, re.DOTALL)
81
changed_by = ["", res[0]]
82
res = re.findall(r"Signed-By: (.*?) <(.*?)>", pl, re.DOTALL)
86
fallback_regex = [ r"Signed-By: <(.*?)>", r"Signed-By: (.*?)" ]
87
for regex in fallback_regex:
89
res = re.findall(regex, pl, re.DOTALL)
91
signed_by = [ "", res[0]]
92
return (changed_by, signed_by)
94
def find_in_subject(subject):
95
regexes = [ r"\[ubuntu\/.*?\]\s+(.*)\s+(.+?)\s+\(Accepted\)",
96
r"Accepted\:\s+(.*?)\s+(.*)\s+\(source\)",
97
r"Accepted\s+(.*?)\s+(.*)\s+\(source\)",
100
ret = re.findall(regex, subject, re.DOTALL)
102
return map(lambda a: string.strip(a, u"\n\t ,"), ret[0])
105
def extract_from_subject(subject):
106
for blacklist_item in [ "(raw-ddtp-tarball)" ]:
107
if blacklist_item in subject:
109
(pkg, version) = find_in_subject(subject)
110
if not pkg and not version:
113
pkg = string.strip(pkg.split("_")[0], u"\n\t ,")
114
return (pkg, version)
116
def scan(messages, last_msg_id, uploads=True):
118
for count, message in enumerate(messages):
119
# Skip broken messages.
120
if message is MESSAGE_NOT_PARSEABLE:
123
msg_id = message.get('Message-Id')
124
# if we checked this mbox before we only need to start reading
126
if last_msg_id and not data and msg_id != last_msg_id:
130
package, version = extract_from_subject(message.get("subject").strip())
134
mail = MailResult(msg_id, package, version)
136
mail = MailResult(msg_id)
138
sent_date = parsedate(message.get('date'))
139
mail.timestamp = datetime.utcfromtimestamp(timegm(sent_date))
141
pl = message.get_payload()
143
pl = pl[0].get_payload()
144
(changed_by, signed_by) = get_persons_from_payload(pl)
145
if not changed_by or not signed_by:
146
mail.name, mail.email = parseaddr(message.get('from'))
148
mail.name, mail.email = changed_by
149
mail.signer_name, mail.signer_email = signed_by
151
return (data, mail.msg_id)
153
def scan_archive(mailbox_file, last_msg_id, uploads=True):
154
mbox = PortableUnixMailbox(open(mailbox_file, 'rb'), message_factory)
155
return scan(mbox, last_msg_id, uploads)