|
20
by Daniel Holbach
improve our approach to finiding people and their email addresses drastically: |
1 |
#!/usr/bin/python
|
|
46
by Daniel Holbach
license |
2 |
#
|
3 |
# Copyright (C) 2008-2009 Canonical Ltd.
|
|
|
20
by Daniel Holbach
improve our approach to finiding people and their email addresses drastically: |
4 |
#
|
5 |
# arguments are the mbox file for mailing list file with team members
|
|
6 |
#
|
|
7 |
||
8 |
from mailbox import PortableUnixMailbox |
|
9 |
from email import message_from_file |
|
10 |
from email.errors import MessageParseError |
|
11 |
from email.utils import parseaddr |
|
12 |
from email.utils import parsedate |
|
13 |
||
14 |
import time |
|
15 |
import sys |
|
16 |
import os |
|
|
49
by Brian Murray
Switch from using time.mktime() which uses local time to calendar.timegm() which uses UTC time |
17 |
import calendar |
|
20
by Daniel Holbach
improve our approach to finiding people and their email addresses drastically: |
18 |
|
19 |
MESSAGE_NOT_PARSEABLE = object() |
|
20 |
||
21 |
# perhaps you could just get a list of everyone in five-a-day and then find
|
|
22 |
# their preferred e-mail address, or fallback to display name if it is private
|
|
23 |
||
24 |
def message_factory(fp): |
|
25 |
try: |
|
26 |
return message_from_file(fp) |
|
27 |
except MessageParseError: |
|
28 |
# Don't return None since that will stop the mailbox iterator.
|
|
29 |
return MESSAGE_NOT_PARSEABLE |
|
30 |
||
31 |
def scan_bugs(messages, old_msg_id): |
|
32 |
data = [] |
|
|
26
by Daniel Holbach
fixed msg_id again |
33 |
msg_id = "" |
|
20
by Daniel Holbach
improve our approach to finiding people and their email addresses drastically: |
34 |
new_msg_id_found = False |
35 |
for count, message in enumerate(messages): |
|
36 |
# Skip broken messages.
|
|
37 |
if message is MESSAGE_NOT_PARSEABLE: |
|
38 |
continue
|
|
39 |
msg_id = message['message-id'] |
|
40 |
if msg_id == old_msg_id or not old_msg_id: |
|
41 |
new_msg_id_found = True |
|
42 |
# don't re-do bugs over and over again
|
|
43 |
if not new_msg_id_found or msg_id == old_msg_id: |
|
44 |
continue
|
|
45 |
# Check to ensure it's from a Launchpad bug.
|
|
46 |
reply_to = message['reply-to'] |
|
47 |
if reply_to is None: |
|
48 |
continue
|
|
49 |
reply_name, reply_address = parseaddr(reply_to) |
|
50 |
reply_local_part, reply_domain = reply_address.split('@') |
|
51 |
bug_id = int(reply_local_part) |
|
|
49
by Brian Murray
Switch from using time.mktime() which uses local time to calendar.timegm() which uses UTC time |
52 |
# parsedate() returns a tuple - all times are in UTC
|
|
20
by Daniel Holbach
improve our approach to finiding people and their email addresses drastically: |
53 |
sent_date = parsedate(message['date']) |
|
49
by Brian Murray
Switch from using time.mktime() which uses local time to calendar.timegm() which uses UTC time |
54 |
# calendar.timegm is seconds since the epoch in UTC
|
55 |
action_date = int(calendar.timegm(sent_date)) |
|
|
20
by Daniel Holbach
improve our approach to finiding people and their email addresses drastically: |
56 |
dummy, sender = parseaddr(message.get('from')) |
57 |
payload = message.get_payload() |
|
58 |
||
59 |
subject = message.get('subject', '') |
|
60 |
# Reporting bugs doesn't count as part of five-a-day
|
|
61 |
if (reply_domain != 'bugs.launchpad.net' or not reply_local_part.isdigit()) or \ |
|
62 |
'[NEW]' in subject: |
|
63 |
continue
|
|
|
34
by Daniel Holbach
remove 'bodies' rule - good comments should count too. reworked msgid logic |
64 |
data.append((msg_id, action_date, sender, bug_id)) |
|
20
by Daniel Holbach
improve our approach to finiding people and their email addresses drastically: |
65 |
return (data, msg_id) |
66 |
||
67 |
if __name__ == '__main__': |
|
68 |
if not len(sys.argv) == 2: |
|
69 |
print >> sys.stderr, "Please specify mailbox file." |
|
70 |
sys.exit(1) |
|
71 |
||
72 |
mailbox_file = os.path.expanduser(sys.argv[1]) |
|
|
34
by Daniel Holbach
remove 'bodies' rule - good comments should count too. reworked msgid logic |
73 |
if not os.path.exists(mailbox_file): |
|
48
by Daniel Holbach
removed unicode sillyness, not necessary - removed unused imports - simplified code in a few places - fixed minor bug with mailbox variable |
74 |
print '''%s mailbox file doesn't exist''' % mailbox_file |
|
34
by Daniel Holbach
remove 'bodies' rule - good comments should count too. reworked msgid logic |
75 |
sys.exit(1) |
76 |
||
|
20
by Daniel Holbach
improve our approach to finiding people and their email addresses drastically: |
77 |
msgidfile_name = mailbox_file+".msgid" |
|
34
by Daniel Holbach
remove 'bodies' rule - good comments should count too. reworked msgid logic |
78 |
if os.path.exists(msgidfile_name) and \ |
|
35
by Daniel Holbach
specify LP script name, remove bodies completely, fix msgid time logic |
79 |
os.stat(msgidfile_name).st_mtime-os.stat(mailbox_file).st_mtime > 60*60: |
|
20
by Daniel Holbach
improve our approach to finiding people and their email addresses drastically: |
80 |
sys.exit(0) |
|
34
by Daniel Holbach
remove 'bodies' rule - good comments should count too. reworked msgid logic |
81 |
|
82 |
mailbox = PortableUnixMailbox(open(mailbox_file, 'rb'), message_factory) |
|
|
20
by Daniel Holbach
improve our approach to finiding people and their email addresses drastically: |
83 |
if os.path.exists(msgidfile_name): |
84 |
old_msg_id = open(msgidfile_name).read().strip() |
|
85 |
else: |
|
86 |
old_msg_id = "" |
|
87 |
(data, last_msg_id) = scan_bugs(mailbox, old_msg_id) |
|
88 |
||
89 |
data_file = open(mailbox_file+".data", 'a') |
|
90 |
for entry in data: |
|
91 |
data_file.write("\"%s\",\"%s\",\"%s\",\"%s\"\n" % (entry[0], entry[1], entry[2], entry[3])) |
|
92 |
data_file.close() |
|
93 |
if os.path.exists(msgidfile_name): |
|
94 |
os.remove(msgidfile_name) |
|
95 |
msgidfile = open(msgidfile_name, "w") |
|
96 |
msgidfile.write(last_msg_id) |
|
97 |
msgidfile.close() |