~5-a-day/five-a-day-stats/trunk

20 by Daniel Holbach
improve our approach to finiding people and their email addresses drastically:
1
#!/usr/bin/python
46 by Daniel Holbach
license
2
#
3
# Copyright (C) 2008-2009  Canonical Ltd.
20 by Daniel Holbach
improve our approach to finiding people and their email addresses drastically:
4
#
5
# arguments are the mbox file for mailing list file with team members
6
#
7
8
from mailbox import PortableUnixMailbox
9
from email import message_from_file
10
from email.errors import MessageParseError
11
from email.utils import parseaddr
12
from email.utils import parsedate
13
14
import time
15
import sys
16
import os
49 by Brian Murray
Switch from using time.mktime() which uses local time to calendar.timegm() which uses UTC time
17
import calendar
20 by Daniel Holbach
improve our approach to finiding people and their email addresses drastically:
18
19
MESSAGE_NOT_PARSEABLE = object()
20
21
# perhaps you could just get a list of everyone in five-a-day and then find
22
# their preferred e-mail address, or fallback to display name if it is private
23
24
def message_factory(fp):
25
    try:
26
        return message_from_file(fp)
27
    except MessageParseError:
28
        # Don't return None since that will stop the mailbox iterator.
29
        return MESSAGE_NOT_PARSEABLE
30
31
def scan_bugs(messages, old_msg_id):
32
    data = []
26 by Daniel Holbach
fixed msg_id again
33
    msg_id = ""
20 by Daniel Holbach
improve our approach to finiding people and their email addresses drastically:
34
    new_msg_id_found = False
35
    for count, message in enumerate(messages):
36
        # Skip broken messages.
37
        if message is MESSAGE_NOT_PARSEABLE:
38
            continue
39
        msg_id = message['message-id']
40
        if msg_id == old_msg_id or not old_msg_id:
41
            new_msg_id_found = True
42
        # don't re-do bugs over and over again
43
        if not new_msg_id_found or msg_id == old_msg_id:
44
            continue
45
        # Check to ensure it's from a Launchpad bug.
46
        reply_to = message['reply-to']
47
        if reply_to is None:
48
            continue
49
        reply_name, reply_address = parseaddr(reply_to)
50
        reply_local_part, reply_domain = reply_address.split('@')
51
        bug_id = int(reply_local_part)
49 by Brian Murray
Switch from using time.mktime() which uses local time to calendar.timegm() which uses UTC time
52
        # parsedate() returns a tuple - all times are in UTC
20 by Daniel Holbach
improve our approach to finiding people and their email addresses drastically:
53
        sent_date = parsedate(message['date'])
49 by Brian Murray
Switch from using time.mktime() which uses local time to calendar.timegm() which uses UTC time
54
        # calendar.timegm is seconds since the epoch in UTC
55
        action_date = int(calendar.timegm(sent_date))
20 by Daniel Holbach
improve our approach to finiding people and their email addresses drastically:
56
        dummy, sender = parseaddr(message.get('from'))
57
        payload = message.get_payload()
58
59
        subject = message.get('subject', '')
60
        # Reporting bugs doesn't count as part of five-a-day
61
        if (reply_domain != 'bugs.launchpad.net' or not reply_local_part.isdigit()) or  \
62
           '[NEW]' in subject:
63
            continue
34 by Daniel Holbach
remove 'bodies' rule - good comments should count too. reworked msgid logic
64
        data.append((msg_id, action_date, sender, bug_id))
20 by Daniel Holbach
improve our approach to finiding people and their email addresses drastically:
65
    return (data, msg_id)
66
67
if __name__ == '__main__':
68
    if not len(sys.argv) == 2:
69
        print >> sys.stderr, "Please specify mailbox file."
70
        sys.exit(1)
71
72
    mailbox_file = os.path.expanduser(sys.argv[1])
34 by Daniel Holbach
remove 'bodies' rule - good comments should count too. reworked msgid logic
73
    if not os.path.exists(mailbox_file):
48 by Daniel Holbach
removed unicode sillyness, not necessary - removed unused imports - simplified code in a few places - fixed minor bug with mailbox variable
74
        print '''%s mailbox file doesn't exist''' % mailbox_file
34 by Daniel Holbach
remove 'bodies' rule - good comments should count too. reworked msgid logic
75
        sys.exit(1)
76
20 by Daniel Holbach
improve our approach to finiding people and their email addresses drastically:
77
    msgidfile_name = mailbox_file+".msgid"
34 by Daniel Holbach
remove 'bodies' rule - good comments should count too. reworked msgid logic
78
    if os.path.exists(msgidfile_name) and \
35 by Daniel Holbach
specify LP script name, remove bodies completely, fix msgid time logic
79
       os.stat(msgidfile_name).st_mtime-os.stat(mailbox_file).st_mtime > 60*60:
20 by Daniel Holbach
improve our approach to finiding people and their email addresses drastically:
80
        sys.exit(0)
34 by Daniel Holbach
remove 'bodies' rule - good comments should count too. reworked msgid logic
81
    
82
    mailbox = PortableUnixMailbox(open(mailbox_file, 'rb'), message_factory)
20 by Daniel Holbach
improve our approach to finiding people and their email addresses drastically:
83
    if os.path.exists(msgidfile_name):
84
        old_msg_id = open(msgidfile_name).read().strip()
85
    else:
86
        old_msg_id = ""
87
    (data, last_msg_id) = scan_bugs(mailbox, old_msg_id)
88
    
89
    data_file = open(mailbox_file+".data", 'a')
90
    for entry in data:
91
        data_file.write("\"%s\",\"%s\",\"%s\",\"%s\"\n" % (entry[0], entry[1], entry[2], entry[3]))
92
    data_file.close()
93
    if os.path.exists(msgidfile_name):
94
        os.remove(msgidfile_name)
95
    msgidfile = open(msgidfile_name, "w")
96
    msgidfile.write(last_msg_id)
97
    msgidfile.close()