~stefanor/+junk/ubuntu-sponsorship-miner

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env python

import cgi
import cgitb
import re
import urllib

import genshi.template
import genshi.template.text
import psycopg2
import psycopg2.extensions

URL = 'ubuntu-sponsorships.cgi'
STATIC_URL = '../ubuntu-sponsorships'
STATIC_PATH = '../htdocs/ubuntu-sponsorships'
DB = 'service=udd'
LIMIT = 1000

class AttrDict(dict):
    """Dictionary with attribute access"""

    def __init__(self, **kwargs):
        for key, value in kwargs.iteritems():
            self[key] = value

    def __getattr__(self, name):
        try:
            return self[name]
        except KeyError, e:
            raise AttributeError(e)


def get_template(name, type_='html'):
    """Load a template"""
    loader = genshi.template.TemplateLoader(STATIC_PATH)
    kwargs = {}
    if type_ == 'text':
        kwargs['cls'] = genshi.template.text.NewTextTemplate
    return loader.load(name, **kwargs)

def locate_sponsorships(sponsor, sponsor_search, sponsoree, sponsoree_search):
    """Query UDD for sponsorhips. Yields rows"""

    # String-substituted into query:
    assert sponsor_search in ('name', 'email')
    assert sponsoree_search in ('name', 'email')

    # Make sure we are dealing with Unicode:
    psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
    psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
    conn = psycopg2.connect(DB)
    conn.set_client_encoding('UTF-8')
    cur = conn.cursor()

    sponsor = sponsor.replace('*', '%')
    if sponsor == '':
        sponsor = '%'
    sponsoree = sponsoree.replace('*', '%')
    if sponsoree == '':
        sponsoree = '%'

    cur.execute("""
        SELECT source, version, date, changed_by, changed_by_name,
               changed_by_email, signed_by, signed_by_name, signed_by_email,
               distribution,
               array_to_string(ARRAY(
                 SELECT bug
                 FROM ubuntu_upload_history_launchpad_closes
                 WHERE source = ubuntu_upload_history.source
                   AND version = ubuntu_upload_history.version
               ), ' ') AS fixed
        FROM ubuntu_upload_history
        WHERE lower(changed_by_%s) LIKE lower(%%s)
          AND lower(signed_by_%s) LIKE lower(%%s)
          AND signed_by_name <> changed_by_name
        ORDER BY date
        LIMIT %i;
    """ % (sponsoree_search, sponsor_search, LIMIT)
    , (sponsoree, sponsor))
    keys = ('source version date changed_by changed_by_name changed_by_email '
            'signed_by signed_by_name signed_by_email distribution fixed'
           ).split()
    for row in cur.fetchall():
        upload = AttrDict(**dict(zip(keys, row)))
        yield upload

def mine_sponsorships(sponsorships):
    """Iterate over sponsorships and guess what we can
    yields sponsorhips
    """
    resyncable_re = re.compile(r'[-\d](fakesync|build)\d+$')
    ubuver_re = re.compile(r'[-\d](\d+)?ubuntu(\d+)$')
    for upload in sponsorships:
        upload['fixed'] = upload['fixed'].split()
        upload['release'] = upload.distribution.split('-')[0]
        info = set()

        if upload.distribution.endswith('-proposed'):
            info.add('sru')
        if upload.distribution.endswith('-security'):
            info.add('security')

        m = resyncable_re.search(upload.version)
        if m is not None:
            if m.group(1) == 'fakesync':
                info.add('sync')
            elif m.group(1) == 'build':
                info.add('rebuild')
            else:
                info.add(m.group(1))

        m = ubuver_re.search(upload.version)
        if m is not None:
            if m.group(2) == '1':
                if m.group(1) == '0':
                    info.add('upgrade')

        upload['info'] = sorted(info)
        # Things we can't determine without changelog / lpapi:
        # sync, merge
        yield upload

def display_sponsorships(sponsor, sponsor_search, sponsoree, sponsoree_search,
                         type_):
    """Return rendering of sponsorships"""
    if type_ == 'html':
        template = get_template('sponsorships.xml')
    else:
        template = get_template('sponsorships.txt', 'text')

    params = {
              'static': STATIC_URL,
              'url': URL,
              'text_url': URL + '?' + urllib.urlencode({
                  'render': 'text',
                  'sponsor': sponsor,
                  'sponsor_search': sponsor_search,
                  'sponsoree': sponsoree,
                  'sponsoree_search': sponsoree_search,
              }),
              'sponsorships': mine_sponsorships(locate_sponsorships(
                  sponsor,
                  sponsor_search,
                  sponsoree,
                  sponsoree_search,
              )),
             }
    return template.generate(**params).render(type_)

def display_form():
    """Return rendered search form"""
    template = get_template('form.xml')
    params = {
              'static': STATIC_URL,
              'url': URL,
             }
    return template.generate(**params).render('html')

def main():
    cgitb.enable()
    form = cgi.FieldStorage()

    body = 'ERROR: No body'

    render = form.getfirst('render', '')
    sponsor = form.getfirst('sponsor', '')
    sponsoree = form.getfirst('sponsoree', '')
    if render and (sponsor or sponsoree):
        sponsor_search = form.getfirst('sponsor_search')
        if sponsor_search not in ('name', 'email'):
            sponsor_search = 'name'
        sponsoree_search = form.getfirst('sponsoree_search')
        if sponsoree_search not in ('name', 'email'):
            sponsoree_search = 'name'
        if render not in ('html', 'text'):
            render = 'html'
        body = display_sponsorships(sponsor, sponsor_search, sponsoree,
                                    sponsoree_search, render)
    else:
        render = 'html'
        body = display_form()

    if render == 'text':
        print "Content-Type: text/plain; charset=utf-8"
    else:
        print "Content-Type: text/html; charset=utf-8"
    print ""
    print body

if __name__ == '__main__':
    main()