~work-items-tracker-hackers/launchpad-work-items-tracker/blueprints-api

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
#!/usr/bin/python
#
# Pull work items from various sources (blueprint whiteboards, linked blueprint
# bugs, wiki pages) and put them into a database.

import urllib, re, sys, optparse, smtplib, pwd, os, urlparse
from email.mime.text import MIMEText
import sqlite3 as dbapi2

from launchpadlib.launchpad import Launchpad

import report_tools

debug = False

# map a bug task status to a WI status; None == ignore
bug_wi_states = {
    'New': 'todo',
    'Incomplete': 'todo',
    'Opinion': None,
    'Invalid': None,
    'Expired': None,
    "Won't Fix": 'postponed',
    'Confirmed': 'todo',
    'Triaged': 'todo',
    'In Progress': 'inprogress',
    'Fix Committed': 'inprogress',
    'Fix Released': 'done',
}


# if this is None, data errors go to stderr; if this is a list of (spec_name,
# msg) tuples, data errors get collected here and mailed out at the end
data_errors = None

def dbg(msg):
    '''Print out debugging message if debugging is enabled.'''

    if debug:
        print >> sys.stderr, msg

def data_error(spec_url, msg, warning=False):
    '''Log an error in the input data for a particular spec.

    Depending on the program mode this gets printed to stderr, or mailed to the
    corresponding team contact.
    '''
    global data_errors

    if warning:
        severity = 'WARNING'
    else:
        severity = 'ERROR'
    s = '%s\n  [%s] %s' % (spec_url, severity, msg)
    if data_errors is None:
        print >> sys.stderr, s
    else:
        data_errors.append((spec_url.split('/')[-1], s))
        dbg('Queueing data error: ' + s)


def web_link(item):
    """Get a link to the Launchpad API object on the website."""
    api_link = item.self_link
    parts = urlparse.urlparse(api_link)
    return parts.scheme + "://" + parts.netloc.replace("api.", "") + "/" + parts.path.split("/", 2)[2]


########################################################################
#
# Functions for parsing Launchpad data
#
########################################################################

def lp_import_blueprint(db, bp):
    '''Import details of a blueprint into the specs table.

    If a blueprint does not have a status, use the description.

    NOTE: We do not fill in "team" right now; let's see whether we will need
    that really.

    Return True on success.
    '''
    cur = db.cursor()
    cur.execute('SELECT name FROM milestones')
    valid_milestones = [m[0] for m in cur] + [None]

    def get_whiteboard_section(heading):
        if bp.whiteboard is None:
            return None
        regex = '^' + heading + ':\s*\n(.*?)\n\n'
        m = re.search(regex, bp.whiteboard, re.S | re.I | re.M)
        if m is not None:
            section = m.group(1).strip()
        else:
            section = None
        return section

    data = {}
    data['priority'] = bp.priority
    data['definition'] = bp.definition_status
    data['implementation'] = bp.implementation_status
    data['approver'] = bp.approver and bp.approver.name or None
    data['drafter'] = bp.drafter and bp.drafter.name or None
    data['assignee'] = bp.assignee and bp.assignee.name or None
    data['milestone'] = bp.milestone and bp.milestone.name or None
    data['description'] = bp.summary or "(no description)"
    data['status'] = get_whiteboard_section('Status')
    data['details_url'] = bp.specification_url
    data['roadmap_notes'] = get_whiteboard_section('Roadmap\s+Notes')

    # ignore "later" milestone"
    if data['milestone'] == 'later':
        dbg('lp_import_blueprint(%s): spec has "later" milestone, ignoring it' % bp.name)
        return False

    if data['milestone'] not in valid_milestones:
        data_error(web_link(bp), 'milestone "%s" is unknown/invalid' % data['milestone'], True)

    # if we have an explicit status: in the whiteboard, use that; otherwise use
    # description as status text
    if data['status']:
        data['status'] = data['status'].strip()
    else:
        data['status'] = data['description'].strip()
    del data['description']

    dbg('lp_import_blueprint(%s): finished parsing; data: %s' % (bp.name, str(data)))

    db.cursor().execute('INSERT INTO specs VALUES (?, ?, ?, ?, ?, NULL, ?, ?, ?, ?, ?, ?,?)',
            (bp.name, web_link(bp), data['priority'], data['implementation'],
            data['assignee'], data['status'], data['milestone'],
            data['definition'], data['drafter'], data['approver'],
            data['details_url'], data['roadmap_notes']))

    return True

def parse_meta_item( lp, db, line, bp_name ):
    '''Parse a meta information line from a blueprint

    '''

    line = line.strip()
    if not line:
        return

    try:
        (key, value) = line.rsplit(':', 1)
        key   = key.strip()
        value = value.strip()
    except ValueError:
        dbg("\tMeta line '%s' can not be parsed" % line)
        return

    dbg( "\t\tMeta for %s: key='%s' value='%s'" % (bp_name, key, value) )

    cur = db.cursor()
    cur.execute('INSERT INTO meta VALUES(?,?,?,date(CURRENT_TIMESTAMP))',
                (key, value, bp_name))
    db.commit()

def parse_complexity_item(lp, db, line, bp_name, bp_url, def_milestone, def_assignee):
    line = line.strip()
    # remove special characters people tend to type
    line = re.sub('[^\w -.]', '', line)
    if not line:
        return

    dbg("\tParsing complexity line '%s'" % line)

    num         = None
    milestone   = None
    assignee    = None

    try:
        complexity_list = line.split()
        complexity_list.reverse()

        # we may not have any values in the list, so append our
        # default values in the right order so they can be mapped
        defs = [None, def_milestone, def_assignee]
        for i in range(len(complexity_list), len(defs)):
            complexity_list.append(defs[i])
        (num, milestone, assignee) = complexity_list
        if not num:
            data_error(bp_url, 'No complexity points defined for %s' % line)

        dbg('\tComplexity: %s MS: %s Who: %s' % (num, milestone, assignee))

        cur = db.cursor()
        cur.execute('INSERT INTO complexity VALUES(?,?,?,?,date(CURRENT_TIMESTAMP))',
                (assignee, num, milestone, bp_name))
    except ValueError:
        data_error(bp_url, "\tComplexity line '%s' could not be parsed %s" % (line, ValueError))


def parse_blueprint_workitem(line, default_assignee, milestone,
        blueprint_url, launchpad, result_list):
    '''Parse a work item line of a blueprint whiteboard.'''

    # FIXME: we lose bug linking etc. that is done by the tales
    # formatters in LP here.
    line = line.strip()
    if not line:
        return
    dbg("\tworkitem (clean): '%s'" % (line))

    try:
        (desc, state) = line.rsplit(':', 1)
    except ValueError:
        desc = line
        state = 'todo'

    state = state.strip().lower()
    if not state:
        state = 'todo'
    if state == 'completed':
        state = 'done'
    if state in ('postpone', 'dropped', 'drop'):
        state = 'postponed'
    if state not in report_tools.valid_states:
        data_error(blueprint_url, 'invalid state "%s" for work item "%s"' %
                (state, desc))
        return

    if desc.startswith('['):
        try:
            off = desc.index(']')
            assignee = desc[1:off]
            desc = desc[off+1:].strip()
        except ValueError:
            data_error(blueprint_url, 'missing closing "]" for assignee for work item "%s"' %
                    desc)
            return
        try:
            if launchpad:
                launchpad.people[assignee]
        except (ValueError, KeyError):
            data_error(blueprint_url, 'assignee "%s" is not a valid Launchpad account' %
                    assignee, True)
    else:
        assignee = default_assignee

    result_list.append((desc, state, assignee, milestone))

def follow_blueprint_buglink(bug, default_assignee, blueprint_name,
        launchpad, release, default_milestone, result_list):
    '''Query launchpad for the information on a linked bug'''

    for task in bug.bug_tasks:
        if task.status == 'Unknown':
            # this can happen for upstream tasks
            continue

        state = bug_wi_states[task.status]
        if state is None:
            continue

        # only look at tasks on Ubuntu or Ubuntu packages; we don't want
        # upstream tasks pulled in here.
        target = launchpad.load(task.target.self_link)
        rtype = urllib.splittag(target.resource_type_link)[1]
        if rtype not in ('distribution_source_package','source_package',
                         'distribution'):
            continue
        # no such field for some distributions (e.g., fedora)?
        try:
            if target.distribution.name != 'ubuntu':
                continue
        except:
            continue

        # if it's not for the current development release, it doesn't matter
        if rtype == 'source_package' and target.distroseries.name != release:
            continue
        if rtype == 'distribution_source_package':
            better_task = False
            for subtask in task.related_tasks:
                subtarget = launchpad.load(subtask.target.self_link)
                try:
                    if subtarget.distribution.name == 'ubuntu' \
                       and subtarget.distroseries.name == release \
                       and subtarget.name == target.name:
                        better_task = True
                        break
                except: pass
            if better_task:
                continue

        desc = '<a href="https://launchpad.net/bugs/%d">LP: #%d</a>: ' % (bug.id, bug.id) + bug.title
        if rtype != 'distribution':
            desc += ' (%s)' % target.name

        if task.assignee:
            assignee = task.assignee.name
        else:
            assignee = default_assignee
        if task.milestone:
            milestone = task.milestone.name
        else:
            milestone = default_milestone
        dbg("\tbug (cleaned): '%s' (%s, %s, %s)" % (desc, state, assignee, milestone))

        result_list.append((desc, state, assignee, milestone))

def milestone_extract(text, valid_milestones):
    words = text.replace('(', ' ').replace(')', ' ').replace('[', ' ').replace(
            ']', ' ').replace('<wbr></wbr>', '').split()

    for word in words:
        if word in valid_milestones:
            return word
    return None

def lp_import_blueprint_workitems(lp, db, bp, release):
    '''Collect work items from a Launchpad blueprint.

    This includes work items from the whiteboard as well as linked bugs.
    '''
    work_items_re = re.compile('^work items(.*)\s*:\s*$', re.I)
    meta_re = re.compile('^Meta.*?:$', re.I)
    complexity_re = re.compile('^Complexity.*?:$', re.I)

    in_workitems_block = False
    in_meta_block = False
    in_complexity_block = False
    work_items = []
    milestone = None

    cur = db.cursor()
    cur.execute('SELECT assignee, milestone, implementation FROM specs WHERE name = ?', (bp.name,))
    (spec_assignee, spec_milestone, spec_implementation) = cur.fetchone()

    dbg('lp_import_blueprint_workitems(): processing %s (spec milestone: %s, spec assignee: %s, spec implementation: %s)' % (
        bp.name, spec_milestone, spec_assignee, spec_implementation))

    cur.execute('SELECT team FROM teams WHERE name = ?', (spec_assignee,))
    assignee_teams = [t[0] for t in cur]

    cur.execute('SELECT name FROM milestones')
    valid_milestones = [m[0] for m in cur]

    if bp.whiteboard:
        for l in bp.whiteboard.splitlines():

            if (not in_workitems_block
                and not in_meta_block and not in_complexity_block):
                m = work_items_re.search(l)
                if m:
                    in_workitems_block = True
                    dbg('lp_import_blueprint_workitems(): starting work items block at ' + l)
                    if not milestone:
                        milestone = milestone_extract(m.group(1), valid_milestones)
                        dbg('  ... setting milestone to ' + str(milestone))
                if meta_re.search(l):
                    in_meta_block = True
                if complexity_re.search(l):
                    in_complexity_block = True
                continue

            if in_workitems_block:
                dbg("\tworkitem (raw): '%s'" % (l.strip()))
                if not l.strip():
                    dbg('lp_import_blueprint_workitems(): closing work items block with line: ' + l)
                    in_workitems_block = False
                    milestone = None
                parse_blueprint_workitem(l, spec_assignee, milestone or
                        spec_milestone, web_link(bp), lp, work_items)

            if in_meta_block:
                dbg("\tmeta line (raw): '%s'" % (l.strip()))
                if not l.strip():
                    in_meta_block = False
                    continue
                parse_meta_item(lp, db, l, bp.name)

            if in_complexity_block:
                dbg("\tcomplexity block line (raw): '%s'" % (l.strip()))
                if not l.strip():
                    in_complexity_block = False
                    continue
                parse_complexity_item(lp, db, l, bp.name, web_link(bp), spec_milestone, spec_assignee)

    if lp:
        for bug in bp.bugs:
            follow_blueprint_buglink(bug, spec_assignee, bp.name, lp, release,
                    spec_milestone, work_items)

    if not work_items:
        #data_error(bp_url, 'no work items defined', True)
        pass

    if spec_implementation == 'Deferred':
        new_work_items = []
        for (desc, status, assignee, milestone) in work_items:
            if status == 'todo':
                status = 'postponed'
            new_work_items.append((desc, status, assignee, milestone))
        work_items = new_work_items

    for (desc, status, assignee, milestone) in work_items:
        db.cursor().execute('INSERT INTO work_items VALUES (?, ?, ?, ?, ?, date(CURRENT_TIMESTAMP))',
                (desc, bp.name, status, assignee, milestone))


def lp_import_milestones(lp_project, db):
    '''Import milestones from Launchpad into DB.

    lp_project must be a Launchpad project or distro_series object.
    '''
    cur = db.cursor()
    cur.execute('SELECT count(*) FROM milestones')
    if cur.fetchone()[0] > 0:
        dbg('lp_import_milestones(): milestone table already filled')
        return

    dbg('lp_import_milestones(): importing from Launchpad')
    for ms in lp_project.all_milestones:
        if ms.date_targeted:
            cur.execute('INSERT INTO milestones VALUES (?, ?)', (ms.name,
                    ms.date_targeted.strftime('%Y-%m-%d')))
    db.commit()

def lp_import_teams(lp, db, cfg):
    '''Import configured teams from Launchpad into DB.

    This needs the "teams" map from configuration.
    '''
    cur = db.cursor()
    cur.execute('SELECT count(*) FROM teams')
    if cur.fetchone()[0] > 0:
        dbg('lp_import_teams(): teams table already filled')
        return

    for team in cfg.get('teams', {}).keys():
        dbg('lp_import_teams(): Importing members for team ' + team)
        for member in lp.people[team].members:
            if not member.is_team:
                cur.execute('INSERT INTO teams VALUES (?, ?)',
                        (member.name, team))
            else:
                if team in cfg.get('recursive_teams', []):
                    for team_member in lp.people[member].members:
                        cur.execute('INSERT INTO teams VALUES (?, ?)',
                                (team_member.name, team))
    db.commit()

def lp_import_bug_workitems(lp_project, db, cfg):
    '''Collect work items from a bug list query'''

    if 'work_item_bugs' not in cfg:
        return

    # synthesize a spec for this project
    name = lp_project.title
    db.cursor().execute("INSERT INTO specs VALUES (?, ?, '', 'Unknown', NULL, NULL, '', NULL, NULL, NULL, NULL, NULL, NULL)",
            (name, 'https://launchpad.net/' + lp_project.name))

    for task in lp_project.searchTasks(status=bug_wi_states.keys(), **cfg['work_item_bugs']):
        id = task.self_link.split('/')[-1]
        title = task.title.split('"', 1)[1].rstrip('"')
        state = bug_wi_states[task.status]
        if state is None:
            dbg('lp_import_bug_workitems: ignoring #%s: %s (status: %s)' % (id, title, task.status))
            continue
        dbg('lp_import_bug_workitems: #%s: %s (%s)' % (id, title, state))

        if task.milestone:
            milestone = task.milestone.name
        else:
            milestone = None
        if task.assignee:
            assignee = task.assignee.name
        else:
            assignee = None
        
        db.cursor().execute('INSERT INTO work_items VALUES (?, ?, ?, ?, ?, date(CURRENT_TIMESTAMP))',
                (title, name, state, assignee, milestone))

def lp_import(db, cfg, name_pattern=None):
    '''Collect blueprint work items and status from Launchpad into DB.'''

    lp = Launchpad.login_with('ubuntu-work-items', service_root="production", version="devel")
    projects = []

    if 'release' in cfg:
        lp_project = lp.distributions['ubuntu'].getSeries(name_or_version=cfg['release'])
        projects.append(lp_project)
    else:
        assert 'project' in cfg, 'Configuration needs to specify project or release'
        lp_project = lp.projects[cfg['project']]
        projects.append(lp_project)

    lp_import_milestones(lp_project, db)
    lp_import_teams(lp, db, cfg)

    extra_projects = cfg.get('extra_projects', [])
    for extra_project_name in extra_projects:
        extra_project = lp.projects[extra_project_name]
        lp_import_milestones(extra_project, db)
        projects.append(extra_project)

    if name_pattern:
        name_filter = re.compile(name_pattern)

    for project in projects:
        # XXX: should this be valid_ or all_specifications?
        for bp in project.valid_specifications:
            if name_pattern:
                if not name_filter.search(bp.name):
                    continue
            dbg('lp_import(): downloading %s from %s' % (bp.name, bp.self_link))
            if lp_import_blueprint(db, bp):
                lp_import_blueprint_workitems(lp, db, bp, cfg.get('release'))

    lp_import_bug_workitems(lp_project, db, cfg)

########################################################################
#
# Functions for parsing Moin wiki data
#
########################################################################

def escape_url(url):
    '''Correctly escape data intended to be included in a URL'''
    return urllib.quote(url)

def get_moin_workitems_group(url, default_assignee):
    '''Collect work items from a moin wiki URL.

    Sections at == are treated as groups, first paragraph is status
    Every line starting with "|| " is treated as a work item.

    Return a list of ('name', 'status', [items...]), where each item
    is ('description', 'state', 'assignee', 'milestone').
    '''
    field_off = {}
    field_re = re.compile('^## WORKITEMS\s+(.*\S)\s*$')
    inprogress = re.compile('INPROGRESS\s*(\S+)')
    result = []
    section = []
    status = ''
    collect = 0
    name = None
    in_section = False
    for line in urllib.urlopen(url):
        # Find the field designators and record them.
        m = field_re.match(line)
        if m:
            field_off = {}
            for pair in m.group(1).split(' '):
                if pair.index('='):
                    (field, offset) = pair.split('=')
                    field_off[field] = int(offset)
            continue

        if line.startswith('== '):
            if in_section:
                result.append([name, status, section])
            fields = line.strip().split('==')
            assert not fields[0] # should be empty
            name = fields[1].strip()
            section = []
            collect = 1
            status = ''
        elif line.startswith('|| '):
            if not in_section:
                in_section = True
            collect = 0
            fields = line.strip().split('||')
            assert not fields[0] # should be empty
            assignee = default_assignee
            istatus = 'todo'
            milestone = None
            # Find the description
            if 'description' in field_off:
                which = field_off['description']
            else:
                which = 1
            desc = fields[which].strip()
            if 'status' in field_off:
                which = field_off['status']
                status_search = [ fields[which] ]
            else:
                status_search = fields[2:]
            for f in status_search:
                if 'DONE' in f or 'POSTPONED' in f or 'TODO' in f or 'INPROGRESS' in f or 'BLOCKED' in f:
                    ff = f.split()
                    if len(ff) == 2:
                        assignee = ff[1]
                    if 'DONE' in f:
                        istatus = 'done'
                        break
                    elif 'POSTPONED' in f:
                        istatus = 'postponed'
                        break
                    elif 'BLOCKED' in f:
                        istatus = 'blocked'
                        break
                    elif 'INPROGRESS' in f:
                        istatus = 'inprogress'
                        break
                    else:
                        istatus = 'todo'
                        break
            if 'assignee' in field_off:
                which = field_off['assignee']
                new_assignee = fields[which].strip()
                if new_assignee:
                    assignee = new_assignee
            if 'milestone' in field_off:
                which = field_off['milestone']
                new_milestone = fields[which].strip()
                if new_milestone:
                    milestone = new_milestone
            section.append((desc, istatus, assignee, milestone))
        elif line.startswith('||'):
            pass
        elif line != '':
            if collect:
                status += " " + line.strip();

    status = status.replace('<wbr>', '').replace('</wbr>', '').strip()
    if in_section:
        result.append([name, status, section])

    return result

def moin_import(db, cfg):
    '''Collect blueprint work items from a moin wiki.'''

    for url, default_assignee in cfg.get('moin_pages', {}).iteritems():
        dbg('moin_import(): processing %s (default assignee: %s)' % (url, default_assignee))
        for group, status, items in get_moin_workitems_group(url, default_assignee):
            url_clean = url.replace('?action=raw', '')
            name = url_clean.split('://', 1)[1].split('/', 1)[1]
            if group:
                name += ' ' + group
                spec_url = '%s#%s' % (url_clean, escape_url(group))
                dbg('  got group %s: name="%s", url="%s"' % (group, name, spec_url))
            else:
                spec_url = url_clean
                dbg('  no group: name="%s", url="%s"' % (name, spec_url))

            for (desc, state, assignee, milestone) in items:
                db.cursor().execute('INSERT INTO work_items VALUES (?, ?, ?, ?, ?, date(CURRENT_TIMESTAMP))',
                        (desc, name, state, assignee, milestone))

            db.cursor().execute("INSERT INTO specs VALUES (?, ?, 'Medium', 'Unknown', NULL, NULL, ?, NULL, NULL, NULL, NULL, NULL, NULL)",
                    (name, spec_url, status))

########################################################################
#
# Database handling
#
########################################################################

def get_db(dbpath):
    '''Open/initialize database.

    This creates the database if it does not exist.
    '''
    init = not os.path.exists(dbpath)

    db = dbapi2.connect(dbpath)

    cur = db.cursor()
    if init:
        cur.execute('''CREATE TABLE version (
            db_layout_ref INT NOT NULL
            )''')
        cur.execute('''INSERT INTO version VALUES (6)''')

        cur.execute('''CREATE TABLE specs (
            name VARCHAR(255) PRIMARY KEY,
            url VARCHAR(1000) NOT NULL,
            priority CHAR(20),
            implementation CHAR(30),
            assignee CHAR(50),
            team CHAR(50),
            status VARCHAR(5000) NOT NULL,
            milestone CHAR(50) REFERENCES milestones(name),
            definition CHAR(30),
            drafter CHAR(50),
            approver CHAR(50),
            details_url VARCHAR(1000),
            roadmap_notes VARCHAR(5000)
            )''')

        cur.execute('''CREATE TABLE work_items (
            description VARCHAR(1000) NOT NULL,
            spec VARCHAR(255) REFERENCES specs(name),
            status VARCHAR(20) NOT NULL,
            assignee VARCHAR(200),
            milestone CHAR(50) REFERENCES milestones(name),
            date TIMESTAMP NOT NULL
            )''')

        cur.execute('''CREATE TABLE teams (
            name VARCHAR(200) NOT NULL,
            team CHAR(50)
            )''')

        cur.execute('''CREATE TABLE milestones (
            name VARCHAR(50) PRIMARY KEY,
            due_date TIMESTAMP NOT NULL
            )''')

        cur.execute('''CREATE TABLE meta (
            key VARCHAR(255),
            value VARCHAR(255),
            spec VARCHAR(255) REFERENCES specs(name),
            date TIMESTAMP NOT NULL
            )''')
            
        cur.execute('''CREATE TABLE complexity (
            assignee VARCHAR(255),
            points VARCHAR(255),
            milestone CHAR(50) REFERENCES milestones(name),
            spec VARCHAR(255) REFERENCES specs(name),
            date TIMESTAMP NOT NULL
            )''')            

        create_v5_indexes(cur)
        create_v6_indexes(cur)

        db.commit()
    else:
        # upgrade DB layout
        cur.execute('SELECT db_layout_ref FROM version')
        ver = cur.fetchone()[0]

        if ver == 1:
            dbg('Upgrading DB to layout version 2')
            cur.execute('ALTER TABLE specs ADD COLUMN milestone CHAR(50) REFERENCES milestones(name)')
            cur.execute('ALTER TABLE specs ADD COLUMN definition CHAR(30)')
            cur.execute('ALTER TABLE specs ADD COLUMN drafter CHAR(50)')
            cur.execute('ALTER TABLE specs ADD COLUMN approver CHAR(50)')
            cur.execute('ALTER TABLE specs ADD COLUMN details_url VARCHAR(1000)')
            cur.execute('UPDATE version SET db_layout_ref = 2')
            db.commit()
            dbg('DB upgrade finished')
            ver = 2

        if ver == 2:
            dbg('Upgrading DB to layout version 3')
            cur.execute('''CREATE TABLE meta (
                key VARCHAR(255),
                value VARCHAR(255),
                spec VARCHAR(255) REFERENCES specs(name),
                date TIMESTAMP NOT NULL
                )''')
            cur.execute('UPDATE version SET db_layout_ref = 3')
            db.commit()
            dbg('DB upgrade finished')
            ver = 3
        
        if ver == 3:
            dbg('Upgrading DB to layout version 4')
            cur.execute('''CREATE TABLE complexity (
                assignee VARCHAR(255),
                points VARCHAR(255),
                milestone CHAR(50) REFERENCES milestones(name),
                spec VARCHAR(255) REFERENCES specs(name),
                date TIMESTAMP NOT NULL
                )''')     
            cur.execute('ALTER TABLE specs ADD COLUMN roadmap_notes VARCHAR(5000)')
            cur.execute('UPDATE version SET db_layout_ref = 4')
            db.commit()
            dbg('DB upgrade finished')
            ver = 4

        if ver == 4:
            dbg('Upgrading DB to layout version 5')
            create_v5_indexes(cur)
            cur.execute('UPDATE version SET db_layout_ref = 5')
            db.commit()
            ver = 5

        if ver == 5:
            dbg('Upgrading DB to layout version 6')
            create_v6_indexes(cur)
            cur.execute('UPDATE version SET db_layout_ref = 6')
            db.commit()
            ver = 6

    return db

def create_v5_indexes(cur):
    cur.execute('''CREATE INDEX teams_name_idx on teams(name)''')
    cur.execute('''CREATE INDEX work_items_date_idx ON work_items (date)''')
    cur.execute('''CREATE INDEX work_items_status_idx ON work_items (status)''')

def create_v6_indexes(cur):
    cur.execute('''CREATE INDEX work_items_assignee_milestone_idx on work_items(assignee,milestone)''')

########################################################################
#
# Program operations and main
#
########################################################################

def parse_argv():
    '''Parse CLI arguments.

    Return (options, args) tuple.
    '''
    optparser = optparse.OptionParser()
    optparser.add_option('-d', '--database',
        help='Path to database', dest='database', metavar='PATH')
    optparser.add_option('-c', '--config',
        help='Path to configuration file', dest='config', metavar='PATH')
    optparser.add_option('-p', '--pattern', metavar='REGEX',
        help='Regex pattern for blueprint name (optional, mainly for testing)', dest='pattern')
    optparser.add_option('--debug', action='store_true', default=False,
        help='Enable debugging output in parsing routines')
    optparser.add_option('--mail', action='store_true', default=False,
        help='Send data errors as email (according to "error_config" map in '
            'config file) instead of printing to stderr', dest='mail')
    optparser.add_option('--refresh', action='store_true', default=False,
        help='Regenerate teams and milestones tables.', dest='refresh')

    (opts, args) = optparser.parse_args()

    if not opts.database:
        optparser.error('No database given')
    if not opts.config:
        optparser.error('No config given')

    return (opts, args)

def send_error_mails(cfg):
    '''Send data_errors to contacts.

    Data error contacts are defined in the configuration in the "error_contact"
    map (which assigns a regexp over spec names to a list of email addresses).
    If no match is found, the error goes to stderr.
    '''
    if data_errors is None:
        return

    # sort errors into address buckets
    emails = {} # email address -> contents

    dbg('mailing %i data errors' % len(data_errors))
    for (spec_name, msg) in data_errors:
        for pattern, addresses in cfg['error_contact'].iteritems():
            if re.search(pattern, spec_name):
                dbg('spec %s matches error_contact pattern "%s", mailing to %s' % (spec_name,
                    pattern, ', '.join(addresses)))
                for a in addresses:
                    emails.setdefault(a, '')
                    emails[a] += msg + '\n'
                break
        else:
            print >> sys.stderr, msg, '(no error_contact pattern)'

    # send mails
    for addr, contents in emails.iteritems():
        msg = MIMEText(contents.encode('ascii', 'replace'))
        msg['Subject'] = 'Errors in work item definitions'
        msg['From'] = 'Launchpad work item tracker <work-items-tracker-hackers@lists.launchpad.net>'
        msg['To'] = addr
        s = smtplib.SMTP()
        s.connect()
        s.sendmail(os.environ.get('EMAIL', pwd.getpwuid(os.geteuid()).pw_name + '@localhost'),
                addr, msg.as_string())
        s.quit()


if __name__ == '__main__':
    report_tools.fix_stdouterr()

    (opts, args) = parse_argv()

    if opts.debug:
        debug = True
    if opts.mail:
        data_errors = []

    cfg = report_tools.load_config(opts.config)

    if 'bug_status_map' in cfg:
        bug_wi_states.update(cfg['bug_status_map'])

    lock_path = opts.database + ".collect_lock"
    lock_f = open(lock_path, "wb")
    if report_tools.lock_file(lock_f) is None:
        print "Another instance is already running"
        sys.exit(0)

    db = get_db(opts.database)

    # reset status for current day
    cur = db.cursor()
    cur.execute('DELETE FROM work_items WHERE date = date(CURRENT_TIMESTAMP)')
    cur.execute('DELETE FROM specs')
    cur.execute('DELETE FROM meta')
    cur.execute('DELETE FROM complexity')    
    if opts.refresh:
        cur.execute('DELETE FROM milestones')
        cur.execute('DELETE FROM teams')

    lp_import(db, cfg, opts.pattern)
    moin_import(db, cfg)

    db.commit()
    send_error_mails(cfg)

    os.unlink(lock_path)