1
'''Crash database interface for the XML-RPC interface to bugzilla.redhat.com
3
Copyright (C) 2007 Red Hat Inc.
4
Author: Will Woods <wwoods@redhat.com>
6
This program is free software; you can redistribute it and/or modify it
7
under the terms of the GNU General Public License as published by the
8
Free Software Foundation; either version 2 of the License, or (at your
9
option) any later version. See http://www.gnu.org/copyleft/gpl.html for
10
the full text of the license.
13
import xmlrpclib, os, tempfile, gzip, rpmUtils, urllib2, cookielib
14
from apport.report import Report
17
def __init__(self, auth_file, bugpattern_baseurl, options):
18
'''Initialize crash database connection.
20
You need to specify an implementation specific file with the
21
authentication credentials for retracing access for download() and
22
update(). For upload() and get_comment_url() you can use None.
24
options is a dictionary with additional settings from crashdb.conf; see
25
get_crashdb() for details'''
27
self.auth_file = auth_file
28
self.options = options
29
baseurl = options.get('baseurl')
31
self.baseurl = baseurl.strip('/')
32
self.bugpattern_baseurl = bugpattern_baseurl
33
self.duplicate_db = None
35
# Set up our urllib/xmlrpclib transports to use the same cookiejar
36
self.cookiejar = cookielib.CookieJar() # TODO use firefox cookies?
37
if self.baseurl.startswith('https'):
38
transport = SafeCookieTransport()
40
transport = CookieTransport()
41
transport.cookiejar = self.cookiejar
42
self.server = xmlrpclib.ServerProxy(self.baseurl+'/xmlrpc.cgi',transport=transport)
43
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))
45
# FIXME AUGH GET A REAL L/P FOR THIS
46
self.user = options.get('user') or 'wwoods@willrad.com'
47
self.password = options.get('password') or 'Yd6OhLCYgZ'
49
def get_bugpattern_baseurl(self):
50
'''Return the base URL for bug patterns.
52
See apport.report.Report.search_bug_patterns() for details. If this
53
function returns None, bug patterns are disabled.'''
55
return self.bugpattern_baseurl
57
# helper functions specific to Red Hat / Fedora + Bugzilla
59
def _get_component(self, pkgname):
60
'''Returns the component name for the given pkg name. The pkg can be
61
a SRPM or RPM or whatever your convention is.'''
62
# This is how we do it for Fedora
63
return rpmUtils.miscutils.splitFilename(pkgname)[0]
65
def _map_distro(self, distrorelease):
66
'''Returns the proper product and version for the given DistroRelease.'''
67
(distro, release) = distrorelease.split()
68
if 'Fedora' in distro:
72
elif int(release) >= 7:
74
elif int(release) <= 6:
75
version = 'fc'+release
76
if distro.startswith('RedHatEnterprise'):
77
product = 'Red Hat Enterprise Linux' # XXX check this
78
version = release # XXX check update number
79
return (product,version)
81
def _attach_file(self,bugid,fileobj,filename='data',desc='uploaded file'):
83
'data':fileobj.read().encode('base64'),
88
'contenttype':'text/plain',
91
response = self.server.bugzilla.addAttachment(bugid,attach,self.user,self.password)
92
# FIXME fault checking
93
return response[0] # attachment id
95
def _get_attachid(self,bugid):
96
'''Finds the most current crash report attached to the given bug, and
97
returns its attachment ID.'''
98
bug = self.server.bugzilla.getBug(bugid,self.user,self.password)
100
# Find the ID of the newest, non-obsolete crash report
101
for attach in bug['attachments']:
102
if attach['description'] == 'Full crash report' and not attach['isobsolete']:
103
attachid = max(attachid,attach['attachid'])
106
# Concrete implementations
108
def upload(self, report):
109
'''Upload given problem report and return a handle for it.
111
This should happen noninteractively.'''
113
(product,version) = self._map_distro(report['DistroRelease'])
115
'product': product, # FIXME allow override from self.options?
117
'component':self._get_component(report['SourcePackage']),
118
'rep_platform':report['Architecture'], # XXX may need massaging
121
'bug_severity':'medium',
123
'short_desc':report['Title'],
124
'comment':report['Stacktrace'] + "\n\n" + report['Disassembly'],
126
#'bit-71':1, # Security bug - XXX get a real giveaway group?
128
response = self.server.bugzilla.createBug(form,self.user,self.password)
129
# FIXME fault checking
131
# Write crash data to a temporary file
132
tmpf = tempfile.TemporaryFile()
135
# Attach that file to the bug report with a reasonable filename
136
crashname=report['ExecutablePath'].lstrip('/').replace('/','_') + '.crash'
137
self._attach_file(bug_id,tmpf,crashname,'Full problem report')
138
# FIXME fault checking
141
def get_comment_url(self, report, handle):
142
'''Return an URL that should be opened after report has been uploaded
143
and upload() returned handle.
145
Should return None if no URL should be opened (anonymous filing without
146
user comments); in that case this function should do whichever
147
interactive steps it wants to perform.'''
149
return self.baseurl +'/show_bug.cgi?id='+ str(handle)
151
def download(self, id):
152
'''Download the problem report from given ID and return a Report.
153
Note that this will require an account with sufficient privilege
154
to read private attachments.'''
156
# Note: this sets our login cookie so the opener request will work
157
attachid = self._get_attachid(id)
158
# Download the attachment to a temporary file
159
url = self.baseurl +'/attachment.cgi?id='+ str(attachid)
160
download = self.opener.open(url)
161
tmpf = tempfile.TemporaryFile()
162
for data in download.xreadlines():
163
tmpf.write(data.decode('base64'))
165
# Read the attachment into a Report object
171
def update(self, id, report, comment):
172
'''Update the given report ID with the retraced results from the report
173
(Stacktrace, ThreadStacktrace, StacktraceTop; also Disassembly if
174
desired) and an optional comment.'''
176
attachid = self._get_attachid(id)
177
# mark attachid obsolete
178
# upload new report, with comment
180
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
182
def get_distro_release(self, id):
183
'''Get 'DistroRelease: <release>' from the given report ID and return
186
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
188
def get_unretraced(self):
189
'''Return an ID set of all crashes which have not been retraced yet and
190
which happened on the current host architecture.'''
192
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
194
def get_dup_unchecked(self):
195
'''Return an ID set of all crashes which have not been checked for
198
This is mainly useful for crashes of scripting languages such as
199
Python, since they do not need to be retraced. It should not return
200
bugs that are covered by get_unretraced().'''
202
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
204
def get_unfixed(self):
205
'''Return an ID set of all crashes which are not yet fixed.
207
The list must not contain bugs which were rejected or duplicate.
209
This function should make sure that the returned list is correct. If
210
there are any errors with connecting to the crash database, it should
211
raise an exception (preferably IOError).'''
213
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
215
def get_fixed_version(self, id):
216
'''Return the package version that fixes a given crash.
218
Return None if the crash is not yet fixed, or an empty string if the
219
crash is fixed, but it cannot be determined by which version. Return
220
'invalid' if the crash report got invalidated, such as closed a
221
duplicate or rejected.
223
This function should make sure that the returned result is correct. If
224
there are any errors with connecting to the crash database, it should
225
raise an exception (preferably IOError).'''
227
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
229
def close_duplicate(self, id, master):
230
'''Mark a crash id as duplicate of given master ID.'''
232
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
234
def mark_regression(self, id, master):
235
'''Mark a crash id as reintroducing an earlier crash which is
236
already marked as fixed (having ID 'master').'''
238
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
240
def mark_retraced(self, id):
241
'''Mark crash id as retraced.'''
243
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
245
def mark_retrace_failed(self, id):
246
'''Mark crash id as 'failed to retrace'.
248
This can be a no-op if you are not interested in this, of course.'''
250
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
252
def _mark_dup_checked(self, id, report):
253
'''Mark crash id as checked for being a duplicate
255
This is an internal method that should not be called from outside.'''
257
raise NotImplementedError, 'this method must be implemented by a concrete subclass'
259
# API for duplicate detection
261
# Tests are in apport/crashdb_impl/memory.py.
263
def init_duplicate_db(self, path):
264
'''Initialize duplicate database.
266
path specifies an SQLite database. It will be created if it does not
269
import sqlite3 as dbapi2
271
assert dbapi2.paramstyle == 'qmark', \
272
'this module assumes qmark dbapi parameter style'
274
init = not os.path.exists(path) or path == ':memory:'
275
self.duplicate_db = dbapi2.connect(path)
278
cur = self.duplicate_db.cursor()
279
cur.execute('''CREATE TABLE crashes (
280
signature VARCHAR(255) NOT NULL,
281
crash_id INTEGER NOT NULL,
282
fixed_version VARCHAR(50),
283
last_change TIMESTAMP)''')
285
cur.execute('''CREATE TABLE consolidation (
286
last_update TIMESTAMP)''')
287
cur.execute('''INSERT INTO consolidation VALUES (CURRENT_TIMESTAMP)''')
288
self.duplicate_db.commit()
290
def check_duplicate(self, id, report=None):
291
'''Check whether a crash is already known.
293
If the crash is new, it will be added to the duplicate database and the
294
function returns None. If the crash is already known, the function
295
returns a pair (crash_id, fixed_version), where fixed_version might be
296
None if the crash is not fixed in the latest version yet. Depending on
297
whether the version in report is smaller than/equal to the fixed
298
version or larger, this calls close_duplicate() or mark_regression().
300
If the report does not have a valid crash signature, this function does
301
nothing and just returns None.
303
By default, the report gets download()ed, but for performance reasons
304
it can be explicitly passed to this function if it is already available.'''
306
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
309
report = self.download(id)
311
self._mark_dup_checked(id, report)
313
sig = report.crash_signature()
317
existing = self._duplicate_search_signature(sig)
319
# sort existing in ascending order, with unfixed last, so that
320
# version comparisons find the closest fix first
338
return packaging.compare_versions(x, y)
340
existing.sort(cmp, lambda k: k[1])
344
cur = self.duplicate_db.cursor()
345
cur.execute('INSERT INTO crashes VALUES (?, ?, ?, CURRENT_TIMESTAMP)', (sig, id, None))
346
self.duplicate_db.commit()
350
report_package_version = report['Package'].split()[1]
351
except (KeyError, IndexError):
352
report_package_version = None
354
# search the newest fixed id or an unfixed id to check whether there is
355
# a regression (crash happening on a later version than the latest
357
for (ex_id, ex_ver) in existing:
359
not report_package_version or \
360
packaging.compare_versions(report_package_version, ex_ver) < 0:
361
self.close_duplicate(id, ex_id)
364
# regression, mark it as such in the crash db
365
self.mark_regression(id, ex_id)
367
# create a new record
368
cur = self.duplicate_db.cursor()
369
cur.execute('INSERT INTO crashes VALUES (?, ?, ?, CURRENT_TIMESTAMP)', (sig, id, None))
370
self.duplicate_db.commit()
372
return (ex_id, ex_ver)
374
def duplicate_db_fixed(self, id, version):
375
'''Mark given crash ID as fixed in the duplicate database.
377
version specifies the package version the crash was fixed in (None for
380
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
382
cur = self.duplicate_db.cursor()
383
n = cur.execute('UPDATE crashes SET fixed_version = ?, last_change = CURRENT_TIMESTAMP WHERE crash_id = ?',
385
assert n.rowcount == 1
386
self.duplicate_db.commit()
388
def duplicate_db_remove(self, id):
389
'''Remove crash from the duplicate database (because it got rejected or
390
manually duplicated).'''
392
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
394
cur = self.duplicate_db.cursor()
395
cur.execute('DELETE FROM crashes WHERE crash_id = ?', [id])
396
self.duplicate_db.commit()
398
def duplicate_db_consolidate(self):
399
'''Update the duplicate database status to the reality of the crash
402
This uses get_unfixed() and get_fixed_version() to get the status of
403
particular crashes. Invalid IDs get removed from the duplicate db, and
404
crashes which got fixed since the last run are marked as such in the
407
This is a very expensive operation and should not be used too often.'''
409
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
411
unfixed = self.get_unfixed()
413
cur = self.duplicate_db.cursor()
414
cur.execute('SELECT crash_id, fixed_version FROM crashes')
416
cur2 = self.duplicate_db.cursor()
417
for (id, ver) in cur:
421
cur2.execute('UPDATE crashes SET fixed_version = NULL, last_change = CURRENT_TIMESTAMP WHERE crash_id = ?', [id])
425
continue # skip get_fixed_version(), we already know its fixed
427
# crash got fixed/rejected
428
fixed_ver = self.get_fixed_version(id)
429
if fixed_ver == 'invalid':
430
cur2.execute('DELETE FROM crashes WHERE crash_id = ?', [id])
432
cur2.execute('UPDATE crashes SET fixed_version = ?, last_change = CURRENT_TIMESTAMP WHERE crash_id = ?',
435
# poke consolidation.last_update
436
cur.execute('UPDATE consolidation SET last_update = CURRENT_TIMESTAMP')
437
self.duplicate_db.commit()
439
def duplicate_db_needs_consolidation(self, interval=86400):
440
'''Check whether the last duplicate_db_consolidate() happened more than
441
'interval' seconds ago (default: one day).'''
443
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
445
cur = self.duplicate_db.cursor()
446
cur.execute('SELECT last_update FROM consolidation')
447
last_run = datetime.datetime.strptime(cur.fetchone()[0],
450
return (datetime.datetime.utcnow() - last_run).seconds >= interval
452
def _duplicate_search_signature(self, sig):
453
'''Look up signature in the duplicate db and return an [(id,
454
fixed_version)] tuple list.
456
There might be several matches if a crash has been reintroduced in a
459
cur = self.duplicate_db.cursor()
460
cur.execute('SELECT crash_id, fixed_version FROM crashes WHERE signature = ?', [sig])
461
return cur.fetchall()
463
def _duplicate_db_dump(self, with_timestamps=False):
464
'''Return the entire duplicate database as a dictionary signature ->
465
(crash_id, fixed_version).
467
If with_timestamps is True, then the map will contain triples
468
(crash_id, fixed_version, last_change) instead.
470
This is mainly useful for debugging and test suites.'''
472
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
475
cur = self.duplicate_db.cursor()
476
cur.execute('SELECT * FROM crashes')
477
for (sig, id, ver, last_change) in cur:
479
dump[sig] = (id, ver, last_change)
481
dump[sig] = (id, ver)
484
class CookieTransport(xmlrpclib.Transport):
485
'''A subclass of xmlrpclib.Transport that supports cookies.'''
488
def send_cookies(self, connection):
489
if self.cookiejar is None:
490
self.cookiejar = cookielib.CookieJar()
492
for cookie in self.cookiejar:
493
connection.putheader("Cookie", "%s=%s" % (cookie.name, cookie.value))
495
def request(self, host, handler, request_body, verbose=0):
496
h = self.make_connection(host)
500
self.send_request(h,handler,request_body)
501
self.send_host(h,host)
502
self.send_cookies(h) # side-effect: creates cookiejar
503
self.send_user_agent(h)
504
self.send_content(h,request_body)
506
errcode, errmsg, headers = h.getreply()
508
# parse headers and get cookies here
509
# use fake URL to satisfy Request constructor
510
cookie_request = urllib2.Request('http://'+host+'/')
511
# fake a response object that we can fill with the headers above
512
class CookieResponse:
513
def __init__(self,headers): self.headers = headers
514
def info(self): return self.headers
515
cookie_response = CookieResponse(headers)
516
# Okay, extract the cookies from the headers
517
self.cookiejar.extract_cookies(cookie_response,cookie_request)
518
# done. back to our normal request() method.
521
raise xmlrpclib.ProtocolError(
527
self.verbose = verbose
531
except AttributeError:
534
return self._parse_response(h.getfile(), sock)
536
class SafeCookieTransport(xmlrpclib.SafeTransport,CookieTransport):
537
'''SafeTransport subclass that supports cookies.'''
538
request = CookieTransport.request
544
def get_crashdb(auth_file, name = None, conf = None):
545
'''Return a CrashDatabase object for the given crash db name, as specified
546
in the configuration file 'conf'.
548
If name is None, it defaults to the 'default' value in conf.
550
If conf is None, it defaults to the environment variable
551
APPORT_CRASHDB_CONF; if that does not exist, the hardcoded default is
552
/etc/apport/crashdb.conf. This Python syntax file needs to specify:
554
- A string variable 'default', giving a default value for 'name' if that is
557
- A dictionary 'databases' which maps names to crash db configuration
558
dictionaries. These need to have at least the keys 'impl' (Python module
559
in apport.crashdb_impl which contains a concrete 'CrashDatabase' class
560
implementation for that crash db type) and 'bug_pattern_base', which
561
specifies an URL for bug patterns (or None if those are not used for that
565
conf = os.environ.get('APPORT_CRASHDB_CONF', '/etc/apport/crashdb.conf')
567
execfile(conf, settings)
570
name = settings['default']
572
db = settings['databases'][name]
574
m = __import__('apport.crashdb_impl.' + db['impl'], globals(), locals(), ['CrashDatabase'], -1)
575
return m.CrashDatabase(auth_file, db['bug_pattern_base'], db)