21
21
# Contributor(s): Terry Weissman <terry@mozilla.org>,
22
22
# Harrison Page <harrison@netscape.com>
23
# Gervase Markham <gerv@gerv.net>
23
# Gervase Markham <gerv@gerv.net>
24
# Richard Walters <rwalters@qualcomm.com>
25
# Jean-Sebastien Guay <jean_seb@hybride.com>
25
27
# Run me out of cron at midnight to collect Bugzilla statistics.
29
# To run new charts for a specific date, pass it in on the command line in
30
# ISO (2004-08-14) format.
31
35
use vars @::legal_product;
33
use lib '/usr/share/bugzilla/lib';
35
38
require "globals.pl";
37
my $var_dir="/var/lib/bugzilla/";
39
# tidy up after graphing module
40
if (chdir("$var_dir/graphs")) {
43
use Bugzilla::Config qw(:DEFAULT $datadir);
45
# Turn off output buffering (probably needed when displaying output feedback
46
# in the regenerate mode.)
49
# Tidy up after graphing module
50
if (chdir("graphs")) {
58
Bugzilla->switch_to_shadow_db();
60
# To recreate the daily statistics, run "collectstats.pl --regenerate" .
62
if ($#ARGV >= 0 && $ARGV[0] eq "--regenerate") {
50
68
push( @myproducts, "-All-", @::legal_product );
52
71
foreach (@myproducts) {
53
my $dir = "$var_dir/data/mining";
72
my $dir = "$datadir/mining";
55
74
&check_data_dir ($dir);
56
&collect_stats ($dir, $_);
77
®enerate_stats($dir, $_);
79
&collect_stats($dir, $_);
83
# Uncomment the following line for performance testing.
84
#print "Total time taken " . delta_time($tstart, $tend) . "\n";
59
86
&calculate_dupes();
91
local $ENV{'GATEWAY_INTERFACE'} = 'cmdline';
92
local $ENV{'REQUEST_METHOD'} = 'GET';
93
local $ENV{'QUERY_STRING'} = 'ctype=rdf';
98
# Generate a static RDF file containing the default view of the duplicates data.
99
open(CGI, "$perl -T duplicates.cgi |")
100
|| die "can't fork duplicates.cgi: $!";
101
open(RDF, ">$datadir/duplicates.tmp")
102
|| die "can't write to $datadir/duplicates.tmp: $!";
103
my $headers_done = 0;
105
print RDF if $headers_done;
106
$headers_done = 1 if $_ eq "\r\n";
111
if (-s "$datadir/duplicates.tmp") {
112
rename("$datadir/duplicates.rdf", "$datadir/duplicates-old.rdf");
113
rename("$datadir/duplicates.tmp", "$datadir/duplicates.rdf");
61
116
sub check_data_dir {
135
194
# Save % count here in a date-named file
136
195
# so we can read it back in to do changed counters
137
196
# First, delete it if it exists, so we don't add to the contents of an old file
138
if (my @files = <$var_dir/data/duplicates/dupes$today*>) {
197
if (my @files = <$datadir/duplicates/dupes$today*>) {
198
map { trick_taint($_) } @files;
142
dbmopen(%count, "$var_dir/data/duplicates/dupes$today", 0644) || die "Can't open DBM dupes file: $!";
202
dbmopen(%count, "$datadir/duplicates/dupes$today", 0644) || die "Can't open DBM dupes file: $!";
144
204
# Create a hash with key "a bug number", value "bug which that bug is a
145
205
# direct dupe of" - straight from the duplicates table.
146
while (@row = FetchSQLData()) {
147
my $dupe_of = shift @row;
148
my $dupe = shift @row;
206
foreach my $row (@$rows) {
207
my ($dupe_of, $dupe) = @$row;
149
208
$dupes{$dupe} = $dupe_of;
194
253
dbmclose(%count);
256
# This regenerates all statistics from the database.
257
sub regenerate_stats {
261
my $dbh = Bugzilla->dbh;
262
my $when = localtime(time());
265
# NB: Need to mangle the product for the filename, but use the real
266
# product name in the query
267
my $file_product = $product;
268
$file_product =~ s/\//-/gs;
269
my $file = join '/', $dir, $file_product;
273
my $and_product = "";
274
my $from_product = "";
276
if ($product ne '-All-') {
277
$and_product = " AND products.name = " . SqlQuote($product);
278
$from_product = "INNER JOIN products " .
279
"ON bugs.product_id = products.id";
282
# Determine the start date from the date the first bug in the
283
# database was created, and the end date from the current day.
284
# If there were no bugs in the search, return early.
285
SendSQL("SELECT " . $dbh->sql_to_days('creation_ts') . " AS start, " .
286
$dbh->sql_to_days('current_date') . " AS end, " .
287
$dbh->sql_to_days("'1970-01-01'") .
288
" FROM bugs $from_product WHERE " .
289
$dbh->sql_to_days('creation_ts') . " IS NOT NULL " .
291
" ORDER BY start " . $dbh->sql_limit(1));
293
my ($start, $end, $base) = FetchSQLData();
294
if (!defined $start) {
298
if (open DATA, ">$file") {
301
# Bugzilla Daily Bug Stats
303
# Do not edit me! This file is generated.
305
# fields: DATE|NEW|ASSIGNED|REOPENED|UNCONFIRMED|RESOLVED|VERIFIED|CLOSED|FIXED|INVALID|WONTFIX|LATER|REMIND|DUPLICATE|WORKSFORME|MOVED
309
# For each day, generate a line of statistics.
310
my $total_days = $end - $start;
311
for (my $day = $start + 1; $day <= $end; $day++) {
312
# Some output feedback
313
my $percent_done = ($day - $start - 1) * 100 / $total_days;
314
printf "\rRegenerating $product \[\%.1f\%\%]", $percent_done;
316
# Get a list of bugs that were created the previous day, and
317
# add those bugs to the list of bugs for this product.
318
SendSQL("SELECT bug_id FROM bugs $from_product " .
319
" WHERE bugs.creation_ts < " . $dbh->sql_from_days($day - 1) .
320
" AND bugs.creation_ts >= " . $dbh->sql_from_days($day - 2) .
325
while (@row = FetchSQLData()) {
329
# For each bug that existed on that day, determine its status
330
# at the beginning of the day. If there were no status
331
# changes on or after that day, the status was the same as it
332
# is today, which can be found in the bugs table. Otherwise,
333
# the status was equal to the first "previous value" entry in
334
# the bugs_activity table for that bug made on or after that
337
my @logstates = qw(NEW ASSIGNED REOPENED UNCONFIRMED RESOLVED
339
my @logresolutions = qw(FIXED INVALID WONTFIX LATER REMIND
340
DUPLICATE WORKSFORME MOVED);
341
foreach (@logstates) {
345
foreach (@logresolutions) {
349
for my $bug (@bugs) {
350
# First, get information on various bug states.
351
SendSQL("SELECT bugs_activity.removed " .
352
" FROM bugs_activity " .
353
"INNER JOIN fielddefs " .
354
" ON bugs_activity.fieldid = fielddefs.fieldid " .
355
" WHERE fielddefs.name = 'bug_status' " .
356
" AND bugs_activity.bug_id = $bug " .
357
" AND bugs_activity.bug_when >= " . $dbh->sql_from_days($day) .
358
" ORDER BY bugs_activity.bug_when " .
362
if (@row = FetchSQLData()) {
365
SendSQL("SELECT bug_status FROM bugs WHERE bug_id = $bug");
366
$status = FetchOneColumn();
369
if (defined $bugcount{$status}) {
370
$bugcount{$status}++;
373
# Next, get information on various bug resolutions.
374
SendSQL("SELECT bugs_activity.removed " .
375
" FROM bugs_activity " .
376
"INNER JOIN fielddefs " .
377
" ON bugs_activity.fieldid = fielddefs.fieldid " .
378
" WHERE fielddefs.name = 'resolution' " .
379
" AND bugs_activity.bug_id = $bug " .
380
" AND bugs_activity.bug_when >= " . $dbh->sql_from_days($day) .
381
" ORDER BY bugs_activity.bug_when " .
384
if (@row = FetchSQLData()) {
387
SendSQL("SELECT resolution FROM bugs WHERE bug_id = $bug");
388
$status = FetchOneColumn();
391
if (defined $bugcount{$status}) {
392
$bugcount{$status}++;
396
# Generate a line of output containing the date and counts
397
# of bugs in each state.
398
my $date = sqlday($day, $base);
400
foreach (@logstates) {
401
print DATA "|$bugcount{$_}";
404
foreach (@logresolutions) {
405
print DATA "|$bugcount{$_}";
411
# Finish up output feedback for this product.
413
print "\rRegenerating $product \[100.0\%] - " .
414
delta_time($tstart, $tend) . "\n";
198
422
my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
199
423
return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
204
428
return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
432
my ($day, $base) = @_;
433
$day = ($day - $base) * 86400;
434
my ($dom, $mon, $year) = (gmtime($day))[3, 4, 5];
435
return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
441
my $delta = $tend - $tstart;
442
my $hours = int($delta/3600);
443
my $minutes = int($delta/60) - ($hours * 60);
444
my $seconds = $delta - ($minutes * 60) - ($hours * 3600);
445
return sprintf("%02d:%02d:%02d" , $hours, $minutes, $seconds);
448
sub CollectSeriesData {
449
# We need some way of randomising the distribution of series, such that
450
# all of the series which are to be run every 7 days don't run on the same
451
# day. This is because this might put the server under severe load if a
452
# particular frequency, such as once a week, is very common. We achieve
453
# this by only running queries when:
454
# (days_since_epoch + series_id) % frequency = 0. So they'll run every
455
# <frequency> days, but the start date depends on the series_id.
456
my $days_since_epoch = int(time() / (60 * 60 * 24));
457
my $today = $ARGV[0] || today_dash();
459
# We save a copy of the main $dbh and then switch to the shadow and get
460
# that one too. Remember, these may be the same.
461
my $dbh = Bugzilla->switch_to_main_db();
462
my $shadow_dbh = Bugzilla->switch_to_shadow_db();
464
my $serieses = $dbh->selectall_hashref("SELECT series_id, query, creator " .
466
"WHERE frequency != 0 AND " .
467
"($days_since_epoch + series_id) % frequency = 0",
470
# We prepare the insertion into the data table, for efficiency.
471
my $sth = $dbh->prepare("INSERT INTO series_data " .
472
"(series_id, series_date, series_value) " .
473
"VALUES (?, " . $dbh->quote($today) . ", ?)");
475
# We delete from the table beforehand, to avoid SQL errors if people run
476
# collectstats.pl twice on the same day.
477
my $deletesth = $dbh->prepare("DELETE FROM series_data
478
WHERE series_id = ? AND series_date = " .
479
$dbh->quote($today));
481
foreach my $series_id (keys %$serieses) {
482
# We set up the user for Search.pm's permission checking - each series
483
# runs with the permissions of its creator.
484
my $user = new Bugzilla::User($serieses->{$series_id}->{'creator'});
486
my $cgi = new Bugzilla::CGI($serieses->{$series_id}->{'query'});
487
my $search = new Bugzilla::Search('params' => $cgi,
488
'fields' => ["bugs.bug_id"],
490
my $sql = $search->getSQL();
494
# We can't die if we get dodgy SQL back for whatever reason, so we
495
# eval() this and, if it fails, just ignore it and carry on.
496
# One day we might even log an error.
498
$data = $shadow_dbh->selectall_arrayref($sql);
502
# We need to count the returned rows. Without subselects, we can't
503
# do this directly in the SQL for all queries. So we do it by hand.
504
my $count = scalar(@$data) || 0;
506
$deletesth->execute($series_id);
507
$sth->execute($series_id, $count);