1.1.4
by Tobias Frost
Import upstream version 7.1.32-rc |
1 |
# Copyright (C) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
2 |
# Use is subject to license terms.
|
|
3 |
#
|
|
4 |
# This program is free software; you can redistribute it and/or modify
|
|
5 |
# it under the terms of the GNU General Public License as published by
|
|
6 |
# the Free Software Foundation; version 2 of the License.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful, but
|
|
9 |
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
11 |
# General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
|
16 |
# USA
|
|
17 |
||
18 |
package GenTest::Reporter::Deadlock; |
|
19 |
||
20 |
require Exporter; |
|
21 |
@ISA = qw(GenTest::Reporter); |
|
22 |
||
23 |
use strict; |
|
24 |
use GenTest; |
|
25 |
use GenTest::Constants; |
|
26 |
use GenTest::Result; |
|
27 |
use GenTest::Reporter; |
|
28 |
use GenTest::Executor::MySQL; |
|
29 |
||
30 |
use DBI; |
|
31 |
use Data::Dumper; |
|
32 |
use POSIX; |
|
33 |
||
34 |
use constant PROCESSLIST_PROCESS_TIME => 5; |
|
35 |
use constant PROCESSLIST_PROCESS_INFO => 7; |
|
36 |
||
37 |
# The time, in seconds, we will wait for a connect before we declare the server hanged
|
|
38 |
use constant CONNECT_TIMEOUT_THRESHOLD => 20; |
|
39 |
||
40 |
# Minimum lifetime of a query before it is considered suspicios
|
|
41 |
use constant QUERY_LIFETIME_THRESHOLD => 600; # Seconds |
|
42 |
||
43 |
# Number of suspicious queries required before a deadlock is declared
|
|
44 |
use constant STALLED_QUERY_COUNT_THRESHOLD => 5; |
|
45 |
||
46 |
# Number of times the actual test duration is allowed to exceed the desired one
|
|
47 |
use constant ACTUAL_TEST_DURATION_MULTIPLIER => 2; |
|
48 |
||
49 |
sub monitor { |
|
50 |
my $reporter = shift; |
|
51 |
||
52 |
my $actual_test_duration = time() - $reporter->testStart(); |
|
53 |
||
54 |
if ($actual_test_duration > ACTUAL_TEST_DURATION_MULTIPLIER * $reporter->testDuration()) { |
|
55 |
say("Actual test duration ($actual_test_duration seconds) is more than ".(ACTUAL_TEST_DURATION_MULTIPLIER)." times the desired duration (".$reporter->testDuration()." seconds)"); |
|
56 |
return STATUS_SERVER_DEADLOCKED; |
|
57 |
}
|
|
58 |
||
59 |
if (osWindows()) { |
|
60 |
return $reporter->monitor_threaded(); |
|
61 |
} else { |
|
62 |
return $reporter->monitor_nonthreaded(); |
|
63 |
}
|
|
64 |
}
|
|
65 |
||
66 |
sub monitor_nonthreaded { |
|
67 |
my $reporter = shift; |
|
68 |
my $dsn = $reporter->dsn(); |
|
69 |
||
70 |
# We connect on every run in order to be able to use the mysql_connect_timeout to detect very debilitating deadlocks.
|
|
71 |
||
72 |
my $dbh; |
|
73 |
||
74 |
# We directly call exit() in the handler because attempting to catch and handle the signal in a more civilized
|
|
75 |
# manner does not work for some reason -- the read() call from the server gets restarted instead
|
|
76 |
||
77 |
sigaction SIGALRM, new POSIX::SigAction sub { |
|
78 |
exit (STATUS_SERVER_DEADLOCKED); |
|
79 |
} or die "Error setting SIGALRM handler: $!\n"; |
|
80 |
||
81 |
my $prev_alarm1 = alarm (CONNECT_TIMEOUT_THRESHOLD); |
|
82 |
$dbh = DBI->connect($dsn, undef, undef, { mysql_connect_timeout => CONNECT_TIMEOUT_THRESHOLD * 2} ); |
|
83 |
||
84 |
if (defined GenTest::Executor::MySQL::errorType($DBI::err)) { |
|
85 |
alarm (0); |
|
86 |
return GenTest::Executor::MySQL::errorType($DBI::err); |
|
87 |
} elsif (not defined $dbh) { |
|
88 |
alarm (0); |
|
89 |
return STATUS_UNKNOWN_ERROR; |
|
90 |
}
|
|
91 |
||
92 |
my $processlist = $dbh->selectall_arrayref("SHOW FULL PROCESSLIST"); |
|
93 |
alarm (0); |
|
94 |
||
95 |
my $stalled_queries = 0; |
|
96 |
||
97 |
foreach my $process (@$processlist) { |
|
98 |
if ( |
|
99 |
($process->[PROCESSLIST_PROCESS_INFO] ne '') && |
|
100 |
($process->[PROCESSLIST_PROCESS_TIME] > QUERY_LIFETIME_THRESHOLD) |
|
101 |
) { |
|
102 |
$stalled_queries++; |
|
103 |
# say("Stalled query: ".$process->[PROCESSLIST_PROCESS_INFO]);
|
|
104 |
}
|
|
105 |
}
|
|
106 |
||
107 |
if ($stalled_queries >= STALLED_QUERY_COUNT_THRESHOLD) { |
|
108 |
say("$stalled_queries stalled queries detected, declaring deadlock at DSN $dsn."); |
|
109 |
||
110 |
foreach my $status_query ( |
|
111 |
"SHOW PROCESSLIST", |
|
112 |
"SHOW ENGINE INNODB STATUS"
|
|
113 |
# "SHOW OPEN TABLES" - disabled due to bug #46433
|
|
114 |
) { |
|
115 |
say("Executing $status_query:"); |
|
116 |
my $status_result = $dbh->selectall_arrayref($status_query); |
|
117 |
print Dumper $status_result; |
|
118 |
}
|
|
119 |
||
120 |
return STATUS_SERVER_DEADLOCKED; |
|
121 |
} else { |
|
122 |
return STATUS_OK; |
|
123 |
}
|
|
124 |
}
|
|
125 |
||
126 |
sub monitor_threaded { |
|
127 |
my $reporter = shift; |
|
128 |
||
129 |
require threads; |
|
130 |
||
131 |
#
|
|
132 |
# We create two threads:
|
|
133 |
# * alarm_thread keeps a timeout so that we do not hang forever
|
|
134 |
# * dbh_thread attempts to connect to the database and thus can hang forever because
|
|
135 |
# there are no network-level timeouts in DBD::mysql
|
|
136 |
#
|
|
137 |
||
138 |
my $alarm_thread = threads->create( \&alarm_thread ); |
|
139 |
my $dbh_thread = threads->create ( \&dbh_thread, $reporter ); |
|
140 |
||
141 |
my $status; |
|
142 |
||
143 |
# We repeatedly check if either thread has terminated, and if so, reap its exit status
|
|
144 |
||
145 |
while (1) { |
|
146 |
foreach my $thread ($alarm_thread, $dbh_thread) { |
|
147 |
$status = $thread->join() if defined $thread && $thread->is_joinable(); |
|
148 |
}
|
|
149 |
last if defined $status; |
|
150 |
sleep(1); |
|
151 |
}
|
|
152 |
||
153 |
# And then we kill the remaining thread.
|
|
154 |
||
155 |
foreach my $thread ($alarm_thread, $dbh_thread) { |
|
156 |
next if !$thread->is_running(); |
|
157 |
# Windows hangs when joining killed threads
|
|
158 |
if (osWindows()) { |
|
159 |
$thread->kill('SIGKILL'); |
|
160 |
} else { |
|
161 |
$thread->kill('SIGKILL')->join(); |
|
162 |
}
|
|
163 |
}
|
|
164 |
||
165 |
return ($status); |
|
166 |
}
|
|
167 |
||
168 |
sub alarm_thread { |
|
169 |
local $SIG{KILL} = sub { threads->exit() }; |
|
170 |
||
171 |
# We sleep in small increments so that signals can get delivered in the meantime
|
|
172 |
||
173 |
foreach my $i (1..CONNECT_TIMEOUT_THRESHOLD) { |
|
174 |
sleep(1); |
|
175 |
};
|
|
176 |
||
177 |
say("Entire-server deadlock detected."); |
|
178 |
return(STATUS_SERVER_DEADLOCKED); |
|
179 |
}
|
|
180 |
||
181 |
sub dbh_thread { |
|
182 |
local $SIG{KILL} = sub { threads->exit() }; |
|
183 |
my $reporter = shift; |
|
184 |
my $dsn = $reporter->dsn(); |
|
185 |
||
186 |
# We connect on every run in order to be able to use a timeout to detect very debilitating deadlocks.
|
|
187 |
||
188 |
my $dbh = DBI->connect($dsn, undef, undef, { mysql_connect_timeout => CONNECT_TIMEOUT_THRESHOLD * 2, PrintError => 1, RaiseError => 0 }); |
|
189 |
||
190 |
if (defined GenTest::Executor::MySQL::errorType($DBI::err)) { |
|
191 |
return GenTest::Executor::MySQL::errorType($DBI::err); |
|
192 |
} elsif (not defined $dbh) { |
|
193 |
return STATUS_UNKNOWN_ERROR; |
|
194 |
}
|
|
195 |
||
196 |
my $processlist = $dbh->selectall_arrayref("SHOW FULL PROCESSLIST"); |
|
197 |
return GenTest::Executor::MySQL::errorType($DBI::err) if not defined $processlist; |
|
198 |
||
199 |
my $stalled_queries = 0; |
|
200 |
||
201 |
foreach my $process (@$processlist) { |
|
202 |
if ( |
|
203 |
($process->[PROCESSLIST_PROCESS_INFO] ne '') && |
|
204 |
($process->[PROCESSLIST_PROCESS_TIME] > QUERY_LIFETIME_THRESHOLD) |
|
205 |
) { |
|
206 |
$stalled_queries++; |
|
207 |
}
|
|
208 |
}
|
|
209 |
||
210 |
if ($stalled_queries >= STALLED_QUERY_COUNT_THRESHOLD) { |
|
211 |
say("$stalled_queries stalled queries detected, declaring deadlock at DSN $dsn."); |
|
212 |
print Dumper $processlist; |
|
213 |
return STATUS_SERVER_DEADLOCKED; |
|
214 |
} else { |
|
215 |
return STATUS_OK; |
|
216 |
}
|
|
217 |
}
|
|
218 |
||
219 |
sub report { |
|
220 |
||
221 |
my $reporter = shift; |
|
222 |
my $server_pid = $reporter->serverInfo('pid'); |
|
223 |
my $datadir = $reporter->serverVariable('datadir'); |
|
224 |
||
225 |
if ( |
|
226 |
($^O eq 'MSWin32') || |
|
227 |
($^O eq 'MSWin64') |
|
228 |
) { |
|
229 |
my $cdb_command = "cdb -p $server_pid -c \".dump /m $datadir\mysqld.dmp;q\""; |
|
230 |
say("Executing $cdb_command"); |
|
231 |
system($cdb_command); |
|
232 |
} else { |
|
233 |
say("Killing mysqld with pid $server_pid with SIGHUP in order to force debug output."); |
|
234 |
kill(1, $server_pid); |
|
235 |
sleep(2); |
|
236 |
||
237 |
say("Killing mysqld with pid $server_pid with SIGSEGV in order to capture core."); |
|
238 |
kill(11, $server_pid); |
|
239 |
sleep(20); |
|
240 |
}
|
|
241 |
||
242 |
return STATUS_OK; |
|
243 |
}
|
|
244 |
||
245 |
sub type { |
|
246 |
return REPORTER_TYPE_PERIODIC | REPORTER_TYPE_DEADLOCK; |
|
247 |
}
|
|
248 |
||
249 |
1; |
|
250 |