1
/* Copyright (C) 2003 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
17
#include <ndb_global.h>
18
#include <my_pthread.h>
19
#include <sys/times.h>
21
#include "WatchDog.hpp"
22
#include "GlobalData.hpp"
25
#include <ErrorHandlingMacros.hpp>
26
#include <EventLogger.hpp>
30
extern EventLogger g_eventLogger;
35
((WatchDog*)w)->run();
39
WatchDog::WatchDog(Uint32 interval) :
40
theIPValue(globalData.getWatchDogPtr())
42
setCheckInterval(interval);
47
WatchDog::~WatchDog(){
52
WatchDog::setCheckInterval(Uint32 interval){
53
// An interval of less than 70ms is not acceptable
54
return theInterval = (interval < 70 ? 70 : interval);
60
theThreadPtr = NdbThread_Create(runWatchDog,
64
NDB_THREAD_PRIO_HIGH);
72
NdbThread_WaitFor(theThreadPtr, &status);
73
NdbThread_Destroy(&theThreadPtr);
77
const char *get_action(Uint32 IPValue)
82
action = "Job Handling";
85
action = "Scanning Timers";
88
action = "External I/O";
91
action = "Print Job Buffers at crash";
94
action = "Checking connections";
97
action = "Performing Send";
100
action = "Polling for Receive";
103
action = "Performing Receive";
106
action = "Allocating memory";
109
action = "Unknown place";
118
unsigned int anIPValue, sleep_time;
119
unsigned int oldIPValue = 0;
120
unsigned int theIntervalCheck = theInterval;
121
struct MicroSecondTimer start_time, last_time, now;
122
NdbTick_getMicroTimer(&start_time);
123
last_time = start_time;
125
// WatchDog for the single threaded NDB
130
NdbSleep_MilliSleep(sleep_time);
134
NdbTick_getMicroTimer(&now);
135
if (NdbTick_getMicrosPassed(last_time, now)/1000 > sleep_time*2)
139
g_eventLogger.info("Watchdog: User time: %llu System time: %llu",
140
(Uint64)my_tms.tms_utime,
141
(Uint64)my_tms.tms_stime);
142
g_eventLogger.warning("Watchdog: Warning overslept %u ms, expected %u ms.",
143
NdbTick_getMicrosPassed(last_time, now)/1000,
148
// Verify that the IP thread is not stuck in a loop
149
anIPValue = *theIPValue;
152
oldIPValue = anIPValue;
153
globalData.incrementWatchDogCounter(0);
154
NdbTick_getMicroTimer(&start_time);
155
theIntervalCheck = theInterval;
160
Uint32 elapsed = NdbTick_getMicrosPassed(start_time, now)/1000;
162
oldIPValue == 9 indicates malloc going on, this can take some time
163
so only warn if we pass the watchdog interval
166
if (elapsed < theIntervalCheck)
169
theIntervalCheck += theInterval;
173
const char *last_stuck_action = get_action(oldIPValue);
174
g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action);
178
g_eventLogger.info("Watchdog: User time: %llu System time: %llu",
179
(Uint64)my_tms.tms_utime,
180
(Uint64)my_tms.tms_stime);
182
if (elapsed > 3 * theInterval)
184
shutdownSystem(last_stuck_action);
193
WatchDog::shutdownSystem(const char *last_stuck_action){
195
ErrorReporter::handleError(NDBD_EXIT_WATCHDOG_TERMINATE,