1
/* Copyright (C) 2003 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
#include <ndb_global.h>
17
#include <my_pthread.h>
19
#include <ndb_version.h>
20
#include "Configuration.hpp"
21
#include <ConfigRetriever.hpp>
22
#include <TransporterRegistry.hpp>
24
#include "vm/SimBlockList.hpp"
25
#include "ThreadConfig.hpp"
26
#include <SignalLoggerManager.hpp>
29
#include <NdbDaemon.h>
31
#include <NdbConfig.h>
32
#include <WatchDog.hpp>
34
#include <LogLevel.hpp>
35
#include <EventLogger.hpp>
37
#include <NdbAutoPtr.hpp>
39
#include <Properties.hpp>
41
#include <mgmapi_debug.h>
43
#if defined NDB_SOLARIS // ok
44
#include <sys/processor.h> // For system informatio
47
extern EventLogger g_eventLogger;
48
extern NdbMutex * theShutdownMutex;
50
void catchsigs(bool ignore); // for process signal handling
52
#define MAX_FAILED_STARTUPS 3
53
// Flag set by child through SIGUSR1 to signal a failed startup
54
static bool failed_startup_flag = false;
55
// Counter for consecutive failed startups
56
static Uint32 failed_startups = 0;
57
extern "C" void handler_shutdown(int signum); // for process signal handling
58
extern "C" void handler_error(int signum); // for process signal handling
59
extern "C" void handler_sigusr1(int signum); // child signalling failed restart
61
// Shows system information
62
void systemInfo(const Configuration & conf,
65
// These are used already before fork if fetch_configuration() fails
66
// (e.g. Unable to alloc node id). Set them to something reasonable.
67
static FILE *child_info_file_r= stdin;
68
static FILE *child_info_file_w= stdout;
70
static void writeChildInfo(const char *token, int val)
72
fprintf(child_info_file_w, "%s=%d\n", token, val);
73
fflush(child_info_file_w);
76
void childReportSignal(int signum)
78
writeChildInfo("signal", signum);
81
void childReportError(int error)
83
writeChildInfo("error", error);
86
void childExit(int code, Uint32 currentStartPhase)
88
writeChildInfo("sphase", currentStartPhase);
89
writeChildInfo("exit", code);
90
fprintf(child_info_file_w, "\n");
91
fclose(child_info_file_r);
92
fclose(child_info_file_w);
96
void childAbort(int code, Uint32 currentStartPhase)
98
writeChildInfo("sphase", currentStartPhase);
99
writeChildInfo("exit", code);
100
fprintf(child_info_file_w, "\n");
101
fclose(child_info_file_r);
102
fclose(child_info_file_w);
107
static int insert(const char * pair, Properties & p)
109
BaseString tmp(pair);
112
Vector<BaseString> split;
113
tmp.split(split, ":=", 2);
114
if(split.size() != 2)
116
p.put(split[0].trim().c_str(), split[1].trim().c_str());
120
static int readChildInfo(Properties &info)
122
fclose(child_info_file_w);
124
while (fgets(buf,sizeof(buf),child_info_file_r))
126
fclose(child_info_file_r);
130
static bool get_int_property(Properties &info,
131
const char *token, Uint32 *int_val)
133
const char *str_val= 0;
134
if (!info.get(token, &str_val))
137
long int tmp= strtol(str_val, &endptr, 10);
138
if (str_val == endptr)
144
int reportShutdown(class Configuration *config, int error_exit, int restart)
146
Uint32 error= 0, signum= 0, sphase= 256;
150
get_int_property(info, "signal", &signum);
151
get_int_property(info, "error", &error);
152
get_int_property(info, "sphase", &sphase);
154
Uint32 length, theData[25];
155
EventReport *rep = (EventReport *)theData;
157
rep->setNodeId(globalData.ownId);
160
(globalData.theRestartFlag == initial_state ? 2 : 0) |
161
(config->getInitialStart() ? 4 : 0);
167
rep->setEventType(NDB_LE_NDBStopCompleted);
173
rep->setEventType(NDB_LE_NDBStopForced);
177
theData[5] = 0; // extra
182
const EventReport * const eventReport = (EventReport *)&theData[0];
183
g_eventLogger.log(eventReport->getEventType(), theData,
184
eventReport->getNodeId(), 0);
187
for (unsigned n = 0; n < config->m_mgmds.size(); n++)
189
NdbMgmHandle h = ndb_mgm_create_handle();
191
ndb_mgm_set_connectstring(h, config->m_mgmds[n].c_str()) ||
194
0, //retry_delay_in_seconds
200
if (ndb_mgm_report_event(h, theData, length))
208
BaseString tmp(ndb_mgm_get_latest_error_msg(h));
210
tmp.append(ndb_mgm_get_latest_error_desc(h));
211
g_eventLogger.warning("Unable to report shutdown reason to %s: %s",
212
config->m_mgmds[n].c_str(), tmp.c_str());
216
g_eventLogger.error("Unable to report shutdown reason to %s",
217
config->m_mgmds[n].c_str());
222
ndb_mgm_disconnect(h);
223
ndb_mgm_destroy_handle(&h);
229
int main(int argc, char** argv)
232
// Print to stdout/console
233
g_eventLogger.createConsoleHandler();
234
g_eventLogger.setCategory("ndbd");
235
g_eventLogger.enable(Logger::LL_ON, Logger::LL_INFO);
236
g_eventLogger.enable(Logger::LL_ON, Logger::LL_CRITICAL);
237
g_eventLogger.enable(Logger::LL_ON, Logger::LL_ERROR);
238
g_eventLogger.enable(Logger::LL_ON, Logger::LL_WARNING);
240
g_eventLogger.m_logLevel.setLogLevel(LogLevel::llStartUp, 15);
242
globalEmulatorData.create();
244
// Parse command line options
245
Configuration* theConfig = globalEmulatorData.theConfiguration;
246
if(!theConfig->init(argc, argv)){
250
{ // Do configuration
252
signal(SIGPIPE, SIG_IGN);
254
theConfig->fetch_configuration();
257
my_setwd(NdbConfig_get_path(0), MYF(0));
259
if (theConfig->getDaemonMode()) {
261
char *lockfile= NdbConfig_PidFileName(globalData.ownId);
262
char *logfile= NdbConfig_StdoutFileName(globalData.ownId);
263
NdbAutoPtr<char> tmp_aptr1(lockfile), tmp_aptr2(logfile);
265
if (NdbDaemon_Make(lockfile, logfile, 0) == -1) {
266
ndbout << "Cannot become daemon: " << NdbDaemon_ErrorText << endl;
272
signal(SIGUSR1, handler_sigusr1);
275
while (! theConfig->getForegroundMode()) // the cond is const
277
// setup reporting between child and parent
281
g_eventLogger.error("pipe() failed with errno=%d (%s)",
282
errno, strerror(errno));
287
if (!(child_info_file_w= fdopen(filedes[1],"w")))
289
g_eventLogger.error("fdopen() failed with errno=%d (%s)",
290
errno, strerror(errno));
292
if (!(child_info_file_r= fdopen(filedes[0],"r")))
294
g_eventLogger.error("fdopen() failed with errno=%d (%s)",
295
errno, strerror(errno));
299
if ((child = fork()) <= 0)
300
break; // child or error
309
* We no longer need the mgm connection in this process
310
* (as we are the angel, not ndb)
312
* We don't want to purge any allocated resources (nodeid), so
313
* we set that option to false
315
theConfig->closeConfiguration(false);
317
int status = 0, error_exit = 0, signum = 0;
318
while(waitpid(child, &status, 0) != child);
319
if(WIFEXITED(status)){
320
switch(WEXITSTATUS(status)){
322
g_eventLogger.info("Angel shutting down");
323
reportShutdown(theConfig, 0, 0);
326
case NRT_NoStart_Restart:
327
theConfig->setInitialStart(false);
328
globalData.theRestartFlag = initial_state;
330
case NRT_NoStart_InitialStart:
331
theConfig->setInitialStart(true);
332
globalData.theRestartFlag = initial_state;
334
case NRT_DoStart_InitialStart:
335
theConfig->setInitialStart(true);
336
globalData.theRestartFlag = perform_start;
340
if(theConfig->stopOnError()){
342
* Error shutdown && stopOnError()
344
reportShutdown(theConfig, error_exit, 0);
348
case NRT_DoStart_Restart:
349
theConfig->setInitialStart(false);
350
globalData.theRestartFlag = perform_start;
355
if (WIFSIGNALED(status))
357
signum = WTERMSIG(status);
358
childReportSignal(signum);
363
g_eventLogger.info("Unknown exit reason. Stopped.");
365
if(theConfig->stopOnError()){
367
* Error shutdown && stopOnError()
369
reportShutdown(theConfig, error_exit, 0);
374
if (!failed_startup_flag)
376
// Reset the counter for consecutive failed startups
379
else if (failed_startups >= MAX_FAILED_STARTUPS && !theConfig->stopOnError())
382
* Error shutdown && stopOnError()
384
g_eventLogger.alert("Ndbd has failed %u consecutive startups. "
385
"Not restarting", failed_startups);
386
reportShutdown(theConfig, error_exit, 0);
389
failed_startup_flag = false;
390
reportShutdown(theConfig, error_exit, 1);
391
g_eventLogger.info("Ndb has terminated (pid %d) restarting", child);
392
theConfig->fetch_configuration();
396
g_eventLogger.info("Angel pid: %d ndb pid: %d", getppid(), getpid());
398
g_eventLogger.info("Ndb pid: %d", getpid());
400
g_eventLogger.info("Ndb started in foreground");
402
g_eventLogger.info("Ndb started");
404
theConfig->setupConfiguration();
405
systemInfo(* theConfig, * theConfig->m_logLevel);
408
globalEmulatorData.theSimBlockList->load(globalEmulatorData);
410
// Set thread concurrency for Solaris' light weight processes
412
status = NdbThread_SetConcurrencyLevel(30);
416
// Create a signal logger
417
char *buf= NdbConfig_SignalLogFileName(globalData.ownId);
418
NdbAutoPtr<char> tmp_aptr(buf);
419
FILE * signalLog = fopen(buf, "a");
420
globalSignalLoggers.setOwnNodeId(globalData.ownId);
421
globalSignalLoggers.setOutputStream(signalLog);
422
#if 0 // to log startup
423
globalSignalLoggers.log(SignalLoggerManager::LogInOut, "BLOCK=DBDICT,DBDIH");
424
globalData.testOn = 1;
434
ErrorReporter::setErrorHandlerShutdownType(NST_ErrorHandlerStartup);
436
switch(globalData.theRestartFlag){
438
globalEmulatorData.theThreadConfig->doStart(NodeState::SL_CMVMI);
441
globalEmulatorData.theThreadConfig->doStart(NodeState::SL_CMVMI);
442
globalEmulatorData.theThreadConfig->doStart(NodeState::SL_STARTING);
445
assert("Illegal state globalData.theRestartFlag" == 0);
448
globalTransporterRegistry.startSending();
449
globalTransporterRegistry.startReceiving();
450
if (!globalTransporterRegistry.start_service(*globalEmulatorData.m_socket_server)){
451
ndbout_c("globalTransporterRegistry.start_service() failed");
455
// Re-use the mgm handle as a transporter
456
if(!globalTransporterRegistry.connect_client(
457
theConfig->get_config_retriever()->get_mgmHandlePtr()))
458
ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG,
459
"Connection to mgmd terminated before setup was complete",
460
"StopOnError missing");
462
if (!globalTransporterRegistry.start_clients()){
463
ndbout_c("globalTransporterRegistry.start_clients() failed");
467
globalEmulatorData.theWatchDog->doStart();
469
globalEmulatorData.m_socket_server->startServer();
471
// theConfig->closeConfiguration();
473
globalEmulatorData.theThreadConfig->ipControlLoop();
475
NdbShutdown(NST_Normal);
482
systemInfo(const Configuration & config, const LogLevel & logLevel){
487
GetSystemInfo(&sinfo);
488
processors = sinfo.dwNumberOfProcessors;
490
if(ERROR_SUCCESS==RegOpenKeyEx
492
TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
493
0, KEY_READ, &hKey)) {
495
DWORD cbData = sizeof(dwMHz);
496
if(ERROR_SUCCESS==RegQueryValueEx(hKey,
497
"~MHz", 0, 0, (LPBYTE)&dwMHz, &cbData)) {
502
#elif defined NDB_SOLARIS // ok
503
// Search for at max 16 processors among the first 256 processor ids
504
processor_info_t pinfo; memset(&pinfo, 0, sizeof(pinfo));
506
while(processors < 16 && pid < 256){
507
if(!processor_info(pid++, &pinfo))
510
speed = pinfo.pi_clock;
513
if(logLevel.getLogLevel(LogLevel::llStartUp) > 0){
514
g_eventLogger.info("NDB Cluster -- DB node %d", globalData.ownId);
515
g_eventLogger.info("%s --", NDB_VERSION_STRING);
516
if (config.get_mgmd_host())
517
g_eventLogger.info("Configuration fetched at %s port %d",
518
config.get_mgmd_host(), config.get_mgmd_port());
519
#ifdef NDB_SOLARIS // ok
520
g_eventLogger.info("NDB is running on a machine with %d processor(s) at %d MHz",
524
if(logLevel.getLogLevel(LogLevel::llStartUp) > 3){
525
Uint32 t = config.timeBetweenWatchDogCheck();
526
g_eventLogger.info("WatchDog timer is set to %d ms", t);
531
#define handler_register(signum, handler, ignore)\
534
if(signum != SIGCHLD)\
535
signal(signum, SIG_IGN);\
537
signal(signum, handler);\
541
catchsigs(bool ignore){
542
#if !defined NDB_WIN32
544
static const int signals_shutdown[] = {
552
#elif defined SIGINFO
564
static const int signals_error[] = {
582
static const int signals_ignore[] = {
587
for(i = 0; i < sizeof(signals_shutdown)/sizeof(signals_shutdown[0]); i++)
588
handler_register(signals_shutdown[i], handler_shutdown, ignore);
589
for(i = 0; i < sizeof(signals_error)/sizeof(signals_error[0]); i++)
590
handler_register(signals_error[i], handler_error, ignore);
591
for(i = 0; i < sizeof(signals_ignore)/sizeof(signals_ignore[0]); i++)
592
handler_register(signals_ignore[i], SIG_IGN, ignore);
594
Configuration* theConfig = globalEmulatorData.theConfiguration;
595
if (! theConfig->getForegroundMode())
596
handler_register(SIGTRAP, handler_error, ignore);
603
handler_shutdown(int signum){
604
g_eventLogger.info("Received signal %d. Performing stop.", signum);
606
childReportSignal(signum);
607
globalData.theRestartFlag = perform_stop;
612
handler_error(int signum){
613
// only let one thread run shutdown
614
static long thread_id= 0;
616
if (thread_id != 0 && thread_id == my_thread_id())
618
// Shutdown thread received signal
620
signal(signum, SIG_DFL);
621
kill(getpid(), signum);
624
NdbSleep_MilliSleep(10);
626
if(theShutdownMutex && NdbMutex_Trylock(theShutdownMutex) != 0)
628
NdbSleep_MilliSleep(10);
629
thread_id= my_thread_id();
630
g_eventLogger.info("Received signal %d. Running error handler.", signum);
631
childReportSignal(signum);
632
// restart the system
633
char errorData[64], *info= 0;
634
#ifdef HAVE_STRSIGNAL
635
info= strsignal(signum);
637
BaseString::snprintf(errorData, sizeof(errorData), "Signal %d received; %s", signum,
638
info ? info : "No text for signal available");
639
ERROR_SET_SIGNAL(fatal, NDBD_EXIT_OS_SIGNAL_RECEIVED, errorData, __FILE__);
644
handler_sigusr1(int signum)
646
if (!failed_startup_flag)
649
failed_startup_flag = true;
651
g_eventLogger.info("Angel received ndbd startup failure count %u.", failed_startups);