1
/*****************************************************************************\
2
* backup.c - backup slurm dbd
3
*****************************************************************************
4
* Copyright (C) 2009 Lawrence Livermore National Security.
5
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6
* Written by Danny Auble <da@llnl.gov>
7
* CODE-OCEC-09-009. All rights reserved.
9
* This file is part of SLURM, a resource management program.
10
* For details, see <https://computing.llnl.gov/linux/slurm/>.
11
* Please also read the included file: DISCLAIMER.
13
* SLURM is free software; you can redistribute it and/or modify it under
14
* the terms of the GNU General Public License as published by the Free
15
* Software Foundation; either version 2 of the License, or (at your option)
18
* In addition, as a special exception, the copyright holders give permission
19
* to link the code of portions of this program with the OpenSSL library under
20
* certain conditions as described in each individual source file, and
21
* distribute linked combinations including the two. You must obey the GNU
22
* General Public License in all respects for all of the code used other than
23
* OpenSSL. If you modify file(s) with this exception, you may extend this
24
* exception to your version of the file(s), but you are not obligated to do
25
* so. If you do not wish to do so, delete this exception statement from your
26
* version. If you delete this exception statement from all source files in
27
* the program, then also delete it here.
29
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
30
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
31
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
34
* You should have received a copy of the GNU General Public License along
35
* with SLURM; if not, write to the Free Software Foundation, Inc.,
36
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
37
\*****************************************************************************/
41
#include "src/common/xmalloc.h"
42
#include "src/common/slurm_protocol_defs.h"
43
#include "src/common/fd.h"
44
#include "src/common/log.h"
45
#include "src/common/slurmdbd_defs.h"
47
#include "src/slurmdbd/backup.h"
49
bool primary_resumed = false;
51
bool have_control = false;
53
static slurm_fd slurmdbd_fd = -1;
55
/* Open a connection to the Slurm DBD and set slurmdbd_fd */
56
static void _open_slurmdbd_fd(slurm_addr dbd_addr)
58
if(dbd_addr.sin_port == 0) {
59
error("sin_port == 0 in the slurmdbd backup");
63
slurmdbd_fd = slurm_open_msg_conn(&dbd_addr);
66
fd_set_nonblocking(slurmdbd_fd);
69
/* Close the SlurmDbd connection */
70
static void _close_slurmdbd_fd(void)
72
if (slurmdbd_fd >= 0) {
78
/* Reopen the Slurm DBD connection due to some error */
79
static void _reopen_slurmdbd_fd(slurm_addr dbd_addr)
82
_open_slurmdbd_fd(dbd_addr);
85
/* run_backup - this is the backup controller, it should run in standby
86
* mode, assuming control when the primary controller stops responding */
87
extern void run_backup(void)
91
primary_resumed = false;
93
/* get a connection */
94
slurm_set_addr(&dbd_addr, slurmdbd_conf->dbd_port,
95
slurmdbd_conf->dbd_host);
97
if (dbd_addr.sin_port == 0)
98
error("Unable to locate SlurmDBD host %s:%u",
99
slurmdbd_conf->dbd_host, slurmdbd_conf->dbd_port);
101
_open_slurmdbd_fd(dbd_addr);
104
/* repeatedly ping Primary */
105
while (!shutdown_time) {
106
bool writeable = fd_writeable(slurmdbd_fd);
107
//info("%d %d", have_control, writeable);
109
if (have_control && writeable) {
110
info("Primary has come back");
111
primary_resumed = true;
113
have_control = false;
115
} else if(!have_control && !writeable) {
117
info("Taking Control");
123
_reopen_slurmdbd_fd(dbd_addr);
126
_close_slurmdbd_fd();