10
#include <sys/types.h>
18
static int lock_index = 1; /* default 1st lock */
19
static time_t collision_timeout = 1; /* default 1 sec */
20
static time_t lock_timeout = 60; /* default 60 sec */
21
time_t unlock_timeout = 60;
22
static time_t monitor_interval = 10;
24
static sfex_controldata cdata;
25
static sfex_lockdata ldata;
26
static sfex_lockdata ldata_new;
28
static const char *device;
31
static const char *rsc_id = "sfex";
32
static const char *rscpidfile;
34
static void usage(FILE *dist) {
35
fprintf(dist, "usage: %s [-i <index>] [-c <collision_timeout>] [-t <lock_timeout>] <device>\n", progname);
38
static int lock_index_check(void)
40
if (read_controldata(&cdata) == -1) {
41
SFEX_LOG_ERR("%s\n", "read_controldata failed in lock_index_check");
45
SFEX_LOG_INFO("version: %d\n", cdata.version);
46
SFEX_LOG_INFO("revision: %d\n", cdata.revision);
47
SFEX_LOG_INFO("blocksize: %d\n", cdata.blocksize);
48
SFEX_LOG_INFO("numlocks: %d\n", cdata.numlocks);
51
if (lock_index > cdata.numlocks) {
52
SFEX_LOG_ERR("%s: ERROR: index %d is too large. %d locks are stored.\n",
53
progname, lock_index, cdata.numlocks);
55
/*exit(EXIT_FAILURE);*/
58
if (cdata.blocksize != sector_size) {
59
SFEX_LOG_ERR("%s: ERROR: sector_size is not the same as the blocksize.\n", progname);
65
static void acquire_lock(void)
67
if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
68
SFEX_LOG_ERR("%s\n", "read_lockdata failed in acquire_lock");
72
if ((ldata.status == SFEX_STATUS_LOCK) && (strncmp(nodename, (const char*)(ldata.nodename), sizeof(ldata.nodename)))) {
73
unsigned int t = lock_timeout;
76
read_lockdata(&cdata, &ldata_new, lock_index);
77
if (ldata.count != ldata_new.count) {
78
SFEX_LOG_ERR("%s", "can\'t acquire lock: the lock's already hold by some other node.\n");
83
/* The lock acquisition is possible because it was not updated. */
84
ldata.status = SFEX_STATUS_LOCK;
85
ldata.count = SFEX_NEXT_COUNT(ldata.count);
86
strncpy((char*)(ldata.nodename), nodename, sizeof(ldata.nodename));
87
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
88
SFEX_LOG_ERR("%s", "write_lockdata failed\n");
92
/* detect the collision of lock */
93
/* The collision occurs when two or more nodes do the reservation
94
processing of the lock at the same time. It waits for collision_timeout
95
seconds to detect this,and whether the superscription of lock data by
96
another node is done is checked. If the superscription was done by
97
another node, the lock acquisition with the own node is given up.
100
unsigned int t = collision_timeout;
103
if (read_lockdata(&cdata, &ldata_new, lock_index) == -1) {
104
SFEX_LOG_ERR("%s", "read_lockdata failed\n");
106
if (strncmp((char*)(ldata.nodename), (const char*)(ldata_new.nodename), sizeof(ldata.nodename))) {
107
SFEX_LOG_ERR("%s", "can\'t acquire lock: collision detected in the air.\n");
112
/* extension of lock */
113
/* Validly time of the lock is extended. It is because of spending at
114
the collision_timeout seconds to detect the collision. */
115
ldata.count = SFEX_NEXT_COUNT(ldata.count);
116
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
117
SFEX_LOG_ERR("%s\n", "write_lockdata failed");
120
SFEX_LOG_ERR("%s", "lock acquired\n");
123
static void error_todo (void)
126
execl("/usr/sbin/crm_resource", "crm_resource", "-F", "-r", rsc_id, "-H", nodename, NULL);
132
static void failure_todo(void)
137
/*execl("/usr/sbin/crm_resource", "crm_resource", "-F", "-r", rsc_id, "-H", nodename, NULL); */
139
ret = write(sysrq_fd, "b\n", 2);
141
SFEX_LOG_ERR("%s\n", strerror(errno));
148
static void update_lock(void)
151
if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
156
/* check current lock status */
157
/* if own node is not locking, lock update is failed */
158
if (ldata.status != SFEX_STATUS_LOCK || strncmp((const char*)(ldata.nodename), nodename, sizeof(ldata.nodename))) {
159
SFEX_LOG_ERR("can't update lock.\n");
165
ldata.count = SFEX_NEXT_COUNT(ldata.count);
166
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
172
static void release_lock(void)
174
/* The only thing I care about in release_lock(), is to terminate the process */
177
if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
181
/* check current lock status */
182
/* if own node is not locking, we judge that lock has been released already */
183
if (ldata.status != SFEX_STATUS_LOCK || strncmp((const char*)(ldata.nodename), nodename, sizeof(ldata.nodename))) {
184
SFEX_LOG_ERR("lock was already released.\n");
189
ldata.status = SFEX_STATUS_UNLOCK;
190
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
191
/*FIXME: We are going to self-stop */
194
SFEX_LOG_INFO("lock released\n");
197
static void quit_handler(int signo, siginfo_t *info, void *context)
199
SFEX_LOG_INFO("quit_handler\n");
204
int main(int argc, char *argv[])
209
progname = get_progname(argv[0]);
210
nodename = get_nodename();
214
openlog("SFex Daemon", LOG_PID|LOG_CONS|LOG_NDELAY, LOG_USER);
217
/* read command line option */
220
int c = getopt(argc, argv, "hi:c:t:m:n:r:d:");
227
case 'i': /* -i <index> */
229
unsigned long l = strtoul(optarg, NULL, 10);
230
if (l < SFEX_MIN_NUMLOCKS || l > SFEX_MAX_NUMLOCKS) {
232
"%s: ERROR: index %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
234
(unsigned long)SFEX_MIN_NUMLOCKS,
235
(unsigned long)SFEX_MAX_NUMLOCKS);
241
case 'c': /* -c <collision_timeout> */
243
unsigned long l = strtoul(optarg, NULL, 10);
244
if (l < 1 || l > INT_MAX) {
246
"%s: ERROR: collision_timeout %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
249
(unsigned long)INT_MAX);
252
collision_timeout = l;
255
case 'm': /* -m <monitor_interval> */
257
unsigned long l = strtoul(optarg, NULL, 10);
258
if (l < 1 || l > INT_MAX) {
260
"%s: ERROR: monitor_interval %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
263
(unsigned long)INT_MAX);
266
monitor_interval = l;
269
case 't': /* -t <lock_timeout> */
271
unsigned long l = strtoul(optarg, NULL, 10);
272
if (l < 1 || l > INT_MAX) {
274
"%s: ERROR: lock_timeout %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
277
(unsigned long)INT_MAX);
286
if (strlen(optarg) > SFEX_MAX_NODENAME) {
287
SFEX_LOG_ERR("%s: ERROR: nodename %s is too long. must be less than %d byte.\n",
289
(unsigned int)SFEX_MAX_NODENAME);
292
nodename = strdup(optarg);
297
rsc_id = strdup(optarg);
302
rscpidfile = strdup(optarg);
305
case '?': /* error */
310
/* check parameter except the option */
311
if (optind >= argc) {
312
SFEX_LOG_ERR("%s: ERROR: no device specified.\n", progname);
315
} else if (optind + 1 < argc) {
316
SFEX_LOG_ERR("%s: ERROR: too many arguments.\n", progname);
320
device = argv[optind];
322
if (rscpidfile == NULL) {
323
SFEX_LOG_ERR("%s: ERROR: Directory for saving pid file is not specified.\n", progname);
327
prepare_lock(device);
329
sysrq_fd = open("/proc/sysrq-trigger", O_WRONLY);
330
if (sysrq_fd == -1) {
331
SFEX_LOG_ERR("failed to open /proc/sysrq-trigger due to %s\n", strerror(errno));
336
ret = lock_index_check();
341
struct sigaction sig_act;
342
sigemptyset (&sig_act.sa_mask);
343
sig_act.sa_flags = SA_SIGINFO;
345
sig_act.sa_sigaction = quit_handler;
346
ret = sigaction(SIGTERM, &sig_act, NULL);
348
SFEX_LOG_ERR("sigaction failed\n");
353
SFEX_LOG_INFO("Starting SFeX Daemon...\n");
355
/* acquire lock first.*/
358
if (daemon(0, 1) != 0) {
359
cl_perror("%s::%d: daemon() failed.", __FUNCTION__, __LINE__);
363
if (cl_lock_pidfile(rscpidfile) < 0) {
364
SFEX_LOG_ERR("Creating pidfile failed.");
369
cl_make_realtime(-1, -1, 128, 128);
371
SFEX_LOG_INFO("SFeX Daemon started.\n");
373
sleep (monitor_interval);