10
#include <sys/types.h>
18
static int lock_index = 1; /* default 1st lock */
19
static time_t collision_timeout = 1; /* default 1 sec */
20
static time_t lock_timeout = 60; /* default 60 sec */
21
static time_t monitor_interval = 10;
23
static sfex_controldata cdata;
24
static sfex_lockdata ldata;
25
static sfex_lockdata ldata_new;
27
static const char *device;
30
static const char *rsc_id = "sfex";
31
static const char *rscpidfile;
33
static void usage(FILE *dist) {
34
fprintf(dist, "usage: %s [-i <index>] [-c <collision_timeout>] [-t <lock_timeout>] <device>\n", progname);
37
static int lock_index_check(void)
39
if (read_controldata(&cdata) == -1) {
40
SFEX_LOG_ERR("%s\n", "read_controldata failed in lock_index_check");
44
SFEX_LOG_INFO("version: %d\n", cdata.version);
45
SFEX_LOG_INFO("revision: %d\n", cdata.revision);
46
SFEX_LOG_INFO("blocksize: %d\n", cdata.blocksize);
47
SFEX_LOG_INFO("numlocks: %d\n", cdata.numlocks);
50
if (lock_index > cdata.numlocks) {
51
SFEX_LOG_ERR("%s: ERROR: index %d is too large. %d locks are stored.\n",
52
progname, lock_index, cdata.numlocks);
57
if (cdata.blocksize != sector_size) {
58
SFEX_LOG_ERR("%s: ERROR: sector_size is not the same as the blocksize.\n", progname);
64
static void acquire_lock(void)
66
if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
67
SFEX_LOG_ERR("%s\n", "read_lockdata failed in acquire_lock");
71
if ((ldata.status == SFEX_STATUS_LOCK) && (strncmp(nodename, (const char*)(ldata.nodename), sizeof(ldata.nodename)))) {
72
unsigned int t = lock_timeout;
75
read_lockdata(&cdata, &ldata_new, lock_index);
76
if (ldata.count != ldata_new.count) {
77
SFEX_LOG_ERR("%s", "can\'t acquire lock: the lock's already hold by some other node.\n");
82
/* The lock acquisition is possible because it was not updated. */
83
ldata.status = SFEX_STATUS_LOCK;
84
ldata.count = SFEX_NEXT_COUNT(ldata.count);
85
strncpy((char*)(ldata.nodename), nodename, sizeof(ldata.nodename));
86
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
87
SFEX_LOG_ERR("%s", "write_lockdata failed\n");
91
/* detect the collision of lock */
92
/* The collision occurs when two or more nodes do the reservation
93
processing of the lock at the same time. It waits for collision_timeout
94
seconds to detect this,and whether the superscription of lock data by
95
another node is done is checked. If the superscription was done by
96
another node, the lock acquisition with the own node is given up.
99
unsigned int t = collision_timeout;
102
if (read_lockdata(&cdata, &ldata_new, lock_index) == -1) {
103
SFEX_LOG_ERR("%s", "read_lockdata failed\n");
105
if (strncmp((char*)(ldata.nodename), (const char*)(ldata_new.nodename), sizeof(ldata.nodename))) {
106
SFEX_LOG_ERR("%s", "can\'t acquire lock: collision detected in the air.\n");
111
/* extension of lock */
112
/* Validly time of the lock is extended. It is because of spending at
113
the collision_timeout seconds to detect the collision. */
114
ldata.count = SFEX_NEXT_COUNT(ldata.count);
115
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
116
SFEX_LOG_ERR("%s\n", "write_lockdata failed");
119
SFEX_LOG_ERR("%s", "lock acquired\n");
122
static void error_todo (void)
125
execl("/usr/sbin/crm_resource", "crm_resource", "-F", "-r", rsc_id, "-H", nodename, NULL);
131
static void failure_todo(void)
136
//execl("/usr/sbin/crm_resource", "crm_resource", "-F", "-r", rsc_id, "-H", nodename, NULL);
138
ret = write(sysrq_fd, "b\n", 2);
140
SFEX_LOG_ERR("%s\n", strerror(errno));
147
static void update_lock(void)
150
if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
155
/* check current lock status */
156
/* if own node is not locking, lock update is failed */
157
if (ldata.status != SFEX_STATUS_LOCK || strncmp((const char*)(ldata.nodename), nodename, sizeof(ldata.nodename))) {
158
SFEX_LOG_ERR("can't update lock.\n");
164
ldata.count = SFEX_NEXT_COUNT(ldata.count);
165
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
171
static void release_lock(void)
173
/* The only thing I care about in release_lock(), is to terminate the process */
176
if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
180
/* check current lock status */
181
/* if own node is not locking, we judge that lock has been released already */
182
if (ldata.status != SFEX_STATUS_LOCK || strncmp((const char*)(ldata.nodename), nodename, sizeof(ldata.nodename))) {
183
SFEX_LOG_ERR("lock was already released.\n");
188
ldata.status = SFEX_STATUS_UNLOCK;
189
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
190
//FIXME: We are going to self-stop
193
SFEX_LOG_INFO("lock released\n");
196
static void quit_handler(int signo, siginfo_t *info, void *context)
198
SFEX_LOG_INFO("quit_handler\n");
203
int main(int argc, char *argv[])
208
progname = get_progname(argv[0]);
209
nodename = get_nodename();
213
openlog("SFex Daemon", LOG_PID|LOG_CONS|LOG_NDELAY, LOG_USER);
216
/* read command line option */
219
int c = getopt(argc, argv, "hi:c:t:m:n:r:d:");
226
case 'i': /* -i <index> */
228
unsigned long l = strtoul(optarg, NULL, 10);
229
if (l < SFEX_MIN_NUMLOCKS || l > SFEX_MAX_NUMLOCKS) {
231
"%s: ERROR: index %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
233
(unsigned long)SFEX_MIN_NUMLOCKS,
234
(unsigned long)SFEX_MAX_NUMLOCKS);
240
case 'c': /* -c <collision_timeout> */
242
unsigned long l = strtoul(optarg, NULL, 10);
243
if (l < 1 || l > INT_MAX) {
245
"%s: ERROR: collision_timeout %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
248
(unsigned long)INT_MAX);
251
collision_timeout = l;
254
case 'm': /* -m <monitor_interval> */
256
unsigned long l = strtoul(optarg, NULL, 10);
257
if (l < 1 || l > INT_MAX) {
259
"%s: ERROR: monitor_interval %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
262
(unsigned long)INT_MAX);
265
monitor_interval = l;
268
case 't': /* -t <lock_timeout> */
270
unsigned long l = strtoul(optarg, NULL, 10);
271
if (l < 1 || l > INT_MAX) {
273
"%s: ERROR: lock_timeout %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
276
(unsigned long)INT_MAX);
285
if (strlen(optarg) > SFEX_MAX_NODENAME) {
286
SFEX_LOG_ERR("%s: ERROR: nodename %s is too long. must be less than %d byte.\n",
288
(unsigned int)SFEX_MAX_NODENAME);
291
nodename = strdup(optarg);
296
rsc_id = strdup(optarg);
301
rscpidfile = strdup(optarg);
304
case '?': /* error */
309
/* check parameter except the option */
310
if (optind >= argc) {
311
SFEX_LOG_ERR("%s: ERROR: no device specified.\n", progname);
314
} else if (optind + 1 < argc) {
315
SFEX_LOG_ERR("%s: ERROR: too many arguments.\n", progname);
319
device = argv[optind];
321
if (rscpidfile == NULL) {
322
SFEX_LOG_ERR("%s: ERROR: Directory for saving pid file is not specified.\n", progname);
326
prepare_lock(device);
328
sysrq_fd = open("/proc/sysrq-trigger", O_WRONLY);
329
if (sysrq_fd == -1) {
330
SFEX_LOG_ERR("failed to open /proc/sysrq-trigger due to %s\n", strerror(errno));
335
ret = lock_index_check();
340
struct sigaction sig_act;
341
sigemptyset (&sig_act.sa_mask);
342
sig_act.sa_flags = SA_SIGINFO;
344
sig_act.sa_sigaction = quit_handler;
345
ret = sigaction(SIGTERM, &sig_act, NULL);
347
SFEX_LOG_ERR("sigaction failed\n");
352
SFEX_LOG_INFO("Starting SFeX Daemon...\n");
354
/* acquire lock first.*/
357
if (daemon(0, 1) != 0) {
358
cl_perror("%s::%d: daemon() failed.", __FUNCTION__, __LINE__);
362
if (cl_lock_pidfile(rscpidfile) < 0) {
363
SFEX_LOG_ERR("Creating pidfile failed.");
368
cl_make_realtime(-1, -1, 128, 128);
370
SFEX_LOG_INFO("SFeX Daemon started.\n");
372
sleep (monitor_interval);