~ubuntu-branches/ubuntu/maverick/cluster-agents/maverick-proposed

« back to all changes in this revision

Viewing changes to tools/sfex_daemon.c

  • Committer: Bazaar Package Importer
  • Author(s): Ante Karamatic
  • Date: 2010-02-17 21:46:00 UTC
  • Revision ID: james.westby@ubuntu.com-20100217214600-g44grvtkw7jbpciz
Tags: upstream-1.0.2
ImportĀ upstreamĀ versionĀ 1.0.2

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#include <stdio.h>
 
2
#include <unistd.h>
 
3
#include <string.h>
 
4
#include <stdlib.h>
 
5
#include <signal.h>
 
6
#include <limits.h>
 
7
#include <sys/mman.h>
 
8
#include <string.h>
 
9
#include <errno.h>
 
10
#include <sys/types.h>
 
11
#include <sys/stat.h>
 
12
#include <fcntl.h>
 
13
#include <syslog.h>
 
14
#include "sfex.h"
 
15
#include "sfex_lib.h"
 
16
 
 
17
static int sysrq_fd;
 
18
static int lock_index = 1;        /* default 1st lock */
 
19
static time_t collision_timeout = 1; /* default 1 sec */
 
20
static time_t lock_timeout = 60; /* default 60 sec */
 
21
time_t unlock_timeout = 60;
 
22
static time_t monitor_interval = 10;
 
23
 
 
24
static sfex_controldata cdata;
 
25
static sfex_lockdata ldata;
 
26
static sfex_lockdata ldata_new;
 
27
 
 
28
static const char *device;
 
29
const char *progname;
 
30
char *nodename;
 
31
static const char *rsc_id = "sfex";
 
32
static const char *rscpidfile;
 
33
 
 
34
static void usage(FILE *dist) {
 
35
          fprintf(dist, "usage: %s [-i <index>] [-c <collision_timeout>] [-t <lock_timeout>] <device>\n", progname);
 
36
}
 
37
 
 
38
static int lock_index_check(void)
 
39
{
 
40
        if (read_controldata(&cdata) == -1) {
 
41
                SFEX_LOG_ERR("%s\n", "read_controldata failed in lock_index_check");
 
42
                return -1;
 
43
        }
 
44
#ifdef SFEX_DEBUG
 
45
        SFEX_LOG_INFO("version: %d\n", cdata.version);
 
46
        SFEX_LOG_INFO("revision: %d\n", cdata.revision);
 
47
        SFEX_LOG_INFO("blocksize: %d\n", cdata.blocksize);
 
48
        SFEX_LOG_INFO("numlocks: %d\n", cdata.numlocks);
 
49
#endif
 
50
 
 
51
        if (lock_index > cdata.numlocks) {
 
52
                SFEX_LOG_ERR("%s: ERROR: index %d is too large. %d locks are stored.\n",
 
53
                                progname, lock_index, cdata.numlocks);
 
54
                return -1;
 
55
                /*exit(EXIT_FAILURE);*/
 
56
        }
 
57
 
 
58
        if (cdata.blocksize != sector_size) {
 
59
                SFEX_LOG_ERR("%s: ERROR: sector_size is not the same as the blocksize.\n", progname);
 
60
                return -1;
 
61
        }
 
62
        return 0;
 
63
}
 
64
 
 
65
static void acquire_lock(void)
 
66
{
 
67
        if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
 
68
                SFEX_LOG_ERR("%s\n", "read_lockdata failed in acquire_lock");
 
69
                exit(EXIT_FAILURE);
 
70
        }
 
71
 
 
72
        if ((ldata.status == SFEX_STATUS_LOCK) && (strncmp(nodename, (const char*)(ldata.nodename), sizeof(ldata.nodename)))) {
 
73
                unsigned int t = lock_timeout;
 
74
                while (t > 0)
 
75
                        t = sleep(t);
 
76
                read_lockdata(&cdata, &ldata_new, lock_index);
 
77
                if (ldata.count != ldata_new.count) {
 
78
                        SFEX_LOG_ERR("%s", "can\'t acquire lock: the lock's already hold by some other node.\n");
 
79
                        exit(2);
 
80
                }
 
81
        }
 
82
 
 
83
        /* The lock acquisition is possible because it was not updated. */
 
84
        ldata.status = SFEX_STATUS_LOCK;
 
85
        ldata.count = SFEX_NEXT_COUNT(ldata.count);
 
86
        strncpy((char*)(ldata.nodename), nodename, sizeof(ldata.nodename));
 
87
        if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
 
88
                SFEX_LOG_ERR("%s", "write_lockdata failed\n");
 
89
                exit(EXIT_FAILURE);
 
90
        }
 
91
 
 
92
        /* detect the collision of lock */
 
93
        /* The collision occurs when two or more nodes do the reservation 
 
94
           processing of the lock at the same time. It waits for collision_timeout 
 
95
           seconds to detect this,and whether the superscription of lock data by 
 
96
           another node is done is checked. If the superscription was done by 
 
97
           another node, the lock acquisition with the own node is given up.  
 
98
         */
 
99
        {
 
100
                unsigned int t = collision_timeout;
 
101
                while (t > 0)
 
102
                        t = sleep(t);
 
103
                if (read_lockdata(&cdata, &ldata_new, lock_index) == -1) {
 
104
                        SFEX_LOG_ERR("%s", "read_lockdata failed\n");
 
105
                }
 
106
                if (strncmp((char*)(ldata.nodename), (const char*)(ldata_new.nodename), sizeof(ldata.nodename))) {
 
107
                        SFEX_LOG_ERR("%s", "can\'t acquire lock: collision detected in the air.\n");
 
108
                        exit(2);
 
109
                }
 
110
        }
 
111
 
 
112
        /* extension of lock */
 
113
        /* Validly time of the lock is extended. It is because of spending at 
 
114
           the collision_timeout seconds to detect the collision. */
 
115
        ldata.count = SFEX_NEXT_COUNT(ldata.count);
 
116
        if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
 
117
                SFEX_LOG_ERR("%s\n", "write_lockdata failed");
 
118
                exit(EXIT_FAILURE);
 
119
        }
 
120
        SFEX_LOG_ERR("%s", "lock acquired\n");
 
121
}
 
122
 
 
123
static void error_todo (void)
 
124
{
 
125
        if (fork() == 0) {
 
126
                execl("/usr/sbin/crm_resource", "crm_resource", "-F", "-r", rsc_id, "-H", nodename, NULL);
 
127
        } else {
 
128
                exit(EXIT_FAILURE);
 
129
        }
 
130
}
 
131
 
 
132
static void failure_todo(void)
 
133
{
 
134
#ifdef SFEX_TESTING     
 
135
        exit(EXIT_FAILURE);
 
136
#else
 
137
        /*execl("/usr/sbin/crm_resource", "crm_resource", "-F", "-r", rsc_id, "-H", nodename, NULL); */
 
138
        int ret;
 
139
        ret = write(sysrq_fd, "b\n", 2);
 
140
        if (ret == -1) {
 
141
                SFEX_LOG_ERR("%s\n", strerror(errno));
 
142
        }
 
143
        close(sysrq_fd);
 
144
        exit(EXIT_FAILURE);
 
145
#endif
 
146
}
 
147
 
 
148
static void update_lock(void)
 
149
{
 
150
        /* read lock data */
 
151
        if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
 
152
                error_todo();
 
153
                exit(EXIT_FAILURE);
 
154
        }
 
155
 
 
156
        /* check current lock status */
 
157
        /* if own node is not locking, lock update is failed */
 
158
        if (ldata.status != SFEX_STATUS_LOCK || strncmp((const char*)(ldata.nodename), nodename, sizeof(ldata.nodename))) {
 
159
                SFEX_LOG_ERR("can't update lock.\n");
 
160
                failure_todo();
 
161
                exit(EXIT_FAILURE); 
 
162
        }
 
163
 
 
164
        /* lock update */
 
165
        ldata.count = SFEX_NEXT_COUNT(ldata.count);
 
166
        if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
 
167
                error_todo();
 
168
                exit(EXIT_FAILURE);
 
169
        }
 
170
}
 
171
 
 
172
static void release_lock(void)
 
173
{
 
174
        /* The only thing I care about in release_lock(), is to terminate the process */
 
175
           
 
176
        /* read lock data */
 
177
        if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
 
178
                exit(EXIT_FAILURE);
 
179
        }
 
180
 
 
181
        /* check current lock status */
 
182
        /* if own node is not locking, we judge that lock has been released already */
 
183
        if (ldata.status != SFEX_STATUS_LOCK || strncmp((const char*)(ldata.nodename), nodename, sizeof(ldata.nodename))) {
 
184
                SFEX_LOG_ERR("lock was already released.\n");
 
185
                exit(1);
 
186
        }
 
187
 
 
188
        /* lock release */
 
189
        ldata.status = SFEX_STATUS_UNLOCK;
 
190
        if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
 
191
            /*FIXME: We are going to self-stop */
 
192
                exit(EXIT_FAILURE);
 
193
        }
 
194
        SFEX_LOG_INFO("lock released\n");
 
195
}
 
196
 
 
197
static void quit_handler(int signo, siginfo_t *info, void *context)
 
198
{
 
199
        SFEX_LOG_INFO("quit_handler\n");
 
200
        release_lock();
 
201
        exit(EXIT_SUCCESS);
 
202
}
 
203
 
 
204
int main(int argc, char *argv[])
 
205
{       
 
206
 
 
207
        int ret;
 
208
 
 
209
        progname = get_progname(argv[0]);
 
210
        nodename = get_nodename();
 
211
 
 
212
 
 
213
#if 0
 
214
        openlog("SFex Daemon", LOG_PID|LOG_CONS|LOG_NDELAY, LOG_USER);
 
215
#endif
 
216
 
 
217
        /* read command line option */
 
218
        opterr = 0;
 
219
        while (1) {
 
220
                int c = getopt(argc, argv, "hi:c:t:m:n:r:d:");
 
221
                if (c == -1)
 
222
                        break;
 
223
                switch (c) {
 
224
                        case 'h':           /* help*/
 
225
                                usage(stdout);
 
226
                                exit(0);
 
227
                        case 'i':           /* -i <index> */
 
228
                                {
 
229
                                        unsigned long l = strtoul(optarg, NULL, 10);
 
230
                                        if (l < SFEX_MIN_NUMLOCKS || l > SFEX_MAX_NUMLOCKS) {
 
231
                                                SFEX_LOG_ERR(
 
232
                                                                "%s: ERROR: index %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
 
233
                                                                progname, optarg,
 
234
                                                                (unsigned long)SFEX_MIN_NUMLOCKS,
 
235
                                                                (unsigned long)SFEX_MAX_NUMLOCKS);
 
236
                                                exit(4);
 
237
                                        }
 
238
                                        lock_index = l;
 
239
                                }
 
240
                                break;
 
241
                        case 'c':           /* -c <collision_timeout> */
 
242
                                {
 
243
                                        unsigned long l = strtoul(optarg, NULL, 10);
 
244
                                        if (l < 1 || l > INT_MAX) {
 
245
                                                SFEX_LOG_ERR(
 
246
                                                                "%s: ERROR: collision_timeout %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
 
247
                                                                progname, optarg,
 
248
                                                                (unsigned long)1,
 
249
                                                                (unsigned long)INT_MAX);
 
250
                                                exit(4);
 
251
                                        }
 
252
                                        collision_timeout = l;
 
253
                                }
 
254
                                break;
 
255
                        case 'm':                       /* -m <monitor_interval> */
 
256
                                {
 
257
                                        unsigned long l = strtoul(optarg, NULL, 10);
 
258
                                        if (l < 1 || l > INT_MAX) {
 
259
                                                SFEX_LOG_ERR(
 
260
                                                                "%s: ERROR: monitor_interval %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
 
261
                                                                progname, optarg,
 
262
                                                                (unsigned long)1,
 
263
                                                                (unsigned long)INT_MAX);
 
264
                                                exit(4);
 
265
                                        }
 
266
                                        monitor_interval = l;
 
267
                                }
 
268
                                break;  
 
269
                        case 't':           /* -t <lock_timeout> */
 
270
                                {
 
271
                                        unsigned long l = strtoul(optarg, NULL, 10);
 
272
                                        if (l < 1 || l > INT_MAX) {
 
273
                                                SFEX_LOG_ERR(
 
274
                                                                "%s: ERROR: lock_timeout %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
 
275
                                                                progname, optarg,
 
276
                                                                (unsigned long)1,
 
277
                                                                (unsigned long)INT_MAX);
 
278
                                                exit(4);
 
279
                                        }
 
280
                                        lock_timeout = l;
 
281
                                }
 
282
                                break;
 
283
                        case 'n':
 
284
                                {
 
285
                                        free(nodename);
 
286
                                        if (strlen(optarg) > SFEX_MAX_NODENAME) {
 
287
                                                SFEX_LOG_ERR("%s: ERROR: nodename %s is too long. must be less than %d byte.\n",
 
288
                                                                progname, optarg,
 
289
                                                                (unsigned int)SFEX_MAX_NODENAME);
 
290
                                                exit(EXIT_FAILURE);
 
291
                                        }
 
292
                                        nodename = strdup(optarg);
 
293
                                }       
 
294
                                break;
 
295
                        case 'r':
 
296
                                {
 
297
                                        rsc_id = strdup(optarg);
 
298
                                }
 
299
                                break;
 
300
                        case 'd':
 
301
                                {
 
302
                                        rscpidfile = strdup(optarg);
 
303
                                }       
 
304
                                break;
 
305
                        case '?':           /* error */
 
306
                                usage(stderr);
 
307
                                exit(4);
 
308
                }
 
309
        }
 
310
        /* check parameter except the option */
 
311
        if (optind >= argc) {
 
312
                SFEX_LOG_ERR("%s: ERROR: no device specified.\n", progname);
 
313
                usage(stderr);
 
314
                exit(EXIT_FAILURE);
 
315
        } else if (optind + 1 < argc) {
 
316
                SFEX_LOG_ERR("%s: ERROR: too many arguments.\n", progname);
 
317
                usage(stderr);
 
318
                exit(EXIT_FAILURE);
 
319
        }
 
320
        device = argv[optind];
 
321
 
 
322
        if (rscpidfile == NULL) {
 
323
                SFEX_LOG_ERR("%s: ERROR: Directory for saving pid file is not specified.\n", progname);
 
324
                exit(EXIT_FAILURE);
 
325
        }
 
326
 
 
327
        prepare_lock(device);
 
328
#if !SFEX_TESTING
 
329
        sysrq_fd = open("/proc/sysrq-trigger", O_WRONLY);
 
330
        if (sysrq_fd == -1) {
 
331
                SFEX_LOG_ERR("failed to open /proc/sysrq-trigger due to %s\n", strerror(errno));
 
332
                exit(EXIT_FAILURE);
 
333
        }
 
334
#endif
 
335
 
 
336
        ret = lock_index_check();
 
337
        if (ret == -1)
 
338
                exit(EXIT_FAILURE);
 
339
 
 
340
        {
 
341
                struct sigaction sig_act;
 
342
                sigemptyset (&sig_act.sa_mask);
 
343
                sig_act.sa_flags = SA_SIGINFO;
 
344
 
 
345
                sig_act.sa_sigaction = quit_handler;
 
346
                ret = sigaction(SIGTERM, &sig_act, NULL);
 
347
                if (ret == -1) {
 
348
                        SFEX_LOG_ERR("sigaction failed\n");
 
349
                        exit(EXIT_FAILURE);
 
350
                }
 
351
        }
 
352
 
 
353
        SFEX_LOG_INFO("Starting SFeX Daemon...\n");
 
354
        
 
355
        /* acquire lock first.*/
 
356
        acquire_lock();
 
357
 
 
358
        if (daemon(0, 1) != 0) {
 
359
                cl_perror("%s::%d: daemon() failed.", __FUNCTION__, __LINE__);
 
360
                release_lock();
 
361
                exit(EXIT_FAILURE);
 
362
        }
 
363
        if (cl_lock_pidfile(rscpidfile) < 0) {
 
364
                SFEX_LOG_ERR("Creating pidfile failed.");
 
365
                release_lock();
 
366
                exit(EXIT_FAILURE);
 
367
        }
 
368
 
 
369
        cl_make_realtime(-1, -1, 128, 128);
 
370
        
 
371
        SFEX_LOG_INFO("SFeX Daemon started.\n");
 
372
        while (1) {
 
373
                sleep (monitor_interval);
 
374
                update_lock();
 
375
        }
 
376
}