~ampelbein/ubuntu/oneiric/heartbeat/lp-770743

« back to all changes in this revision

Viewing changes to tools/sfex_daemon.c

  • Committer: Bazaar Package Importer
  • Author(s): Ante Karamatic
  • Date: 2010-02-17 21:59:18 UTC
  • mfrom: (1.1.11 upstream)
  • Revision ID: james.westby@ubuntu.com-20100217215918-06paxph5do4saw8v
Tags: 3.0.2-0ubuntu1
* New upstream release
* Drop hard dep on pacemaker for heartbet; moved to Recommends
* debian/heartbeat.install:
  - follow upstream changes
* debian/control:
  - added docbook-xsl and xsltproc to build depends

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
#include <stdio.h>
2
 
#include <unistd.h>
3
 
#include <string.h>
4
 
#include <stdlib.h>
5
 
#include <signal.h>
6
 
#include <limits.h>
7
 
#include <sys/mman.h>
8
 
#include <string.h>
9
 
#include <errno.h>
10
 
#include <sys/types.h>
11
 
#include <sys/stat.h>
12
 
#include <fcntl.h>
13
 
#include <syslog.h>
14
 
#include "sfex.h"
15
 
#include "sfex_lib.h"
16
 
 
17
 
static int sysrq_fd;
18
 
static int lock_index = 1;        /* default 1st lock */
19
 
static time_t collision_timeout = 1; /* default 1 sec */
20
 
static time_t lock_timeout = 60; /* default 60 sec */
21
 
static time_t monitor_interval = 10;
22
 
 
23
 
static sfex_controldata cdata;
24
 
static sfex_lockdata ldata;
25
 
static sfex_lockdata ldata_new;
26
 
 
27
 
static const char *device;
28
 
const char *progname;
29
 
char *nodename;
30
 
static const char *rsc_id = "sfex";
31
 
static const char *rscpidfile;
32
 
 
33
 
static void usage(FILE *dist) {
34
 
          fprintf(dist, "usage: %s [-i <index>] [-c <collision_timeout>] [-t <lock_timeout>] <device>\n", progname);
35
 
}
36
 
 
37
 
static int lock_index_check(void)
38
 
{
39
 
        if (read_controldata(&cdata) == -1) {
40
 
                SFEX_LOG_ERR("%s\n", "read_controldata failed in lock_index_check");
41
 
                return -1;
42
 
        }
43
 
#ifdef SFEX_DEBUG
44
 
        SFEX_LOG_INFO("version: %d\n", cdata.version);
45
 
        SFEX_LOG_INFO("revision: %d\n", cdata.revision);
46
 
        SFEX_LOG_INFO("blocksize: %d\n", cdata.blocksize);
47
 
        SFEX_LOG_INFO("numlocks: %d\n", cdata.numlocks);
48
 
#endif
49
 
 
50
 
        if (lock_index > cdata.numlocks) {
51
 
                SFEX_LOG_ERR("%s: ERROR: index %d is too large. %d locks are stored.\n",
52
 
                                progname, lock_index, cdata.numlocks);
53
 
                return -1;
54
 
                //exit(EXIT_FAILURE);
55
 
        }
56
 
 
57
 
        if (cdata.blocksize != sector_size) {
58
 
                SFEX_LOG_ERR("%s: ERROR: sector_size is not the same as the blocksize.\n", progname);
59
 
                return -1;
60
 
        }
61
 
        return 0;
62
 
}
63
 
 
64
 
static void acquire_lock(void)
65
 
{
66
 
        if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
67
 
                SFEX_LOG_ERR("%s\n", "read_lockdata failed in acquire_lock");
68
 
                exit(EXIT_FAILURE);
69
 
        }
70
 
 
71
 
        if ((ldata.status == SFEX_STATUS_LOCK) && (strncmp(nodename, (const char*)(ldata.nodename), sizeof(ldata.nodename)))) {
72
 
                unsigned int t = lock_timeout;
73
 
                while (t > 0)
74
 
                        t = sleep(t);
75
 
                read_lockdata(&cdata, &ldata_new, lock_index);
76
 
                if (ldata.count != ldata_new.count) {
77
 
                        SFEX_LOG_ERR("%s", "can\'t acquire lock: the lock's already hold by some other node.\n");
78
 
                        exit(2);
79
 
                }
80
 
        }
81
 
 
82
 
        /* The lock acquisition is possible because it was not updated. */
83
 
        ldata.status = SFEX_STATUS_LOCK;
84
 
        ldata.count = SFEX_NEXT_COUNT(ldata.count);
85
 
        strncpy((char*)(ldata.nodename), nodename, sizeof(ldata.nodename));
86
 
        if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
87
 
                SFEX_LOG_ERR("%s", "write_lockdata failed\n");
88
 
                exit(EXIT_FAILURE);
89
 
        }
90
 
 
91
 
        /* detect the collision of lock */
92
 
        /* The collision occurs when two or more nodes do the reservation 
93
 
           processing of the lock at the same time. It waits for collision_timeout 
94
 
           seconds to detect this,and whether the superscription of lock data by 
95
 
           another node is done is checked. If the superscription was done by 
96
 
           another node, the lock acquisition with the own node is given up.  
97
 
         */
98
 
        {
99
 
                unsigned int t = collision_timeout;
100
 
                while (t > 0)
101
 
                        t = sleep(t);
102
 
                if (read_lockdata(&cdata, &ldata_new, lock_index) == -1) {
103
 
                        SFEX_LOG_ERR("%s", "read_lockdata failed\n");
104
 
                }
105
 
                if (strncmp((char*)(ldata.nodename), (const char*)(ldata_new.nodename), sizeof(ldata.nodename))) {
106
 
                        SFEX_LOG_ERR("%s", "can\'t acquire lock: collision detected in the air.\n");
107
 
                        exit(2);
108
 
                }
109
 
        }
110
 
 
111
 
        /* extension of lock */
112
 
        /* Validly time of the lock is extended. It is because of spending at 
113
 
           the collision_timeout seconds to detect the collision. */
114
 
        ldata.count = SFEX_NEXT_COUNT(ldata.count);
115
 
        if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
116
 
                SFEX_LOG_ERR("%s\n", "write_lockdata failed");
117
 
                exit(EXIT_FAILURE);
118
 
        }
119
 
        SFEX_LOG_ERR("%s", "lock acquired\n");
120
 
}
121
 
 
122
 
static void error_todo (void)
123
 
{
124
 
        if (fork() == 0) {
125
 
                execl("/usr/sbin/crm_resource", "crm_resource", "-F", "-r", rsc_id, "-H", nodename, NULL);
126
 
        } else {
127
 
                exit(EXIT_FAILURE);
128
 
        }
129
 
}
130
 
 
131
 
static void failure_todo(void)
132
 
{
133
 
#ifdef SFEX_TESTING     
134
 
        exit(EXIT_FAILURE);
135
 
#else
136
 
        //execl("/usr/sbin/crm_resource", "crm_resource", "-F", "-r", rsc_id, "-H", nodename, NULL); 
137
 
        int ret;
138
 
        ret = write(sysrq_fd, "b\n", 2);
139
 
        if (ret == -1) {
140
 
                SFEX_LOG_ERR("%s\n", strerror(errno));
141
 
        }
142
 
        close(sysrq_fd);
143
 
        exit(EXIT_FAILURE);
144
 
#endif
145
 
}
146
 
 
147
 
static void update_lock(void)
148
 
{
149
 
        /* read lock data */
150
 
        if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
151
 
                error_todo();
152
 
                exit(EXIT_FAILURE);
153
 
        }
154
 
 
155
 
        /* check current lock status */
156
 
        /* if own node is not locking, lock update is failed */
157
 
        if (ldata.status != SFEX_STATUS_LOCK || strncmp((const char*)(ldata.nodename), nodename, sizeof(ldata.nodename))) {
158
 
                SFEX_LOG_ERR("can't update lock.\n");
159
 
                failure_todo();
160
 
                exit(EXIT_FAILURE); 
161
 
        }
162
 
 
163
 
        /* lock update */
164
 
        ldata.count = SFEX_NEXT_COUNT(ldata.count);
165
 
        if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
166
 
                error_todo();
167
 
                exit(EXIT_FAILURE);
168
 
        }
169
 
}
170
 
 
171
 
static void release_lock(void)
172
 
{
173
 
        /* The only thing I care about in release_lock(), is to terminate the process */
174
 
           
175
 
        /* read lock data */
176
 
        if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
177
 
                exit(EXIT_FAILURE);
178
 
        }
179
 
 
180
 
        /* check current lock status */
181
 
        /* if own node is not locking, we judge that lock has been released already */
182
 
        if (ldata.status != SFEX_STATUS_LOCK || strncmp((const char*)(ldata.nodename), nodename, sizeof(ldata.nodename))) {
183
 
                SFEX_LOG_ERR("lock was already released.\n");
184
 
                exit(1);
185
 
        }
186
 
 
187
 
        /* lock release */
188
 
        ldata.status = SFEX_STATUS_UNLOCK;
189
 
        if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
190
 
                //FIXME: We are going to self-stop
191
 
                exit(EXIT_FAILURE);
192
 
        }
193
 
        SFEX_LOG_INFO("lock released\n");
194
 
}
195
 
 
196
 
static void quit_handler(int signo, siginfo_t *info, void *context)
197
 
{
198
 
        SFEX_LOG_INFO("quit_handler\n");
199
 
        release_lock();
200
 
        exit(EXIT_SUCCESS);
201
 
}
202
 
 
203
 
int main(int argc, char *argv[])
204
 
{       
205
 
 
206
 
        int ret;
207
 
 
208
 
        progname = get_progname(argv[0]);
209
 
        nodename = get_nodename();
210
 
 
211
 
 
212
 
#if 0
213
 
        openlog("SFex Daemon", LOG_PID|LOG_CONS|LOG_NDELAY, LOG_USER);
214
 
#endif
215
 
 
216
 
        /* read command line option */
217
 
        opterr = 0;
218
 
        while (1) {
219
 
                int c = getopt(argc, argv, "hi:c:t:m:n:r:d:");
220
 
                if (c == -1)
221
 
                        break;
222
 
                switch (c) {
223
 
                        case 'h':           /* help*/
224
 
                                usage(stdout);
225
 
                                exit(0);
226
 
                        case 'i':           /* -i <index> */
227
 
                                {
228
 
                                        unsigned long l = strtoul(optarg, NULL, 10);
229
 
                                        if (l < SFEX_MIN_NUMLOCKS || l > SFEX_MAX_NUMLOCKS) {
230
 
                                                SFEX_LOG_ERR(
231
 
                                                                "%s: ERROR: index %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
232
 
                                                                progname, optarg,
233
 
                                                                (unsigned long)SFEX_MIN_NUMLOCKS,
234
 
                                                                (unsigned long)SFEX_MAX_NUMLOCKS);
235
 
                                                exit(4);
236
 
                                        }
237
 
                                        lock_index = l;
238
 
                                }
239
 
                                break;
240
 
                        case 'c':           /* -c <collision_timeout> */
241
 
                                {
242
 
                                        unsigned long l = strtoul(optarg, NULL, 10);
243
 
                                        if (l < 1 || l > INT_MAX) {
244
 
                                                SFEX_LOG_ERR(
245
 
                                                                "%s: ERROR: collision_timeout %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
246
 
                                                                progname, optarg,
247
 
                                                                (unsigned long)1,
248
 
                                                                (unsigned long)INT_MAX);
249
 
                                                exit(4);
250
 
                                        }
251
 
                                        collision_timeout = l;
252
 
                                }
253
 
                                break;
254
 
                        case 'm':                       /* -m <monitor_interval> */
255
 
                                {
256
 
                                        unsigned long l = strtoul(optarg, NULL, 10);
257
 
                                        if (l < 1 || l > INT_MAX) {
258
 
                                                SFEX_LOG_ERR(
259
 
                                                                "%s: ERROR: monitor_interval %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
260
 
                                                                progname, optarg,
261
 
                                                                (unsigned long)1,
262
 
                                                                (unsigned long)INT_MAX);
263
 
                                                exit(4);
264
 
                                        }
265
 
                                        monitor_interval = l;
266
 
                                }
267
 
                                break;  
268
 
                        case 't':           /* -t <lock_timeout> */
269
 
                                {
270
 
                                        unsigned long l = strtoul(optarg, NULL, 10);
271
 
                                        if (l < 1 || l > INT_MAX) {
272
 
                                                SFEX_LOG_ERR(
273
 
                                                                "%s: ERROR: lock_timeout %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
274
 
                                                                progname, optarg,
275
 
                                                                (unsigned long)1,
276
 
                                                                (unsigned long)INT_MAX);
277
 
                                                exit(4);
278
 
                                        }
279
 
                                        lock_timeout = l;
280
 
                                }
281
 
                                break;
282
 
                        case 'n':
283
 
                                {
284
 
                                        free(nodename);
285
 
                                        if (strlen(optarg) > SFEX_MAX_NODENAME) {
286
 
                                                SFEX_LOG_ERR("%s: ERROR: nodename %s is too long. must be less than %d byte.\n",
287
 
                                                                progname, optarg,
288
 
                                                                (unsigned int)SFEX_MAX_NODENAME);
289
 
                                                exit(EXIT_FAILURE);
290
 
                                        }
291
 
                                        nodename = strdup(optarg);
292
 
                                }       
293
 
                                break;
294
 
                        case 'r':
295
 
                                {
296
 
                                        rsc_id = strdup(optarg);
297
 
                                }
298
 
                                break;
299
 
                        case 'd':
300
 
                                {
301
 
                                        rscpidfile = strdup(optarg);
302
 
                                }       
303
 
                                break;
304
 
                        case '?':           /* error */
305
 
                                usage(stderr);
306
 
                                exit(4);
307
 
                }
308
 
        }
309
 
        /* check parameter except the option */
310
 
        if (optind >= argc) {
311
 
                SFEX_LOG_ERR("%s: ERROR: no device specified.\n", progname);
312
 
                usage(stderr);
313
 
                exit(EXIT_FAILURE);
314
 
        } else if (optind + 1 < argc) {
315
 
                SFEX_LOG_ERR("%s: ERROR: too many arguments.\n", progname);
316
 
                usage(stderr);
317
 
                exit(EXIT_FAILURE);
318
 
        }
319
 
        device = argv[optind];
320
 
 
321
 
        if (rscpidfile == NULL) {
322
 
                SFEX_LOG_ERR("%s: ERROR: Directory for saving pid file is not specified.\n", progname);
323
 
                exit(EXIT_FAILURE);
324
 
        }
325
 
 
326
 
        prepare_lock(device);
327
 
#if !SFEX_TESTING
328
 
        sysrq_fd = open("/proc/sysrq-trigger", O_WRONLY);
329
 
        if (sysrq_fd == -1) {
330
 
                SFEX_LOG_ERR("failed to open /proc/sysrq-trigger due to %s\n", strerror(errno));
331
 
                exit(EXIT_FAILURE);
332
 
        }
333
 
#endif
334
 
 
335
 
        ret = lock_index_check();
336
 
        if (ret == -1)
337
 
                exit(EXIT_FAILURE);
338
 
 
339
 
        {
340
 
                struct sigaction sig_act;
341
 
                sigemptyset (&sig_act.sa_mask);
342
 
                sig_act.sa_flags = SA_SIGINFO;
343
 
 
344
 
                sig_act.sa_sigaction = quit_handler;
345
 
                ret = sigaction(SIGTERM, &sig_act, NULL);
346
 
                if (ret == -1) {
347
 
                        SFEX_LOG_ERR("sigaction failed\n");
348
 
                        exit(EXIT_FAILURE);
349
 
                }
350
 
        }
351
 
 
352
 
        SFEX_LOG_INFO("Starting SFeX Daemon...\n");
353
 
        
354
 
        /* acquire lock first.*/
355
 
        acquire_lock();
356
 
 
357
 
        if (daemon(0, 1) != 0) {
358
 
                cl_perror("%s::%d: daemon() failed.", __FUNCTION__, __LINE__);
359
 
                release_lock();
360
 
                exit(EXIT_FAILURE);
361
 
        }
362
 
        if (cl_lock_pidfile(rscpidfile) < 0) {
363
 
                SFEX_LOG_ERR("Creating pidfile failed.");
364
 
                release_lock();
365
 
                exit(EXIT_FAILURE);
366
 
        }
367
 
 
368
 
        cl_make_realtime(-1, -1, 128, 128);
369
 
        
370
 
        SFEX_LOG_INFO("SFeX Daemon started.\n");
371
 
        while (1) {
372
 
                sleep (monitor_interval);
373
 
                update_lock();
374
 
        }
375
 
}