~wb-munzinger/+junk/sanlock

« back to all changes in this revision

Viewing changes to lockspace.c

  • Committer: David Weber
  • Date: 2012-01-18 13:00:36 UTC
  • Revision ID: wb@munzinger.de-20120118130036-9a7wvhhmfuip7zx5
Tags: upstream-1.9
Import

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright 2010-2011 Red Hat, Inc.
 
3
 *
 
4
 * This copyrighted material is made available to anyone wishing to use,
 
5
 * modify, copy, or redistribute it subject to the terms and conditions
 
6
 * of the GNU General Public License v2 or (at your option) any later version.
 
7
 */
 
8
 
 
9
#include <inttypes.h>
 
10
#include <unistd.h>
 
11
#include <stdio.h>
 
12
#include <stdlib.h>
 
13
#include <stdint.h>
 
14
#include <stddef.h>
 
15
#include <fcntl.h>
 
16
#include <string.h>
 
17
#include <errno.h>
 
18
#include <limits.h>
 
19
#include <pthread.h>
 
20
#include <time.h>
 
21
#include <syslog.h>
 
22
#include <sys/types.h>
 
23
#include <sys/time.h>
 
24
#include <sys/un.h>
 
25
 
 
26
#include "sanlock_internal.h"
 
27
#include "sanlock_sock.h"
 
28
#include "diskio.h"
 
29
#include "log.h"
 
30
#include "delta_lease.h"
 
31
#include "lockspace.h"
 
32
#include "resource.h"
 
33
#include "watchdog.h"
 
34
#include "task.h"
 
35
#include "direct.h"
 
36
 
 
37
static uint32_t space_id_counter = 1;
 
38
 
 
39
static struct space *_search_space(char *name,
 
40
                                   struct sync_disk *disk,
 
41
                                   uint64_t host_id,
 
42
                                   struct list_head *head1,
 
43
                                   struct list_head *head2,
 
44
                                   struct list_head *head3)
 
45
{
 
46
        struct space *sp;
 
47
 
 
48
        if (head1) {
 
49
                list_for_each_entry(sp, head1, list) {
 
50
                        if (name && strncmp(sp->space_name, name, NAME_ID_SIZE))
 
51
                                continue;
 
52
                        if (disk && strncmp(sp->host_id_disk.path, disk->path, SANLK_PATH_LEN))
 
53
                                continue;
 
54
                        if (disk && sp->host_id_disk.offset != disk->offset)
 
55
                                continue;
 
56
                        if (host_id && sp->host_id != host_id)
 
57
                                continue;
 
58
                        return sp;
 
59
                }
 
60
        }
 
61
        if (head2) {
 
62
                list_for_each_entry(sp, head2, list) {
 
63
                        if (name && strncmp(sp->space_name, name, NAME_ID_SIZE))
 
64
                                continue;
 
65
                        if (disk && strncmp(sp->host_id_disk.path, disk->path, SANLK_PATH_LEN))
 
66
                                continue;
 
67
                        if (disk && sp->host_id_disk.offset != disk->offset)
 
68
                                continue;
 
69
                        if (host_id && sp->host_id != host_id)
 
70
                                continue;
 
71
                        return sp;
 
72
                }
 
73
        }
 
74
        if (head3) {
 
75
                list_for_each_entry(sp, head3, list) {
 
76
                        if (name && strncmp(sp->space_name, name, NAME_ID_SIZE))
 
77
                                continue;
 
78
                        if (disk && strncmp(sp->host_id_disk.path, disk->path, SANLK_PATH_LEN))
 
79
                                continue;
 
80
                        if (disk && sp->host_id_disk.offset != disk->offset)
 
81
                                continue;
 
82
                        if (host_id && sp->host_id != host_id)
 
83
                                continue;
 
84
                        return sp;
 
85
                }
 
86
        }
 
87
        return NULL;
 
88
}
 
89
 
 
90
struct space *find_lockspace(char *name)
 
91
{
 
92
        return _search_space(name, NULL, 0, &spaces, &spaces_rem, &spaces_add);
 
93
}
 
94
 
 
95
int _lockspace_info(char *space_name, struct space *sp_out)
 
96
{
 
97
        struct space *sp;
 
98
 
 
99
        list_for_each_entry(sp, &spaces, list) {
 
100
                if (strncmp(sp->space_name, space_name, NAME_ID_SIZE))
 
101
                        continue;
 
102
                memcpy(sp_out, sp, sizeof(struct space));
 
103
                return 0;
 
104
        }
 
105
        return -1;
 
106
}
 
107
 
 
108
int lockspace_info(char *space_name, struct space *sp_out)
 
109
{
 
110
        int rv;
 
111
 
 
112
        pthread_mutex_lock(&spaces_mutex);
 
113
        rv = _lockspace_info(space_name, sp_out);
 
114
        pthread_mutex_unlock(&spaces_mutex);
 
115
 
 
116
        return rv;
 
117
}
 
118
 
 
119
int lockspace_disk(char *space_name, struct sync_disk *disk)
 
120
{
 
121
        struct space space;
 
122
        int rv;
 
123
 
 
124
        pthread_mutex_lock(&spaces_mutex);
 
125
        rv = _lockspace_info(space_name, &space);
 
126
        if (!rv) {
 
127
                memcpy(disk, &space.host_id_disk, sizeof(struct sync_disk));
 
128
                disk->fd = -1;
 
129
        }
 
130
        pthread_mutex_unlock(&spaces_mutex);
 
131
 
 
132
        return rv;
 
133
}
 
134
 
 
135
#if 0
 
136
static void clear_bit(int host_id, char *bitmap)
 
137
{
 
138
        char *byte = bitmap + ((host_id - 1) / 8);
 
139
        unsigned int bit = host_id % 8;
 
140
 
 
141
        *byte &= ~bit;
 
142
}
 
143
#endif
 
144
 
 
145
static void set_id_bit(int host_id, char *bitmap, char *c)
 
146
{
 
147
        char *byte = bitmap + ((host_id - 1) / 8);
 
148
        unsigned int bit = (host_id - 1) % 8;
 
149
        char mask;
 
150
 
 
151
        mask = 1 << bit;
 
152
 
 
153
        *byte |= mask;
 
154
 
 
155
        *c = *byte;
 
156
}
 
157
 
 
158
/* FIXME: another copy in direct_lib.c */
 
159
 
 
160
int test_id_bit(int host_id, char *bitmap)
 
161
{
 
162
        char *byte = bitmap + ((host_id - 1) / 8);
 
163
        unsigned int bit = (host_id - 1) % 8;
 
164
        char mask;
 
165
 
 
166
        mask = 1 << bit;
 
167
 
 
168
        return (*byte & mask);
 
169
}
 
170
 
 
171
int host_status_set_bit(char *space_name, uint64_t host_id)
 
172
{
 
173
        struct space *sp;
 
174
        int found = 0;
 
175
 
 
176
        if (!host_id || host_id > DEFAULT_MAX_HOSTS)
 
177
                return -EINVAL;
 
178
 
 
179
        pthread_mutex_lock(&spaces_mutex);
 
180
        list_for_each_entry(sp, &spaces, list) {
 
181
                if (strncmp(sp->space_name, space_name, NAME_ID_SIZE))
 
182
                        continue;
 
183
                found = 1;
 
184
                break;
 
185
        }
 
186
        pthread_mutex_unlock(&spaces_mutex);
 
187
 
 
188
        if (!found)
 
189
                return -ENOSPC;
 
190
 
 
191
        pthread_mutex_lock(&sp->mutex);
 
192
        sp->host_status[host_id-1].set_bit_time = monotime();
 
193
        pthread_mutex_unlock(&sp->mutex);
 
194
        return 0;
 
195
}
 
196
 
 
197
int host_info(char *space_name, uint64_t host_id, struct host_status *hs_out)
 
198
{
 
199
        struct space *sp;
 
200
        int found = 0;
 
201
 
 
202
        if (!host_id || host_id > DEFAULT_MAX_HOSTS)
 
203
                return -EINVAL;
 
204
 
 
205
        pthread_mutex_lock(&spaces_mutex);
 
206
        list_for_each_entry(sp, &spaces, list) {
 
207
                if (strncmp(sp->space_name, space_name, NAME_ID_SIZE))
 
208
                        continue;
 
209
                memcpy(hs_out, &sp->host_status[host_id-1], sizeof(struct host_status));
 
210
                found = 1;
 
211
                break;
 
212
        }
 
213
        pthread_mutex_unlock(&spaces_mutex);
 
214
 
 
215
        if (!found)
 
216
                return -ENOSPC;
 
217
        return 0;
 
218
}
 
219
 
 
220
static void create_bitmap(struct task *task, struct space *sp, char *bitmap)
 
221
{
 
222
        uint64_t now;
 
223
        int i;
 
224
        char c;
 
225
 
 
226
        now = monotime();
 
227
 
 
228
        pthread_mutex_lock(&sp->mutex);
 
229
        for (i = 0; i < DEFAULT_MAX_HOSTS; i++) {
 
230
                if (i+1 == sp->host_id)
 
231
                        continue;
 
232
 
 
233
                if (!sp->host_status[i].set_bit_time)
 
234
                        continue;
 
235
 
 
236
                if (now - sp->host_status[i].set_bit_time > task->request_finish_seconds) {
 
237
                        log_space(sp, "bitmap clear host_id %d", i+1);
 
238
                        sp->host_status[i].set_bit_time = 0;
 
239
                } else {
 
240
                        set_id_bit(i+1, bitmap, &c);
 
241
                        log_space(sp, "bitmap set host_id %d byte %x", i+1, c);
 
242
                }
 
243
        }
 
244
        pthread_mutex_unlock(&sp->mutex);
 
245
}
 
246
 
 
247
void check_other_leases(struct task *task, struct space *sp, char *buf)
 
248
{
 
249
        struct leader_record *leader;
 
250
        struct sync_disk *disk;
 
251
        struct host_status *hs;
 
252
        char *bitmap;
 
253
        uint64_t now;
 
254
        int i, new;
 
255
 
 
256
        disk = &sp->host_id_disk;
 
257
 
 
258
        now = monotime();
 
259
        new = 0;
 
260
 
 
261
        for (i = 0; i < DEFAULT_MAX_HOSTS; i++) {
 
262
                hs = &sp->host_status[i];
 
263
                hs->last_check = now;
 
264
 
 
265
                leader = (struct leader_record *)(buf + (i * disk->sector_size));
 
266
 
 
267
                if (hs->owner_id == leader->owner_id &&
 
268
                    hs->owner_generation == leader->owner_generation &&
 
269
                    hs->timestamp == leader->timestamp) {
 
270
                        continue;
 
271
                }
 
272
 
 
273
                hs->owner_id = leader->owner_id;
 
274
                hs->owner_generation = leader->owner_generation;
 
275
                hs->timestamp = leader->timestamp;
 
276
                hs->last_live = now;
 
277
 
 
278
                if (i+1 == sp->host_id)
 
279
                        continue;
 
280
 
 
281
                bitmap = (char *)leader + HOSTID_BITMAP_OFFSET;
 
282
 
 
283
                if (!test_id_bit(sp->host_id, bitmap))
 
284
                        continue;
 
285
 
 
286
                /* this host has made a request for us, we won't take a new
 
287
                   request from this host for another request_finish_seconds */
 
288
 
 
289
                if (now - hs->last_req < task->request_finish_seconds)
 
290
                        continue;
 
291
 
 
292
                log_space(sp, "request from host_id %d", i+1);
 
293
                hs->last_req = now;
 
294
                new = 1;
 
295
        }
 
296
 
 
297
        if (new)
 
298
                set_resource_examine(sp->space_name, NULL);
 
299
}
 
300
 
 
301
/*
 
302
 * check if our_host_id_thread has renewed within timeout
 
303
 */
 
304
 
 
305
int check_our_lease(struct task *task, struct space *sp, int *check_all, char *check_buf)
 
306
{
 
307
        uint64_t last_success;
 
308
        int corrupt_result;
 
309
        int gap;
 
310
 
 
311
        pthread_mutex_lock(&sp->mutex);
 
312
        last_success = sp->lease_status.renewal_last_success;
 
313
        corrupt_result = sp->lease_status.corrupt_result;
 
314
 
 
315
        if (sp->lease_status.renewal_read_count > sp->lease_status.renewal_read_check) {
 
316
                /* main loop will pass this buf to check_other_leases next */
 
317
                sp->lease_status.renewal_read_check = sp->lease_status.renewal_read_count;
 
318
                *check_all = 1;
 
319
                if (check_buf)
 
320
                        memcpy(check_buf, sp->lease_status.renewal_read_buf, sp->align_size);
 
321
        }
 
322
        pthread_mutex_unlock(&sp->mutex);
 
323
 
 
324
        if (corrupt_result) {
 
325
                log_erros(sp, "check_our_lease corrupt %d", corrupt_result);
 
326
                return -1;
 
327
        }
 
328
 
 
329
        gap = monotime() - last_success;
 
330
 
 
331
        if (gap >= task->id_renewal_fail_seconds) {
 
332
                log_erros(sp, "check_our_lease failed %d", gap);
 
333
                return -1;
 
334
        }
 
335
 
 
336
        if (gap >= task->id_renewal_warn_seconds) {
 
337
                log_erros(sp, "check_our_lease warning %d last_success %llu",
 
338
                          gap, (unsigned long long)last_success);
 
339
        }
 
340
 
 
341
        if (com.debug_renew > 1) {
 
342
                log_space(sp, "check_our_lease good %d %llu",
 
343
                          gap, (unsigned long long)last_success);
 
344
        }
 
345
 
 
346
        return 0;
 
347
}
 
348
 
 
349
/* If a renewal result is one of the listed errors, it means our
 
350
   delta lease has been corrupted/overwritten/reinitialized out from
 
351
   under us, and we should stop using it immediately.  There's no
 
352
   point in retrying the renewal. */
 
353
 
 
354
static int corrupt_result(int result)
 
355
{
 
356
        switch (result) {
 
357
        case SANLK_RENEW_OWNER:
 
358
        case SANLK_RENEW_DIFF:
 
359
        case SANLK_LEADER_MAGIC:
 
360
        case SANLK_LEADER_VERSION:
 
361
        case SANLK_LEADER_SECTORSIZE:
 
362
        case SANLK_LEADER_LOCKSPACE:
 
363
        case SANLK_LEADER_CHECKSUM:
 
364
                return result;
 
365
        default:
 
366
                return 0;
 
367
        }
 
368
}
 
369
 
 
370
static void *lockspace_thread(void *arg_in)
 
371
{
 
372
        char bitmap[HOSTID_BITMAP_SIZE];
 
373
        struct task task;
 
374
        struct space *sp;
 
375
        struct leader_record leader;
 
376
        uint64_t delta_begin, last_success;
 
377
        int rv, delta_length, renewal_interval;
 
378
        int acquire_result, delta_result, read_result;
 
379
        int opened = 0;
 
380
        int stop = 0;
 
381
 
 
382
        sp = (struct space *)arg_in;
 
383
 
 
384
        memset(&task, 0, sizeof(struct task));
 
385
        setup_task_timeouts(&task, main_task.io_timeout_seconds);
 
386
        setup_task_aio(&task, main_task.use_aio, HOSTID_AIO_CB_SIZE);
 
387
        memcpy(task.name, sp->space_name, NAME_ID_SIZE);
 
388
 
 
389
        delta_begin = monotime();
 
390
 
 
391
        rv = open_disk(&sp->host_id_disk);
 
392
        if (rv < 0) {
 
393
                log_erros(sp, "open_disk %s error %d", sp->host_id_disk.path, rv);
 
394
                acquire_result = -ENODEV;
 
395
                goto set_status;
 
396
        }
 
397
        opened = 1;
 
398
 
 
399
        sp->align_size = direct_align(&sp->host_id_disk);
 
400
        if (sp->align_size < 0) {
 
401
                log_erros(sp, "direct_align error");
 
402
                acquire_result = sp->align_size;
 
403
                goto set_status;
 
404
        }
 
405
 
 
406
        sp->lease_status.renewal_read_buf = malloc(sp->align_size);
 
407
        if (!sp->lease_status.renewal_read_buf) {
 
408
                acquire_result = -ENOMEM;
 
409
                goto set_status;
 
410
        }
 
411
 
 
412
        /*
 
413
         * acquire the delta lease
 
414
         */
 
415
 
 
416
        delta_begin = monotime();
 
417
 
 
418
        delta_result = delta_lease_acquire(&task, sp, &sp->host_id_disk,
 
419
                                           sp->space_name, our_host_name_global,
 
420
                                           sp->host_id, &leader);
 
421
        delta_length = monotime() - delta_begin;
 
422
 
 
423
        if (delta_result == SANLK_OK)
 
424
                last_success = leader.timestamp;
 
425
 
 
426
        acquire_result = delta_result;
 
427
 
 
428
        /* we need to start the watchdog after we acquire the host_id but
 
429
           before we allow any pid's to begin running */
 
430
 
 
431
        if (delta_result == SANLK_OK) {
 
432
                rv = create_watchdog_file(sp, last_success);
 
433
                if (rv < 0) {
 
434
                        log_erros(sp, "create_watchdog failed %d", rv);
 
435
                        acquire_result = SANLK_ERROR;
 
436
                }
 
437
        }
 
438
 
 
439
 set_status:
 
440
        pthread_mutex_lock(&sp->mutex);
 
441
        sp->lease_status.acquire_last_result = acquire_result;
 
442
        sp->lease_status.acquire_last_attempt = delta_begin;
 
443
        if (delta_result == SANLK_OK)
 
444
                sp->lease_status.acquire_last_success = last_success;
 
445
        sp->lease_status.renewal_last_result = acquire_result;
 
446
        sp->lease_status.renewal_last_attempt = delta_begin;
 
447
        if (delta_result == SANLK_OK)
 
448
                sp->lease_status.renewal_last_success = last_success;
 
449
        pthread_mutex_unlock(&sp->mutex);
 
450
 
 
451
        if (acquire_result < 0)
 
452
                goto out;
 
453
 
 
454
        sp->host_generation = leader.owner_generation;
 
455
 
 
456
        while (1) {
 
457
                pthread_mutex_lock(&sp->mutex);
 
458
                stop = sp->thread_stop;
 
459
                pthread_mutex_unlock(&sp->mutex);
 
460
                if (stop)
 
461
                        break;
 
462
 
 
463
 
 
464
                /*
 
465
                 * wait between each renewal
 
466
                 */
 
467
 
 
468
                if (monotime() - last_success < task.id_renewal_seconds) {
 
469
                        sleep(1);
 
470
                        continue;
 
471
                } else {
 
472
                        /* don't spin too quickly if renew is failing
 
473
                           immediately and repeatedly */
 
474
                        usleep(500000);
 
475
                }
 
476
 
 
477
 
 
478
                /*
 
479
                 * do a renewal, measuring length of time spent in renewal,
 
480
                 * and the length of time between successful renewals
 
481
                 */
 
482
 
 
483
                memset(bitmap, 0, sizeof(bitmap));
 
484
                create_bitmap(&task, sp, bitmap);
 
485
 
 
486
                delta_begin = monotime();
 
487
 
 
488
                delta_result = delta_lease_renew(&task, sp, &sp->host_id_disk,
 
489
                                                 sp->space_name, bitmap,
 
490
                                                 delta_result, &read_result,
 
491
                                                 &leader, &leader);
 
492
                delta_length = monotime() - delta_begin;
 
493
 
 
494
                if (delta_result == SANLK_OK) {
 
495
                        renewal_interval = leader.timestamp - last_success;
 
496
                        last_success = leader.timestamp;
 
497
                }
 
498
 
 
499
 
 
500
                /*
 
501
                 * publish the results
 
502
                 */
 
503
 
 
504
                pthread_mutex_lock(&sp->mutex);
 
505
                sp->lease_status.renewal_last_result = delta_result;
 
506
                sp->lease_status.renewal_last_attempt = delta_begin;
 
507
 
 
508
                if (delta_result == SANLK_OK)
 
509
                        sp->lease_status.renewal_last_success = last_success;
 
510
 
 
511
                if (delta_result != SANLK_OK && !sp->lease_status.corrupt_result)
 
512
                        sp->lease_status.corrupt_result = corrupt_result(delta_result);
 
513
 
 
514
                if (read_result == SANLK_OK && task.iobuf) {
 
515
                        memcpy(sp->lease_status.renewal_read_buf, task.iobuf, sp->align_size);
 
516
                        sp->lease_status.renewal_read_count++;
 
517
                }
 
518
 
 
519
 
 
520
                /*
 
521
                 * pet the watchdog
 
522
                 * (don't update on thread_stop because it's probably unlinked)
 
523
                 */
 
524
 
 
525
                if (delta_result == SANLK_OK && !sp->thread_stop)
 
526
                        update_watchdog_file(sp, last_success);
 
527
 
 
528
                pthread_mutex_unlock(&sp->mutex);
 
529
 
 
530
 
 
531
                /*
 
532
                 * log the results
 
533
                 */
 
534
 
 
535
                if (delta_result != SANLK_OK) {
 
536
                        log_erros(sp, "renewal error %d delta_length %d last_success %llu",
 
537
                                  delta_result, delta_length, (unsigned long long)last_success);
 
538
                } else if (delta_length > task.id_renewal_seconds) {
 
539
                        log_erros(sp, "renewed %llu delta_length %d too long",
 
540
                                  (unsigned long long)last_success, delta_length);
 
541
                } else if (com.debug_renew) {
 
542
                        log_space(sp, "renewed %llu delta_length %d interval %d",
 
543
                                  (unsigned long long)last_success, delta_length, renewal_interval);
 
544
                }
 
545
        }
 
546
 
 
547
        /* watchdog unlink was done in main_loop when thread_stop was set, to
 
548
           get it done as quickly as possible in case the wd is about to fire. */
 
549
 
 
550
        close_watchdog_file(sp);
 
551
 out:
 
552
        if (delta_result == SANLK_OK)
 
553
                delta_lease_release(&task, sp, &sp->host_id_disk,
 
554
                                    sp->space_name, &leader, &leader);
 
555
 
 
556
        if (opened)
 
557
                close(sp->host_id_disk.fd);
 
558
 
 
559
        close_task_aio(&task);
 
560
        return NULL;
 
561
}
 
562
 
 
563
static void free_sp(struct space *sp)
 
564
{
 
565
        if (sp->lease_status.renewal_read_buf)
 
566
                free(sp->lease_status.renewal_read_buf);
 
567
        free(sp);
 
568
}
 
569
 
 
570
/*
 
571
 * When this function returns, it needs to be safe to being processing lease
 
572
 * requests and allowing pid's to run, so we need to own our host_id, and the
 
573
 * watchdog needs to be active watching our host_id renewals.
 
574
 */
 
575
 
 
576
int add_lockspace(struct sanlk_lockspace *ls)
 
577
{
 
578
        struct space *sp, *sp2;
 
579
        int rv, result;
 
580
 
 
581
        if (!ls->name[0] || !ls->host_id || !ls->host_id_disk.path[0]) {
 
582
                log_error("add_lockspace bad args id %llu name %zu path %zu",
 
583
                          (unsigned long long)ls->host_id,
 
584
                          strlen(ls->name), strlen(ls->host_id_disk.path));
 
585
                return -EINVAL;
 
586
        }
 
587
 
 
588
        sp = malloc(sizeof(struct space));
 
589
        if (!sp)
 
590
                return -ENOMEM;
 
591
        memset(sp, 0, sizeof(struct space));
 
592
 
 
593
        memcpy(sp->space_name, ls->name, NAME_ID_SIZE);
 
594
        memcpy(&sp->host_id_disk, &ls->host_id_disk, sizeof(struct sanlk_disk));
 
595
        sp->host_id_disk.sector_size = 0;
 
596
        sp->host_id_disk.fd = -1;
 
597
        sp->host_id = ls->host_id;
 
598
        pthread_mutex_init(&sp->mutex, NULL);
 
599
 
 
600
        pthread_mutex_lock(&spaces_mutex);
 
601
 
 
602
        /* search all lists for an identical lockspace */
 
603
 
 
604
        sp2 = _search_space(sp->space_name, &sp->host_id_disk, sp->host_id,
 
605
                            &spaces, NULL, NULL);
 
606
        if (sp2) {
 
607
                pthread_mutex_unlock(&spaces_mutex);
 
608
                rv = -EEXIST;
 
609
                goto fail_free;
 
610
        }
 
611
 
 
612
        sp2 = _search_space(sp->space_name, &sp->host_id_disk, sp->host_id,
 
613
                            &spaces_add, NULL, NULL);
 
614
        if (sp2) {
 
615
                pthread_mutex_unlock(&spaces_mutex);
 
616
                rv = -EINPROGRESS;
 
617
                goto fail_free;
 
618
        }
 
619
 
 
620
        sp2 = _search_space(sp->space_name, &sp->host_id_disk, sp->host_id,
 
621
                            &spaces_rem, NULL, NULL);
 
622
        if (sp2) {
 
623
                pthread_mutex_unlock(&spaces_mutex);
 
624
                rv = -EAGAIN;
 
625
                goto fail_free;
 
626
        }
 
627
 
 
628
        /* search all lists for a lockspace with the same name */
 
629
 
 
630
        sp2 = _search_space(sp->space_name, NULL, 0,
 
631
                            &spaces, &spaces_add, &spaces_rem);
 
632
        if (sp2) {
 
633
                pthread_mutex_unlock(&spaces_mutex);
 
634
                rv = -EINVAL;
 
635
                goto fail_free;
 
636
        }
 
637
 
 
638
        /* search all lists for a lockspace with the same host_id_disk */
 
639
 
 
640
        sp2 = _search_space(NULL, &sp->host_id_disk, 0,
 
641
                            &spaces, &spaces_add, &spaces_rem);
 
642
        if (sp2) {
 
643
                pthread_mutex_unlock(&spaces_mutex);
 
644
                rv = -EINVAL;
 
645
                goto fail_free;
 
646
        }
 
647
 
 
648
        sp->space_id = space_id_counter++;
 
649
        list_add(&sp->list, &spaces_add);
 
650
        pthread_mutex_unlock(&spaces_mutex);
 
651
 
 
652
        /* save a record of what this space_id is for later debugging */
 
653
        log_level(sp->space_id, 0, NULL, LOG_WARNING,
 
654
                  "lockspace %.48s:%llu:%.256s:%llu",
 
655
                  sp->space_name,
 
656
                  (unsigned long long)sp->host_id,
 
657
                  sp->host_id_disk.path,
 
658
                  (unsigned long long)sp->host_id_disk.offset);
 
659
 
 
660
        rv = pthread_create(&sp->thread, NULL, lockspace_thread, sp);
 
661
        if (rv < 0) {
 
662
                log_erros(sp, "add_lockspace create thread failed");
 
663
                goto fail_del;
 
664
        }
 
665
 
 
666
        while (1) {
 
667
                pthread_mutex_lock(&sp->mutex);
 
668
                result = sp->lease_status.acquire_last_result;
 
669
                pthread_mutex_unlock(&sp->mutex);
 
670
                if (result)
 
671
                        break;
 
672
                sleep(1);
 
673
        }
 
674
 
 
675
        if (result != SANLK_OK) {
 
676
                /* the thread exits right away if acquire fails */
 
677
                pthread_join(sp->thread, NULL);
 
678
                rv = result;
 
679
                goto fail_del;
 
680
        }
 
681
 
 
682
        /* once we move sp to spaces list, tokens can begin using it,
 
683
           and the main loop will begin monitoring its renewals */
 
684
 
 
685
        pthread_mutex_lock(&spaces_mutex);
 
686
        if (sp->external_remove || external_shutdown) {
 
687
                rv = -1;
 
688
                pthread_mutex_unlock(&spaces_mutex);
 
689
                goto fail_del;
 
690
        }
 
691
        list_move(&sp->list, &spaces);
 
692
        pthread_mutex_unlock(&spaces_mutex);
 
693
        return 0;
 
694
 
 
695
 fail_del:
 
696
        pthread_mutex_lock(&spaces_mutex);
 
697
        list_del(&sp->list);
 
698
        pthread_mutex_unlock(&spaces_mutex);
 
699
 fail_free:
 
700
        free_sp(sp);
 
701
        return rv;
 
702
}
 
703
 
 
704
int inq_lockspace(struct sanlk_lockspace *ls)
 
705
{
 
706
        int rv;
 
707
        struct space *sp;
 
708
 
 
709
        pthread_mutex_lock(&spaces_mutex);
 
710
 
 
711
        sp = _search_space(ls->name, (struct sync_disk *)&ls->host_id_disk, ls->host_id,
 
712
                           &spaces, NULL, NULL);
 
713
 
 
714
        if (sp) {
 
715
                rv = 0;
 
716
                goto out;
 
717
        } else {
 
718
                rv = -ENOENT;
 
719
        }
 
720
 
 
721
        sp = _search_space(ls->name, (struct sync_disk *)&ls->host_id_disk, ls->host_id,
 
722
                           &spaces_add, &spaces_rem, NULL);
 
723
 
 
724
        if (sp)
 
725
                rv = -EINPROGRESS;
 
726
 
 
727
 out:
 
728
        pthread_mutex_unlock(&spaces_mutex);
 
729
        return rv;
 
730
}
 
731
 
 
732
int rem_lockspace(struct sanlk_lockspace *ls)
 
733
{
 
734
        struct space *sp, *sp2;
 
735
        unsigned int id;
 
736
        int rv, done;
 
737
 
 
738
        pthread_mutex_lock(&spaces_mutex);
 
739
 
 
740
        sp = _search_space(ls->name, (struct sync_disk *)&ls->host_id_disk, ls->host_id,
 
741
                           &spaces_rem, NULL, NULL);
 
742
        if (sp) {
 
743
                pthread_mutex_unlock(&spaces_mutex);
 
744
                rv = -EINPROGRESS;
 
745
                goto out;
 
746
        }
 
747
 
 
748
        sp = _search_space(ls->name, (struct sync_disk *)&ls->host_id_disk, ls->host_id,
 
749
                           &spaces_add, NULL, NULL);
 
750
        if (sp) {
 
751
                sp->external_remove = 1;
 
752
                pthread_mutex_unlock(&spaces_mutex);
 
753
                rv = 0;
 
754
                goto out;
 
755
        }
 
756
 
 
757
        sp = _search_space(ls->name, (struct sync_disk *)&ls->host_id_disk, ls->host_id,
 
758
                           &spaces, NULL, NULL);
 
759
        if (!sp) {
 
760
                pthread_mutex_unlock(&spaces_mutex);
 
761
                rv = -ENOENT;
 
762
                goto out;
 
763
        }
 
764
 
 
765
        sp->external_remove = 1;
 
766
        id = sp->space_id;
 
767
        pthread_mutex_unlock(&spaces_mutex);
 
768
 
 
769
        while (1) {
 
770
                pthread_mutex_lock(&spaces_mutex);
 
771
                sp2 = _search_space(ls->name, (struct sync_disk *)&ls->host_id_disk, ls->host_id,
 
772
                                    &spaces, &spaces_rem, NULL);
 
773
                if (sp2 && sp2->space_id == id)
 
774
                        done = 0;
 
775
                else
 
776
                        done = 1;
 
777
                pthread_mutex_unlock(&spaces_mutex);
 
778
 
 
779
                if (done)
 
780
                        break;
 
781
                sleep(1);
 
782
        }
 
783
        rv = 0;
 
784
 out:
 
785
        return rv;
 
786
}
 
787
 
 
788
/* 
 
789
 * we call stop_host_id() when all pids are gone and we're in a safe state, so
 
790
 * it's safe to unlink the watchdog right away here.  We want to sp the unlink
 
791
 * as soon as it's safe, so we can reduce the chance we get killed by the
 
792
 * watchdog (we could actually call this in main_loop just before the break).
 
793
 * Getting this unlink done quickly is more important than doing at the more
 
794
 * "logical" point commented above in host_id_thread.
 
795
 */
 
796
 
 
797
static int stop_lockspace_thread(struct space *sp, int wait)
 
798
{
 
799
        int stop, rv;
 
800
 
 
801
        pthread_mutex_lock(&sp->mutex);
 
802
        stop = sp->thread_stop;
 
803
        sp->thread_stop = 1;
 
804
        pthread_mutex_unlock(&sp->mutex);
 
805
 
 
806
        if (!stop) {
 
807
                /* should never happen */
 
808
                log_erros(sp, "stop_lockspace_thread zero thread_stop");
 
809
                return -EINVAL;
 
810
        }
 
811
 
 
812
        if (wait)
 
813
                rv = pthread_join(sp->thread, NULL);
 
814
        else
 
815
                rv = pthread_tryjoin_np(sp->thread, NULL);
 
816
 
 
817
        return rv;
 
818
}
 
819
 
 
820
void free_lockspaces(int wait)
 
821
{
 
822
        struct space *sp, *safe;
 
823
        int rv;
 
824
 
 
825
        pthread_mutex_lock(&spaces_mutex);
 
826
        list_for_each_entry_safe(sp, safe, &spaces_rem, list) {
 
827
                rv = stop_lockspace_thread(sp, wait);
 
828
                if (!rv) {
 
829
                        log_space(sp, "free lockspace");
 
830
                        list_del(&sp->list);
 
831
                        free_sp(sp);
 
832
                }
 
833
        }
 
834
        pthread_mutex_unlock(&spaces_mutex);
 
835
}
 
836