1
/* -*- mode: c; c-basic-offset: 8; -*-
2
* vim: noexpandtab sw=8 ts=8 sts=0:
4
* Copyright (C) 2008 Oracle. All rights reserved.
6
* This copyrighted material is made available to anyone wishing to use,
7
* modify, copy, or redistribute it subject to the terms and conditions
8
* of the GNU General Public License v.2.
17
#include <sys/types.h>
26
#include <openais/saAis.h>
27
#include <openais/saCkpt.h>
29
#include "ocfs2_controld.h"
35
SaCkptCheckpointHandleT ch_handle;
38
static SaCkptHandleT daemon_handle;
39
struct ckpt_handle *global_handle;
41
/* This is the version OpenAIS supports */
42
static SaVersionT version = { 'B', 1, 1 };
44
static SaCkptCallbacksT callbacks = {
50
* All of our checkpoints store 4K of data in 32 sections of 128bytes. We
51
* probably won't actually use more than one section of each checkpoint,
52
* but we spec them larger so that we can use space later compatibly. Note
53
* that data space is only allocated when needed, so if we store one section
54
* of 10 bytes, the checkpoint uses 10 bytes, not 4K.
56
* Retention time is 0 - when a daemon exits, it should disappear.
58
* Max section ID size is basically big enough to hold a uuid (32
59
* characters) plus something extra. We don't use uuids in section names
60
* yet, but just in case.
62
#define CKPT_MAX_SECTION_SIZE 128
63
#define CKPT_MAX_SECTIONS 32
64
#define CKPT_MAX_SECTION_ID 40
65
static SaCkptCheckpointCreationAttributesT ckpt_attributes = {
66
.creationFlags = SA_CKPT_WR_ALL_REPLICAS,
67
.checkpointSize = 4096,
68
.retentionDuration = 0LL,
69
.maxSections = CKPT_MAX_SECTIONS,
70
.maxSectionSize = CKPT_MAX_SECTION_SIZE,
71
.maxSectionIdSize = CKPT_MAX_SECTION_ID,
74
static void ais_err_to_errno(SaAisErrorT error, int *rc, char **reason)
81
case SA_AIS_ERR_LIBRARY:
83
*reason = "Internal library error";
85
case SA_AIS_ERR_TIMEOUT:
87
*reason = "Timed out";
89
case SA_AIS_ERR_TRY_AGAIN:
91
*reason = "Try again";
93
case SA_AIS_ERR_INVALID_PARAM:
95
*reason = "Invalid parameter";
97
case SA_AIS_ERR_NO_MEMORY:
99
*reason = "Out of memory";
101
case SA_AIS_ERR_NO_RESOURCES:
103
*reason = "Insufficient resources";
105
case SA_AIS_ERR_VERSION:
107
*reason = "Protocol not compatible";
109
case SA_AIS_ERR_BAD_HANDLE:
111
*reason = "Bad Ckpt handle";
113
case SA_AIS_ERR_INIT:
115
*reason = "Initialization not complete";
117
case SA_AIS_ERR_NOT_EXIST:
119
*reason = "Object does not exist";
121
case SA_AIS_ERR_EXIST:
123
*reason = "Object already exists";
125
case SA_AIS_ERR_BAD_FLAGS:
127
*reason = "Invalid flags";
129
case SA_AIS_ERR_ACCESS:
131
*reason = "Permission denied";
135
*reason = "Unknown error";
136
log_error("Unknown error seen! (%d)", error);
142
* Our retention-time scheme of 0 means that we need to create any
143
* checkpoint we want to update. Nobody is writing to the same checkpoint
146
static int call_ckpt_open(struct ckpt_handle *handle, int write)
148
int rc, existcount = 0, againcount = 0;
151
int flags = SA_CKPT_CHECKPOINT_READ;
154
flags |= (SA_CKPT_CHECKPOINT_WRITE |
155
SA_CKPT_CHECKPOINT_CREATE);
158
log_debug("Opening checkpoint \"%.*s\" (try %d)",
159
handle->ch_name.length, handle->ch_name.value,
160
existcount + againcount + 1);
161
error = saCkptCheckpointOpen(daemon_handle,
163
write ? &ckpt_attributes : NULL,
164
flags, 0, &handle->ch_handle);
165
ais_err_to_errno(error, &rc, &reason);
167
log_debug("Opened checkpoint \"%.*s\" with handle 0x%llx",
168
handle->ch_name.length,
169
handle->ch_name.value,
173
if ((rc != -EAGAIN) &&
174
(!write || (rc != -EEXIST))){
175
log_error("Unable to open checkpoint \"%.*s\": %s",
176
handle->ch_name.length,
177
handle->ch_name.value,
181
if (write && (rc == -EEXIST)) {
183
* EEXIST means one of two things:
185
* 1) Another daemon is up and running. This
186
* one is just going to sit here printing to
187
* the log until it's killed or the other one
188
* dies. This will confuse people; they'll
189
* stop the running daemon, but not be able to
190
* unload the stack. We have to do this because
193
* 2) The daemon was stopped and then immediately
194
* restarted. AIS cleans up the checkpoint
195
* in a lazy fashion, so there is no guarantee
196
* the checkpoint is gone by the time the new
197
* daemon starts up. So we can get an EEXIST
198
* for a little while until AIS gets around to
199
* the cleanup. Because scheduling, etc, can
200
* take a while, we don't know how long that
201
* will be. So we keep retrying. Eventually,
202
* AIS will clean up the checkpoint from the
203
* daemon that exited and let us create our new
206
retry_warning(existcount,
207
"Checkpoint exists seen %d times "
208
"while opening checkpoint \"%.*s\", "
211
handle->ch_name.length,
212
handle->ch_name.value);
213
} else if (rc == -EAGAIN) {
214
/* TRY_AGAIN is Ckpt saying it's just busy. */
215
retry_warning(againcount,
216
"TRY_AGAIN seen %d times while "
217
"opening checkpoint \"%.*s\", "
220
handle->ch_name.length,
221
handle->ch_name.value);
231
static void call_ckpt_close(struct ckpt_handle *handle)
233
int rc, againcount = 0;
238
log_debug("Closing checkpoint \"%.*s\" (try %d)",
239
handle->ch_name.length, handle->ch_name.value,
241
error = saCkptCheckpointClose(handle->ch_handle);
242
ais_err_to_errno(error, &rc, &reason);
244
log_debug("Closed checkpoint \"%.*s\"",
245
handle->ch_name.length,
246
handle->ch_name.value);
250
log_error("Unable to close checkpoint \"%.*s\": %s",
251
handle->ch_name.length,
252
handle->ch_name.value,
257
retry_warning(againcount,
258
"TRY_AGAIN seen %d times while "
259
"closing checkpoint \"%.*s\", "
262
handle->ch_name.length,
263
handle->ch_name.value);
270
* All of our sections live for the life of the checkpoint. We don't need
273
static int call_section_create(struct ckpt_handle *handle, const char *name,
274
const char *data, size_t data_len)
276
int rc, againcount = 0;
279
SaCkptSectionIdT id = {
280
.idLen = strlen(name),
281
.id = (SaUint8T *)name,
283
SaCkptSectionCreationAttributesT attrs = {
285
.expirationTime = SA_TIME_END,
289
log_debug("Creating section \"%s\" on checkpoint "
291
name, handle->ch_name.length,
292
handle->ch_name.value, againcount + 1);
293
error = saCkptSectionCreate(handle->ch_handle, &attrs,
295
ais_err_to_errno(error, &rc, &reason);
297
log_debug("Created section \"%s\" on checkpoint "
299
name, handle->ch_name.length,
300
handle->ch_name.value);
304
log_error("Unable to create section \"%s\" on "
305
"checkpoint \"%.*s\": %s",
306
name, handle->ch_name.length,
307
handle->ch_name.value, reason);
311
retry_warning(againcount,
312
"TRY_AGAIN seen %d times while "
313
"creating section \"%s\" on checkpoint "
314
"\"%.*s\", still trying",
316
handle->ch_name.length,
317
handle->ch_name.value);
325
static int call_section_write(struct ckpt_handle *handle, const char *name,
326
const char *data, size_t data_len)
328
int rc, againcount = 0;
331
SaCkptSectionIdT id = {
332
.idLen = strlen(name),
333
.id = (SaUint8T *)name,
337
log_debug("Writing to section \"%s\" on checkpoint "
339
name, handle->ch_name.length,
340
handle->ch_name.value, againcount + 1);
341
error = saCkptSectionOverwrite(handle->ch_handle, &id,
343
ais_err_to_errno(error, &rc, &reason);
345
log_debug("Stored section \"%s\" on checkpoint "
347
name, handle->ch_name.length,
348
handle->ch_name.value);
352
/* If it doesn't exist, create it. */
354
rc = call_section_create(handle, name, data, data_len);
359
log_error("Unable to write section \"%s\" on "
360
"checkpoint \"%.*s\": %s",
361
name, handle->ch_name.length,
362
handle->ch_name.value, reason);
366
retry_warning(againcount,
367
"TRY_AGAIN seen %d times while "
368
"writing section \"%s\" on checkpoint "
369
"\"%.*s\", still trying",
371
handle->ch_name.length,
372
handle->ch_name.value);
380
static int call_section_read(struct ckpt_handle *handle, const char *name,
381
char **data, size_t *data_len)
383
int rc, againcount = 0;
385
char readbuf[CKPT_MAX_SECTION_SIZE];
387
SaCkptIOVectorElementT readvec[] = {
390
.idLen = strlen(name),
391
.id = (SaUint8T *)name,
393
.dataBuffer = readbuf,
394
.dataSize = CKPT_MAX_SECTION_SIZE,
399
log_debug("Reading from section \"%s\" on checkpoint "
401
name, handle->ch_name.length,
402
handle->ch_name.value, againcount + 1);
403
error = saCkptCheckpointRead(handle->ch_handle, readvec, 1,
405
ais_err_to_errno(error, &rc, &reason);
407
log_debug("Read section \"%s\" from checkpoint "
409
name, handle->ch_name.length,
410
handle->ch_name.value);
414
/* -ENOENT is a clean error for the caller to handle */
416
log_debug("Checkpoint \"%.*s\" does not have a "
417
"section named \"%s\"",
418
handle->ch_name.length,
419
handle->ch_name.value, name);
424
log_error("Unable to read section \"%s\" from "
425
"checkpoint \"%.*s\": %s",
426
name, handle->ch_name.length,
427
handle->ch_name.value, reason);
431
retry_warning(againcount,
432
"TRY_AGAIN seen %d times while "
433
"reading section \"%s\" on checkpoint "
434
"\"%.*s\", still trying",
436
handle->ch_name.length,
437
handle->ch_name.value);
445
p = malloc(sizeof(char) * readvec[0].readSize);
447
memcpy(p, readbuf, readvec[0].readSize);
449
*data_len = readvec[0].readSize;
451
log_error("Unable to allocate memory while reading section "
452
"\"%s\" from checkpoint \"%.*s\"",
453
name, handle->ch_name.length,
454
handle->ch_name.value);
463
int ckpt_section_store(struct ckpt_handle *handle, const char *section,
464
const char *data, size_t data_len)
466
if (strlen(section) > CKPT_MAX_SECTION_ID) {
467
log_error("Error: section id \"%s\" is too long "
469
section, CKPT_MAX_SECTION_ID);
472
if (data_len > CKPT_MAX_SECTION_SIZE) {
473
log_error("Error: attempt to store %d bytes in a section "
475
data_len, CKPT_MAX_SECTION_SIZE);
479
return call_section_write(handle, section, data, data_len);
482
int ckpt_global_store(const char *section, const char *data, size_t data_len)
484
if (!global_handle) {
485
log_error("Error: The global checkpoint is not initialized");
489
return ckpt_section_store(global_handle, section, data, data_len);
492
int ckpt_section_get(struct ckpt_handle *handle, const char *section,
493
char **data, size_t *data_len)
495
if (strlen(section) > CKPT_MAX_SECTION_ID) {
496
log_error("Error: section id \"%s\" is too long "
498
section, CKPT_MAX_SECTION_ID);
502
return call_section_read(handle, section, data, data_len);
505
int ckpt_global_get(const char *section, char **data, size_t *data_len)
507
if (!global_handle) {
508
log_error("Error: The global checkpoint is not initialized");
512
return call_section_read(global_handle, section, data, data_len);
516
* We name our ckeckpoints in one of three ways, all prefixed with 'ocfs2:'.
518
* The global checkpoint is named 'ocfs2:controld'.
519
* The node info checkpoint is named 'ocfs2:controld:<8-hex-char-nodeid>'
520
* A mount checkpoint is named 'ocfs2:<uuid>:<8-hex-char-nodeid>'
522
#define CKPT_PREFIX "ocfs2:"
523
static int ckpt_new(const char *name, int write, struct ckpt_handle **handle)
526
size_t namelen = strlen(name) + strlen(CKPT_PREFIX);
527
struct ckpt_handle *h;
529
if (namelen > SA_MAX_NAME_LENGTH) {
530
log_error("Checkpoint name \"%s\" too long", name);
534
h = malloc(sizeof(struct ckpt_handle));
536
log_error("Unable to allocate checkpoint handle");
540
memset(h, 0, sizeof(struct ckpt_handle));
541
h->ch_name.length = snprintf((char *)(h->ch_name.value),
542
SA_MAX_NAME_LENGTH, "%s%s",
545
rc = call_ckpt_open(h, write);
554
static void ckpt_free(struct ckpt_handle *handle)
556
if (handle->ch_handle)
557
call_ckpt_close(handle);
562
int ckpt_open_global(int write)
567
return ckpt_new("controld", write, &global_handle);
570
void ckpt_close_global(void)
573
ckpt_free(global_handle);
574
global_handle = NULL;
578
int ckpt_open_node(int nodeid, struct ckpt_handle **handle)
580
char name[SA_MAX_NAME_LENGTH];
582
snprintf(name, SA_MAX_NAME_LENGTH, "controld:%08x", nodeid);
584
return ckpt_new(name, 0, handle);
587
int ckpt_open_this_node(struct ckpt_handle **handle)
589
char name[SA_MAX_NAME_LENGTH];
591
snprintf(name, SA_MAX_NAME_LENGTH, "controld:%08x", our_nodeid);
593
return ckpt_new(name, 1, handle);
596
void ckpt_close(struct ckpt_handle *handle)
603
int rc, againcount = 0;
608
log_debug("Initializing CKPT service (try %d)",
610
error = saCkptInitialize(&daemon_handle, &callbacks,
612
ais_err_to_errno(error, &rc, &reason);
614
log_debug("Connected to CKPT service with handle 0x%llx",
619
log_error("Unable to connect to CKPT: %s", reason);
622
retry_warning(againcount,
623
"TRY_AGAIN seen %d times while "
624
"connectiong to CKPT, still trying",
634
int rc, againcount = 0;
642
log_debug("Disconnecting from CKPT service (try %d)",
644
error = saCkptFinalize(daemon_handle);
645
ais_err_to_errno(error, &rc, &reason);
647
log_debug("Disconnected from CKPT service");
651
log_error("Unable to disconnect from CKPT: %s",
655
retry_warning(againcount,
656
"TRY_AGAIN seen %d times while "
657
"disconnecting to CKPT, still trying",
664
int dump_point, dump_wrap, daemon_debug_opt = 1;
665
char daemon_debug_buf[1024];
666
char dump_buf[DUMP_SIZE];
667
void daemon_dump_save(void)
671
len = strlen(daemon_debug_buf);
673
for (i = 0; i < len; i++) {
674
dump_buf[dump_point++] = daemon_debug_buf[i];
676
if (dump_point == DUMP_SIZE) {
684
int main(int argc, char *argv[])
689
struct ckpt_handle *h;
695
rc = ckpt_open_global(1);
698
rc = ckpt_global_store("version", "1.0", strlen("1.0"));
700
rc = ckpt_global_get("foo", &buf, &buflen);
702
log_error("read should not have found anything");
711
rc = ckpt_open_this_node(&h);
714
rc = ckpt_section_store(h, "foo", "bar", strlen("bar"));
716
rc = ckpt_section_get(h, "foo", &buf, &buflen);
718
if ((buflen != strlen("bar")) ||
719
memcmp(buf, "bar", strlen("bar"))) {
720
log_error("read returned bad value");
730
rc = ckpt_open_node(4, &h);
741
#endif /* DEBUG_EXE */