2
* CTDB Performance Metrics Domain Agent (PMDA) for Performance Co-Pilot (PCP)
4
* Copyright (c) 1995,2004 Silicon Graphics, Inc. All Rights Reserved.
5
* Copyright (c) 2011 David Disseldorp
7
* This program is free software; you can redistribute it and/or modify it
8
* under the terms of the GNU General Public License as published by the
9
* Free Software Foundation; either version 2 of the License, or (at your
10
* option) any later version.
12
* This program is distributed in the hope that it will be useful, but
13
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17
* You should have received a copy of the GNU General Public License along
18
* with this program; if not, write to the Free Software Foundation, Inc.,
19
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
#include <pcp/pmapi.h>
25
#include "../../include/includes.h"
26
#include "../../lib/tevent/tevent.h"
27
#include "../../include/ctdb.h"
28
#include "../../include/ctdb_private.h"
29
#include "../../include/ctdb_protocol.h"
35
* This PMDA connects to the locally running ctdbd daemon and pulls
36
* statistics for export via PCP. The ctdbd Unix domain socket path can be
37
* specified with the CTDB_SOCKET environment variable, otherwise the default
42
* All metrics supported in this PMDA - one table entry for each.
43
* The 4th field specifies the serial number of the instance domain
44
* for the metric, and must be either PM_INDOM_NULL (denoting a
45
* metric that only ever has a single value), or the serial number
46
* of one of the instance domains declared in the instance domain table
47
* (i.e. in indomtab, above).
49
static pmdaMetric metrictab[] = {
51
{ NULL, { PMDA_PMID(0,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
52
PMDA_PMUNITS(0,0,0,0,0,0) }, },
54
{ NULL, { PMDA_PMID(1,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
55
PMDA_PMUNITS(0,0,0,0,0,0) }, },
57
{ NULL, { PMDA_PMID(3,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
58
PMDA_PMUNITS(0,0,0,0,0,0) }, },
59
/* client_packets_sent */
60
{ NULL, { PMDA_PMID(4,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
61
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
62
/* client_packets_recv */
63
{ NULL, { PMDA_PMID(5,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
64
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
65
/* node_packets_sent */
66
{ NULL, { PMDA_PMID(6,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
67
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
68
/* node_packets_recv */
69
{ NULL, { PMDA_PMID(7,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
70
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
71
/* keepalive_packets_sent */
72
{ NULL, { PMDA_PMID(8,8), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
73
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
74
/* keepalive_packets_recv */
75
{ NULL, { PMDA_PMID(9,9), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
76
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
78
{ NULL, { PMDA_PMID(10,10), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
79
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
81
{ NULL, { PMDA_PMID(10,11), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
82
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
84
{ NULL, { PMDA_PMID(10,12), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
85
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
87
{ NULL, { PMDA_PMID(10,13), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
88
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
90
{ NULL, { PMDA_PMID(10,14), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
91
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
93
{ NULL, { PMDA_PMID(10,15), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
94
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
96
{ NULL, { PMDA_PMID(10,16), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
97
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
99
{ NULL, { PMDA_PMID(10,17), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
100
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
102
{ NULL, { PMDA_PMID(11,18), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
103
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
105
{ NULL, { PMDA_PMID(11,19), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
106
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
108
{ NULL, { PMDA_PMID(11,20), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
109
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
111
{ NULL, { PMDA_PMID(12,21), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
112
PMDA_PMUNITS(0,0,1,0,0,0) }, },
114
{ NULL, { PMDA_PMID(12,22), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
115
PMDA_PMUNITS(0,0,1,0,0,0) }, },
117
{ NULL, { PMDA_PMID(12,23), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
118
PMDA_PMUNITS(0,0,1,0,0,0) }, },
120
{ NULL, { PMDA_PMID(13,24), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
121
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
123
{ NULL, { PMDA_PMID(14,25), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
124
PMDA_PMUNITS(0,0,0,0,0,0) }, },
126
{ NULL, { PMDA_PMID(15,27), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
127
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
128
/* pending_lockwait_calls */
129
{ NULL, { PMDA_PMID(16,27), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
130
PMDA_PMUNITS(0,0,0,0,0,0) }, },
131
/* childwrite_calls */
132
{ NULL, { PMDA_PMID(17,28), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
133
PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
134
/* pending_childwrite_calls */
135
{ NULL, { PMDA_PMID(18,29), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
136
PMDA_PMUNITS(0,0,0,0,0,0) }, },
138
{ NULL, { PMDA_PMID(19,30), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
139
PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0) }, },
141
{ NULL, { PMDA_PMID(20,31), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
142
PMDA_PMUNITS(0,0,0,0,0,0) }, },
143
/* max_reclock_ctdbd */
144
{ NULL, { PMDA_PMID(21,32), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
145
PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
146
/* max_reclock_recd */
147
{ NULL, { PMDA_PMID(22,33), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
148
PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
149
/* max_call_latency */
150
{ NULL, { PMDA_PMID(23,34), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
151
PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
152
/* max_lockwait_latency */
153
{ NULL, { PMDA_PMID(24,35), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
154
PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
155
/* max_childwrite_latency */
156
{ NULL, { PMDA_PMID(25,36), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
157
PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
159
{ NULL, { PMDA_PMID(26,37), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
160
PMDA_PMUNITS(0,0,0,0,0,0) }, },
163
static struct event_context *ev;
164
static struct ctdb_context *ctdb;
165
static struct ctdb_statistics *stats;
168
pmda_ctdb_q_read_cb(uint8_t *data, size_t cnt, void *args)
171
fprintf(stderr, "ctdbd unreachable\n");
172
/* cleanup on request timeout */
176
ctdb_client_read_cb(data, cnt, args);
181
pmda_ctdb_daemon_connect(void)
183
const char *socket_name;
185
struct sockaddr_un addr;
187
ev = event_context_init(NULL);
189
fprintf(stderr, "Failed to init event ctx\n");
193
ctdb = ctdb_init(ev);
195
fprintf(stderr, "Failed to init ctdb\n");
199
socket_name = getenv("CTDB_SOCKET");
200
if (socket_name == NULL) {
201
socket_name = CTDB_PATH;
204
ret = ctdb_set_socketname(ctdb, socket_name);
206
fprintf(stderr, "ctdb_set_socketname failed - %s\n",
212
* ctdb_socket_connect() sets a default queue callback handler that
213
* calls exit() if ctdbd is unavailable on recv, use our own wrapper to
217
memset(&addr, 0, sizeof(addr));
218
addr.sun_family = AF_UNIX;
219
strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
221
ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
222
if (ctdb->daemon.sd == -1) {
223
fprintf(stderr, "Failed to open client socket\n");
227
set_nonblocking(ctdb->daemon.sd);
228
set_close_on_exec(ctdb->daemon.sd);
230
if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
231
fprintf(stderr, "Failed to connect to ctdb daemon via %s\n",
236
ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
238
pmda_ctdb_q_read_cb, ctdb,
240
if (ctdb->daemon.queue == NULL) {
241
fprintf(stderr, "Failed to setup queue\n");
245
ctdb->pnn = ctdb_ctrl_getpnn(ctdb, timeval_current_ofs(3, 0),
247
if (ctdb->pnn == (uint32_t)-1) {
248
fprintf(stderr, "Failed to get ctdb pnn\n");
254
close(ctdb->daemon.sd);
264
pmda_ctdb_daemon_disconnect(void)
267
ctdb->methods->shutdown(ctdb);
270
if (ctdb->daemon.sd != -1) {
271
close(ctdb->daemon.sd);
280
fill_node(unsigned int item, pmAtomValue *atom)
284
atom->ul = stats->node.req_call;
287
atom->ul = stats->node.reply_call;
290
atom->ul = stats->node.req_dmaster;
293
atom->ul = stats->node.reply_dmaster;
296
atom->ul = stats->node.reply_error;
299
atom->ul = stats->node.req_message;
302
atom->ul = stats->node.req_control;
305
atom->ul = stats->node.reply_control;
315
fill_client(unsigned int item, pmAtomValue *atom)
319
atom->ul = stats->client.req_call;
322
atom->ul = stats->client.req_message;
325
atom->ul = stats->client.req_control;
335
fill_timeout(unsigned int item, pmAtomValue *atom)
339
atom->ul = stats->timeouts.call;
342
atom->ul = stats->timeouts.control;
345
atom->ul = stats->timeouts.traverse;
355
* callback provided to pmdaFetch
358
pmda_ctdb_fetch_cb(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
361
__pmID_int *id = (__pmID_int *)&(mdesc->m_desc.pmid);
363
if (inst != PM_IN_NULL) {
368
fprintf(stderr, "stats not available\n");
374
switch (id->cluster) {
376
atom->ul = stats->num_clients;
379
atom->ul = stats->frozen;
382
atom->ul = stats->recovering;
385
atom->ul = stats->client_packets_sent;
388
atom->ul = stats->client_packets_recv;
391
atom->ul = stats->node_packets_sent;
394
atom->ul = stats->node_packets_recv;
397
atom->ul = stats->keepalive_packets_sent;
400
atom->ul = stats->keepalive_packets_recv;
403
ret = fill_node(id->item, atom);
409
ret = fill_client(id->item, atom);
415
ret = fill_timeout(id->item, atom);
421
atom->ul = stats->total_calls;
424
atom->ul = stats->pending_calls;
427
atom->ul = stats->lockwait_calls;
430
atom->ul = stats->pending_lockwait_calls;
433
atom->ul = stats->childwrite_calls;
436
atom->ul = stats->pending_childwrite_calls;
439
atom->ul = stats->memory_used;
442
atom->ul = stats->max_hop_count;
445
atom->d = stats->reclock.ctdbd.max;
448
atom->d = stats->reclock.recd.max;
451
atom->d = stats->call_latency.max;
454
atom->d = stats->lockwait_latency.max;
457
atom->d = stats->childwrite_latency.max;
460
atom->d = stats->num_recoveries;
472
* This routine is called once for each pmFetch(3) operation, so is a
473
* good place to do once-per-fetch functions, such as value caching or
474
* instance domain evaluation.
477
pmda_ctdb_fetch(int numpmid, pmID pmidlist[], pmResult **resp, pmdaExt *pmda)
482
struct timeval ctdb_timeout;
485
fprintf(stderr, "attempting reconnect to ctdbd\n");
486
ret = pmda_ctdb_daemon_connect();
488
fprintf(stderr, "reconnect failed\n");
493
ctdb_timeout = timeval_current_ofs(1, 0);
494
ret = ctdb_control(ctdb, ctdb->pnn, 0,
495
CTDB_CONTROL_STATISTICS, 0, tdb_null,
496
ctdb, &data, &res, &ctdb_timeout, NULL);
498
if (ret != 0 || res != 0) {
499
fprintf(stderr, "ctdb control for statistics failed, reconnecting\n");
500
pmda_ctdb_daemon_disconnect();
505
stats = (struct ctdb_statistics *)data.dptr;
507
if (data.dsize != sizeof(struct ctdb_statistics)) {
508
fprintf(stderr, "incorrect statistics size %zu - not %zu\n",
509
data.dsize, sizeof(struct ctdb_statistics));
514
ret = pmdaFetch(numpmid, pmidlist, resp, pmda);
523
* Initialise the agent
526
pmda_ctdb_init(pmdaInterface *dp)
528
if (dp->status != 0) {
532
dp->version.two.fetch = pmda_ctdb_fetch;
533
pmdaSetFetchCallBack(dp, pmda_ctdb_fetch_cb);
535
pmdaInit(dp, NULL, 0, metrictab,
536
(sizeof(metrictab) / sizeof(metrictab[0])));
542
static char buf[MAXPATHLEN];
545
snprintf(buf, sizeof(buf), "%s/ctdb/help",
546
pmGetConfig("PCP_PMDAS_DIR"));
554
fprintf(stderr, "Usage: %s [options]\n\n", pmProgname);
556
" -d domain use domain (numeric) for metrics domain of PMDA\n"
557
" -l logfile write log into logfile rather than using default log name\n"
558
"\nExactly one of the following options may appear:\n"
559
" -i port expect PMCD to connect on given inet port (number or name)\n"
560
" -p expect PMCD to supply stdin/stdout (pipe)\n"
561
" -u socket expect PMCD to connect on given unix domain socket\n",
567
* Set up the agent if running as a daemon.
570
main(int argc, char **argv)
573
char log_file[] = "pmda_ctdb.log";
574
pmdaInterface dispatch;
576
__pmSetProgname(argv[0]);
578
pmdaDaemon(&dispatch, PMDA_INTERFACE_2, pmProgname, CTDB,
579
log_file, helpfile());
581
if (pmdaGetOpt(argc, argv, "d:i:l:pu:?", &dispatch, &err) != EOF) {
589
pmdaOpenLog(&dispatch);
590
pmda_ctdb_init(&dispatch);
591
pmdaConnect(&dispatch);