2
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public
6
* License as published by the Free Software Foundation; either
7
* version 2.1 of the License, or (at your option) any later version.
9
* This software is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
* General Public License for more details.
14
* You should have received a copy of the GNU General Public
15
* License along with this library; if not, write to the Free Software
16
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
#include <lha_internal.h>
21
#include <sys/param.h>
24
#include <crm/msg_xml.h>
25
#include <crm/common/msg.h>
26
#include <crm/common/xml.h>
28
#include <heartbeat.h>
29
#include <clplumbing/Gmain_timeout.h>
30
#include <lrm/lrm_api.h>
32
char *failed_stop_offset = NULL;
33
char *failed_start_offset = NULL;
35
crm_data_t *need_abort(crm_data_t *update);
36
void process_graph_event(crm_data_t *event, const char *event_node);
37
int match_graph_event(int action_id, crm_data_t *event, const char *event_node,
38
int op_status, int op_rc, int target_rc);
41
need_abort(crm_data_t *update)
43
crm_data_t *section_xml = NULL;
44
const char *section = NULL;
50
xml_prop_iter(update, name, value,
51
if(safe_str_eq(name, XML_ATTR_HAVE_QUORUM)) {
53
} else if(safe_str_eq(name, XML_ATTR_NUMPEERS)) {
55
} else if(safe_str_eq(name, XML_ATTR_GENERATION)) {
57
} else if(safe_str_eq(name, XML_ATTR_GENERATION_ADMIN)) {
62
crm_debug("Aborting on change to %s", name);
63
crm_log_xml_debug(update, "Abort: CIB Attrs");
67
section = XML_CIB_TAG_NODES;
68
section_xml = get_object_root(section, update);
69
xml_child_iter(section_xml, child,
73
section = XML_CIB_TAG_RESOURCES;
74
section_xml = get_object_root(section, update);
75
xml_child_iter(section_xml, child,
79
section = XML_CIB_TAG_CONSTRAINTS;
80
section_xml = get_object_root(section, update);
81
xml_child_iter(section_xml, child,
85
section = XML_CIB_TAG_CRMCONFIG;
86
section_xml = get_object_root(section, update);
87
xml_child_iter(section_xml, child,
94
fail_incompletable_actions(crm_graph_t *graph, const char *down_node)
96
const char *target = NULL;
97
crm_data_t *last_action = NULL;
100
synapse, synapse_t, graph->synapses, lpc,
101
if (synapse->confirmed) {
106
action, crm_action_t, synapse->actions, lpc,
108
if(action->type == action_type_pseudo || action->confirmed) {
112
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
113
if(safe_str_eq(target, down_node)) {
114
action->failed = TRUE;
115
last_action = action->xml;
116
update_graph(graph, action);
117
crm_notice("Action %d (%s) is scheduled for %s (offline)",
118
action->id, ID(action->xml), down_node);
124
if(last_action != NULL) {
125
crm_warn("Node %s shutdown resulted in un-runnable actions", down_node);
126
abort_transition(INFINITY, tg_restart, "Node failure", last_action);
134
extract_event(crm_data_t *msg)
137
const char *event_node = NULL;
143
<node_state id="node1" state=CRMD_STATE_ACTIVE exp_state="active">
146
<rsc_state id="" rsc_id="rsc4" node_id="node1" rsc_state="stopped"/>
148
crm_debug_4("Extracting event from %s", crm_element_name(msg));
149
xml_child_iter_filter(
150
msg, node_state, XML_CIB_TAG_STATE,
152
crm_data_t *attrs = NULL;
153
crm_data_t *resources = NULL;
155
const char *ccm_state = crm_element_value(
156
node_state, XML_CIB_ATTR_INCCM);
157
const char *crmd_state = crm_element_value(
158
node_state, XML_CIB_ATTR_CRMDSTATE);
160
/* Transient node attribute changes... */
161
event_node = crm_element_value(node_state, XML_ATTR_ID);
162
crm_debug_2("Processing state update from %s", event_node);
163
crm_log_xml_debug_3(node_state, "Processing");
165
attrs = find_xml_node(
166
node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
169
crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" changes for %s", event_node);
170
abort_transition(INFINITY, tg_restart,
171
XML_TAG_TRANSIENT_NODEATTRS, attrs);
174
resources = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
175
resources = find_xml_node(
176
resources, XML_LRM_TAG_RESOURCES, FALSE);
178
/* LRM resource update... */
184
crm_log_xml_debug_3(rsc_op, "Processing resource update");
185
process_graph_event(rsc_op, event_node);
190
* node state update... possibly from a shutdown we requested
192
if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE)
193
|| safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) {
194
crm_action_t *shutdown = NULL;
195
shutdown = match_down_event(0, event_node, NULL);
197
if(shutdown != NULL) {
198
update_graph(transition_graph, shutdown);
202
crm_info("Stonith/shutdown of %s not matched", event_node);
203
abort_transition(INFINITY, tg_restart, "Node failure", node_state);
205
fail_incompletable_actions(transition_graph, event_node);
209
ha_msg_value_int(node_state, XML_CIB_ATTR_SHUTDOWN, &shutdown);
211
crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute for %s", event_node);
212
abort_transition(INFINITY, tg_restart, "Shutdown request", node_state);
220
update_failcount(crm_data_t *event, const char *event_node, int rc, int target_rc)
225
char *attr_name = NULL;
226
const char *id = ID(event);
227
const char *on_uuid = event_node;
228
const char *value = NULL;
231
/* this is an internal code for "we're busy, try again" */
234
} else if(rc == target_rc) {
238
if(failed_stop_offset == NULL) {
239
failed_stop_offset = crm_strdup(INFINITY_S);
242
if(failed_start_offset == NULL) {
243
failed_start_offset = crm_strdup(INFINITY_S);
246
CRM_CHECK(on_uuid != NULL, return);
248
CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval),
249
crm_err("Couldn't parse: %s", ID(event));
251
CRM_CHECK(task != NULL, goto bail);
252
CRM_CHECK(rsc_id != NULL, goto bail);
254
if(safe_str_eq(task, CRMD_ACTION_START)) {
256
value = failed_start_offset;
258
} else if(safe_str_eq(task, CRMD_ACTION_STOP)) {
260
value = failed_stop_offset;
263
if(value == NULL || safe_str_neq(value, INFINITY_S)) {
264
value = XML_NVPAIR_ATTR_VALUE"++";
269
attr_name = crm_concat("fail-count", rsc_id, '-');
270
crm_warn("Updating failcount for %s on %s after failed %s: rc=%d (update=%s)",
271
rsc_id, on_uuid, task, rc, value);
273
call_id = update_attr(te_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS,
274
on_uuid, NULL,NULL, attr_name, value, FALSE);
276
/* don't let notificatios of these updates cause new transitions */
277
add_cib_op_callback(call_id, FALSE, NULL, cib_failcount_updated);
287
status_from_rc(crm_action_t *action, int orig_status, int rc, int target_rc)
289
int status = orig_status;
290
if(target_rc == rc) {
291
crm_debug_2("Target rc: == %d", rc);
292
if(status != LRM_OP_DONE) {
293
crm_debug_2("Re-mapping op status to"
294
" LRM_OP_DONE for rc=%d", rc);
295
status = LRM_OP_DONE;
299
crm_debug_2("Target rc: != %d", rc);
300
if(status != LRM_OP_ERROR) {
301
crm_info("Re-mapping op status to"
302
" LRM_OP_ERROR for rc=%d", rc);
303
status = LRM_OP_ERROR;
307
/* 99 is the code we use for direct nack's */
308
if(rc != 99 && status != LRM_OP_DONE) {
309
const char *task, *uname;
310
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
311
uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
312
crm_warn("Action %d (%s) on %s failed (target: %d vs. rc: %d): %s",
313
action->id, task, uname, target_rc, rc, op_status2text(status));
320
* returns the ID of the action if a match is found
321
* returns -1 if a match was not found
322
* returns -2 if a match was found but the action failed (and was
326
match_graph_event(int action_id, crm_data_t *event, const char *event_node,
327
int op_status, int op_rc, int target_rc)
329
const char *target = NULL;
330
const char *allow_fail = NULL;
331
const char *this_event = ID(event);
332
crm_action_t *action = NULL;
334
action = get_action(action_id, FALSE);
339
op_status = status_from_rc(action, op_status, op_rc, target_rc);
340
if(op_status != LRM_OP_DONE) {
341
update_failcount(event, event_node, op_rc, target_rc);
344
/* Process OP status */
347
crm_debug("Ignoring pending operation");
354
case LRM_OP_NOTSUPPORTED:
355
action->failed = TRUE;
357
case LRM_OP_CANCELLED:
359
crm_err("Dont know what to do for cancelled ops yet");
362
action->failed = TRUE;
363
crm_err("Unsupported action result: %d", op_status);
366
/* stop this event's timer if it had one */
367
stop_te_timer(action->timer);
368
action->confirmed = TRUE;
370
update_graph(transition_graph, action);
374
allow_fail = g_hash_table_lookup(
375
action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL));
376
if(crm_is_true(allow_fail)) {
377
action->failed = FALSE;
382
abort_transition(action->synapse->priority+1,
383
tg_restart, "Event failed", event);
386
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
387
te_log_action(LOG_INFO, "Action %s (%d) confirmed on %s (rc=%d)",
388
crm_str(this_event), action->id, crm_str(target),
395
get_action(int id, gboolean confirmed)
398
synapse, synapse_t, transition_graph->synapses, lpc,
401
action, crm_action_t, synapse->actions, lpc2,
403
if(action->id == id) {
405
stop_te_timer(action->timer);
406
action->confirmed = TRUE;
417
match_down_event(int id, const char *target, const char *filter)
419
const char *this_action = NULL;
420
const char *this_node = NULL;
421
crm_action_t *match = NULL;
424
synapse, synapse_t, transition_graph->synapses, lpc,
428
action, crm_action_t, synapse->actions, lpc2,
430
if(id > 0 && action->id == id) {
435
this_action = crm_element_value(
436
action->xml, XML_LRM_ATTR_TASK);
438
if(action->type != action_type_crm) {
441
} else if(safe_str_eq(this_action, CRM_OP_LRM_REFRESH)){
444
} else if(filter != NULL
445
&& safe_str_neq(this_action, filter)) {
449
this_node = crm_element_value(
450
action->xml, XML_LRM_ATTR_TARGET_UUID);
452
if(this_node == NULL) {
453
crm_log_xml_err(action->xml, "No node uuid");
456
if(safe_str_neq(this_node, target)) {
457
crm_debug("Action %d : Node mismatch: %s",
458
action->id, this_node);
466
/* stop this event's timer if it had one */
472
/* stop this event's timer if it had one */
473
crm_debug("Match found for action %d: %s on %s", id,
474
crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY),
476
stop_te_timer(match->timer);
477
match->confirmed = TRUE;
480
crm_err("No match for action %d", id);
482
crm_warn("No match for shutdown action on %s", target);
489
process_graph_event(crm_data_t *event, const char *event_node)
496
int transition_num = -1;
497
char *update_te_uuid = NULL;
499
gboolean passed = FALSE;
500
const char *id = NULL;
501
const char *magic = NULL;
503
CRM_ASSERT(event != NULL);
506
magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC);
513
CRM_CHECK(decode_transition_magic(
514
magic, &update_te_uuid, &transition_num, &action,
515
&status, &rc, &target_rc),
516
crm_err("Invalid event %s detected", id);
517
abort_transition(INFINITY, tg_restart,"Bad event", event);
520
if(status == LRM_OP_PENDING) {
524
if(transition_num == -1) {
525
crm_err("Action %s (%s) initiated outside of a transition",
527
abort_transition(INFINITY, tg_restart,"Unexpected event",event);
529
} else if(action < 0 || safe_str_neq(update_te_uuid, te_uuid)) {
530
crm_info("Action %s (%s) initiated by a different transitioner",
532
abort_transition(INFINITY, tg_restart,"Foreign event", event);
534
} else if(transition_graph->id != transition_num) {
535
crm_info("Detected action %s from a different transition:"
536
" %d vs. %d", id, transition_num, transition_graph->id);
537
abort_transition(INFINITY, tg_restart,"Old event", event);
539
} else if(transition_graph->complete) {
540
crm_info("Action %s arrived after a completed transition", id);
541
abort_transition(INFINITY, tg_restart, "Inactive graph", event);
543
} else if(match_graph_event(
544
action, event, event_node, status, rc, target_rc) < 0) {
545
crm_err("Unknown graph action %s", id);
546
abort_transition(INFINITY, tg_restart, "Unknown event", event);
550
crm_debug_2("Processed update to %s: %s", id, magic);
553
if(passed == FALSE && rc != EXECRA_OK) {
554
update_failcount(event, event_node, rc, target_rc);
558
crm_free(update_te_uuid);