~ampelbein/ubuntu/oneiric/heartbeat/lp-770743

« back to all changes in this revision

Viewing changes to crm/tengine/events.c

  • Committer: Bazaar Package Importer
  • Author(s): Ante Karamatic
  • Date: 2009-08-10 19:29:25 UTC
  • mfrom: (5.2.3 experimental)
  • Revision ID: james.westby@ubuntu.com-20090810192925-9zy2llcbgavbskf7
Tags: 2.99.2+sles11r9-5ubuntu1
* New upstream snapshot
* Adjusted heartbeat.install and rules for documentation path

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/* 
2
 
 * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3
 
 * 
4
 
 * This program is free software; you can redistribute it and/or
5
 
 * modify it under the terms of the GNU General Public
6
 
 * License as published by the Free Software Foundation; either
7
 
 * version 2.1 of the License, or (at your option) any later version.
8
 
 * 
9
 
 * This software is distributed in the hope that it will be useful,
10
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 
 * General Public License for more details.
13
 
 * 
14
 
 * You should have received a copy of the GNU General Public
15
 
 * License along with this library; if not, write to the Free Software
16
 
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 
 */
18
 
 
19
 
#include <lha_internal.h>
20
 
 
21
 
#include <sys/param.h>
22
 
#include <crm/crm.h>
23
 
#include <crm/cib.h>
24
 
#include <crm/msg_xml.h>
25
 
#include <crm/common/msg.h>
26
 
#include <crm/common/xml.h>
27
 
#include <tengine.h>
28
 
#include <heartbeat.h>
29
 
#include <clplumbing/Gmain_timeout.h>
30
 
#include <lrm/lrm_api.h>
31
 
 
32
 
char *failed_stop_offset = NULL;
33
 
char *failed_start_offset = NULL;
34
 
 
35
 
crm_data_t *need_abort(crm_data_t *update);
36
 
void process_graph_event(crm_data_t *event, const char *event_node);
37
 
int match_graph_event(int action_id, crm_data_t *event, const char *event_node,
38
 
                      int op_status, int op_rc, int target_rc);
39
 
 
40
 
crm_data_t *
41
 
need_abort(crm_data_t *update)
42
 
{
43
 
        crm_data_t *section_xml = NULL;
44
 
        const char *section = NULL;
45
 
 
46
 
        if(update == NULL) {
47
 
                return NULL;
48
 
        }
49
 
        
50
 
        xml_prop_iter(update, name, value,
51
 
                      if(safe_str_eq(name, XML_ATTR_HAVE_QUORUM)) {
52
 
                              goto do_abort;
53
 
                      } else if(safe_str_eq(name, XML_ATTR_NUMPEERS)) {
54
 
                              goto do_abort;
55
 
                      } else if(safe_str_eq(name, XML_ATTR_GENERATION)) {
56
 
                              goto do_abort;
57
 
                      } else if(safe_str_eq(name, XML_ATTR_GENERATION_ADMIN)) {
58
 
                              goto do_abort;
59
 
                      }
60
 
                      continue;
61
 
          do_abort:
62
 
                      crm_debug("Aborting on change to %s", name);
63
 
                      crm_log_xml_debug(update, "Abort: CIB Attrs");
64
 
                      return update;
65
 
                );
66
 
 
67
 
        section = XML_CIB_TAG_NODES;
68
 
        section_xml = get_object_root(section, update);
69
 
        xml_child_iter(section_xml, child, 
70
 
                       return section_xml;
71
 
                );
72
 
 
73
 
        section = XML_CIB_TAG_RESOURCES;
74
 
        section_xml = get_object_root(section, update);
75
 
        xml_child_iter(section_xml, child, 
76
 
                       return section_xml;
77
 
                );
78
 
 
79
 
        section = XML_CIB_TAG_CONSTRAINTS;
80
 
        section_xml = get_object_root(section, update);
81
 
        xml_child_iter(section_xml, child, 
82
 
                       return section_xml;
83
 
                );
84
 
 
85
 
        section = XML_CIB_TAG_CRMCONFIG;
86
 
        section_xml = get_object_root(section, update);
87
 
        xml_child_iter(section_xml, child, 
88
 
                       return section_xml;
89
 
                );
90
 
        return NULL;
91
 
}
92
 
 
93
 
static gboolean
94
 
fail_incompletable_actions(crm_graph_t *graph, const char *down_node) 
95
 
{
96
 
        const char *target = NULL;
97
 
        crm_data_t *last_action = NULL;
98
 
 
99
 
        slist_iter(
100
 
                synapse, synapse_t, graph->synapses, lpc,
101
 
                if (synapse->confirmed) {
102
 
                        continue;
103
 
                }
104
 
 
105
 
                slist_iter(
106
 
                        action, crm_action_t, synapse->actions, lpc,
107
 
 
108
 
                        if(action->type == action_type_pseudo || action->confirmed) {
109
 
                                continue;
110
 
                        }
111
 
                        
112
 
                        target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
113
 
                        if(safe_str_eq(target, down_node)) {
114
 
                                action->failed = TRUE;
115
 
                                last_action = action->xml;
116
 
                                update_graph(graph, action);
117
 
                                crm_notice("Action %d (%s) is scheduled for %s (offline)",
118
 
                                           action->id, ID(action->xml), down_node);
119
 
                        }
120
 
                        
121
 
                        );
122
 
                );
123
 
 
124
 
        if(last_action != NULL) {
125
 
                crm_warn("Node %s shutdown resulted in un-runnable actions", down_node);
126
 
                abort_transition(INFINITY, tg_restart, "Node failure", last_action);
127
 
                return TRUE;
128
 
        }
129
 
        
130
 
        return FALSE;
131
 
}
132
 
 
133
 
gboolean
134
 
extract_event(crm_data_t *msg)
135
 
{
136
 
        int shutdown = 0;
137
 
        const char *event_node = NULL;
138
 
 
139
 
/*
140
 
[cib fragment]
141
 
...
142
 
<status>
143
 
   <node_state id="node1" state=CRMD_STATE_ACTIVE exp_state="active">
144
 
     <lrm>
145
 
       <lrm_resources>
146
 
         <rsc_state id="" rsc_id="rsc4" node_id="node1" rsc_state="stopped"/>
147
 
*/
148
 
        crm_debug_4("Extracting event from %s", crm_element_name(msg));
149
 
        xml_child_iter_filter(
150
 
                msg, node_state, XML_CIB_TAG_STATE,
151
 
 
152
 
                crm_data_t *attrs = NULL;
153
 
                crm_data_t *resources = NULL;
154
 
 
155
 
                const char *ccm_state  = crm_element_value(
156
 
                        node_state, XML_CIB_ATTR_INCCM);
157
 
                const char *crmd_state  = crm_element_value(
158
 
                        node_state, XML_CIB_ATTR_CRMDSTATE);
159
 
 
160
 
                /* Transient node attribute changes... */
161
 
                event_node = crm_element_value(node_state, XML_ATTR_ID);
162
 
                crm_debug_2("Processing state update from %s", event_node);
163
 
                crm_log_xml_debug_3(node_state, "Processing");
164
 
 
165
 
                attrs = find_xml_node(
166
 
                        node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
167
 
 
168
 
                if(attrs != NULL) {
169
 
                        crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" changes for %s", event_node);
170
 
                        abort_transition(INFINITY, tg_restart,
171
 
                                         XML_TAG_TRANSIENT_NODEATTRS, attrs);
172
 
                }
173
 
                
174
 
                resources = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
175
 
                resources = find_xml_node(
176
 
                        resources, XML_LRM_TAG_RESOURCES, FALSE);
177
 
 
178
 
                /* LRM resource update... */
179
 
                xml_child_iter(
180
 
                        resources, rsc,  
181
 
                        xml_child_iter(
182
 
                                rsc, rsc_op,  
183
 
                                
184
 
                                crm_log_xml_debug_3(rsc_op, "Processing resource update");
185
 
                                process_graph_event(rsc_op, event_node);
186
 
                                );
187
 
                        );
188
 
 
189
 
                /*
190
 
                 * node state update... possibly from a shutdown we requested
191
 
                 */
192
 
                if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE)
193
 
                   || safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) {
194
 
                        crm_action_t *shutdown = NULL;
195
 
                        shutdown = match_down_event(0, event_node, NULL);
196
 
                        
197
 
                        if(shutdown != NULL) {
198
 
                                update_graph(transition_graph, shutdown);
199
 
                                trigger_graph();
200
 
 
201
 
                        } else {
202
 
                                crm_info("Stonith/shutdown of %s not matched", event_node);
203
 
                                abort_transition(INFINITY, tg_restart, "Node failure", node_state);
204
 
                        }                       
205
 
                        fail_incompletable_actions(transition_graph, event_node);
206
 
                }
207
 
 
208
 
                shutdown = 0;
209
 
                ha_msg_value_int(node_state, XML_CIB_ATTR_SHUTDOWN, &shutdown);
210
 
                if(shutdown != 0) {
211
 
                        crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute for %s", event_node);
212
 
                        abort_transition(INFINITY, tg_restart, "Shutdown request", node_state);
213
 
                }
214
 
                );
215
 
 
216
 
        return TRUE;
217
 
}
218
 
 
219
 
static void
220
 
update_failcount(crm_data_t *event, const char *event_node, int rc, int target_rc) 
221
 
{
222
 
        int interval = 0;
223
 
        char *task = NULL;
224
 
        char *rsc_id = NULL;
225
 
        char *attr_name = NULL;
226
 
        const char *id  = ID(event);
227
 
        const char *on_uuid  = event_node;
228
 
        const char *value = NULL;
229
 
 
230
 
        if(rc == 99) {
231
 
                /* this is an internal code for "we're busy, try again" */
232
 
                return;
233
 
 
234
 
        } else if(rc == target_rc) {
235
 
            return;
236
 
        }
237
 
 
238
 
        if(failed_stop_offset == NULL) {
239
 
            failed_stop_offset = crm_strdup(INFINITY_S);
240
 
        }
241
 
 
242
 
        if(failed_start_offset == NULL) {
243
 
            failed_start_offset = crm_strdup(INFINITY_S);
244
 
        }
245
 
        
246
 
        CRM_CHECK(on_uuid != NULL, return);
247
 
 
248
 
        CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval),
249
 
                  crm_err("Couldn't parse: %s", ID(event));
250
 
                  goto bail);
251
 
        CRM_CHECK(task != NULL, goto bail);
252
 
        CRM_CHECK(rsc_id != NULL, goto bail);
253
 
 
254
 
        if(safe_str_eq(task, CRMD_ACTION_START)) {
255
 
            interval = 1;
256
 
            value = failed_start_offset;
257
 
 
258
 
        } else if(safe_str_eq(task, CRMD_ACTION_STOP)) {
259
 
            interval = 1;
260
 
            value = failed_stop_offset;
261
 
        }
262
 
 
263
 
        if(value == NULL || safe_str_neq(value, INFINITY_S)) {
264
 
            value = XML_NVPAIR_ATTR_VALUE"++";
265
 
        }
266
 
 
267
 
        if(interval > 0) {
268
 
                int call_id = 0;
269
 
                attr_name = crm_concat("fail-count", rsc_id, '-');
270
 
                crm_warn("Updating failcount for %s on %s after failed %s: rc=%d (update=%s)",
271
 
                         rsc_id, on_uuid, task, rc, value);
272
 
 
273
 
                call_id = update_attr(te_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS,
274
 
                            on_uuid, NULL,NULL, attr_name, value, FALSE);
275
 
 
276
 
                /* don't let notificatios of these updates cause new transitions */
277
 
                add_cib_op_callback(call_id, FALSE, NULL, cib_failcount_updated);
278
 
                crm_free(attr_name);
279
 
        }
280
 
 
281
 
  bail:
282
 
        crm_free(rsc_id);
283
 
        crm_free(task);
284
 
}
285
 
 
286
 
static int
287
 
status_from_rc(crm_action_t *action, int orig_status, int rc, int target_rc)
288
 
{
289
 
        int status = orig_status;
290
 
        if(target_rc == rc) {
291
 
            crm_debug_2("Target rc: == %d", rc);
292
 
            if(status != LRM_OP_DONE) {
293
 
                crm_debug_2("Re-mapping op status to"
294
 
                            " LRM_OP_DONE for rc=%d", rc);
295
 
                status = LRM_OP_DONE;
296
 
            }
297
 
 
298
 
        } else {
299
 
            crm_debug_2("Target rc: != %d", rc);
300
 
            if(status != LRM_OP_ERROR) {
301
 
                crm_info("Re-mapping op status to"
302
 
                         " LRM_OP_ERROR for rc=%d", rc);
303
 
                status = LRM_OP_ERROR;
304
 
            }
305
 
        }
306
 
        
307
 
        /* 99 is the code we use for direct nack's */
308
 
        if(rc != 99 && status != LRM_OP_DONE) {
309
 
                const char *task, *uname;
310
 
                task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
311
 
                uname  = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
312
 
                crm_warn("Action %d (%s) on %s failed (target: %d vs. rc: %d): %s",
313
 
                         action->id, task, uname, target_rc, rc, op_status2text(status));
314
 
        }
315
 
 
316
 
        return status;
317
 
}
318
 
 
319
 
/*
320
 
 * returns the ID of the action if a match is found
321
 
 * returns -1 if a match was not found
322
 
 * returns -2 if a match was found but the action failed (and was
323
 
 *            not allowed to)
324
 
 */
325
 
int
326
 
match_graph_event(int action_id, crm_data_t *event, const char *event_node,
327
 
                  int op_status, int op_rc, int target_rc)
328
 
{
329
 
        const char *target = NULL;
330
 
        const char *allow_fail = NULL;
331
 
        const char *this_event = ID(event);
332
 
        crm_action_t *action = NULL;
333
 
 
334
 
        action = get_action(action_id, FALSE);
335
 
        if(action == NULL) {
336
 
                return -1;
337
 
        }
338
 
        
339
 
        op_status = status_from_rc(action, op_status, op_rc, target_rc);
340
 
        if(op_status != LRM_OP_DONE) {
341
 
            update_failcount(event, event_node, op_rc, target_rc);
342
 
        }
343
 
        
344
 
        /* Process OP status */
345
 
        switch(op_status) {
346
 
                case LRM_OP_PENDING:
347
 
                        crm_debug("Ignoring pending operation");
348
 
                        return action->id;
349
 
                        break;
350
 
                case LRM_OP_DONE:
351
 
                        break;
352
 
                case LRM_OP_ERROR:
353
 
                case LRM_OP_TIMEOUT:
354
 
                case LRM_OP_NOTSUPPORTED:
355
 
                        action->failed = TRUE;
356
 
                        break;
357
 
                case LRM_OP_CANCELLED:
358
 
                        /* do nothing?? */
359
 
                        crm_err("Dont know what to do for cancelled ops yet");
360
 
                        break;
361
 
                default:
362
 
                        action->failed = TRUE;
363
 
                        crm_err("Unsupported action result: %d", op_status);
364
 
        }
365
 
 
366
 
        /* stop this event's timer if it had one */
367
 
        stop_te_timer(action->timer);
368
 
        action->confirmed = TRUE;
369
 
        
370
 
        update_graph(transition_graph, action);
371
 
        trigger_graph();
372
 
        
373
 
        if(action->failed) {
374
 
                allow_fail = g_hash_table_lookup(
375
 
                        action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL));
376
 
                if(crm_is_true(allow_fail)) {
377
 
                        action->failed = FALSE;
378
 
                }
379
 
        }
380
 
 
381
 
        if(action->failed) {
382
 
                abort_transition(action->synapse->priority+1,
383
 
                                 tg_restart, "Event failed", event);
384
 
        }
385
 
 
386
 
        target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
387
 
        te_log_action(LOG_INFO, "Action %s (%d) confirmed on %s (rc=%d)",
388
 
                      crm_str(this_event), action->id, crm_str(target),
389
 
                      op_status);
390
 
 
391
 
        return action->id;
392
 
}
393
 
 
394
 
crm_action_t *
395
 
get_action(int id, gboolean confirmed)
396
 
{
397
 
        slist_iter(
398
 
                synapse, synapse_t, transition_graph->synapses, lpc,
399
 
 
400
 
                slist_iter(
401
 
                        action, crm_action_t, synapse->actions, lpc2,
402
 
 
403
 
                        if(action->id == id) {
404
 
                                if(confirmed) {
405
 
                                        stop_te_timer(action->timer);
406
 
                                        action->confirmed = TRUE;
407
 
                                }
408
 
                                return action;
409
 
                        }
410
 
                        )
411
 
                );
412
 
        return NULL;
413
 
}
414
 
 
415
 
 
416
 
crm_action_t *
417
 
match_down_event(int id, const char *target, const char *filter)
418
 
{
419
 
        const char *this_action = NULL;
420
 
        const char *this_node   = NULL;
421
 
        crm_action_t *match = NULL;
422
 
        
423
 
        slist_iter(
424
 
                synapse, synapse_t, transition_graph->synapses, lpc,
425
 
 
426
 
                /* lookup event */
427
 
                slist_iter(
428
 
                        action, crm_action_t, synapse->actions, lpc2,
429
 
 
430
 
                        if(id > 0 && action->id == id) {
431
 
                                match = action;
432
 
                                break;
433
 
                        }
434
 
                        
435
 
                        this_action = crm_element_value(
436
 
                                action->xml, XML_LRM_ATTR_TASK);
437
 
 
438
 
                        if(action->type != action_type_crm) {
439
 
                                continue;
440
 
 
441
 
                        } else if(safe_str_eq(this_action, CRM_OP_LRM_REFRESH)){
442
 
                                continue;
443
 
                                
444
 
                        } else if(filter != NULL
445
 
                                  && safe_str_neq(this_action, filter)) {
446
 
                                continue;
447
 
                        }
448
 
                        
449
 
                        this_node = crm_element_value(
450
 
                                action->xml, XML_LRM_ATTR_TARGET_UUID);
451
 
 
452
 
                        if(this_node == NULL) {
453
 
                                crm_log_xml_err(action->xml, "No node uuid");
454
 
                        }
455
 
                        
456
 
                        if(safe_str_neq(this_node, target)) {
457
 
                                crm_debug("Action %d : Node mismatch: %s",
458
 
                                         action->id, this_node);
459
 
                                continue;
460
 
                        }
461
 
 
462
 
                        match = action;
463
 
                        break;
464
 
                        );
465
 
                if(match != NULL) {
466
 
                        /* stop this event's timer if it had one */
467
 
                        break;
468
 
                }
469
 
                );
470
 
        
471
 
        if(match != NULL) {
472
 
                /* stop this event's timer if it had one */
473
 
                crm_debug("Match found for action %d: %s on %s", id,
474
 
                          crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY),
475
 
                          target);
476
 
                stop_te_timer(match->timer);
477
 
                match->confirmed = TRUE;
478
 
 
479
 
        } else if(id > 0) {
480
 
                crm_err("No match for action %d", id);
481
 
        } else {
482
 
                crm_warn("No match for shutdown action on %s", target);
483
 
        }
484
 
        return match;
485
 
}
486
 
 
487
 
 
488
 
void
489
 
process_graph_event(crm_data_t *event, const char *event_node)
490
 
{
491
 
        int rc = -1;
492
 
        int status = -1;
493
 
 
494
 
        int action = -1;
495
 
        int target_rc = -1;
496
 
        int transition_num = -1;
497
 
        char *update_te_uuid = NULL;
498
 
 
499
 
        gboolean passed = FALSE;
500
 
        const char *id = NULL;
501
 
        const char *magic = NULL;
502
 
        
503
 
        CRM_ASSERT(event != NULL);
504
 
 
505
 
        id = ID(event);
506
 
        magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC);
507
 
 
508
 
        if(magic == NULL) {
509
 
                /* non-change */
510
 
                return;
511
 
        }
512
 
        
513
 
        CRM_CHECK(decode_transition_magic(
514
 
                          magic, &update_te_uuid, &transition_num, &action,
515
 
                          &status, &rc, &target_rc),
516
 
                  crm_err("Invalid event %s detected", id);
517
 
                  abort_transition(INFINITY, tg_restart,"Bad event", event);
518
 
                );
519
 
 
520
 
        if(status == LRM_OP_PENDING) {
521
 
            goto bail;
522
 
        }
523
 
        
524
 
        if(transition_num == -1) {
525
 
                crm_err("Action %s (%s) initiated outside of a transition",
526
 
                        id, magic);
527
 
                abort_transition(INFINITY, tg_restart,"Unexpected event",event);
528
 
 
529
 
        } else if(action < 0 || safe_str_neq(update_te_uuid, te_uuid)) {
530
 
                crm_info("Action %s (%s) initiated by a different transitioner",
531
 
                         id, magic);
532
 
                abort_transition(INFINITY, tg_restart,"Foreign event", event);
533
 
                
534
 
        } else if(transition_graph->id != transition_num) {
535
 
                crm_info("Detected action %s from a different transition:"
536
 
                        " %d vs. %d", id, transition_num, transition_graph->id);
537
 
                abort_transition(INFINITY, tg_restart,"Old event", event);
538
 
                
539
 
        } else if(transition_graph->complete) {
540
 
                crm_info("Action %s arrived after a completed transition", id);
541
 
                abort_transition(INFINITY, tg_restart, "Inactive graph", event);
542
 
 
543
 
        } else if(match_graph_event(
544
 
                      action, event, event_node, status, rc, target_rc) < 0) {
545
 
                crm_err("Unknown graph action %s", id);
546
 
                abort_transition(INFINITY, tg_restart, "Unknown event", event);
547
 
 
548
 
        } else {
549
 
                passed = TRUE;
550
 
                crm_debug_2("Processed update to %s: %s", id, magic);
551
 
        }
552
 
 
553
 
        if(passed == FALSE && rc != EXECRA_OK) {
554
 
            update_failcount(event, event_node, rc, target_rc);
555
 
        }
556
 
 
557
 
  bail:
558
 
        crm_free(update_te_uuid);
559
 
        return;
560
 
}
561