3
% Description: Implements the "follow service" mechanism based on the Red Hat RIND event
6
% Author: Marc Grimme, Mark Hlawatschek, October 2008
7
% Support: support@atix.de
8
% License: GNU General Public License (GPL), version 2 or later
9
% Copyright: (c) 2008-2010 ATIX AG
12
debug("*** follow-service.sl");
16
% Returns a list of nodes for the given service that are online and in the failoverdomain.
18
define nodelist_online(service_name) {
19
variable nodes, nofailback, restricted, ordered, node_list;
22
(nofailback, restricted, ordered, node_list) = service_domain_info(service_name);
24
return intersection(nodes, node_list);
29
% General purpose function of a construct when Service(svc1) and Service(svc2)
30
% should not be running on the same node even after failover.
31
% There are to options to influence the behaviour. If both services have to be
32
% running on the same node (only one node is left in the failovergroup) what
33
% service is the master and should both services be running or only the master
34
% service survives. If master is not svc1 or svc2 both service might run on the
35
% same node. If master is either svc1 or svc2 the specified one will be the
37
% If followslave is not 0 the svc1 always follows svc2. That means it will be
38
% started on on the same node as svc1. And if available svc2 will be relocated
41
define follow_service(svc1, svc2, master) %, followslave)
43
variable state_svc1, state_svc2, owner_svc1, owner_svc2;
44
variable nodes1, nodes2, allowed;
46
debug("*** FOLLOW_SERVICE: follow_service(",svc1,", ",svc2,", ", master, ")");
47
debug("*** FOLLOW_SERVICE: event_type: ", event_type, ", service_name: ", service_name, ", service_state: ", service_state);
52
if ((master != svc1) and (master != svc2)) {
53
debug("*** FOLLOW_SERVICE: master=NULL");
57
% get infos we need to decide further
58
(,,, owner_svc1, state_svc1) = service_status(svc1);
59
(,,, owner_svc2, state_svc2) = service_status(svc2);
60
nodes1 = nodelist_online(svc1);
61
nodes2 = nodelist_online(svc2);
62
debug("*** FOLLOW_SERVICE: service_status(",svc1,"): ", state_svc1);
63
debug("*** FOLLOW_SERVICE: owner_svc1: ", owner_svc1, ", owner_svc2: ", owner_svc2, ", nodes1: ", nodes1, ", nodes2: ", nodes2);
65
if (((event_type == EVENT_NODE) and (owner_svc1 == node_id) and (node_state == NODE_OFFLINE) and (owner_svc2 >=0)) or
66
((event_type == EVENT_SERVICE) and (service_name == svc1) and (service_state == "recovering" ) and (owner_svc2 >= 0))) {
68
% uh oh, the owner of the master server died. Restart it
69
% on the node running the slave server or if we should not
70
% follow the slave start it somewhere else.
71
% We should end up here if svc1 has to be restarted
74
% If this was a service event, don't execute the default event
75
% script trigger after this script completes.
77
if (event_type == EVENT_SERVICE) {
81
allowed=subtract(nodes2, owner_svc2);
82
if (length(allowed) > 1) {
83
allowed=subtract(allowed, service_last_owner);
85
debug("*** FOLLOW SERVICE: service event triggered following svc2 to ",owner_svc2, " svc2 on : ",allowed);
87
% either svc1 is the master or there are node were to start svc2
88
if ((master == svc1) or (length(allowed) > 0)) {
89
()=service_start(svc1, owner_svc2);
91
% either svc2 is the master or there are node were to start svc2
92
if ((master == svc2) or (length(allowed) > 0)) {
93
()=service_stop(svc2);
94
()=service_start(svc2, allowed);
97
else if (((event_type == EVENT_NODE) and (owner_svc2 == node_id) and (node_state == NODE_OFFLINE) and (owner_svc2 >=0)) or
98
((event_type == EVENT_SERVICE) and (service_name == svc2) and (service_state == "recovering" ) and (owner_svc1 >= 0))) {
100
% uh oh, the owner of the svc2 died. Restart it
101
% on any other node but not the one running the svc1.
102
% If svc1 is the only one left only start it there
105
% Just relocate svc2 or if svc2 is master stop svc1 and start svc2 on owner_svc1
108
% If this was a service event, don't execute the default event
109
% script trigger after this script completes.
112
if (event_type == EVENT_SERVICE) {
116
allowed=subtract(nodes2, owner_svc1);
117
if (length(allowed) > 1) {
118
allowed=subtract(allowed, service_last_owner);
121
debug("*** FOLLOW SERVICE: service event triggered relocating svc2 to ",allowed, " svc1 on : ",owner_svc1);
123
if (length(allowed) > 0) {
124
()=service_stop(svc2);
125
()=service_start(svc2, allowed);
126
} else if (master == svc2) {
127
()=service_stop(svc1);
128
()=service_start(svc2, owner_svc1);
131
else if (((event_type == EVENT_SERVICE) and (service_state == "started") and (owner_svc2 == owner_svc1) and (owner_svc1 > 0) and (owner_svc2 > 0)) or
132
((event_type == EVENT_CONFIG) and (owner_svc2 == owner_svc1))) {
133
allowed=subtract(nodes2, owner_svc1);
134
debug("*** FOLLOW SERVICE: service event both running on same node triggered.", allowed);
135
if (length(allowed) > 0) {
136
%()=service_stop(svc1);
137
%()=service_start(svc1, owner_svc2);
138
()=service_stop(svc2);
139
()=service_start(svc2, allowed);
140
} else if ((master == svc2) and (owner_svc2 > 0)){
141
debug("*** FOLLOW SERVICE: will stop service .", svc1);
142
()=service_stop(svc1);
143
} else if ((master == svc1) and (owner_svc1 > 0)) {
144
debug("*** FOLLOW SERVICE: will stop service .", svc2);
145
()=service_stop(svc2);
147
debug("*** FOLLOW SERVICE: both services running on the same node or only one is running.", allowed, ", ", master);