3
Failover protocol support code... */
6
* Copyright (c) 2004-2011 by Internet Systems Consortium, Inc. ("ISC")
7
* Copyright (c) 1999-2003 by Internet Software Consortium
9
* Permission to use, copy, modify, and distribute this software for any
10
* purpose with or without fee is hereby granted, provided that the above
11
* copyright notice and this permission notice appear in all copies.
13
* THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
14
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
16
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
19
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21
* Internet Systems Consortium, Inc.
23
* Redwood City, CA 94063
25
* https://www.isc.org/
27
* This software has been written for Internet Systems Consortium
28
* by Ted Lemon in cooperation with Vixie Enterprises and Nominum, Inc.
29
* To learn more about Internet Systems Consortium, see
30
* ``https://www.isc.org/''. To learn more about Vixie Enterprises,
31
* see ``http://www.vix.com''. To learn more about Nominum, Inc., see
32
* ``http://www.nominum.com''.
36
#include <omapip/omapip_p.h>
38
#if defined (FAILOVER_PROTOCOL)
39
dhcp_failover_state_t *failover_states;
40
static isc_result_t do_a_failover_option (omapi_object_t *,
41
dhcp_failover_link_t *);
42
dhcp_failover_listener_t *failover_listeners;
44
static isc_result_t failover_message_reference (failover_message_t **,
46
const char *file, int line);
47
static isc_result_t failover_message_dereference (failover_message_t **,
48
const char *file, int line);
50
static void dhcp_failover_pool_balance(dhcp_failover_state_t *state);
51
static void dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state);
52
static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state,
53
isc_boolean_t *sendreq);
54
static inline int secondary_not_hoarding(dhcp_failover_state_t *state,
58
void dhcp_failover_startup ()
60
dhcp_failover_state_t *state;
64
for (state = failover_states; state; state = state -> next) {
65
dhcp_failover_state_transition (state, "startup");
67
if (state -> pool_count == 0) {
68
log_error ("failover peer declaration with no %s",
70
log_error ("In order to use failover, you MUST %s",
71
"refer to your main failover declaration");
72
log_error ("in each pool declaration. You MUST %s",
73
"NOT use range declarations outside");
74
log_fatal ("of pool declarations.");
76
/* In case the peer is already running, immediately try
77
to establish a connection with it. */
78
status = dhcp_failover_link_initiate ((omapi_object_t *)state);
79
if (status != ISC_R_SUCCESS && status != DHCP_R_INCOMPLETE) {
80
#if defined (DEBUG_FAILOVER_TIMING)
81
log_info ("add_timeout +90 dhcp_failover_reconnect");
83
tv . tv_sec = cur_time + 90;
86
dhcp_failover_reconnect, state,
88
dhcp_failover_state_reference,
90
dhcp_failover_state_dereference);
91
log_error ("failover peer %s: %s", state -> name,
92
isc_result_totext (status));
95
status = (dhcp_failover_listen
96
((omapi_object_t *)state));
97
if (status != ISC_R_SUCCESS) {
98
#if defined (DEBUG_FAILOVER_TIMING)
99
log_info ("add_timeout +90 %s",
100
"dhcp_failover_listener_restart");
102
tv . tv_sec = cur_time + 90;
105
dhcp_failover_listener_restart,
107
(tvref_t)omapi_object_reference,
108
(tvunref_t)omapi_object_dereference);
113
int dhcp_failover_write_all_states ()
115
dhcp_failover_state_t *state;
117
for (state = failover_states; state; state = state -> next) {
118
if (!write_failover_state (state))
124
isc_result_t enter_failover_peer (peer)
125
dhcp_failover_state_t *peer;
127
dhcp_failover_state_t *dup = (dhcp_failover_state_t *)0;
130
status = find_failover_peer (&dup, peer -> name, MDL);
131
if (status == ISC_R_NOTFOUND) {
132
if (failover_states) {
133
dhcp_failover_state_reference (&peer -> next,
134
failover_states, MDL);
135
dhcp_failover_state_dereference (&failover_states,
138
dhcp_failover_state_reference (&failover_states, peer, MDL);
139
return ISC_R_SUCCESS;
141
dhcp_failover_state_dereference (&dup, MDL);
142
if (status == ISC_R_SUCCESS)
147
isc_result_t find_failover_peer (peer, name, file, line)
148
dhcp_failover_state_t **peer;
153
dhcp_failover_state_t *p;
155
for (p = failover_states; p; p = p -> next)
156
if (!strcmp (name, p -> name))
159
return dhcp_failover_state_reference (peer, p, file, line);
160
return ISC_R_NOTFOUND;
163
/* The failover protocol has three objects associated with it. For
164
each failover partner declaration in the dhcpd.conf file, primary
165
or secondary, there is a failover_state object. For any primary or
166
secondary state object that has a connection to its peer, there is
167
also a failover_link object, which has its own input state separate
168
from the failover protocol state for managing the actual bytes
169
coming in off the wire. Finally, there will be one listener object
170
for every distinct port number associated with a secondary
171
failover_state object. Normally all secondary failover_state
172
objects are expected to listen on the same port number, so there
173
need be only one listener object, but if different port numbers are
174
specified for each failover object, there could be as many as one
175
listener object for each secondary failover_state object. */
177
/* This, then, is the implementation of the failover link object. */
179
isc_result_t dhcp_failover_link_initiate (omapi_object_t *h)
182
dhcp_failover_link_t *obj;
183
dhcp_failover_state_t *state;
186
struct data_string ds;
187
omapi_addr_list_t *addrs = (omapi_addr_list_t *)0;
188
omapi_addr_t local_addr;
190
/* Find the failover state in the object chain. */
191
for (o = h; o -> outer; o = o -> outer)
193
for (; o; o = o -> inner) {
194
if (o -> type == dhcp_type_failover_state)
198
return DHCP_R_INVALIDARG;
199
state = (dhcp_failover_state_t *)o;
201
obj = (dhcp_failover_link_t *)0;
202
status = dhcp_failover_link_allocate (&obj, MDL);
203
if (status != ISC_R_SUCCESS)
205
option_cache_reference (&obj -> peer_address,
206
state -> partner.address, MDL);
207
obj -> peer_port = state -> partner.port;
208
dhcp_failover_state_reference (&obj -> state_object, state, MDL);
210
memset (&ds, 0, sizeof ds);
211
if (!evaluate_option_cache (&ds, (struct packet *)0, (struct lease *)0,
212
(struct client_state *)0,
213
(struct option_state *)0,
214
(struct option_state *)0,
215
&global_scope, obj -> peer_address, MDL)) {
216
dhcp_failover_link_dereference (&obj, MDL);
217
return ISC_R_UNEXPECTED;
220
/* Make an omapi address list out of a buffer containing zero or more
222
status = omapi_addr_list_new (&addrs, ds.len / 4, MDL);
223
if (status != ISC_R_SUCCESS) {
224
dhcp_failover_link_dereference (&obj, MDL);
228
for (i = 0; i < addrs -> count; i++) {
229
addrs -> addresses [i].addrtype = AF_INET;
230
addrs -> addresses [i].addrlen = sizeof (struct in_addr);
231
memcpy (addrs -> addresses [i].address,
232
&ds.data [i * 4], sizeof (struct in_addr));
233
addrs -> addresses [i].port = obj -> peer_port;
235
data_string_forget (&ds, MDL);
237
/* Now figure out the local address that we're supposed to use. */
238
if (!state -> me.address ||
239
!evaluate_option_cache (&ds, (struct packet *)0,
241
(struct client_state *)0,
242
(struct option_state *)0,
243
(struct option_state *)0,
244
&global_scope, state -> me.address,
246
memset (&local_addr, 0, sizeof local_addr);
247
local_addr.addrtype = AF_INET;
248
local_addr.addrlen = sizeof (struct in_addr);
249
if (!state -> server_identifier.len) {
250
log_fatal ("failover peer %s: no local address.",
254
if (ds.len != sizeof (struct in_addr)) {
255
log_error("failover peer %s: 'address' parameter "
256
"fails to resolve to an IPv4 address",
258
data_string_forget (&ds, MDL);
259
dhcp_failover_link_dereference (&obj, MDL);
260
omapi_addr_list_dereference (&addrs, MDL);
261
return DHCP_R_INVALIDARG;
263
local_addr.addrtype = AF_INET;
264
local_addr.addrlen = ds.len;
265
memcpy (local_addr.address, ds.data, ds.len);
266
if (!state -> server_identifier.len)
267
data_string_copy (&state -> server_identifier,
269
data_string_forget (&ds, MDL);
270
local_addr.port = 0; /* Let the O.S. choose. */
273
status = omapi_connect_list ((omapi_object_t *)obj,
275
omapi_addr_list_dereference (&addrs, MDL);
277
dhcp_failover_link_dereference (&obj, MDL);
281
isc_result_t dhcp_failover_link_signal (omapi_object_t *h,
282
const char *name, va_list ap)
285
dhcp_failover_link_t *link;
287
dhcp_failover_state_t *s, *state = (dhcp_failover_state_t *)0;
292
if (h -> type != dhcp_type_failover_link) {
293
/* XXX shouldn't happen. Put an assert here? */
294
return ISC_R_UNEXPECTED;
296
link = (dhcp_failover_link_t *)h;
298
if (!strcmp (name, "connect")) {
299
if (link -> state_object -> i_am == primary) {
300
status = dhcp_failover_send_connect (h);
301
if (status != ISC_R_SUCCESS) {
302
log_info ("dhcp_failover_send_connect: %s",
303
isc_result_totext (status));
304
omapi_disconnect (h -> outer, 1);
307
status = ISC_R_SUCCESS;
308
/* Allow the peer fifteen seconds to send us a
310
#if defined (DEBUG_FAILOVER_TIMING)
311
log_info ("add_timeout +15 %s",
312
"dhcp_failover_link_startup_timeout");
314
tv . tv_sec = cur_time + 15;
317
dhcp_failover_link_startup_timeout,
319
(tvref_t)dhcp_failover_link_reference,
320
(tvunref_t)dhcp_failover_link_dereference);
324
if (!strcmp (name, "disconnect")) {
325
if (link -> state_object) {
326
dhcp_failover_state_reference (&state,
327
link -> state_object, MDL);
328
link -> state = dhcp_flink_disconnected;
330
/* Make the transition. */
331
if (state->link_to_peer == link)
332
dhcp_failover_state_transition(link->state_object, name);
334
/* Schedule an attempt to reconnect. */
335
#if defined (DEBUG_FAILOVER_TIMING)
336
log_info("add_timeout +5 dhcp_failover_reconnect");
338
tv.tv_sec = cur_time + 5;
339
tv.tv_usec = cur_tv.tv_usec;
340
add_timeout(&tv, dhcp_failover_reconnect, state,
341
(tvref_t)dhcp_failover_state_reference,
342
(tvunref_t)dhcp_failover_state_dereference);
344
dhcp_failover_state_dereference (&state, MDL);
346
return ISC_R_SUCCESS;
349
if (!strcmp (name, "status")) {
350
if (link -> state_object) {
353
status = va_arg(ap, isc_result_t);
355
if ((status == ISC_R_HOSTUNREACH) || (status == ISC_R_TIMEDOUT)) {
356
dhcp_failover_state_reference (&state,
357
link -> state_object, MDL);
358
link -> state = dhcp_flink_disconnected;
360
/* Make the transition. */
361
dhcp_failover_state_transition (link -> state_object,
364
/* Start trying to reconnect. */
365
#if defined (DEBUG_FAILOVER_TIMING)
366
log_info ("add_timeout +5 %s",
367
"dhcp_failover_reconnect");
369
tv . tv_sec = cur_time + 5;
371
add_timeout (&tv, dhcp_failover_reconnect,
373
(tvref_t)dhcp_failover_state_reference,
374
(tvunref_t)dhcp_failover_state_dereference);
376
dhcp_failover_state_dereference (&state, MDL);
378
return ISC_R_SUCCESS;
381
/* Not a signal we recognize? */
382
if (strcmp (name, "ready")) {
383
if (h -> inner && h -> inner -> type -> signal_handler)
384
return (*(h -> inner -> type -> signal_handler))
385
(h -> inner, name, ap);
386
return ISC_R_NOTFOUND;
389
if (!h -> outer || h -> outer -> type != omapi_type_connection)
390
return DHCP_R_INVALIDARG;
393
/* We get here because we requested that we be woken up after
394
some number of bytes were read, and that number of bytes
395
has in fact been read. */
396
switch (link -> state) {
397
case dhcp_flink_start:
398
link -> state = dhcp_flink_message_length_wait;
399
if ((omapi_connection_require (c, 2)) != ISC_R_SUCCESS)
401
case dhcp_flink_message_length_wait:
403
link -> state = dhcp_flink_message_wait;
404
link -> imsg = dmalloc (sizeof (failover_message_t), MDL);
406
status = ISC_R_NOMEMORY;
409
failover_message_dereference (&link->imsg,
412
link -> state = dhcp_flink_disconnected;
413
log_info ("message length wait: %s",
414
isc_result_totext (status));
415
omapi_disconnect (c, 1);
416
/* XXX just blow away the protocol state now?
417
XXX or will disconnect blow it away? */
418
return ISC_R_UNEXPECTED;
420
memset (link -> imsg, 0, sizeof (failover_message_t));
421
link -> imsg -> refcnt = 1;
422
/* Get the length: */
423
omapi_connection_get_uint16 (c, &link -> imsg_len);
424
link -> imsg_count = 0; /* Bytes read. */
426
/* Ensure the message is of valid length. */
427
if (link->imsg_len < DHCP_FAILOVER_MIN_MESSAGE_SIZE ||
428
link->imsg_len > DHCP_FAILOVER_MAX_MESSAGE_SIZE) {
429
status = ISC_R_UNEXPECTED;
430
goto dhcp_flink_fail;
433
if ((omapi_connection_require (c, link -> imsg_len - 2U)) !=
436
case dhcp_flink_message_wait:
437
/* Read in the message. At this point we have the
438
entire message in the input buffer. For each
439
incoming value ID, set a bit in the bitmask
440
indicating that we've gotten it. Maybe flag an
441
error message if the bit is already set. Once
442
we're done reading, we can check the bitmask to
443
make sure that the required fields for each message
444
have been included. */
446
link -> imsg_count += 2; /* Count the length as read. */
448
/* Get message type. */
449
omapi_connection_copyout (&link -> imsg -> type, c, 1);
450
link -> imsg_count++;
452
/* Get message payload offset. */
453
omapi_connection_copyout (&link -> imsg_payoff, c, 1);
454
link -> imsg_count++;
456
/* Get message time. */
457
omapi_connection_get_uint32 (c, &link -> imsg -> time);
458
link -> imsg_count += 4;
460
/* Get transaction ID. */
461
omapi_connection_get_uint32 (c, &link -> imsg -> xid);
462
link -> imsg_count += 4;
464
#if defined (DEBUG_FAILOVER_MESSAGES)
465
# if !defined(DEBUG_FAILOVER_CONTACT_MESSAGES)
466
if (link->imsg->type == FTM_CONTACT)
469
log_info ("link: message %s payoff %d time %ld xid %ld",
470
dhcp_failover_message_name (link -> imsg -> type),
472
(unsigned long)link -> imsg -> time,
473
(unsigned long)link -> imsg -> xid);
474
# if !defined(DEBUG_FAILOVER_CONTACT_MESSAGES)
478
/* Skip over any portions of the message header that we
480
if (link -> imsg_payoff - link -> imsg_count) {
481
omapi_connection_copyout ((unsigned char *)0, c,
482
(link -> imsg_payoff -
483
link -> imsg_count));
484
link -> imsg_count = link -> imsg_payoff;
487
/* Now start sucking options off the wire. */
488
while (link -> imsg_count < link -> imsg_len) {
489
status = do_a_failover_option (c, link);
490
if (status != ISC_R_SUCCESS)
491
goto dhcp_flink_fail;
494
/* If it's a connect message, try to associate it with
496
/* XXX this should be authenticated! */
497
if (link -> imsg -> type == FTM_CONNECT) {
501
if (!(link->imsg->options_present &
502
FTB_RELATIONSHIP_NAME)) {
503
errmsg = "missing relationship-name";
504
reason = FTR_INVALID_PARTNER;
508
/* See if we can find a failover_state object that
509
matches this connection. This message should only
510
be received by a secondary from a primary. */
511
for (s = failover_states; s; s = s -> next) {
512
if (dhcp_failover_state_match_by_name(s,
513
&link->imsg->relationship_name))
517
/* If we can't find a failover protocol state
518
for this remote host, drop the connection */
520
errmsg = "unknown failover relationship name";
521
reason = FTR_INVALID_PARTNER;
524
/* XXX Send a refusal message first?
525
XXX Look in protocol spec for guidance. */
529
slen = strlen(sname);
530
} else if (link->imsg->options_present &
531
FTB_RELATIONSHIP_NAME) {
532
sname = (char *)link->imsg->
533
relationship_name.data;
534
slen = link->imsg->relationship_name.count;
537
slen = strlen(sname);
540
log_error("Failover CONNECT from %.*s: %s",
541
slen, sname, errmsg);
542
dhcp_failover_send_connectack
543
((omapi_object_t *)link, state,
545
log_info ("failover: disconnect: %s", errmsg);
546
omapi_disconnect (c, 0);
547
link -> state = dhcp_flink_disconnected;
548
return ISC_R_SUCCESS;
551
if ((cur_time > link -> imsg -> time &&
552
cur_time - link -> imsg -> time > 60) ||
553
(cur_time < link -> imsg -> time &&
554
link -> imsg -> time - cur_time > 60)) {
555
errmsg = "time offset too large";
556
reason = FTR_TIMEMISMATCH;
560
if (!(link -> imsg -> options_present & FTB_HBA) ||
561
link -> imsg -> hba.count != 32) {
562
errmsg = "invalid HBA";
563
reason = FTR_HBA_CONFLICT; /* XXX */
567
dfree (state -> hba, MDL);
568
state -> hba = dmalloc (32, MDL);
570
errmsg = "no memory";
571
reason = FTR_MISC_REJECT;
574
memcpy (state -> hba, link -> imsg -> hba.data, 32);
576
if (!link -> state_object)
577
dhcp_failover_state_reference
578
(&link -> state_object, state, MDL);
579
if (!link -> peer_address)
580
option_cache_reference
581
(&link -> peer_address,
582
state -> partner.address, MDL);
585
/* If we don't have a state object at this point, it's
586
some kind of bogus situation, so just drop the
588
if (!link -> state_object) {
589
log_info ("failover: connect: no matching state.");
590
omapi_disconnect (c, 1);
591
link -> state = dhcp_flink_disconnected;
592
return DHCP_R_INVALIDARG;
595
/* Once we have the entire message, and we've validated
596
it as best we can here, pass it to the parent. */
597
omapi_signal ((omapi_object_t *)link -> state_object,
599
link -> state = dhcp_flink_message_length_wait;
601
failover_message_dereference (&link -> imsg, MDL);
602
/* XXX This is dangerous because we could get into a tight
603
XXX loop reading input without servicing any other stuff.
604
XXX There needs to be a way to relinquish control but
605
XXX get it back immediately if there's no other work to
607
if ((omapi_connection_require (c, 2)) == ISC_R_SUCCESS)
612
log_fatal("Impossible case at %s:%d.", MDL);
615
return ISC_R_SUCCESS;
618
static isc_result_t do_a_failover_option (c, link)
620
dhcp_failover_link_t *link;
622
u_int16_t option_code;
623
u_int16_t option_len;
629
if (link -> imsg_count + 2 > link -> imsg_len) {
630
log_error ("FAILOVER: message overflow at option code.");
631
return DHCP_R_PROTOCOLERROR;
634
/* Get option code. */
635
omapi_connection_get_uint16 (c, &option_code);
636
link -> imsg_count += 2;
638
if (link -> imsg_count + 2 > link -> imsg_len) {
639
log_error ("FAILOVER: message overflow at length.");
640
return DHCP_R_PROTOCOLERROR;
643
/* Get option length. */
644
omapi_connection_get_uint16 (c, &option_len);
645
link -> imsg_count += 2;
647
if (link -> imsg_count + option_len > link -> imsg_len) {
648
log_error ("FAILOVER: message overflow at data.");
649
return DHCP_R_PROTOCOLERROR;
652
/* If it's an unknown code, skip over it. */
653
if ((option_code > FTO_MAX) ||
654
(ft_options[option_code].type == FT_UNDEF)) {
655
#if defined (DEBUG_FAILOVER_MESSAGES)
656
log_debug (" option code %d (%s) len %d (not recognized)",
658
dhcp_failover_option_name (option_code),
661
omapi_connection_copyout ((unsigned char *)0, c, option_len);
662
link -> imsg_count += option_len;
663
return ISC_R_SUCCESS;
666
/* If it's the digest, do it now. */
667
if (ft_options [option_code].type == FT_DIGEST) {
668
link -> imsg_count += option_len;
669
if (link -> imsg_count != link -> imsg_len) {
670
log_error ("FAILOVER: digest not at end of message");
671
return DHCP_R_PROTOCOLERROR;
673
#if defined (DEBUG_FAILOVER_MESSAGES)
674
log_debug (" option %s len %d",
675
ft_options [option_code].name, option_len);
677
/* For now, just dump it. */
678
omapi_connection_copyout ((unsigned char *)0, c, option_len);
679
return ISC_R_SUCCESS;
682
/* Only accept an option once. */
683
if (link -> imsg -> options_present & ft_options [option_code].bit) {
684
log_error ("FAILOVER: duplicate option %s",
685
ft_options [option_code].name);
686
return DHCP_R_PROTOCOLERROR;
689
/* Make sure the option is appropriate for this type of message.
690
Really, any option is generally allowed for any message, and the
691
cases where this is not true are too complicated to represent in
692
this way - what this code is doing is to just avoid saving the
693
value of an option we don't have any way to use, which allows
694
us to make the failover_message structure smaller. */
695
if (ft_options [option_code].bit &&
696
!(fto_allowed [link -> imsg -> type] &
697
ft_options [option_code].bit)) {
698
omapi_connection_copyout ((unsigned char *)0, c, option_len);
699
link -> imsg_count += option_len;
700
return ISC_R_SUCCESS;
703
/* Figure out how many elements, how big they are, and where
705
if (ft_options [option_code].num_present) {
706
/* If this option takes a fixed number of elements,
707
we expect the space for them to be preallocated,
708
and we can just read the data in. */
710
op = ((unsigned char *)link -> imsg) +
711
ft_options [option_code].offset;
712
op_size = ft_sizes [ft_options [option_code].type];
713
op_count = ft_options [option_code].num_present;
715
if (option_len != op_size * op_count) {
716
log_error ("FAILOVER: option size (%d:%d), option %s",
718
(ft_sizes [ft_options [option_code].type] *
719
ft_options [option_code].num_present),
720
ft_options [option_code].name);
721
return DHCP_R_PROTOCOLERROR;
724
failover_option_t *fo;
726
/* FT_DDNS* are special - one or two bytes of status
727
followed by the client FQDN. */
728
if (ft_options [option_code].type == FT_DDNS1 ||
729
ft_options [option_code].type == FT_DDNS1) {
732
(((char *)link -> imsg) +
733
ft_options [option_code].offset));
735
op_count = (ft_options [option_code].type == FT_DDNS1
738
omapi_connection_copyout (&ddns -> codes [0],
740
link -> imsg_count += op_count;
742
ddns -> codes [1] = 0;
744
op_count = option_len - op_count;
746
ddns -> length = op_count;
747
ddns -> data = dmalloc (op_count, MDL);
749
log_error ("FAILOVER: no memory getting%s(%d)",
750
" DNS data ", op_count);
752
/* Actually, NO_MEMORY, but if we lose here
753
we have to drop the connection. */
754
return DHCP_R_PROTOCOLERROR;
756
omapi_connection_copyout (ddns -> data, c, op_count);
760
/* A zero for num_present means that any number of
761
elements can appear, so we have to figure out how
762
many we got from the length of the option, and then
763
fill out a failover_option structure describing the
765
op_size = ft_sizes [ft_options [option_code].type];
767
/* Make sure that option data length is a multiple of the
768
size of the data type being sent. */
769
if (op_size > 1 && option_len % op_size) {
770
log_error ("FAILOVER: option_len %d not %s%d",
771
option_len, "multiple of ", op_size);
772
return DHCP_R_PROTOCOLERROR;
775
op_count = option_len / op_size;
777
fo = ((failover_option_t *)
778
(((char *)link -> imsg) +
779
ft_options [option_code].offset));
781
fo -> count = op_count;
782
fo -> data = dmalloc (option_len, MDL);
784
log_error ("FAILOVER: no memory getting %s (%d)",
785
"option data", op_count);
787
return DHCP_R_PROTOCOLERROR;
792
/* For single-byte message values and multi-byte values that
793
don't need swapping, just read them in all at once. */
794
if (op_size == 1 || ft_options [option_code].type == FT_IPADDR) {
795
omapi_connection_copyout ((unsigned char *)op, c, option_len);
796
link -> imsg_count += option_len;
799
* As of 3.1.0, many option codes were changed to conform to
800
* draft revision 12 (which alphabetized, then renumbered all
801
* the option codes without preserving the version option code
802
* nor bumping its value). As it turns out, the message codes
803
* for CONNECT and CONNECTACK turn out the same, so it tries
804
* its darndest to connect, and falls short (when TLS_REQUEST
805
* comes up size 2 rather than size 1 as draft revision 12 also
808
* The VENDOR_CLASS code in 3.0.x was 11, which is now the HBA
809
* code. Both work out to be arbitrarily long text-or-byte
810
* strings, so they pass parsing.
812
* Note that it is possible (or intentional), if highly
813
* improbable, for the HBA bit array to exactly match
814
* isc-V3.0.x. Warning here is not an issue; if it really is
815
* 3.0.x, there will be a protocol error later on. If it isn't
816
* actually 3.0.x, then I guess the lucky user will have to
817
* live with a weird warning.
819
if ((option_code == 11) && (option_len > 9) &&
820
(strncmp((const char *)op, "isc-V3.0.", 9) == 0)) {
821
log_error("WARNING: failover as of versions 3.1.0 and "
822
"on are not reverse compatible with "
829
/* For values that require swapping, read them in one at a time
830
using routines that swap bytes. */
831
for (i = 0; i < op_count; i++) {
832
switch (ft_options [option_code].type) {
834
omapi_connection_get_uint32 (c, (u_int32_t *)op);
836
link -> imsg_count += 4;
840
omapi_connection_get_uint16 (c, (u_int16_t *)op);
842
link -> imsg_count += 2;
846
/* Everything else should have been handled
848
log_error ("FAILOVER: option %s: bad type %d",
849
ft_options [option_code].name,
850
ft_options [option_code].type);
851
return DHCP_R_PROTOCOLERROR;
855
/* Remember that we got this option. */
856
link -> imsg -> options_present |= ft_options [option_code].bit;
857
return ISC_R_SUCCESS;
860
isc_result_t dhcp_failover_link_set_value (omapi_object_t *h,
862
omapi_data_string_t *name,
863
omapi_typed_data_t *value)
865
if (h -> type != omapi_type_protocol)
866
return DHCP_R_INVALIDARG;
868
/* Never valid to set these. */
869
if (!omapi_ds_strcmp (name, "link-port") ||
870
!omapi_ds_strcmp (name, "link-name") ||
871
!omapi_ds_strcmp (name, "link-state"))
874
if (h -> inner && h -> inner -> type -> set_value)
875
return (*(h -> inner -> type -> set_value))
876
(h -> inner, id, name, value);
877
return ISC_R_NOTFOUND;
880
isc_result_t dhcp_failover_link_get_value (omapi_object_t *h,
882
omapi_data_string_t *name,
883
omapi_value_t **value)
885
dhcp_failover_link_t *link;
887
if (h -> type != omapi_type_protocol)
888
return DHCP_R_INVALIDARG;
889
link = (dhcp_failover_link_t *)h;
891
if (!omapi_ds_strcmp (name, "link-port")) {
892
return omapi_make_int_value (value, name,
893
(int)link -> peer_port, MDL);
894
} else if (!omapi_ds_strcmp (name, "link-state")) {
895
if (link -> state < 0 ||
896
link -> state >= dhcp_flink_state_max)
897
return omapi_make_string_value (value, name,
898
"invalid link state",
900
return omapi_make_string_value
902
dhcp_flink_state_names [link -> state], MDL);
905
if (h -> inner && h -> inner -> type -> get_value)
906
return (*(h -> inner -> type -> get_value))
907
(h -> inner, id, name, value);
908
return ISC_R_NOTFOUND;
911
isc_result_t dhcp_failover_link_destroy (omapi_object_t *h,
912
const char *file, int line)
914
dhcp_failover_link_t *link;
915
if (h -> type != dhcp_type_failover_link)
916
return DHCP_R_INVALIDARG;
917
link = (dhcp_failover_link_t *)h;
919
if (link -> peer_address)
920
option_cache_dereference (&link -> peer_address, file, line);
922
failover_message_dereference (&link -> imsg, file, line);
923
if (link -> state_object)
924
dhcp_failover_state_dereference (&link -> state_object,
926
return ISC_R_SUCCESS;
929
/* Write all the published values associated with the object through the
930
specified connection. */
932
isc_result_t dhcp_failover_link_stuff_values (omapi_object_t *c,
936
dhcp_failover_link_t *link;
939
if (l -> type != dhcp_type_failover_link)
940
return DHCP_R_INVALIDARG;
941
link = (dhcp_failover_link_t *)l;
943
status = omapi_connection_put_name (c, "link-port");
944
if (status != ISC_R_SUCCESS)
946
status = omapi_connection_put_uint32 (c, sizeof (int));
947
if (status != ISC_R_SUCCESS)
949
status = omapi_connection_put_uint32 (c, link -> peer_port);
950
if (status != ISC_R_SUCCESS)
953
status = omapi_connection_put_name (c, "link-state");
954
if (status != ISC_R_SUCCESS)
956
if (link -> state < 0 ||
957
link -> state >= dhcp_flink_state_max)
958
status = omapi_connection_put_string (c, "invalid link state");
960
status = (omapi_connection_put_string
961
(c, dhcp_flink_state_names [link -> state]));
962
if (status != ISC_R_SUCCESS)
965
if (link -> inner && link -> inner -> type -> stuff_values)
966
return (*(link -> inner -> type -> stuff_values)) (c, id,
968
return ISC_R_SUCCESS;
971
/* Set up a listener for the omapi protocol. The handle stored points to
972
a listener object, not a protocol object. */
974
isc_result_t dhcp_failover_listen (omapi_object_t *h)
977
dhcp_failover_listener_t *obj, *l;
978
omapi_value_t *value = (omapi_value_t *)0;
979
omapi_addr_t local_addr;
982
status = omapi_get_value_str (h, (omapi_object_t *)0,
983
"local-port", &value);
984
if (status != ISC_R_SUCCESS)
986
if (!value -> value) {
987
omapi_value_dereference (&value, MDL);
988
return DHCP_R_INVALIDARG;
991
status = omapi_get_int_value (&port, value -> value);
992
omapi_value_dereference (&value, MDL);
993
if (status != ISC_R_SUCCESS)
995
local_addr.port = port;
997
status = omapi_get_value_str (h, (omapi_object_t *)0,
998
"local-address", &value);
999
if (status != ISC_R_SUCCESS)
1001
if (!value -> value) {
1003
omapi_value_dereference (&value, MDL);
1004
return DHCP_R_INVALIDARG;
1007
if (value -> value -> type != omapi_datatype_data ||
1008
value -> value -> u.buffer.len != sizeof (struct in_addr))
1011
memcpy (local_addr.address, value -> value -> u.buffer.value,
1012
value -> value -> u.buffer.len);
1013
local_addr.addrlen = value -> value -> u.buffer.len;
1014
local_addr.addrtype = AF_INET;
1016
omapi_value_dereference (&value, MDL);
1018
/* Are we already listening on this port and address? */
1019
for (l = failover_listeners; l; l = l -> next) {
1020
if (l -> address.port == local_addr.port &&
1021
l -> address.addrtype == local_addr.addrtype &&
1022
l -> address.addrlen == local_addr.addrlen &&
1023
!memcmp (l -> address.address, local_addr.address,
1024
local_addr.addrlen))
1027
/* Already listening. */
1029
return ISC_R_SUCCESS;
1031
obj = (dhcp_failover_listener_t *)0;
1032
status = dhcp_failover_listener_allocate (&obj, MDL);
1033
if (status != ISC_R_SUCCESS)
1035
obj -> address = local_addr;
1037
status = omapi_listen_addr ((omapi_object_t *)obj, &obj -> address, 1);
1038
if (status != ISC_R_SUCCESS)
1041
status = omapi_object_reference (&h -> outer,
1042
(omapi_object_t *)obj, MDL);
1043
if (status != ISC_R_SUCCESS) {
1044
dhcp_failover_listener_dereference (&obj, MDL);
1047
status = omapi_object_reference (&obj -> inner, h, MDL);
1048
if (status != ISC_R_SUCCESS) {
1049
dhcp_failover_listener_dereference (&obj, MDL);
1053
/* Put this listener on the list. */
1054
if (failover_listeners) {
1055
dhcp_failover_listener_reference (&obj -> next,
1056
failover_listeners, MDL);
1057
dhcp_failover_listener_dereference (&failover_listeners, MDL);
1059
dhcp_failover_listener_reference (&failover_listeners, obj, MDL);
1061
return dhcp_failover_listener_dereference (&obj, MDL);
1064
/* Signal handler for protocol listener - if we get a connect signal,
1065
create a new protocol connection, otherwise pass the signal down. */
1067
isc_result_t dhcp_failover_listener_signal (omapi_object_t *o,
1068
const char *name, va_list ap)
1070
isc_result_t status;
1071
omapi_connection_object_t *c;
1072
dhcp_failover_link_t *obj;
1073
dhcp_failover_listener_t *p;
1074
dhcp_failover_state_t *s, *state = (dhcp_failover_state_t *)0;
1076
if (!o || o -> type != dhcp_type_failover_listener)
1077
return DHCP_R_INVALIDARG;
1078
p = (dhcp_failover_listener_t *)o;
1080
/* Not a signal we recognize? */
1081
if (strcmp (name, "connect")) {
1082
if (p -> inner && p -> inner -> type -> signal_handler)
1083
return (*(p -> inner -> type -> signal_handler))
1084
(p -> inner, name, ap);
1085
return ISC_R_NOTFOUND;
1088
c = va_arg (ap, omapi_connection_object_t *);
1089
if (!c || c -> type != omapi_type_connection)
1090
return DHCP_R_INVALIDARG;
1092
/* See if we can find a failover_state object that
1093
matches this connection. */
1094
for (s = failover_states; s; s = s -> next) {
1095
if (dhcp_failover_state_match
1096
(s, (u_int8_t *)&c -> remote_addr.sin_addr,
1097
sizeof c -> remote_addr.sin_addr)) {
1103
log_info ("failover: listener: no matching state");
1104
omapi_disconnect ((omapi_object_t *)c, 1);
1105
return(ISC_R_NOTFOUND);
1108
obj = (dhcp_failover_link_t *)0;
1109
status = dhcp_failover_link_allocate (&obj, MDL);
1110
if (status != ISC_R_SUCCESS)
1112
obj -> peer_port = ntohs (c -> remote_addr.sin_port);
1114
status = omapi_object_reference (&obj -> outer,
1115
(omapi_object_t *)c, MDL);
1116
if (status != ISC_R_SUCCESS) {
1118
dhcp_failover_link_dereference (&obj, MDL);
1119
log_info ("failover: listener: picayune failure.");
1120
omapi_disconnect ((omapi_object_t *)c, 1);
1124
status = omapi_object_reference (&c -> inner,
1125
(omapi_object_t *)obj, MDL);
1126
if (status != ISC_R_SUCCESS)
1129
status = dhcp_failover_state_reference (&obj -> state_object,
1131
if (status != ISC_R_SUCCESS)
1134
omapi_signal_in ((omapi_object_t *)obj, "connect");
1136
return dhcp_failover_link_dereference (&obj, MDL);
1139
isc_result_t dhcp_failover_listener_set_value (omapi_object_t *h,
1141
omapi_data_string_t *name,
1142
omapi_typed_data_t *value)
1144
if (h -> type != dhcp_type_failover_listener)
1145
return DHCP_R_INVALIDARG;
1147
if (h -> inner && h -> inner -> type -> set_value)
1148
return (*(h -> inner -> type -> set_value))
1149
(h -> inner, id, name, value);
1150
return ISC_R_NOTFOUND;
1153
isc_result_t dhcp_failover_listener_get_value (omapi_object_t *h,
1155
omapi_data_string_t *name,
1156
omapi_value_t **value)
1158
if (h -> type != dhcp_type_failover_listener)
1159
return DHCP_R_INVALIDARG;
1161
if (h -> inner && h -> inner -> type -> get_value)
1162
return (*(h -> inner -> type -> get_value))
1163
(h -> inner, id, name, value);
1164
return ISC_R_NOTFOUND;
1167
isc_result_t dhcp_failover_listener_destroy (omapi_object_t *h,
1168
const char *file, int line)
1170
dhcp_failover_listener_t *l;
1172
if (h -> type != dhcp_type_failover_listener)
1173
return DHCP_R_INVALIDARG;
1174
l = (dhcp_failover_listener_t *)h;
1176
dhcp_failover_listener_dereference (&l -> next, file, line);
1178
return ISC_R_SUCCESS;
1181
/* Write all the published values associated with the object through the
1182
specified connection. */
1184
isc_result_t dhcp_failover_listener_stuff (omapi_object_t *c,
1188
if (p -> type != dhcp_type_failover_listener)
1189
return DHCP_R_INVALIDARG;
1191
if (p -> inner && p -> inner -> type -> stuff_values)
1192
return (*(p -> inner -> type -> stuff_values)) (c, id,
1194
return ISC_R_SUCCESS;
1197
/* Set up master state machine for the failover protocol. */
1199
isc_result_t dhcp_failover_register (omapi_object_t *h)
1201
isc_result_t status;
1202
dhcp_failover_state_t *obj;
1204
omapi_value_t *value = (omapi_value_t *)0;
1206
status = omapi_get_value_str (h, (omapi_object_t *)0,
1207
"local-port", &value);
1208
if (status != ISC_R_SUCCESS)
1210
if (!value -> value) {
1211
omapi_value_dereference (&value, MDL);
1212
return DHCP_R_INVALIDARG;
1215
status = omapi_get_int_value (&port, value -> value);
1216
omapi_value_dereference (&value, MDL);
1217
if (status != ISC_R_SUCCESS)
1220
obj = (dhcp_failover_state_t *)0;
1221
dhcp_failover_state_allocate (&obj, MDL);
1222
obj -> me.port = port;
1224
status = omapi_listen ((omapi_object_t *)obj, port, 1);
1225
if (status != ISC_R_SUCCESS) {
1226
dhcp_failover_state_dereference (&obj, MDL);
1230
status = omapi_object_reference (&h -> outer, (omapi_object_t *)obj,
1232
if (status != ISC_R_SUCCESS) {
1233
dhcp_failover_state_dereference (&obj, MDL);
1236
status = omapi_object_reference (&obj -> inner, h, MDL);
1237
dhcp_failover_state_dereference (&obj, MDL);
1241
/* Signal handler for protocol state machine. */
1243
isc_result_t dhcp_failover_state_signal (omapi_object_t *o,
1244
const char *name, va_list ap)
1246
isc_result_t status;
1247
dhcp_failover_state_t *state;
1248
dhcp_failover_link_t *link;
1251
if (!o || o -> type != dhcp_type_failover_state)
1252
return DHCP_R_INVALIDARG;
1253
state = (dhcp_failover_state_t *)o;
1255
/* Not a signal we recognize? */
1256
if (strcmp (name, "disconnect") &&
1257
strcmp (name, "message")) {
1258
if (state -> inner && state -> inner -> type -> signal_handler)
1259
return (*(state -> inner -> type -> signal_handler))
1260
(state -> inner, name, ap);
1261
return ISC_R_NOTFOUND;
1264
/* Handle connect signals by seeing what state we're in
1265
and potentially doing a state transition. */
1266
if (!strcmp (name, "disconnect")) {
1267
link = va_arg (ap, dhcp_failover_link_t *);
1269
dhcp_failover_link_dereference (&state -> link_to_peer, MDL);
1270
dhcp_failover_state_transition (state, "disconnect");
1271
if (state -> i_am == primary) {
1272
#if defined (DEBUG_FAILOVER_TIMING)
1273
log_info ("add_timeout +90 %s",
1274
"dhcp_failover_reconnect");
1276
tv . tv_sec = cur_time + 90;
1278
add_timeout (&tv, dhcp_failover_reconnect,
1280
(tvref_t)dhcp_failover_state_reference,
1282
dhcp_failover_state_dereference);
1284
} else if (!strcmp (name, "message")) {
1285
link = va_arg (ap, dhcp_failover_link_t *);
1287
if (link -> imsg -> type == FTM_CONNECT) {
1288
/* If we already have a link to the peer, it must be
1290
XXX Is this the right thing to do?
1291
XXX Probably not - what if both peers start at
1292
XXX the same time? */
1293
if (state -> link_to_peer) {
1294
dhcp_failover_send_connectack
1295
((omapi_object_t *)link, state,
1297
"already connected");
1298
omapi_disconnect (link -> outer, 1);
1299
return ISC_R_SUCCESS;
1301
if (!(link -> imsg -> options_present & FTB_MCLT)) {
1302
dhcp_failover_send_connectack
1303
((omapi_object_t *)link, state,
1305
"no MCLT provided");
1306
omapi_disconnect (link -> outer, 1);
1307
return ISC_R_SUCCESS;
1310
dhcp_failover_link_reference (&state -> link_to_peer,
1312
status = (dhcp_failover_send_connectack
1313
((omapi_object_t *)link, state, 0, 0));
1314
if (status != ISC_R_SUCCESS) {
1315
dhcp_failover_link_dereference
1316
(&state -> link_to_peer, MDL);
1317
log_info ("dhcp_failover_send_connectack: %s",
1318
isc_result_totext (status));
1319
omapi_disconnect (link -> outer, 1);
1320
return ISC_R_SUCCESS;
1322
if (link -> imsg -> options_present & FTB_MAX_UNACKED)
1323
state -> partner.max_flying_updates =
1324
link -> imsg -> max_unacked;
1325
if (link -> imsg -> options_present & FTB_RECEIVE_TIMER)
1326
state -> partner.max_response_delay =
1327
link -> imsg -> receive_timer;
1328
state -> mclt = link -> imsg -> mclt;
1329
dhcp_failover_send_state (state);
1330
cancel_timeout (dhcp_failover_link_startup_timeout,
1332
} else if (link -> imsg -> type == FTM_CONNECTACK) {
1337
cancel_timeout (dhcp_failover_link_startup_timeout,
1340
if (!(link->imsg->options_present &
1341
FTB_RELATIONSHIP_NAME)) {
1342
errmsg = "missing relationship-name";
1343
reason = FTR_INVALID_PARTNER;
1347
if (link->imsg->options_present & FTB_REJECT_REASON) {
1348
/* XXX: add message option to text output. */
1349
log_error ("Failover CONNECT to %s rejected: %s",
1350
state ? state->name : "unknown",
1351
(dhcp_failover_reject_reason_print
1352
(link -> imsg -> reject_reason)));
1353
/* XXX print message from peer if peer sent message. */
1354
omapi_disconnect (link -> outer, 1);
1355
return ISC_R_SUCCESS;
1358
if (!dhcp_failover_state_match_by_name(state,
1359
&link->imsg->relationship_name)) {
1360
/* XXX: Overflow results in log truncation, safe. */
1361
snprintf(errbuf, sizeof(errbuf), "remote failover "
1362
"relationship name %.*s does not match",
1363
(int)link->imsg->relationship_name.count,
1364
link->imsg->relationship_name.data);
1366
reason = FTR_INVALID_PARTNER;
1368
log_error("Failover CONNECTACK from %s: %s",
1369
state->name, errmsg);
1370
dhcp_failover_send_disconnect ((omapi_object_t *)link,
1372
omapi_disconnect (link -> outer, 0);
1373
return ISC_R_SUCCESS;
1376
if (state -> link_to_peer) {
1377
errmsg = "already connected";
1378
reason = FTR_DUP_CONNECTION;
1382
if ((cur_time > link -> imsg -> time &&
1383
cur_time - link -> imsg -> time > 60) ||
1384
(cur_time < link -> imsg -> time &&
1385
link -> imsg -> time - cur_time > 60)) {
1386
errmsg = "time offset too large";
1387
reason = FTR_TIMEMISMATCH;
1391
dhcp_failover_link_reference (&state -> link_to_peer,
1394
/* XXX This is probably the right thing to do, but
1395
XXX for release three, to make the smallest possible
1396
XXX change, we are doing this when the peer state
1397
XXX changes instead. */
1398
if (state -> me.state == startup)
1399
dhcp_failover_set_state (state,
1400
state -> saved_state);
1403
dhcp_failover_send_state (state);
1405
if (link -> imsg -> options_present & FTB_MAX_UNACKED)
1406
state -> partner.max_flying_updates =
1407
link -> imsg -> max_unacked;
1408
if (link -> imsg -> options_present & FTB_RECEIVE_TIMER)
1409
state -> partner.max_response_delay =
1410
link -> imsg -> receive_timer;
1411
#if defined (DEBUG_FAILOVER_CONTACT_TIMING)
1412
log_info ("add_timeout +%d %s",
1413
(int)state -> partner.max_response_delay / 3,
1414
"dhcp_failover_send_contact");
1416
tv . tv_sec = cur_time +
1417
(int)state -> partner.max_response_delay / 3;
1420
dhcp_failover_send_contact, state,
1421
(tvref_t)dhcp_failover_state_reference,
1422
(tvunref_t)dhcp_failover_state_dereference);
1423
#if defined (DEBUG_FAILOVER_CONTACT_TIMING)
1424
log_info ("add_timeout +%d %s",
1425
(int)state -> me.max_response_delay,
1426
"dhcp_failover_timeout");
1428
tv . tv_sec = cur_time +
1429
(int)state -> me.max_response_delay;
1432
dhcp_failover_timeout, state,
1433
(tvref_t)dhcp_failover_state_reference,
1434
(tvunref_t)dhcp_failover_state_dereference);
1435
} else if (link -> imsg -> type == FTM_DISCONNECT) {
1436
if (link -> imsg -> reject_reason) {
1437
log_error ("Failover DISCONNECT from %s: %s",
1438
state ? state->name : "unknown",
1439
(dhcp_failover_reject_reason_print
1440
(link -> imsg -> reject_reason)));
1442
omapi_disconnect (link -> outer, 1);
1443
} else if (link -> imsg -> type == FTM_BNDUPD) {
1444
dhcp_failover_process_bind_update (state,
1446
} else if (link -> imsg -> type == FTM_BNDACK) {
1447
dhcp_failover_process_bind_ack (state, link -> imsg);
1448
} else if (link -> imsg -> type == FTM_UPDREQ) {
1449
dhcp_failover_process_update_request (state,
1451
} else if (link -> imsg -> type == FTM_UPDREQALL) {
1452
dhcp_failover_process_update_request_all
1453
(state, link -> imsg);
1454
} else if (link -> imsg -> type == FTM_UPDDONE) {
1455
dhcp_failover_process_update_done (state,
1457
} else if (link -> imsg -> type == FTM_POOLREQ) {
1458
dhcp_failover_pool_reqbalance(state);
1459
} else if (link -> imsg -> type == FTM_POOLRESP) {
1460
log_info ("pool response: %ld leases",
1462
link -> imsg -> addresses_transferred);
1463
} else if (link -> imsg -> type == FTM_STATE) {
1464
dhcp_failover_peer_state_changed (state,
1468
/* Add a timeout so that if the partner doesn't send
1469
another message for the maximum transmit idle time
1470
plus a grace of one second, we close the
1472
if (state -> link_to_peer &&
1473
state -> link_to_peer == link &&
1474
state -> link_to_peer -> state != dhcp_flink_disconnected)
1476
#if defined (DEBUG_FAILOVER_CONTACT_TIMING)
1477
log_info ("add_timeout +%d %s",
1478
(int)state -> me.max_response_delay,
1479
"dhcp_failover_timeout");
1481
tv . tv_sec = cur_time +
1482
(int)state -> me.max_response_delay;
1485
dhcp_failover_timeout, state,
1486
(tvref_t)dhcp_failover_state_reference,
1487
(tvunref_t)dhcp_failover_state_dereference);
1492
/* Handle all the events we care about... */
1493
return ISC_R_SUCCESS;
1496
isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state,
1499
isc_result_t status;
1501
/* XXX Check these state transitions against the spec! */
1502
if (!strcmp (name, "disconnect")) {
1503
if (state -> link_to_peer) {
1504
log_info ("peer %s: disconnected", state -> name);
1505
if (state -> link_to_peer -> state_object)
1506
dhcp_failover_state_dereference
1507
(&state -> link_to_peer -> state_object, MDL);
1508
dhcp_failover_link_dereference (&state -> link_to_peer,
1511
cancel_timeout (dhcp_failover_send_contact, state);
1512
cancel_timeout (dhcp_failover_timeout, state);
1513
cancel_timeout (dhcp_failover_startup_timeout, state);
1515
switch (state -> me.state == startup ?
1516
state -> saved_state : state -> me.state) {
1517
/* In these situations, we remain in the current
1518
* state, or if in startup enter those states.
1520
case communications_interrupted:
1527
case resolution_interrupted:
1529
/* Already in the right state? */
1530
if (state -> me.state == startup)
1531
return (dhcp_failover_set_state
1532
(state, state -> saved_state));
1533
return ISC_R_SUCCESS;
1535
case potential_conflict:
1536
return dhcp_failover_set_state
1537
(state, resolution_interrupted);
1540
return dhcp_failover_set_state
1541
(state, communications_interrupted);
1544
return dhcp_failover_set_state
1545
(state, resolution_interrupted);
1548
log_fatal("Impossible case at %s:%d.", MDL);
1549
break; /* can't happen. */
1551
} else if (!strcmp (name, "connect")) {
1552
switch (state -> me.state) {
1553
case communications_interrupted:
1554
status = dhcp_failover_set_state (state, normal);
1555
dhcp_failover_send_updates (state);
1558
case resolution_interrupted:
1559
return dhcp_failover_set_state (state,
1560
potential_conflict);
1564
case potential_conflict:
1573
return dhcp_failover_send_state (state);
1576
log_fatal("Impossible case at %s:%d.", MDL);
1579
} else if (!strcmp (name, "startup")) {
1580
dhcp_failover_set_state (state, startup);
1581
return ISC_R_SUCCESS;
1582
} else if (!strcmp (name, "connect-timeout")) {
1583
switch (state -> me.state) {
1584
case communications_interrupted:
1586
case resolution_interrupted:
1591
return ISC_R_SUCCESS;
1598
return dhcp_failover_set_state
1599
(state, communications_interrupted);
1601
case potential_conflict:
1602
return dhcp_failover_set_state
1603
(state, resolution_interrupted);
1606
log_fatal("Impossible case at %s:%d.", MDL);
1610
return DHCP_R_INVALIDARG;
1613
isc_result_t dhcp_failover_set_service_state (dhcp_failover_state_t *state)
1615
switch (state -> me.state) {
1617
state -> service_state = not_responding;
1618
state -> nrr = " (my state unknown)";
1622
state -> service_state = service_partner_down;
1627
state -> service_state = cooperating;
1631
case communications_interrupted:
1632
state -> service_state = not_cooperating;
1636
case resolution_interrupted:
1637
case potential_conflict:
1639
state -> service_state = not_responding;
1640
state -> nrr = " (resolving conflicts)";
1644
state -> service_state = not_responding;
1645
state -> nrr = " (recovering)";
1649
state -> service_state = not_responding;
1650
state -> nrr = " (shut down)";
1654
state -> service_state = not_responding;
1655
state -> nrr = " (paused)";
1659
state -> service_state = not_responding;
1660
state -> nrr = " (recover wait)";
1664
state -> service_state = not_responding;
1665
state -> nrr = " (recover done)";
1669
state -> service_state = service_startup;
1670
state -> nrr = " (startup)";
1674
log_fatal("Impossible case at %s:%d.\n", MDL);
1678
/* Some peer states can require us not to respond, even if our
1680
/* XXX hm. I suspect this isn't true anymore. */
1681
if (state -> service_state != not_responding) {
1682
switch (state -> partner.state) {
1684
state -> service_state = not_responding;
1685
state -> nrr = " (peer demands: recovering)";
1688
case potential_conflict:
1690
case resolution_interrupted:
1691
state -> service_state = not_responding;
1692
state -> nrr = " (peer demands: resolving conflicts)";
1695
/* Other peer states don't affect our behaviour. */
1701
return ISC_R_SUCCESS;
1704
isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state,
1705
enum failover_state new_state)
1707
enum failover_state saved_state;
1710
struct shared_network *s;
1714
/* If we're in certain states where we're sending updates, and the peer
1715
* state changes, we need to re-schedule any pending updates just to
1716
* be on the safe side. This results in retransmission.
1718
switch (state -> me.state) {
1720
case potential_conflict:
1722
if (state -> ack_queue_tail) {
1725
/* Zap the flags. */
1726
for (lp = state -> ack_queue_head; lp; lp = lp -> next_pending)
1727
lp -> flags = ((lp -> flags & ~ON_ACK_QUEUE) |
1730
/* Now hook the ack queue to the beginning of the update
1732
if (state -> update_queue_head) {
1733
lease_reference (&state -> ack_queue_tail -> next_pending,
1734
state -> update_queue_head, MDL);
1735
lease_dereference (&state -> update_queue_head, MDL);
1737
lease_reference (&state -> update_queue_head,
1738
state -> ack_queue_head, MDL);
1739
if (!state -> update_queue_tail) {
1740
#if defined (POINTER_DEBUG)
1741
if (state -> ack_queue_tail -> next_pending) {
1742
log_error ("next pending on ack queue tail.");
1746
lease_reference (&state -> update_queue_tail,
1747
state -> ack_queue_tail, MDL);
1749
lease_dereference (&state -> ack_queue_tail, MDL);
1750
lease_dereference (&state -> ack_queue_head, MDL);
1751
state -> cur_unacked_updates = 0;
1753
/* We will re-queue a timeout later, if applicable. */
1754
cancel_timeout (dhcp_failover_keepalive, state);
1761
/* Tentatively make the transition. */
1762
saved_state = state -> me.state;
1763
saved_stos = state -> me.stos;
1765
/* Keep the old stos if we're going into recover_wait or if we're
1766
coming into or out of startup. */
1767
if (new_state != recover_wait && new_state != startup &&
1768
saved_state != startup)
1769
state -> me.stos = cur_time;
1771
/* If we're in shutdown, peer is in partner_down, and we're moving
1772
to recover, we can skip waiting for MCLT to expire. This happens
1773
when a server is moved administratively into shutdown prior to
1774
actually shutting down. Of course, if there are any updates
1775
pending we can't actually do this. */
1776
if (new_state == recover && saved_state == shut_down &&
1777
state -> partner.state == partner_down &&
1778
!state -> update_queue_head && !state -> ack_queue_head)
1779
state -> me.stos = cur_time - state -> mclt;
1781
state -> me.state = new_state;
1782
if (new_state == startup && saved_state != startup)
1783
state -> saved_state = saved_state;
1785
/* If we can't record the new state, we can't make a state transition. */
1786
if (!write_failover_state (state) || !commit_leases ()) {
1787
log_error ("Unable to record current failover state for %s",
1789
state -> me.state = saved_state;
1790
state -> me.stos = saved_stos;
1791
return ISC_R_IOERROR;
1794
log_info ("failover peer %s: I move from %s to %s",
1795
state -> name, dhcp_failover_state_name_print (saved_state),
1796
dhcp_failover_state_name_print (state -> me.state));
1798
/* If we were in startup and we just left it, cancel the timeout. */
1799
if (new_state != startup && saved_state == startup)
1800
cancel_timeout (dhcp_failover_startup_timeout, state);
1803
* If the state changes for any reason, cancel 'delayed auto state
1804
* changes' (currently there is just the one).
1806
cancel_timeout(dhcp_failover_auto_partner_down, state);
1808
/* Set our service state. */
1809
dhcp_failover_set_service_state (state);
1811
/* Tell the peer about it. */
1812
if (state -> link_to_peer)
1813
dhcp_failover_send_state (state);
1815
switch (new_state) {
1816
case communications_interrupted:
1818
* There is an optional feature to automatically enter partner
1819
* down after a timer expires, upon entering comms-interrupted.
1820
* This feature is generally not safe except in specific
1823
* A zero value (also the default) disables it.
1825
if (state->auto_partner_down == 0)
1828
#if defined (DEBUG_FAILOVER_TIMING)
1829
log_info("add_timeout +%lu dhcp_failover_auto_partner_down",
1830
(unsigned long)state->auto_partner_down);
1832
tv.tv_sec = cur_time + state->auto_partner_down;
1834
add_timeout(&tv, dhcp_failover_auto_partner_down, state,
1835
(tvref_t)omapi_object_reference,
1836
(tvunref_t)omapi_object_dereference);
1840
/* Upon entering normal state, the server is expected to retransmit
1841
* all pending binding updates. This is a good opportunity to
1842
* rebalance the pool (potentially making new pending updates),
1843
* which also schedules the next pool rebalance.
1845
dhcp_failover_pool_balance(state);
1846
dhcp_failover_generate_update_queue(state, 0);
1848
if (state->update_queue_tail != NULL) {
1849
dhcp_failover_send_updates(state);
1850
log_info("Sending updates to %s.", state->name);
1855
case potential_conflict:
1856
if (state -> i_am == primary)
1857
dhcp_failover_send_update_request (state);
1861
#if defined (DEBUG_FAILOVER_TIMING)
1862
log_info ("add_timeout +15 %s",
1863
"dhcp_failover_startup_timeout");
1865
tv . tv_sec = cur_time + 15;
1868
dhcp_failover_startup_timeout,
1870
(tvref_t)omapi_object_reference,
1872
omapi_object_dereference);
1875
/* If we come back in recover_wait and there's still waiting
1876
to do, set a timeout. */
1878
if (state -> me.stos + state -> mclt > cur_time) {
1879
#if defined (DEBUG_FAILOVER_TIMING)
1880
log_info ("add_timeout +%d %s",
1882
state -> me.stos + state -> mclt),
1883
"dhcp_failover_startup_timeout");
1885
tv . tv_sec = (int)(state -> me.stos + state -> mclt);
1888
dhcp_failover_recover_done,
1890
(tvref_t)omapi_object_reference,
1892
omapi_object_dereference);
1894
dhcp_failover_recover_done (state);
1898
/* XXX: We're supposed to calculate if updreq or updreqall is
1899
* needed. In theory, we should only have to updreqall if we
1900
* are positive we lost our stable storage.
1902
if (state -> link_to_peer)
1903
dhcp_failover_send_update_request_all (state);
1907
/* For every expired lease, set a timeout for it to become free. */
1908
for (s = shared_networks; s; s = s -> next) {
1909
for (p = s -> pools; p; p = p -> next) {
1910
if (p -> failover_peer == state) {
1911
for (l = p->expired ; l ; l = l->next) {
1912
l->tsfp = state->me.stos + state->mclt;
1913
l->sort_time = (l->tsfp > l->ends) ?
1917
(p->expired->sort_time < p->next_event_time)) {
1919
p->next_event_time = p->expired->sort_time;
1920
#if defined (DEBUG_FAILOVER_TIMING)
1921
log_info ("add_timeout +%d %s",
1922
(int)(cur_time - p->next_event_time),
1925
tv.tv_sec = p->next_event_time;
1927
add_timeout(&tv, pool_timer, p,
1928
(tvref_t)pool_reference,
1929
(tvunref_t)pool_dereference);
1941
return ISC_R_SUCCESS;
1944
isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *state,
1945
failover_message_t *msg)
1947
enum failover_state previous_state = state -> partner.state;
1948
enum failover_state new_state;
1951
new_state = msg -> server_state;
1952
startupp = (msg -> server_flags & FTF_SERVER_STARTUP) ? 1 : 0;
1954
if (state -> partner.state == new_state && state -> me.state) {
1955
switch (state -> me.state) {
1957
dhcp_failover_set_state (state, state -> saved_state);
1958
return ISC_R_SUCCESS;
1962
case potential_conflict:
1967
return ISC_R_SUCCESS;
1969
/* If we get a peer state change when we're
1970
disconnected, we always process it. */
1972
case communications_interrupted:
1973
case resolution_interrupted:
1979
log_fatal("Impossible case at %s:%d.", MDL);
1984
state -> partner.state = new_state;
1986
log_info ("failover peer %s: peer moves from %s to %s",
1988
dhcp_failover_state_name_print (previous_state),
1989
dhcp_failover_state_name_print (state -> partner.state));
1991
if (!write_failover_state (state) || !commit_leases ()) {
1992
/* This is bad, but it's not fatal. Of course, if we
1993
can't write to the lease database, we're not going to
1994
get much done anyway. */
1995
log_error ("Unable to record current failover state for %s",
1999
/* Quickly validate the new state as being one of the 13 known
2002
switch (new_state) {
2006
case communications_interrupted:
2008
case potential_conflict:
2013
case resolution_interrupted:
2019
log_error("failover peer %s: Invalid state: %d", state->name,
2021
dhcp_failover_set_state(state, shut_down);
2022
return ISC_R_SUCCESS;
2025
/* Do any state transitions that are required as a result of the
2026
peer's state transition. */
2028
switch (state -> me.state == startup ?
2029
state -> saved_state : state -> me.state) {
2031
switch (new_state) {
2033
dhcp_failover_state_pool_check (state);
2037
if (state -> me.state == startup)
2038
dhcp_failover_set_state (state, recover);
2040
dhcp_failover_set_state (state,
2041
potential_conflict);
2044
case potential_conflict:
2045
case resolution_interrupted:
2047
/* None of these transitions should ever occur. */
2048
log_error("Peer %s: Invalid state transition %s "
2049
"to %s.", state->name,
2050
dhcp_failover_state_name_print(previous_state),
2051
dhcp_failover_state_name_print(new_state));
2052
dhcp_failover_set_state (state, shut_down);
2057
dhcp_failover_set_state (state, partner_down);
2061
dhcp_failover_set_state (state,
2062
communications_interrupted);
2066
/* recover_wait, recover_done, unknown_state, startup,
2067
* communications_interrupted
2074
switch (new_state) {
2076
log_info ("failover peer %s: requesting %s",
2077
state -> name, "full update from peer");
2078
/* Don't send updreqall if we're really in the
2079
startup state, because that will result in two
2081
if (state -> me.state == recover)
2082
dhcp_failover_send_update_request_all (state);
2085
case potential_conflict:
2086
case resolution_interrupted:
2089
dhcp_failover_set_state (state, potential_conflict);
2093
case communications_interrupted:
2094
/* We're supposed to send an update request at this
2096
/* XXX we don't currently have code here to do any
2097
XXX clever detection of when we should send an
2098
XXX UPDREQALL message rather than an UPDREQ
2099
XXX message. What to do, what to do? */
2100
/* Currently when we enter recover state, no matter
2101
* the reason, we send an UPDREQALL. So, it makes
2102
* the most sense to stick to that until something
2104
* Furthermore, we only want to send the update
2105
* request if we are not in startup state.
2107
if (state -> me.state == recover)
2108
dhcp_failover_send_update_request_all (state);
2112
/* XXX We're not explicitly told what to do in this
2113
XXX case, but this transition is consistent with
2114
XXX what is elsewhere in the draft. */
2115
dhcp_failover_set_state (state, partner_down);
2118
/* We can't really do anything in this case. */
2120
/* paused, recover_done, recover_wait, unknown_state,
2127
case potential_conflict:
2128
switch (new_state) {
2130
/* This is an illegal transition. */
2131
log_error("Peer %s moves to normal during conflict "
2132
"resolution - panic, shutting down.",
2134
dhcp_failover_set_state(state, shut_down);
2138
if (previous_state == potential_conflict)
2139
dhcp_failover_send_update_request (state);
2141
log_error("Peer %s: Unexpected move to "
2142
"conflict-done.", state->name);
2147
case potential_conflict:
2149
case communications_interrupted:
2150
case resolution_interrupted:
2155
dhcp_failover_set_state (state, recover);
2159
dhcp_failover_set_state (state, partner_down);
2163
/* unknown_state, startup */
2169
switch (new_state) {
2172
dhcp_failover_set_state(state, new_state);
2176
log_fatal("Peer %s: Invalid attempt to move from %s "
2177
"to %s while local state is conflict-done.",
2179
dhcp_failover_state_name_print(previous_state),
2180
dhcp_failover_state_name_print(new_state));
2185
/* Take no action if other server is starting up. */
2189
switch (new_state) {
2190
/* This is where we should be. */
2196
dhcp_failover_set_state (state, normal);
2200
case potential_conflict:
2202
case communications_interrupted:
2203
case resolution_interrupted:
2205
dhcp_failover_set_state (state, potential_conflict);
2209
/* shut_down, paused, unknown_state, startup */
2214
case communications_interrupted:
2215
switch (new_state) {
2217
/* Stick with the status quo. */
2220
/* If we're in communications-interrupted and an
2221
amnesic peer connects, go to the partner_down
2222
state immediately. */
2224
dhcp_failover_set_state (state, partner_down);
2228
case communications_interrupted:
2231
/* XXX so we don't need to do this specially in
2232
XXX the CONNECT and CONNECTACK handlers. */
2233
dhcp_failover_send_updates (state);
2234
dhcp_failover_set_state (state, normal);
2237
case potential_conflict:
2239
case resolution_interrupted:
2241
dhcp_failover_set_state (state, potential_conflict);
2245
dhcp_failover_set_state (state, partner_down);
2249
/* unknown_state, startup */
2254
case resolution_interrupted:
2255
switch (new_state) {
2258
case potential_conflict:
2260
case communications_interrupted:
2261
case resolution_interrupted:
2265
dhcp_failover_set_state (state, potential_conflict);
2269
dhcp_failover_set_state (state, partner_down);
2273
/* paused, unknown_state, startup */
2278
/* Make no transitions while in recover_wait...just wait. */
2283
switch (new_state) {
2285
log_error("Both servers have entered recover-done!");
2287
dhcp_failover_set_state (state, normal);
2291
dhcp_failover_set_state (state, partner_down);
2295
/* potential_conflict, partner_down,
2296
* communications_interrupted, resolution_interrupted,
2297
* paused, recover, recover_wait, unknown_state,
2304
/* We are essentially dead in the water when we're in
2305
either shut_down or paused states, and do not do any
2306
automatic state transitions. */
2311
/* XXX: Shouldn't this be a fatal condition? */
2316
log_fatal("Impossible condition at %s:%d.", MDL);
2321
/* If we didn't make a transition out of startup as a result of
2322
the peer's state change, do it now as a result of the fact that
2323
we got a state change from the peer. */
2324
if (state -> me.state == startup && state -> saved_state != startup)
2325
dhcp_failover_set_state (state, state -> saved_state);
2327
/* For now, just set the service state based on the peer's state
2329
dhcp_failover_set_service_state (state);
2331
return ISC_R_SUCCESS;
2335
* Balance operation manual entry; startup, entrance to normal state. No
2336
* sense sending a POOLREQ at this stage; the peer is likely about to schedule
2337
* their own rebalance event upon entering normal themselves.
2340
dhcp_failover_pool_balance(dhcp_failover_state_t *state)
2342
/* Cancel pending event. */
2343
cancel_timeout(dhcp_failover_pool_rebalance, state);
2344
state->sched_balance = 0;
2346
dhcp_failover_pool_dobalance(state, NULL);
2350
* Balance operation entry from timer event. Once per timer interval is
2351
* the only time we want to emit POOLREQs (asserting an interrupt in our
2355
dhcp_failover_pool_rebalance(void *failover_state)
2357
dhcp_failover_state_t *state;
2358
isc_boolean_t sendreq = ISC_FALSE;
2360
state = (dhcp_failover_state_t *)failover_state;
2362
/* Clear scheduled event indicator. */
2363
state->sched_balance = 0;
2365
if (dhcp_failover_pool_dobalance(state, &sendreq))
2366
dhcp_failover_send_updates(state);
2369
dhcp_failover_send_poolreq(state);
2373
* Balance operation entry from POOLREQ protocol message. Do not permit a
2374
* POOLREQ to send back a POOLREQ. Ping pong.
2377
dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state)
2381
/* Cancel pending event. */
2382
cancel_timeout(dhcp_failover_pool_rebalance, state);
2383
state->sched_balance = 0;
2385
queued = dhcp_failover_pool_dobalance(state, NULL);
2387
dhcp_failover_send_poolresp(state, queued);
2390
dhcp_failover_send_updates(state);
2392
log_info("peer %s: Got POOLREQ, answering negatively! "
2393
"Peer may be out of leases or database inconsistent.",
2398
* Do the meat of the work common to all forms of pool rebalance. If the
2399
* caller deems it appropriate to transmit POOLREQ messages, it can use the
2400
* sendreq pointer to pass in the address of a FALSE value which this function
2401
* will conditionally turn TRUE if a POOLREQ is determined to be necessary.
2402
* A NULL value may be passed, in which case no action is taken.
2405
dhcp_failover_pool_dobalance(dhcp_failover_state_t *state,
2406
isc_boolean_t *sendreq)
2408
int lts, total, thresh, hold, panic, pass;
2409
int leases_queued = 0;
2410
struct lease *lp = (struct lease *)0;
2411
struct lease *next = (struct lease *)0;
2412
struct shared_network *s;
2414
binding_state_t peer_lease_state;
2415
binding_state_t my_lease_state;
2417
int (*log_func)(const char *, ...);
2418
const char *result, *reqlog;
2420
if (state -> me.state != normal)
2423
state->last_balance = cur_time;
2425
for (s = shared_networks ; s ; s = s->next) {
2426
for (p = s->pools ; p ; p = p->next) {
2427
if (p->failover_peer != state)
2430
/* Right now we're giving the peer half of the free leases.
2431
If we have more leases than the peer (i.e., more than
2432
half), then the number of leases we have, less the number
2433
of leases the peer has, will be how many more leases we
2434
have than the peer has. So if we send half that number
2435
to the peer, we should be even. */
2436
if (p->failover_peer->i_am == primary) {
2437
lts = (p->free_leases - p->backup_leases) / 2;
2438
peer_lease_state = FTS_BACKUP;
2439
my_lease_state = FTS_FREE;
2442
lts = (p->backup_leases - p->free_leases) / 2;
2443
peer_lease_state = FTS_FREE;
2444
my_lease_state = FTS_BACKUP;
2448
total = p->backup_leases + p->free_leases;
2450
thresh = ((total * state->max_lease_misbalance) + 50) / 100;
2451
hold = ((total * state->max_lease_ownership) + 50) / 100;
2454
* If we need leases (so lts is negative) more than negative
2455
* double the thresh%, panic and send poolreq to hopefully wake
2456
* up the peer (but more likely the db is inconsistent). But,
2457
* if this comes out zero, switch to -1 so that the POOLREQ is
2458
* sent on lts == -2 rather than right away at -1.
2460
* Note that we do not subtract -1 from panic all the time
2461
* because thresh% and hold% may come out to the same number,
2462
* and that is correct operation...where thresh% and hold% are
2463
* both -1, we want to send poolreq when lts reaches -3. So,
2464
* "-3 < -2", lts < panic.
2466
panic = thresh * -2;
2471
if ((sendreq != NULL) && (lts < panic)) {
2472
reqlog = " (requesting peer rebalance!)";
2473
*sendreq = ISC_TRUE;
2477
log_info("balancing pool %lx %s total %d free %d "
2478
"backup %d lts %d max-own (+/-)%d%s",
2480
(p->shared_network ?
2481
p->shared_network->name : ""), p->lease_count,
2482
p->free_leases, p->backup_leases, lts, hold,
2485
/* In the first pass, try to allocate leases to the
2486
* peer which it would normally be responsible for (if
2487
* the lease has a hardware address or client-identifier,
2488
* and the load-balance-algorithm chooses the peer to
2489
* answer that address), up to a hold% excess in the peer's
2490
* favor. In the second pass, just send the oldest (first
2491
* on the list) leases up to a hold% excess in our favor.
2493
* This could make for additional pool rebalance
2494
* events, but preserving MAC possession should be
2498
lease_reference(&lp, *lq, MDL);
2502
lease_dereference(&next, MDL);
2504
lease_reference(&next, lp->next, MDL);
2507
* Stop if the pool is 'balanced enough.'
2509
* The pool is balanced enough if:
2511
* 1) We're on the first run through and the peer has
2512
* its fair share of leases already (lts reaches
2514
* 2) We're on the second run through, we are shifting
2515
* never-used leases, and there is a perfectly even
2516
* balance (lts reaches zero).
2517
* 3) Second run through, we are shifting previously
2518
* used leases, and the local system has its fair
2519
* share but no more (lts reaches hold).
2521
* Note that this is implemented below in 3,2,1 order.
2531
} else if (lts <= -hold)
2534
if (pass || peer_wants_lease(lp)) {
2537
lp->next_binding_state = peer_lease_state;
2538
lp->tstp = cur_time;
2539
lp->starts = cur_time;
2541
if (!supersede_lease(lp, NULL, 0, 1, 0) ||
2543
log_error("can't commit lease %s on "
2544
"giveaway", piaddr(lp->ip_addr));
2547
lease_dereference(&lp, MDL);
2549
lease_reference(&lp, next, MDL);
2552
lease_reference(&lp, *lq, MDL);
2557
lease_dereference(&next, MDL);
2559
lease_dereference(&lp, MDL);
2562
result = "IMBALANCED";
2563
log_func = log_error;
2565
result = "balanced";
2566
log_func = log_info;
2569
log_func("%s pool %lx %s total %d free %d backup %d "
2570
"lts %d max-misbal %d", result, (unsigned long)p,
2571
(p->shared_network ?
2572
p->shared_network->name : ""), p->lease_count,
2573
p->free_leases, p->backup_leases, lts, thresh);
2575
/* Recalculate next rebalance event timer. */
2576
dhcp_failover_pool_check(p);
2583
return leases_queued;
2586
/* dhcp_failover_pool_check: Called whenever FREE or BACKUP leases change
2587
* states, on both servers. Check the scheduled time to rebalance the pool
2588
* and lower it if applicable.
2591
dhcp_failover_pool_check(struct pool *pool)
2593
dhcp_failover_state_t *peer;
2597
peer = pool->failover_peer;
2599
if(!peer || peer->me.state != normal)
2602
/* Estimate the time left until lease exhaustion.
2603
* The first lease on the backup or free lists is also the oldest
2604
* lease. It is reasonable to guess that it will take at least
2605
* as much time for a pool to run out of leases, as the present
2606
* age of the oldest lease (seconds since it expired).
2608
* Note that this isn't so sane of an assumption if the oldest
2609
* lease is a virgin (ends = 0), we wind up sending this against
2610
* the max_balance bounds check.
2612
if(pool->free && pool->free->ends < cur_time)
2613
est1 = cur_time - pool->free->ends;
2617
if(pool->backup && pool->backup->ends < cur_time)
2618
est2 = cur_time - pool->backup->ends;
2622
/* We don't want to schedule rebalance for when we think we'll run
2623
* out of leases, we want to schedule the rebalance for when we think
2624
* the disparity will be 'large enough' to warrant action.
2626
est1 = ((est1 * peer->max_lease_misbalance) + 50) / 100;
2627
est2 = ((est2 * peer->max_lease_misbalance) + 50) / 100;
2629
/* Guess when the local system will begin issuing POOLREQ panic
2630
* attacks because "max_lease_misbalance*2" has been exceeded.
2632
if(peer->i_am == primary)
2637
/* Select the smallest time. */
2641
/* Bounded by the maximum configured value. */
2642
if(est1 > peer->max_balance)
2643
est1 = peer->max_balance;
2645
/* Project this time into the future. */
2648
/* Do not move the time down under the minimum. */
2649
est2 = peer->last_balance + peer->min_balance;
2650
if(peer->last_balance && (est1 < est2))
2653
/* Introduce a random delay. */
2654
est1 += random() % 5;
2656
/* Do not move the time forward, or reset to the same time. */
2657
if(peer->sched_balance) {
2658
if (est1 >= peer->sched_balance)
2661
/* We are about to schedule the time down, cancel the
2664
cancel_timeout(dhcp_failover_pool_rebalance, peer);
2667
/* The time is different, and lower, use it. */
2668
peer->sched_balance = est1;
2670
#if defined(DEBUG_FAILOVER_TIMING)
2671
log_info("add_timeout +%d dhcp_failover_pool_rebalance",
2672
(int)(est1 - cur_time));
2676
add_timeout(&tv, dhcp_failover_pool_rebalance, peer,
2677
(tvref_t)dhcp_failover_state_reference,
2678
(tvunref_t)dhcp_failover_state_dereference);
2681
int dhcp_failover_state_pool_check (dhcp_failover_state_t *state)
2683
struct shared_network *s;
2686
for (s = shared_networks; s; s = s -> next) {
2687
for (p = s -> pools; p; p = p -> next) {
2688
if (p -> failover_peer != state)
2690
dhcp_failover_pool_check (p);
2696
isc_result_t dhcp_failover_send_updates (dhcp_failover_state_t *state)
2698
struct lease *lp = (struct lease *)0;
2699
isc_result_t status;
2701
/* Can't update peer if we're not talking to it! */
2702
if (!state -> link_to_peer)
2703
return ISC_R_SUCCESS;
2705
/* If there are acks pending, transmit them prior to potentially
2706
* sending new updates for the same lease.
2708
if (state->toack_queue_head != NULL)
2709
dhcp_failover_send_acks(state);
2711
while ((state -> partner.max_flying_updates >
2712
state -> cur_unacked_updates) && state -> update_queue_head) {
2713
/* Grab the head of the update queue. */
2714
lease_reference (&lp, state -> update_queue_head, MDL);
2716
/* Send the update to the peer. */
2717
status = dhcp_failover_send_bind_update (state, lp);
2718
if (status != ISC_R_SUCCESS) {
2719
lease_dereference (&lp, MDL);
2722
lp -> flags &= ~ON_UPDATE_QUEUE;
2724
/* Take it off the head of the update queue and put the next
2725
item in the update queue at the head. */
2726
lease_dereference (&state -> update_queue_head, MDL);
2727
if (lp -> next_pending) {
2728
lease_reference (&state -> update_queue_head,
2729
lp -> next_pending, MDL);
2730
lease_dereference (&lp -> next_pending, MDL);
2732
lease_dereference (&state -> update_queue_tail, MDL);
2735
if (state -> ack_queue_head) {
2737
(&state -> ack_queue_tail -> next_pending,
2739
lease_dereference (&state -> ack_queue_tail, MDL);
2741
lease_reference (&state -> ack_queue_head, lp, MDL);
2743
#if defined (POINTER_DEBUG)
2744
if (lp -> next_pending) {
2745
log_error ("ack_queue_tail: lp -> next_pending");
2749
lease_reference (&state -> ack_queue_tail, lp, MDL);
2750
lp -> flags |= ON_ACK_QUEUE;
2751
lease_dereference (&lp, MDL);
2753
/* Count the object as an unacked update. */
2754
state -> cur_unacked_updates++;
2756
return ISC_R_SUCCESS;
2759
/* Queue an update for a lease. Always returns 1 at this point - it's
2760
not an error for this to be called on a lease for which there's no
2763
int dhcp_failover_queue_update (struct lease *lease, int immediate)
2765
dhcp_failover_state_t *state;
2767
if (!lease -> pool ||
2768
!lease -> pool -> failover_peer)
2771
/* If it's already on the update queue, leave it there. */
2772
if (lease -> flags & ON_UPDATE_QUEUE)
2775
/* Get the failover state structure for this lease. */
2776
state = lease -> pool -> failover_peer;
2778
/* If it's on the ack queue, take it off. */
2779
if (lease -> flags & ON_ACK_QUEUE)
2780
dhcp_failover_ack_queue_remove (state, lease);
2782
if (state -> update_queue_head) {
2783
lease_reference (&state -> update_queue_tail -> next_pending,
2785
lease_dereference (&state -> update_queue_tail, MDL);
2787
lease_reference (&state -> update_queue_head, lease, MDL);
2789
#if defined (POINTER_DEBUG)
2790
if (lease -> next_pending) {
2791
log_error ("next pending on update queue lease.");
2792
#if defined (DEBUG_RC_HISTORY)
2793
dump_rc_history (lease);
2798
lease_reference (&state -> update_queue_tail, lease, MDL);
2799
lease -> flags |= ON_UPDATE_QUEUE;
2801
dhcp_failover_send_updates (state);
2805
int dhcp_failover_send_acks (dhcp_failover_state_t *state)
2807
failover_message_t *msg = (failover_message_t *)0;
2809
/* Must commit all leases prior to acking them. */
2810
if (!commit_leases ())
2813
while (state -> toack_queue_head) {
2814
failover_message_reference
2815
(&msg, state -> toack_queue_head, MDL);
2816
failover_message_dereference
2817
(&state -> toack_queue_head, MDL);
2819
failover_message_reference
2820
(&state -> toack_queue_head, msg -> next, MDL);
2823
dhcp_failover_send_bind_ack (state, msg, 0, (const char *)0);
2825
failover_message_dereference (&msg, MDL);
2828
if (state -> toack_queue_tail)
2829
failover_message_dereference (&state -> toack_queue_tail, MDL);
2830
state -> pending_acks = 0;
2835
void dhcp_failover_toack_queue_timeout (void *vs)
2837
dhcp_failover_state_t *state = vs;
2839
#if defined (DEBUG_FAILOVER_TIMING)
2840
log_info ("dhcp_failover_toack_queue_timeout");
2843
dhcp_failover_send_acks (state);
2846
/* Queue an ack for a message. There is currently no way to queue a
2847
negative ack -- these need to be sent directly. */
2849
int dhcp_failover_queue_ack (dhcp_failover_state_t *state,
2850
failover_message_t *msg)
2854
if (state -> toack_queue_head) {
2855
failover_message_reference
2856
(&state -> toack_queue_tail -> next, msg, MDL);
2857
failover_message_dereference (&state -> toack_queue_tail, MDL);
2859
failover_message_reference (&state -> toack_queue_head,
2862
failover_message_reference (&state -> toack_queue_tail, msg, MDL);
2864
state -> pending_acks++;
2866
/* Flush the toack queue whenever we exceed half the number of
2867
allowed unacked updates. */
2868
if (state -> pending_acks >= state -> partner.max_flying_updates / 2) {
2869
dhcp_failover_send_acks (state);
2872
/* Schedule a timeout to flush the ack queue. */
2873
if (state -> pending_acks > 0) {
2874
#if defined (DEBUG_FAILOVER_TIMING)
2875
log_info ("add_timeout +2 %s",
2876
"dhcp_failover_toack_queue_timeout");
2878
tv . tv_sec = cur_time + 2;
2881
dhcp_failover_toack_queue_timeout, state,
2882
(tvref_t)dhcp_failover_state_reference,
2883
(tvunref_t)dhcp_failover_state_dereference);
2889
void dhcp_failover_ack_queue_remove (dhcp_failover_state_t *state,
2890
struct lease *lease)
2894
if (!(lease -> flags & ON_ACK_QUEUE))
2897
if (state -> ack_queue_head == lease) {
2898
lease_dereference (&state -> ack_queue_head, MDL);
2899
if (lease -> next_pending) {
2900
lease_reference (&state -> ack_queue_head,
2901
lease -> next_pending, MDL);
2902
lease_dereference (&lease -> next_pending, MDL);
2904
lease_dereference (&state -> ack_queue_tail, MDL);
2907
for (lp = state -> ack_queue_head;
2908
lp && lp -> next_pending != lease;
2909
lp = lp -> next_pending)
2915
lease_dereference (&lp -> next_pending, MDL);
2916
if (lease -> next_pending) {
2917
lease_reference (&lp -> next_pending,
2918
lease -> next_pending, MDL);
2919
lease_dereference (&lease -> next_pending, MDL);
2921
lease_dereference (&state -> ack_queue_tail, MDL);
2922
if (lp -> next_pending) {
2923
log_error ("state -> ack_queue_tail");
2926
lease_reference (&state -> ack_queue_tail, lp, MDL);
2930
lease -> flags &= ~ON_ACK_QUEUE;
2931
/* Multiple acks on one XID is an error and may cause badness. */
2932
lease->last_xid = 0;
2933
/* XXX: this violates draft-failover. We can't send another
2934
* update just because we forgot about an old one that hasn't
2937
state -> cur_unacked_updates--;
2940
* When updating leases as a result of an ack, we defer the commit
2941
* for performance reasons. When there are no more acks pending,
2944
if (state -> cur_unacked_updates == 0) {
2949
isc_result_t dhcp_failover_state_set_value (omapi_object_t *h,
2951
omapi_data_string_t *name,
2952
omapi_typed_data_t *value)
2954
isc_result_t status;
2956
if (h -> type != dhcp_type_failover_state)
2957
return DHCP_R_INVALIDARG;
2959
/* This list of successful returns is completely wrong, but the
2960
fastest way to make dhcpctl do something vaguely sane when
2961
you try to change the local state. */
2963
if (!omapi_ds_strcmp (name, "name")) {
2964
return ISC_R_SUCCESS;
2965
} else if (!omapi_ds_strcmp (name, "partner-address")) {
2966
return ISC_R_SUCCESS;
2967
} else if (!omapi_ds_strcmp (name, "local-address")) {
2968
return ISC_R_SUCCESS;
2969
} else if (!omapi_ds_strcmp (name, "partner-port")) {
2970
return ISC_R_SUCCESS;
2971
} else if (!omapi_ds_strcmp (name, "local-port")) {
2972
return ISC_R_SUCCESS;
2973
} else if (!omapi_ds_strcmp (name, "max-outstanding-updates")) {
2974
return ISC_R_SUCCESS;
2975
} else if (!omapi_ds_strcmp (name, "mclt")) {
2976
return ISC_R_SUCCESS;
2977
} else if (!omapi_ds_strcmp (name, "load-balance-max-secs")) {
2978
return ISC_R_SUCCESS;
2979
} else if (!omapi_ds_strcmp (name, "load-balance-hba")) {
2980
return ISC_R_SUCCESS;
2981
} else if (!omapi_ds_strcmp (name, "partner-state")) {
2982
return ISC_R_SUCCESS;
2983
} else if (!omapi_ds_strcmp (name, "local-state")) {
2985
status = omapi_get_int_value (&l, value);
2986
if (status != ISC_R_SUCCESS)
2988
return dhcp_failover_set_state ((dhcp_failover_state_t *)h, l);
2989
} else if (!omapi_ds_strcmp (name, "partner-stos")) {
2990
return ISC_R_SUCCESS;
2991
} else if (!omapi_ds_strcmp (name, "local-stos")) {
2992
return ISC_R_SUCCESS;
2993
} else if (!omapi_ds_strcmp (name, "hierarchy")) {
2994
return ISC_R_SUCCESS;
2995
} else if (!omapi_ds_strcmp (name, "last-packet-sent")) {
2996
return ISC_R_SUCCESS;
2997
} else if (!omapi_ds_strcmp (name, "last-timestamp-received")) {
2998
return ISC_R_SUCCESS;
2999
} else if (!omapi_ds_strcmp (name, "skew")) {
3000
return ISC_R_SUCCESS;
3001
} else if (!omapi_ds_strcmp (name, "max-response-delay")) {
3002
return ISC_R_SUCCESS;
3003
} else if (!omapi_ds_strcmp (name, "cur-unacked-updates")) {
3004
return ISC_R_SUCCESS;
3007
if (h -> inner && h -> inner -> type -> set_value)
3008
return (*(h -> inner -> type -> set_value))
3009
(h -> inner, id, name, value);
3010
return ISC_R_NOTFOUND;
3013
void dhcp_failover_keepalive (void *vs)
3017
void dhcp_failover_reconnect (void *vs)
3019
dhcp_failover_state_t *state = vs;
3020
isc_result_t status;
3023
#if defined (DEBUG_FAILOVER_TIMING)
3024
log_info ("dhcp_failover_reconnect");
3026
/* If we already connected the other way, let the connection
3027
recovery code initiate any retry that may be required. */
3028
if (state -> link_to_peer)
3031
status = dhcp_failover_link_initiate ((omapi_object_t *)state);
3032
if (status != ISC_R_SUCCESS && status != DHCP_R_INCOMPLETE) {
3033
log_info ("failover peer %s: %s", state -> name,
3034
isc_result_totext (status));
3035
#if defined (DEBUG_FAILOVER_TIMING)
3036
log_info("add_timeout +90 dhcp_failover_reconnect");
3038
tv . tv_sec = cur_time + 90;
3040
add_timeout(&tv, dhcp_failover_reconnect, state,
3041
(tvref_t)dhcp_failover_state_reference,
3042
(tvunref_t)dhcp_failover_state_dereference);
3046
void dhcp_failover_startup_timeout (void *vs)
3048
dhcp_failover_state_t *state = vs;
3050
#if defined (DEBUG_FAILOVER_TIMING)
3051
log_info ("dhcp_failover_startup_timeout");
3054
dhcp_failover_state_transition (state, "disconnect");
3057
void dhcp_failover_link_startup_timeout (void *vl)
3059
dhcp_failover_link_t *link = vl;
3062
for (p = (omapi_object_t *)link; p -> inner; p = p -> inner)
3064
for (; p; p = p -> outer)
3065
if (p -> type == omapi_type_connection)
3068
log_info ("failover: link startup timeout");
3069
omapi_disconnect (p, 1);
3073
void dhcp_failover_listener_restart (void *vs)
3075
dhcp_failover_state_t *state = vs;
3076
isc_result_t status;
3079
#if defined (DEBUG_FAILOVER_TIMING)
3080
log_info ("dhcp_failover_listener_restart");
3083
status = dhcp_failover_listen ((omapi_object_t *)state);
3084
if (status != ISC_R_SUCCESS) {
3085
log_info ("failover peer %s: %s", state -> name,
3086
isc_result_totext (status));
3087
#if defined (DEBUG_FAILOVER_TIMING)
3088
log_info ("add_timeout +90 %s",
3089
"dhcp_failover_listener_restart");
3091
tv . tv_sec = cur_time + 90;
3094
dhcp_failover_listener_restart, state,
3095
(tvref_t)dhcp_failover_state_reference,
3096
(tvunref_t)dhcp_failover_state_dereference);
3101
dhcp_failover_auto_partner_down(void *vs)
3103
dhcp_failover_state_t *state = vs;
3105
#if defined (DEBUG_FAILOVER_TIMING)
3106
log_info("dhcp_failover_auto_partner_down");
3109
dhcp_failover_set_state(state, partner_down);
3112
isc_result_t dhcp_failover_state_get_value (omapi_object_t *h,
3114
omapi_data_string_t *name,
3115
omapi_value_t **value)
3117
dhcp_failover_state_t *s;
3118
struct option_cache *oc;
3119
struct data_string ds;
3120
isc_result_t status;
3122
if (h -> type != dhcp_type_failover_state)
3123
return DHCP_R_INVALIDARG;
3124
s = (dhcp_failover_state_t *)h;
3126
if (!omapi_ds_strcmp (name, "name")) {
3128
return omapi_make_string_value (value,
3129
name, s -> name, MDL);
3130
return ISC_R_NOTFOUND;
3131
} else if (!omapi_ds_strcmp (name, "partner-address")) {
3132
oc = s -> partner.address;
3134
memset (&ds, 0, sizeof ds);
3135
if (!evaluate_option_cache (&ds, (struct packet *)0,
3137
(struct client_state *)0,
3138
(struct option_state *)0,
3139
(struct option_state *)0,
3140
&global_scope, oc, MDL)) {
3141
return ISC_R_NOTFOUND;
3143
status = omapi_make_const_value (value,
3144
name, ds.data, ds.len, MDL);
3145
/* Disgusting kludge: */
3146
if (oc == s -> me.address && !s -> server_identifier.len)
3147
data_string_copy (&s -> server_identifier, &ds, MDL);
3148
data_string_forget (&ds, MDL);
3150
} else if (!omapi_ds_strcmp (name, "local-address")) {
3151
oc = s -> me.address;
3153
} else if (!omapi_ds_strcmp (name, "partner-port")) {
3154
return omapi_make_int_value (value, name,
3155
s -> partner.port, MDL);
3156
} else if (!omapi_ds_strcmp (name, "local-port")) {
3157
return omapi_make_int_value (value,
3158
name, s -> me.port, MDL);
3159
} else if (!omapi_ds_strcmp (name, "max-outstanding-updates")) {
3160
return omapi_make_uint_value (value, name,
3161
s -> me.max_flying_updates,
3163
} else if (!omapi_ds_strcmp (name, "mclt")) {
3164
return omapi_make_uint_value (value, name, s -> mclt, MDL);
3165
} else if (!omapi_ds_strcmp (name, "load-balance-max-secs")) {
3166
return omapi_make_int_value (value, name,
3167
s -> load_balance_max_secs, MDL);
3168
} else if (!omapi_ds_strcmp (name, "load-balance-hba")) {
3170
return omapi_make_const_value (value, name,
3172
return ISC_R_NOTFOUND;
3173
} else if (!omapi_ds_strcmp (name, "partner-state")) {
3174
return omapi_make_uint_value (value, name,
3175
s -> partner.state, MDL);
3176
} else if (!omapi_ds_strcmp (name, "local-state")) {
3177
return omapi_make_uint_value (value, name,
3178
s -> me.state, MDL);
3179
} else if (!omapi_ds_strcmp (name, "partner-stos")) {
3180
return omapi_make_int_value (value, name,
3181
s -> partner.stos, MDL);
3182
} else if (!omapi_ds_strcmp (name, "local-stos")) {
3183
return omapi_make_int_value (value, name,
3185
} else if (!omapi_ds_strcmp (name, "hierarchy")) {
3186
return omapi_make_uint_value (value, name, s -> i_am, MDL);
3187
} else if (!omapi_ds_strcmp (name, "last-packet-sent")) {
3188
return omapi_make_int_value (value, name,
3189
s -> last_packet_sent, MDL);
3190
} else if (!omapi_ds_strcmp (name, "last-timestamp-received")) {
3191
return omapi_make_int_value (value, name,
3192
s -> last_timestamp_received,
3194
} else if (!omapi_ds_strcmp (name, "skew")) {
3195
return omapi_make_int_value (value, name, s -> skew, MDL);
3196
} else if (!omapi_ds_strcmp (name, "max-response-delay")) {
3197
return omapi_make_uint_value (value, name,
3198
s -> me.max_response_delay,
3200
} else if (!omapi_ds_strcmp (name, "cur-unacked-updates")) {
3201
return omapi_make_int_value (value, name,
3202
s -> cur_unacked_updates, MDL);
3205
if (h -> inner && h -> inner -> type -> get_value)
3206
return (*(h -> inner -> type -> get_value))
3207
(h -> inner, id, name, value);
3208
return ISC_R_NOTFOUND;
3211
isc_result_t dhcp_failover_state_destroy (omapi_object_t *h,
3212
const char *file, int line)
3214
dhcp_failover_state_t *s;
3216
if (h -> type != dhcp_type_failover_state)
3217
return DHCP_R_INVALIDARG;
3218
s = (dhcp_failover_state_t *)h;
3220
if (s -> link_to_peer)
3221
dhcp_failover_link_dereference (&s -> link_to_peer, file, line);
3223
dfree (s -> name, MDL);
3224
s -> name = (char *)0;
3226
if (s -> partner.address)
3227
option_cache_dereference (&s -> partner.address, file, line);
3228
if (s -> me.address)
3229
option_cache_dereference (&s -> me.address, file, line);
3231
dfree (s -> hba, file, line);
3232
s -> hba = (u_int8_t *)0;
3234
if (s -> update_queue_head)
3235
lease_dereference (&s -> update_queue_head, file, line);
3236
if (s -> update_queue_tail)
3237
lease_dereference (&s -> update_queue_tail, file, line);
3238
if (s -> ack_queue_head)
3239
lease_dereference (&s -> ack_queue_head, file, line);
3240
if (s -> ack_queue_tail)
3241
lease_dereference (&s -> ack_queue_tail, file, line);
3242
if (s -> send_update_done)
3243
lease_dereference (&s -> send_update_done, file, line);
3244
if (s -> toack_queue_head)
3245
failover_message_dereference (&s -> toack_queue_head,
3247
if (s -> toack_queue_tail)
3248
failover_message_dereference (&s -> toack_queue_tail,
3250
return ISC_R_SUCCESS;
3253
/* Write all the published values associated with the object through the
3254
specified connection. */
3256
isc_result_t dhcp_failover_state_stuff (omapi_object_t *c,
3260
dhcp_failover_state_t *s;
3261
omapi_connection_object_t *conn;
3262
isc_result_t status;
3264
if (c -> type != omapi_type_connection)
3265
return DHCP_R_INVALIDARG;
3266
conn = (omapi_connection_object_t *)c;
3268
if (h -> type != dhcp_type_failover_state)
3269
return DHCP_R_INVALIDARG;
3270
s = (dhcp_failover_state_t *)h;
3272
status = omapi_connection_put_name (c, "name");
3273
if (status != ISC_R_SUCCESS)
3275
status = omapi_connection_put_string (c, s -> name);
3276
if (status != ISC_R_SUCCESS)
3279
status = omapi_connection_put_name (c, "partner-address");
3280
if (status != ISC_R_SUCCESS)
3282
status = omapi_connection_put_uint32 (c, sizeof s -> partner.address);
3283
if (status != ISC_R_SUCCESS)
3285
status = omapi_connection_copyin (c, (u_int8_t *)&s -> partner.address,
3286
sizeof s -> partner.address);
3287
if (status != ISC_R_SUCCESS)
3290
status = omapi_connection_put_name (c, "partner-port");
3291
if (status != ISC_R_SUCCESS)
3293
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3294
if (status != ISC_R_SUCCESS)
3296
status = omapi_connection_put_uint32 (c, (u_int32_t)s -> partner.port);
3297
if (status != ISC_R_SUCCESS)
3300
status = omapi_connection_put_name (c, "local-address");
3301
if (status != ISC_R_SUCCESS)
3303
status = omapi_connection_put_uint32 (c, sizeof s -> me.address);
3304
if (status != ISC_R_SUCCESS)
3306
status = omapi_connection_copyin (c, (u_int8_t *)&s -> me.address,
3307
sizeof s -> me.address);
3308
if (status != ISC_R_SUCCESS)
3311
status = omapi_connection_put_name (c, "local-port");
3312
if (status != ISC_R_SUCCESS)
3314
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3315
if (status != ISC_R_SUCCESS)
3317
status = omapi_connection_put_uint32 (c, (u_int32_t)s -> me.port);
3318
if (status != ISC_R_SUCCESS)
3321
status = omapi_connection_put_name (c, "max-outstanding-updates");
3322
if (status != ISC_R_SUCCESS)
3324
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3325
if (status != ISC_R_SUCCESS)
3327
status = omapi_connection_put_uint32 (c,
3328
s -> me.max_flying_updates);
3329
if (status != ISC_R_SUCCESS)
3332
status = omapi_connection_put_name (c, "mclt");
3333
if (status != ISC_R_SUCCESS)
3335
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3336
if (status != ISC_R_SUCCESS)
3338
status = omapi_connection_put_uint32 (c, s -> mclt);
3339
if (status != ISC_R_SUCCESS)
3342
status = omapi_connection_put_name (c, "load-balance-max-secs");
3343
if (status != ISC_R_SUCCESS)
3345
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3346
if (status != ISC_R_SUCCESS)
3348
status = (omapi_connection_put_uint32
3349
(c, (u_int32_t)s -> load_balance_max_secs));
3350
if (status != ISC_R_SUCCESS)
3355
status = omapi_connection_put_name (c, "load-balance-hba");
3356
if (status != ISC_R_SUCCESS)
3358
status = omapi_connection_put_uint32 (c, 32);
3359
if (status != ISC_R_SUCCESS)
3361
status = omapi_connection_copyin (c, s -> hba, 32);
3362
if (status != ISC_R_SUCCESS)
3366
status = omapi_connection_put_name (c, "partner-state");
3367
if (status != ISC_R_SUCCESS)
3369
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3370
if (status != ISC_R_SUCCESS)
3372
status = omapi_connection_put_uint32 (c, s -> partner.state);
3373
if (status != ISC_R_SUCCESS)
3376
status = omapi_connection_put_name (c, "local-state");
3377
if (status != ISC_R_SUCCESS)
3379
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3380
if (status != ISC_R_SUCCESS)
3382
status = omapi_connection_put_uint32 (c, s -> me.state);
3383
if (status != ISC_R_SUCCESS)
3386
status = omapi_connection_put_name (c, "partner-stos");
3387
if (status != ISC_R_SUCCESS)
3389
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3390
if (status != ISC_R_SUCCESS)
3392
status = omapi_connection_put_uint32 (c,
3393
(u_int32_t)s -> partner.stos);
3394
if (status != ISC_R_SUCCESS)
3397
status = omapi_connection_put_name (c, "local-stos");
3398
if (status != ISC_R_SUCCESS)
3400
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3401
if (status != ISC_R_SUCCESS)
3403
status = omapi_connection_put_uint32 (c, (u_int32_t)s -> me.stos);
3404
if (status != ISC_R_SUCCESS)
3407
status = omapi_connection_put_name (c, "hierarchy");
3408
if (status != ISC_R_SUCCESS)
3410
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3411
if (status != ISC_R_SUCCESS)
3413
status = omapi_connection_put_uint32 (c, s -> i_am);
3414
if (status != ISC_R_SUCCESS)
3417
status = omapi_connection_put_name (c, "last-packet-sent");
3418
if (status != ISC_R_SUCCESS)
3420
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3421
if (status != ISC_R_SUCCESS)
3423
status = (omapi_connection_put_uint32
3424
(c, (u_int32_t)s -> last_packet_sent));
3425
if (status != ISC_R_SUCCESS)
3428
status = omapi_connection_put_name (c, "last-timestamp-received");
3429
if (status != ISC_R_SUCCESS)
3431
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3432
if (status != ISC_R_SUCCESS)
3434
status = (omapi_connection_put_uint32
3435
(c, (u_int32_t)s -> last_timestamp_received));
3436
if (status != ISC_R_SUCCESS)
3439
status = omapi_connection_put_name (c, "skew");
3440
if (status != ISC_R_SUCCESS)
3442
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3443
if (status != ISC_R_SUCCESS)
3445
status = omapi_connection_put_uint32 (c, (u_int32_t)s -> skew);
3446
if (status != ISC_R_SUCCESS)
3449
status = omapi_connection_put_name (c, "max-response-delay");
3450
if (status != ISC_R_SUCCESS)
3452
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3453
if (status != ISC_R_SUCCESS)
3455
status = (omapi_connection_put_uint32
3456
(c, (u_int32_t)s -> me.max_response_delay));
3457
if (status != ISC_R_SUCCESS)
3460
status = omapi_connection_put_name (c, "cur-unacked-updates");
3461
if (status != ISC_R_SUCCESS)
3463
status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
3464
if (status != ISC_R_SUCCESS)
3466
status = (omapi_connection_put_uint32
3467
(c, (u_int32_t)s -> cur_unacked_updates));
3468
if (status != ISC_R_SUCCESS)
3471
if (h -> inner && h -> inner -> type -> stuff_values)
3472
return (*(h -> inner -> type -> stuff_values)) (c, id,
3474
return ISC_R_SUCCESS;
3477
isc_result_t dhcp_failover_state_lookup (omapi_object_t **sp,
3479
omapi_object_t *ref)
3481
omapi_value_t *tv = (omapi_value_t *)0;
3482
isc_result_t status;
3483
dhcp_failover_state_t *s;
3486
return DHCP_R_NOKEYS;
3488
/* First see if we were sent a handle. */
3489
status = omapi_get_value_str (ref, id, "handle", &tv);
3490
if (status == ISC_R_SUCCESS) {
3491
status = omapi_handle_td_lookup (sp, tv -> value);
3493
omapi_value_dereference (&tv, MDL);
3494
if (status != ISC_R_SUCCESS)
3497
/* Don't return the object if the type is wrong. */
3498
if ((*sp) -> type != dhcp_type_failover_state) {
3499
omapi_object_dereference (sp, MDL);
3500
return DHCP_R_INVALIDARG;
3504
/* Look the failover state up by peer name. */
3505
status = omapi_get_value_str (ref, id, "name", &tv);
3506
if (status == ISC_R_SUCCESS) {
3507
for (s = failover_states; s; s = s -> next) {
3508
unsigned l = strlen (s -> name);
3509
if (l == tv -> value -> u.buffer.len &&
3511
tv -> value -> u.buffer.value, l))
3514
omapi_value_dereference (&tv, MDL);
3516
/* If we already have a lease, and it's not the same one,
3517
then the query was invalid. */
3518
if (*sp && *sp != (omapi_object_t *)s) {
3519
omapi_object_dereference (sp, MDL);
3520
return DHCP_R_KEYCONFLICT;
3523
omapi_object_dereference (sp, MDL);
3524
return ISC_R_NOTFOUND;
3526
/* XXX fix so that hash lookup itself creates
3527
XXX the reference. */
3528
omapi_object_reference (sp, (omapi_object_t *)s, MDL);
3531
/* If we get to here without finding a lease, no valid key was
3534
return DHCP_R_NOKEYS;
3535
return ISC_R_SUCCESS;
3538
isc_result_t dhcp_failover_state_create (omapi_object_t **sp,
3541
return ISC_R_NOTIMPLEMENTED;
3544
isc_result_t dhcp_failover_state_remove (omapi_object_t *sp,
3547
return ISC_R_NOTIMPLEMENTED;
3550
int dhcp_failover_state_match (dhcp_failover_state_t *state,
3551
u_int8_t *addr, unsigned addrlen)
3553
struct data_string ds;
3556
memset (&ds, 0, sizeof ds);
3557
if (evaluate_option_cache (&ds, (struct packet *)0,
3559
(struct client_state *)0,
3560
(struct option_state *)0,
3561
(struct option_state *)0,
3563
state -> partner.address, MDL)) {
3564
for (i = 0; i + addrlen - 1 < ds.len; i += addrlen) {
3565
if (!memcmp (&ds.data [i],
3567
data_string_forget (&ds, MDL);
3571
data_string_forget (&ds, MDL);
3577
dhcp_failover_state_match_by_name(state, name)
3578
dhcp_failover_state_t *state;
3579
failover_option_t *name;
3581
if ((strlen(state->name) == name->count) &&
3582
(memcmp(state->name, name->data, name->count) == 0))
3588
const char *dhcp_failover_reject_reason_print (int reason)
3590
static char resbuf[sizeof("Undefined-255: This reason code is not defined "
3591
"in the protocol standard.")];
3593
if ((reason > 0xff) || (reason < 0))
3594
return "Reason code out of range.";
3597
case FTR_ILLEGAL_IP_ADDR:
3598
return "Illegal IP address (not part of any address pool).";
3600
case FTR_FATAL_CONFLICT:
3601
return "Fatal conflict exists: address in use by other client.";
3603
case FTR_MISSING_BINDINFO:
3604
return "Missing binding information.";
3606
case FTR_TIMEMISMATCH:
3607
return "Connection rejected, time mismatch too great.";
3609
case FTR_INVALID_MCLT:
3610
return "Connection rejected, invalid MCLT.";
3612
case FTR_MISC_REJECT:
3613
return "Connection rejected, unknown reason.";
3615
case FTR_DUP_CONNECTION:
3616
return "Connection rejected, duplicate connection.";
3618
case FTR_INVALID_PARTNER:
3619
return "Connection rejected, invalid failover partner.";
3621
case FTR_TLS_UNSUPPORTED:
3622
return "TLS not supported.";
3624
case FTR_TLS_UNCONFIGURED:
3625
return "TLS supported but not configured.";
3627
case FTR_TLS_REQUIRED:
3628
return "TLS required but not supported by partner.";
3630
case FTR_DIGEST_UNSUPPORTED:
3631
return "Message digest not supported.";
3633
case FTR_DIGEST_UNCONFIGURED:
3634
return "Message digest not configured.";
3636
case FTR_VERSION_MISMATCH:
3637
return "Protocol version mismatch.";
3639
case FTR_OUTDATED_BIND_INFO:
3640
return "Outdated binding information.";
3642
case FTR_LESS_CRIT_BIND_INFO:
3643
return "Less critical binding information.";
3645
case FTR_NO_TRAFFIC:
3646
return "No traffic within sufficient time.";
3648
case FTR_HBA_CONFLICT:
3649
return "Hash bucket assignment conflict.";
3651
case FTR_IP_NOT_RESERVED:
3652
return "IP not reserved on this server.";
3654
case FTR_IP_DIGEST_FAILURE:
3655
return "Message digest failed to compare.";
3657
case FTR_IP_MISSING_DIGEST:
3658
return "Missing message digest.";
3661
return "Unknown Error.";
3664
sprintf(resbuf, "Undefined-%d: This reason code is not defined in the "
3665
"protocol standard.", reason);
3670
const char *dhcp_failover_state_name_print (enum failover_state state)
3675
return "unknown-state";
3678
return "partner-down";
3684
return "conflict-done";
3686
case communications_interrupted:
3687
return "communications-interrupted";
3689
case resolution_interrupted:
3690
return "resolution-interrupted";
3692
case potential_conflict:
3693
return "potential-conflict";
3699
return "recover-done";
3702
return "recover-wait";
3715
const char *dhcp_failover_message_name (unsigned type)
3717
static char messbuf[sizeof("unknown-message-255")];
3720
return "invalid-message";
3724
return "pool-request";
3727
return "pool-response";
3730
return "bind-update";
3738
case FTM_CONNECTACK:
3739
return "connect-ack";
3742
return "update-request";
3745
return "update-done";
3748
return "update-request-all";
3756
case FTM_DISCONNECT:
3757
return "disconnect";
3760
sprintf(messbuf, "unknown-message-%u", type);
3765
const char *dhcp_failover_option_name (unsigned type)
3767
static char optbuf[sizeof("unknown-option-65535")];
3770
return "invalid-option";
3773
case FTO_ADDRESSES_TRANSFERRED:
3774
return "addresses-transferred";
3776
case FTO_ASSIGNED_IP_ADDRESS:
3777
return "assigned-ip-address";
3779
case FTO_BINDING_STATUS:
3780
return "binding-status";
3782
case FTO_CLIENT_IDENTIFIER:
3783
return "client-identifier";
3794
case FTO_DELAYED_SERVICE:
3795
return "delayed-service";
3803
case FTO_LEASE_EXPIRY:
3804
return "lease-expiry";
3806
case FTO_MAX_UNACKED:
3807
return "max-unacked";
3815
case FTO_MESSAGE_DIGEST:
3816
return "message-digest";
3818
case FTO_POTENTIAL_EXPIRY:
3819
return "potential-expiry";
3821
case FTO_PROTOCOL_VERSION:
3822
return "protocol-version";
3824
case FTO_RECEIVE_TIMER:
3825
return "receive-timer";
3827
case FTO_REJECT_REASON:
3828
return "reject-reason";
3830
case FTO_RELATIONSHIP_NAME:
3831
return "relationship-name";
3833
case FTO_REPLY_OPTIONS:
3834
return "reply-options";
3836
case FTO_REQUEST_OPTIONS:
3837
return "request-options";
3839
case FTO_SERVER_FLAGS:
3840
return "server-flags";
3842
case FTO_SERVER_STATE:
3843
return "server-state";
3851
case FTO_TLS_REQUEST:
3852
return "tls-request";
3854
case FTO_VENDOR_CLASS:
3855
return "vendor-class";
3857
case FTO_VENDOR_OPTIONS:
3858
return "vendor-options";
3861
sprintf(optbuf, "unknown-option-%u", type);
3866
failover_option_t *dhcp_failover_option_printf (unsigned code,
3870
const char *fmt, ...)
3875
/* %Audit% Truncation causes panic. %2004.06.17,Revisit%
3876
* It is unclear what the effects of truncation here are, or
3877
* how that condition should be handled. It seems that this
3878
* function is used for formatting messages in the failover
3879
* command channel. For now the safest thing is for
3880
* overflow-truncation to cause a fatal log.
3883
if (vsnprintf (tbuf, sizeof tbuf, fmt, va) >= sizeof tbuf)
3884
log_fatal ("%s: vsnprintf would truncate",
3885
"dhcp_failover_make_option");
3888
return dhcp_failover_make_option (code, obuf, obufix, obufmax,
3889
strlen (tbuf), tbuf);
3892
failover_option_t *dhcp_failover_make_option (unsigned code,
3893
char *obuf, unsigned *obufix,
3894
unsigned obufmax, ...)
3897
struct failover_option_info *info;
3899
unsigned size, count;
3905
#if defined (DEBUG_FAILOVER_MESSAGES)
3909
/* Note that the failover_option structure is used differently on
3910
input than on output - on input, count is an element count, and
3911
on output it's the number of bytes total in the option, including
3912
the option code and option length. */
3913
failover_option_t option, *op;
3916
/* Bogus option code? */
3917
if (code < 1 || code > FTO_MAX || ft_options [code].type == FT_UNDEF) {
3918
return &null_failover_option;
3920
info = &ft_options [code];
3922
va_start (va, obufmax);
3924
/* Get the number of elements and the size of the buffer we need
3926
if (info -> type == FT_DDNS || info -> type == FT_DDNS1) {
3927
count = info -> type == FT_DDNS ? 1 : 2;
3928
size = va_arg (va, int) + count;
3930
/* Find out how many items in this list. */
3931
if (info -> num_present)
3932
count = info -> num_present;
3934
count = va_arg (va, int);
3936
/* Figure out size. */
3937
switch (info -> type) {
3944
case FT_TEXT_OR_BYTES:
3946
txt = va_arg (va, char *);
3951
ilen = va_arg (va, unsigned);
3952
size = count * ilen;
3964
/* shouldn't get here. */
3965
log_fatal ("bogus type in failover_make_option: %d",
3967
return &null_failover_option;
3973
/* Allocate a buffer for the option. */
3974
option.count = size;
3975
option.data = dmalloc (option.count, MDL);
3978
return &null_failover_option;
3981
/* Put in the option code and option length. */
3982
putUShort (option.data, code);
3983
putUShort (&option.data [2], size - 4);
3985
#if defined (DEBUG_FAILOVER_MESSAGES)
3986
/* %Audit% Truncation causes panic. %2004.06.17,Revisit%
3987
* It is unclear what the effects of truncation here are, or
3988
* how that condition should be handled. It seems that this
3989
* message may be sent over the failover command channel.
3990
* For now the safest thing is for overflow-truncation to cause
3993
if (snprintf (tbuf, sizeof tbuf, " (%s<%d>", info -> name,
3994
option.count) >= sizeof tbuf)
3995
log_fatal ("dhcp_failover_make_option: tbuf overflow");
3996
failover_print (obuf, obufix, obufmax, tbuf);
3999
/* Now put in the data. */
4000
switch (info -> type) {
4002
for (i = 0; i < count; i++) {
4003
val = va_arg (va, unsigned);
4004
#if defined (DEBUG_FAILOVER_MESSAGES)
4005
/* %Audit% Cannot exceed 24 bytes. %2004.06.17,Safe% */
4006
sprintf (tbuf, " %d", val);
4007
failover_print (obuf, obufix, obufmax, tbuf);
4009
option.data [i + 4] = val;
4014
for (i = 0; i < count; i++) {
4015
iaddr = va_arg (va, u_int8_t *);
4017
dfree (option.data, MDL);
4018
log_error ("IP addrlen=%d, should be 4.",
4021
return &null_failover_option;
4024
#if defined (DEBUG_FAILOVER_MESSAGES)
4025
/*%Audit% Cannot exceed 17 bytes. %2004.06.17,Safe%*/
4026
sprintf (tbuf, " %u.%u.%u.%u",
4027
iaddr [0], iaddr [1], iaddr [2], iaddr [3]);
4028
failover_print (obuf, obufix, obufmax, tbuf);
4030
memcpy (&option.data [4 + i * ilen], iaddr, ilen);
4035
for (i = 0; i < count; i++) {
4036
val = va_arg (va, unsigned);
4037
#if defined (DEBUG_FAILOVER_MESSAGES)
4038
/*%Audit% Cannot exceed 24 bytes. %2004.06.17,Safe%*/
4039
sprintf (tbuf, " %d", val);
4040
failover_print (obuf, obufix, obufmax, tbuf);
4042
putULong (&option.data [4 + i * 4], val);
4048
bval = va_arg (va, u_int8_t *);
4049
#if defined (DEBUG_FAILOVER_MESSAGES)
4050
for (i = 0; i < count; i++) {
4051
/* 23 bytes plus nul, safe. */
4052
sprintf (tbuf, " %d", bval [i]);
4053
failover_print (obuf, obufix, obufmax, tbuf);
4056
memcpy (&option.data [4], bval, count);
4059
/* On output, TEXT_OR_BYTES is _always_ text, and always NUL
4060
terminated. Note that the caller should be careful not
4061
to provide a format and data that amount to more than 256
4062
bytes of data, since it will cause a fatal error. */
4063
case FT_TEXT_OR_BYTES:
4065
#if defined (DEBUG_FAILOVER_MESSAGES)
4066
/* %Audit% Truncation causes panic. %2004.06.17,Revisit%
4067
* It is unclear what the effects of truncation here are, or
4068
* how that condition should be handled. It seems that this
4069
* function is used for formatting messages in the failover
4070
* command channel. For now the safest thing is for
4071
* overflow-truncation to cause a fatal log.
4073
if (snprintf (tbuf, sizeof tbuf, "\"%s\"", txt) >= sizeof tbuf)
4074
log_fatal ("dhcp_failover_make_option: tbuf overflow");
4075
failover_print (obuf, obufix, obufmax, tbuf);
4077
memcpy (&option.data [4], txt, count);
4082
option.data [4] = va_arg (va, unsigned);
4084
option.data [5] = va_arg (va, unsigned);
4085
bval = va_arg (va, u_int8_t *);
4086
memcpy (&option.data [4 + count], bval, size - count - 4);
4087
#if defined (DEBUG_FAILOVER_MESSAGES)
4088
for (i = 4; i < size; i++) {
4089
/*%Audit% Cannot exceed 24 bytes. %2004.06.17,Safe%*/
4090
sprintf (tbuf, " %d", option.data [i]);
4091
failover_print (obuf, obufix, obufmax, tbuf);
4097
for (i = 0; i < count; i++) {
4098
val = va_arg (va, u_int32_t);
4099
#if defined (DEBUG_FAILOVER_MESSAGES)
4100
/*%Audit% Cannot exceed 24 bytes. %2004.06.17,Safe%*/
4101
sprintf (tbuf, " %d", val);
4102
failover_print (obuf, obufix, obufmax, tbuf);
4104
putUShort (&option.data [4 + i * 2], val);
4113
#if defined DEBUG_FAILOVER_MESSAGES
4114
failover_print (obuf, obufix, obufmax, ")");
4118
/* Now allocate a place to store what we just set up. */
4119
op = dmalloc (sizeof (failover_option_t), MDL);
4121
dfree (option.data, MDL);
4122
return &null_failover_option;
4129
/* Send a failover message header. */
4131
isc_result_t dhcp_failover_put_message (dhcp_failover_link_t *link,
4132
omapi_object_t *connection,
4133
int msg_type, u_int32_t xid, ...)
4139
failover_option_t *option;
4140
unsigned char *opbuf;
4141
isc_result_t status = ISC_R_SUCCESS;
4145
/* Run through the argument list once to compute the length of
4146
the option portion of the message. */
4147
va_start (list, xid);
4148
while ((option = va_arg (list, failover_option_t *))) {
4149
if (option != &skip_failover_option)
4150
size += option -> count;
4151
if (option == &null_failover_option)
4156
/* Allocate an option buffer, unless we got an error. */
4157
if (!bad_option && size) {
4158
opbuf = dmalloc (size, MDL);
4160
status = ISC_R_NOMEMORY;
4162
opbuf = (unsigned char *)0;
4164
va_start (list, xid);
4165
while ((option = va_arg (list, failover_option_t *))) {
4166
if (option == &skip_failover_option)
4168
if (!bad_option && opbuf)
4169
memcpy (&opbuf [opix],
4170
option -> data, option -> count);
4171
if (option != &null_failover_option &&
4172
option != &skip_failover_option) {
4173
opix += option -> count;
4174
dfree (option -> data, MDL);
4175
dfree (option, MDL);
4181
return DHCP_R_INVALIDARG;
4183
/* Now send the message header. */
4185
/* Message length. */
4186
status = omapi_connection_put_uint16 (connection, size + 12);
4187
if (status != ISC_R_SUCCESS)
4192
status = omapi_connection_copyin (connection, &cbuf, 1);
4193
if (status != ISC_R_SUCCESS)
4196
/* Payload offset. */
4198
status = omapi_connection_copyin (connection, &cbuf, 1);
4199
if (status != ISC_R_SUCCESS)
4203
status = omapi_connection_put_uint32 (connection, (u_int32_t)cur_time);
4204
if (status != ISC_R_SUCCESS)
4207
/* Transaction ID. */
4208
status = omapi_connection_put_uint32(connection, xid);
4209
if (status != ISC_R_SUCCESS)
4214
status = omapi_connection_copyin (connection, opbuf, size);
4215
if (status != ISC_R_SUCCESS)
4219
if (link -> state_object &&
4220
link -> state_object -> link_to_peer == link) {
4221
#if defined (DEBUG_FAILOVER_CONTACT_TIMING)
4222
log_info ("add_timeout +%d %s",
4223
(int)(link -> state_object ->
4224
partner.max_response_delay) / 3,
4225
"dhcp_failover_send_contact");
4227
tv . tv_sec = cur_time +
4228
(int)(link -> state_object ->
4229
partner.max_response_delay) / 3;
4232
dhcp_failover_send_contact, link -> state_object,
4233
(tvref_t)dhcp_failover_state_reference,
4234
(tvunref_t)dhcp_failover_state_dereference);
4241
log_info ("dhcp_failover_put_message: something went wrong.");
4242
omapi_disconnect (connection, 1);
4246
void dhcp_failover_timeout (void *vstate)
4248
dhcp_failover_state_t *state = vstate;
4249
dhcp_failover_link_t *link;
4251
#if defined (DEBUG_FAILOVER_TIMING)
4252
log_info ("dhcp_failover_timeout");
4255
if (!state || state -> type != dhcp_type_failover_state)
4257
link = state -> link_to_peer;
4260
link -> outer -> type != omapi_type_connection)
4263
log_error ("timeout waiting for failover peer %s", state -> name);
4265
/* If we haven't gotten a timely response, blow away the connection.
4266
This will cause the state to change automatically. */
4267
omapi_disconnect (link -> outer, 1);
4270
void dhcp_failover_send_contact (void *vstate)
4272
dhcp_failover_state_t *state = vstate;
4273
dhcp_failover_link_t *link;
4274
isc_result_t status;
4276
#if defined(DEBUG_FAILOVER_MESSAGES) && \
4277
defined(DEBUG_FAILOVER_CONTACT_MESSAGES)
4279
unsigned obufix = 0;
4281
failover_print(obuf, &obufix, sizeof(obuf), "(contact");
4284
#if defined (DEBUG_FAILOVER_CONTACT_TIMING)
4285
log_info ("dhcp_failover_send_contact");
4288
if (!state || state -> type != dhcp_type_failover_state)
4290
link = state -> link_to_peer;
4293
link -> outer -> type != omapi_type_connection)
4296
status = (dhcp_failover_put_message
4297
(link, link -> outer,
4298
FTM_CONTACT, link->xid++,
4299
(failover_option_t *)0));
4301
#if defined(DEBUG_FAILOVER_MESSAGES) && \
4302
defined(DEBUG_FAILOVER_CONTACT_MESSAGES)
4303
if (status != ISC_R_SUCCESS)
4304
failover_print(obuf, &obufix, sizeof(obuf), " (failed)");
4305
failover_print(obuf, &obufix, sizeof(obuf), ")");
4307
log_debug ("%s", obuf);
4313
isc_result_t dhcp_failover_send_state (dhcp_failover_state_t *state)
4315
dhcp_failover_link_t *link;
4316
isc_result_t status;
4318
#if defined (DEBUG_FAILOVER_MESSAGES)
4320
unsigned obufix = 0;
4322
# define FMA obuf, &obufix, sizeof obuf
4323
failover_print (FMA, "(state");
4325
# define FMA (char *)0, (unsigned *)0, 0
4328
if (!state || state -> type != dhcp_type_failover_state)
4329
return DHCP_R_INVALIDARG;
4330
link = state -> link_to_peer;
4333
link -> outer -> type != omapi_type_connection)
4334
return DHCP_R_INVALIDARG;
4336
status = (dhcp_failover_put_message
4337
(link, link -> outer,
4338
FTM_STATE, link->xid++,
4339
dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
4340
(state -> me.state == startup
4341
? state -> saved_state
4342
: state -> me.state)),
4343
dhcp_failover_make_option
4344
(FTO_SERVER_FLAGS, FMA,
4345
(state -> service_state == service_startup
4346
? FTF_SERVER_STARTUP : 0)),
4347
dhcp_failover_make_option (FTO_STOS, FMA, state -> me.stos),
4348
(failover_option_t *)0));
4350
#if defined (DEBUG_FAILOVER_MESSAGES)
4351
if (status != ISC_R_SUCCESS)
4352
failover_print (FMA, " (failed)");
4353
failover_print (FMA, ")");
4355
log_debug ("%s", obuf);
4358
return ISC_R_SUCCESS;
4361
/* Send a connect message. */
4363
isc_result_t dhcp_failover_send_connect (omapi_object_t *l)
4365
dhcp_failover_link_t *link;
4366
dhcp_failover_state_t *state;
4367
isc_result_t status;
4368
#if defined (DEBUG_FAILOVER_MESSAGES)
4370
unsigned obufix = 0;
4372
# define FMA obuf, &obufix, sizeof obuf
4373
failover_print (FMA, "(connect");
4375
# define FMA (char *)0, (unsigned *)0, 0
4378
if (!l || l -> type != dhcp_type_failover_link)
4379
return DHCP_R_INVALIDARG;
4380
link = (dhcp_failover_link_t *)l;
4381
state = link -> state_object;
4382
if (!l -> outer || l -> outer -> type != omapi_type_connection)
4383
return DHCP_R_INVALIDARG;
4386
(dhcp_failover_put_message
4388
FTM_CONNECT, link->xid++,
4389
dhcp_failover_make_option(FTO_RELATIONSHIP_NAME, FMA,
4390
strlen(state->name), state->name),
4391
dhcp_failover_make_option (FTO_MAX_UNACKED, FMA,
4392
state -> me.max_flying_updates),
4393
dhcp_failover_make_option (FTO_RECEIVE_TIMER, FMA,
4394
state -> me.max_response_delay),
4395
dhcp_failover_option_printf(FTO_VENDOR_CLASS, FMA,
4396
"isc-%s", PACKAGE_VERSION),
4397
dhcp_failover_make_option (FTO_PROTOCOL_VERSION, FMA,
4398
DHCP_FAILOVER_VERSION),
4399
dhcp_failover_make_option (FTO_TLS_REQUEST, FMA,
4401
dhcp_failover_make_option (FTO_MCLT, FMA,
4404
? dhcp_failover_make_option (FTO_HBA, FMA, 32, state -> hba)
4405
: &skip_failover_option),
4406
(failover_option_t *)0));
4408
#if defined (DEBUG_FAILOVER_MESSAGES)
4409
if (status != ISC_R_SUCCESS)
4410
failover_print (FMA, " (failed)");
4411
failover_print (FMA, ")");
4413
log_debug ("%s", obuf);
4419
isc_result_t dhcp_failover_send_connectack (omapi_object_t *l,
4420
dhcp_failover_state_t *state,
4421
int reason, const char *errmsg)
4423
dhcp_failover_link_t *link;
4424
isc_result_t status;
4425
#if defined (DEBUG_FAILOVER_MESSAGES)
4427
unsigned obufix = 0;
4429
# define FMA obuf, &obufix, sizeof obuf
4430
failover_print (FMA, "(connectack");
4432
# define FMA (char *)0, (unsigned *)0, 0
4435
if (!l || l -> type != dhcp_type_failover_link)
4436
return DHCP_R_INVALIDARG;
4437
link = (dhcp_failover_link_t *)l;
4438
if (!l -> outer || l -> outer -> type != omapi_type_connection)
4439
return DHCP_R_INVALIDARG;
4442
(dhcp_failover_put_message
4444
FTM_CONNECTACK, link->imsg->xid,
4446
? dhcp_failover_make_option(FTO_RELATIONSHIP_NAME, FMA,
4447
strlen(state->name), state->name)
4448
: (link->imsg->options_present & FTB_RELATIONSHIP_NAME)
4449
? &link->imsg->relationship_name
4450
: &skip_failover_option,
4452
? dhcp_failover_make_option (FTO_MAX_UNACKED, FMA,
4453
state -> me.max_flying_updates)
4454
: &skip_failover_option,
4456
? dhcp_failover_make_option (FTO_RECEIVE_TIMER, FMA,
4457
state -> me.max_response_delay)
4458
: &skip_failover_option,
4459
dhcp_failover_option_printf(FTO_VENDOR_CLASS, FMA,
4460
"isc-%s", PACKAGE_VERSION),
4461
dhcp_failover_make_option (FTO_PROTOCOL_VERSION, FMA,
4462
DHCP_FAILOVER_VERSION),
4463
(link->imsg->options_present & FTB_TLS_REQUEST)
4464
? dhcp_failover_make_option(FTO_TLS_REPLY, FMA,
4466
: &skip_failover_option,
4468
? dhcp_failover_make_option (FTO_REJECT_REASON,
4470
: &skip_failover_option,
4472
? dhcp_failover_make_option (FTO_MESSAGE, FMA,
4473
strlen (errmsg), errmsg)
4474
: &skip_failover_option,
4475
(failover_option_t *)0));
4477
#if defined (DEBUG_FAILOVER_MESSAGES)
4478
if (status != ISC_R_SUCCESS)
4479
failover_print (FMA, " (failed)");
4480
failover_print (FMA, ")");
4482
log_debug ("%s", obuf);
4488
isc_result_t dhcp_failover_send_disconnect (omapi_object_t *l,
4490
const char *message)
4492
dhcp_failover_link_t *link;
4493
dhcp_failover_state_t *state;
4494
isc_result_t status;
4495
#if defined (DEBUG_FAILOVER_MESSAGES)
4497
unsigned obufix = 0;
4499
# define FMA obuf, &obufix, sizeof obuf
4500
failover_print (FMA, "(disconnect");
4502
# define FMA (char *)0, (unsigned *)0, 0
4505
if (!l || l -> type != dhcp_type_failover_link)
4506
return DHCP_R_INVALIDARG;
4507
link = (dhcp_failover_link_t *)l;
4508
state = link -> state_object;
4509
if (!l -> outer || l -> outer -> type != omapi_type_connection)
4510
return DHCP_R_INVALIDARG;
4512
if (!message && reason)
4513
message = dhcp_failover_reject_reason_print (reason);
4515
status = (dhcp_failover_put_message
4517
FTM_DISCONNECT, link->xid++,
4518
dhcp_failover_make_option (FTO_REJECT_REASON,
4521
? dhcp_failover_make_option (FTO_MESSAGE, FMA,
4522
strlen (message), message)
4523
: &skip_failover_option),
4524
(failover_option_t *)0));
4526
#if defined (DEBUG_FAILOVER_MESSAGES)
4527
if (status != ISC_R_SUCCESS)
4528
failover_print (FMA, " (failed)");
4529
failover_print (FMA, ")");
4531
log_debug ("%s", obuf);
4537
/* Send a Bind Update message. */
4539
isc_result_t dhcp_failover_send_bind_update (dhcp_failover_state_t *state,
4540
struct lease *lease)
4542
dhcp_failover_link_t *link;
4543
isc_result_t status;
4545
binding_state_t transmit_state;
4546
#if defined (DEBUG_FAILOVER_MESSAGES)
4548
unsigned obufix = 0;
4550
# define FMA obuf, &obufix, sizeof obuf
4551
failover_print (FMA, "(bndupd");
4553
# define FMA (char *)0, (unsigned *)0, 0
4556
if (!state -> link_to_peer ||
4557
state -> link_to_peer -> type != dhcp_type_failover_link)
4558
return DHCP_R_INVALIDARG;
4559
link = (dhcp_failover_link_t *)state -> link_to_peer;
4561
if (!link -> outer || link -> outer -> type != omapi_type_connection)
4562
return DHCP_R_INVALIDARG;
4564
transmit_state = lease->desired_binding_state;
4565
if (lease->flags & RESERVED_LEASE) {
4566
/* If we are listing an allocable (not yet ACTIVE etc) lease
4567
* as reserved, toggle to the peer's 'free state', per the
4568
* draft. This gives the peer permission to alloc it to the
4569
* chaddr/uid-named client.
4571
if ((state->i_am == primary) && (transmit_state == FTS_FREE))
4572
transmit_state = FTS_BACKUP;
4573
else if ((state->i_am == secondary) &&
4574
(transmit_state == FTS_BACKUP))
4575
transmit_state = FTS_FREE;
4577
flags |= FTF_IP_FLAG_RESERVE;
4579
if (lease->flags & BOOTP_LEASE)
4580
flags |= FTF_IP_FLAG_BOOTP;
4582
/* last_xid == 0 is illegal, seek past zero if we hit it. */
4586
lease->last_xid = link->xid++;
4589
* Our very next action is to transmit a binding update relating to
4590
* this lease over the wire, and although there is a BNDACK, there is
4591
* no BNDACKACK or BNDACKACKACK...the basic issue as we send a BNDUPD,
4592
* we may not receive a BNDACK. This non-reception does not imply the
4593
* peer did not receive and process the BNDUPD. So at this point, we
4594
* must divest any state that would be dangerous to retain under the
4595
* impression the peer has been updated. Normally state changes like
4596
* this are processed in supersede_lease(), but in this case we need a
4597
* very late binding.
4599
* In failover rules, a server is permitted to work forward in certain
4600
* directions from a given lease's state; active leases may be
4601
* extended, so forth. There is an 'optimization' in the failover
4602
* draft that permits a server to 'rewind' any work they have not
4603
* informed the peer. Since we can't know if the peer received our
4604
* update but was unable to acknowledge it, we make this change on
4605
* transmit rather than upon receiving the acknowledgement.
4607
* XXX: Frequent lease commits are undesirable. This should hopefully
4608
* only trigger when a server is sending a lease /state change/, and
4609
* not merely an update such as with a renewal.
4611
if (lease->rewind_binding_state != lease->binding_state) {
4612
lease->rewind_binding_state = lease->binding_state;
4618
/* Send the update. */
4619
status = (dhcp_failover_put_message
4620
(link, link -> outer,
4621
FTM_BNDUPD, lease->last_xid,
4622
dhcp_failover_make_option (FTO_ASSIGNED_IP_ADDRESS, FMA,
4623
lease -> ip_addr.len,
4624
lease -> ip_addr.iabuf),
4625
dhcp_failover_make_option (FTO_BINDING_STATUS, FMA,
4626
lease -> desired_binding_state),
4628
? dhcp_failover_make_option (FTO_CLIENT_IDENTIFIER, FMA,
4631
: &skip_failover_option,
4632
lease -> hardware_addr.hlen
4633
? dhcp_failover_make_option (FTO_CHADDR, FMA,
4634
lease -> hardware_addr.hlen,
4635
lease -> hardware_addr.hbuf)
4636
: &skip_failover_option,
4637
dhcp_failover_make_option (FTO_LEASE_EXPIRY, FMA,
4639
dhcp_failover_make_option (FTO_POTENTIAL_EXPIRY, FMA,
4641
dhcp_failover_make_option (FTO_STOS, FMA,
4643
(lease->cltt != 0) ?
4644
dhcp_failover_make_option(FTO_CLTT, FMA, lease->cltt) :
4645
&skip_failover_option, /* No CLTT */
4646
flags ? dhcp_failover_make_option(FTO_IP_FLAGS, FMA,
4648
&skip_failover_option, /* No IP_FLAGS */
4649
&skip_failover_option, /* XXX DDNS */
4650
&skip_failover_option, /* XXX request options */
4651
&skip_failover_option, /* XXX reply options */
4652
(failover_option_t *)0));
4654
#if defined (DEBUG_FAILOVER_MESSAGES)
4655
if (status != ISC_R_SUCCESS)
4656
failover_print (FMA, " (failed)");
4657
failover_print (FMA, ")");
4659
log_debug ("%s", obuf);
4665
/* Send a Bind ACK message. */
4667
isc_result_t dhcp_failover_send_bind_ack (dhcp_failover_state_t *state,
4668
failover_message_t *msg,
4669
int reason, const char *message)
4671
dhcp_failover_link_t *link;
4672
isc_result_t status;
4673
#if defined (DEBUG_FAILOVER_MESSAGES)
4675
unsigned obufix = 0;
4677
# define FMA obuf, &obufix, sizeof obuf
4678
failover_print (FMA, "(bndack");
4680
# define FMA (char *)0, (unsigned *)0, 0
4683
if (!state -> link_to_peer ||
4684
state -> link_to_peer -> type != dhcp_type_failover_link)
4685
return DHCP_R_INVALIDARG;
4686
link = (dhcp_failover_link_t *)state -> link_to_peer;
4688
if (!link -> outer || link -> outer -> type != omapi_type_connection)
4689
return DHCP_R_INVALIDARG;
4691
if (!message && reason)
4692
message = dhcp_failover_reject_reason_print (reason);
4694
/* Send the update. */
4695
status = (dhcp_failover_put_message
4696
(link, link -> outer,
4697
FTM_BNDACK, msg->xid,
4698
dhcp_failover_make_option (FTO_ASSIGNED_IP_ADDRESS, FMA,
4699
sizeof msg -> assigned_addr,
4700
&msg -> assigned_addr),
4701
#ifdef DO_BNDACK_SHOULD_NOT
4702
dhcp_failover_make_option (FTO_BINDING_STATUS, FMA,
4703
msg -> binding_status),
4704
(msg -> options_present & FTB_CLIENT_IDENTIFIER)
4705
? dhcp_failover_make_option (FTO_CLIENT_IDENTIFIER, FMA,
4706
msg -> client_identifier.count,
4707
msg -> client_identifier.data)
4708
: &skip_failover_option,
4709
(msg -> options_present & FTB_CHADDR)
4710
? dhcp_failover_make_option (FTO_CHADDR, FMA,
4711
msg -> chaddr.count,
4713
: &skip_failover_option,
4714
dhcp_failover_make_option (FTO_LEASE_EXPIRY, FMA,
4716
dhcp_failover_make_option (FTO_POTENTIAL_EXPIRY, FMA,
4717
msg -> potential_expiry),
4718
dhcp_failover_make_option (FTO_STOS, FMA,
4720
(msg->options_present & FTB_CLTT) ?
4721
dhcp_failover_make_option(FTO_CLTT, FMA, msg->cltt) :
4722
&skip_failover_option, /* No CLTT in the msg to ack. */
4723
((msg->options_present & FTB_IP_FLAGS) && msg->ip_flags) ?
4724
dhcp_failover_make_option(FTO_IP_FLAGS, FMA,
4726
: &skip_failover_option,
4727
#endif /* DO_BNDACK_SHOULD_NOT */
4729
? dhcp_failover_make_option(FTO_REJECT_REASON, FMA, reason)
4730
: &skip_failover_option,
4732
? dhcp_failover_make_option (FTO_MESSAGE, FMA,
4733
strlen (message), message)
4734
: &skip_failover_option,
4735
#ifdef DO_BNDACK_SHOULD_NOT
4736
&skip_failover_option, /* XXX DDNS */
4737
&skip_failover_option, /* XXX request options */
4738
&skip_failover_option, /* XXX reply options */
4739
#endif /* DO_BNDACK_SHOULD_NOT */
4740
(failover_option_t *)0));
4742
#if defined (DEBUG_FAILOVER_MESSAGES)
4743
if (status != ISC_R_SUCCESS)
4744
failover_print (FMA, " (failed)");
4745
failover_print (FMA, ")");
4747
log_debug ("%s", obuf);
4753
isc_result_t dhcp_failover_send_poolreq (dhcp_failover_state_t *state)
4755
dhcp_failover_link_t *link;
4756
isc_result_t status;
4757
#if defined (DEBUG_FAILOVER_MESSAGES)
4759
unsigned obufix = 0;
4761
# define FMA obuf, &obufix, sizeof obuf
4762
failover_print (FMA, "(poolreq");
4764
# define FMA (char *)0, (unsigned *)0, 0
4767
if (!state -> link_to_peer ||
4768
state -> link_to_peer -> type != dhcp_type_failover_link)
4769
return DHCP_R_INVALIDARG;
4770
link = (dhcp_failover_link_t *)state -> link_to_peer;
4772
if (!link -> outer || link -> outer -> type != omapi_type_connection)
4773
return DHCP_R_INVALIDARG;
4775
status = (dhcp_failover_put_message
4776
(link, link -> outer,
4777
FTM_POOLREQ, link->xid++,
4778
(failover_option_t *)0));
4780
#if defined (DEBUG_FAILOVER_MESSAGES)
4781
if (status != ISC_R_SUCCESS)
4782
failover_print (FMA, " (failed)");
4783
failover_print (FMA, ")");
4785
log_debug ("%s", obuf);
4791
isc_result_t dhcp_failover_send_poolresp (dhcp_failover_state_t *state,
4794
dhcp_failover_link_t *link;
4795
isc_result_t status;
4796
#if defined (DEBUG_FAILOVER_MESSAGES)
4798
unsigned obufix = 0;
4800
# define FMA obuf, &obufix, sizeof obuf
4801
failover_print (FMA, "(poolresp");
4803
# define FMA (char *)0, (unsigned *)0, 0
4806
if (!state -> link_to_peer ||
4807
state -> link_to_peer -> type != dhcp_type_failover_link)
4808
return DHCP_R_INVALIDARG;
4809
link = (dhcp_failover_link_t *)state -> link_to_peer;
4811
if (!link -> outer || link -> outer -> type != omapi_type_connection)
4812
return DHCP_R_INVALIDARG;
4814
status = (dhcp_failover_put_message
4815
(link, link -> outer,
4816
FTM_POOLRESP, link->imsg->xid,
4817
dhcp_failover_make_option (FTO_ADDRESSES_TRANSFERRED, FMA,
4819
(failover_option_t *)0));
4821
#if defined (DEBUG_FAILOVER_MESSAGES)
4822
if (status != ISC_R_SUCCESS)
4823
failover_print (FMA, " (failed)");
4824
failover_print (FMA, ")");
4826
log_debug ("%s", obuf);
4832
isc_result_t dhcp_failover_send_update_request (dhcp_failover_state_t *state)
4834
dhcp_failover_link_t *link;
4835
isc_result_t status;
4836
#if defined (DEBUG_FAILOVER_MESSAGES)
4838
unsigned obufix = 0;
4840
# define FMA obuf, &obufix, sizeof obuf
4841
failover_print (FMA, "(updreq");
4843
# define FMA (char *)0, (unsigned *)0, 0
4846
if (!state -> link_to_peer ||
4847
state -> link_to_peer -> type != dhcp_type_failover_link)
4848
return DHCP_R_INVALIDARG;
4849
link = (dhcp_failover_link_t *)state -> link_to_peer;
4851
if (!link -> outer || link -> outer -> type != omapi_type_connection)
4852
return DHCP_R_INVALIDARG;
4854
if (state -> curUPD)
4855
return ISC_R_ALREADYRUNNING;
4857
status = (dhcp_failover_put_message
4858
(link, link -> outer,
4859
FTM_UPDREQ, link->xid++,
4860
(failover_option_t *)0));
4862
if (status == ISC_R_SUCCESS)
4863
state -> curUPD = FTM_UPDREQ;
4865
#if defined (DEBUG_FAILOVER_MESSAGES)
4866
if (status != ISC_R_SUCCESS)
4867
failover_print (FMA, " (failed)");
4868
failover_print (FMA, ")");
4870
log_debug ("%s", obuf);
4873
log_info ("Sent update request message to %s", state -> name);
4877
isc_result_t dhcp_failover_send_update_request_all (dhcp_failover_state_t
4880
dhcp_failover_link_t *link;
4881
isc_result_t status;
4882
#if defined (DEBUG_FAILOVER_MESSAGES)
4884
unsigned obufix = 0;
4886
# define FMA obuf, &obufix, sizeof obuf
4887
failover_print (FMA, "(updreqall");
4889
# define FMA (char *)0, (unsigned *)0, 0
4892
if (!state -> link_to_peer ||
4893
state -> link_to_peer -> type != dhcp_type_failover_link)
4894
return DHCP_R_INVALIDARG;
4895
link = (dhcp_failover_link_t *)state -> link_to_peer;
4897
if (!link -> outer || link -> outer -> type != omapi_type_connection)
4898
return DHCP_R_INVALIDARG;
4900
/* If there is an UPDREQ in progress, then upgrade to UPDREQALL. */
4901
if (state -> curUPD && (state -> curUPD != FTM_UPDREQ))
4902
return ISC_R_ALREADYRUNNING;
4904
status = (dhcp_failover_put_message
4905
(link, link -> outer,
4906
FTM_UPDREQALL, link->xid++,
4907
(failover_option_t *)0));
4909
if (status == ISC_R_SUCCESS)
4910
state -> curUPD = FTM_UPDREQALL;
4912
#if defined (DEBUG_FAILOVER_MESSAGES)
4913
if (status != ISC_R_SUCCESS)
4914
failover_print (FMA, " (failed)");
4915
failover_print (FMA, ")");
4917
log_debug ("%s", obuf);
4920
log_info ("Sent update request all message to %s", state -> name);
4924
isc_result_t dhcp_failover_send_update_done (dhcp_failover_state_t *state)
4926
dhcp_failover_link_t *link;
4927
isc_result_t status;
4928
#if defined (DEBUG_FAILOVER_MESSAGES)
4930
unsigned obufix = 0;
4932
# define FMA obuf, &obufix, sizeof obuf
4933
failover_print (FMA, "(upddone");
4935
# define FMA (char *)0, (unsigned *)0, 0
4938
if (!state -> link_to_peer ||
4939
state -> link_to_peer -> type != dhcp_type_failover_link)
4940
return DHCP_R_INVALIDARG;
4941
link = (dhcp_failover_link_t *)state -> link_to_peer;
4943
if (!link -> outer || link -> outer -> type != omapi_type_connection)
4944
return DHCP_R_INVALIDARG;
4946
status = (dhcp_failover_put_message
4947
(link, link -> outer,
4948
FTM_UPDDONE, state->updxid,
4949
(failover_option_t *)0));
4951
#if defined (DEBUG_FAILOVER_MESSAGES)
4952
if (status != ISC_R_SUCCESS)
4953
failover_print (FMA, " (failed)");
4954
failover_print (FMA, ")");
4956
log_debug ("%s", obuf);
4960
log_info ("Sent update done message to %s", state -> name);
4962
state->updxid--; /* Paranoia, just so it mismatches. */
4964
/* There may be uncommitted leases at this point (since
4965
dhcp_failover_process_bind_ack() doesn't commit leases);
4966
commit the lease file. */
4973
* failover_lease_is_better() compares the binding update in 'msg' with
4974
* the current lease in 'lease'. If the determination is that the binding
4975
* update shouldn't be allowed to update/crush more critical binding info
4976
* on the lease, the lease is preferred. A value of true is returned if the
4977
* local lease is preferred, or false if the remote binding update is
4980
* For now this function is hopefully simplistic and trivial. It may be that
4981
* a more detailed system of preferences is required, so this is something we
4982
* should monitor as we gain experience with these dueling events.
4984
static isc_boolean_t
4985
failover_lease_is_better(dhcp_failover_state_t *state, struct lease *lease,
4986
failover_message_t *msg)
4988
binding_state_t local_state;
4991
if (lease->binding_state != lease->desired_binding_state)
4992
local_state = lease->desired_binding_state;
4994
local_state = lease->binding_state;
4996
if ((msg->options_present & FTB_CLTT) != 0)
4997
msg_cltt = msg->cltt;
5001
switch(local_state) {
5003
if (msg->binding_status == FTS_ACTIVE) {
5004
if (msg_cltt < lease->cltt)
5006
else if (msg_cltt > lease->cltt)
5008
else if (state->i_am == primary)
5012
} else if (msg->binding_status == FTS_EXPIRED) {
5023
if (msg->binding_status == FTS_ACTIVE)
5025
else if (state->i_am == primary)
5029
/* FALL THROUGH to impossible condition */
5032
log_fatal("Impossible condition at %s:%d.", MDL);
5035
log_fatal("Impossible condition at %s:%d.", MDL);
5036
/* Silence compiler warning. */
5040
isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state,
5041
failover_message_t *msg)
5043
struct lease *lt, *lease;
5045
int reason = FTR_MISC_REJECT;
5046
const char *message;
5047
int new_binding_state;
5048
int send_to_backup = 0;
5049
int required_options;
5050
isc_boolean_t chaddr_changed = ISC_FALSE;
5051
isc_boolean_t ident_changed = ISC_FALSE;
5053
/* Validate the binding update. */
5054
required_options = FTB_ASSIGNED_IP_ADDRESS | FTB_BINDING_STATUS;
5055
if ((msg->options_present & required_options) != required_options) {
5056
message = "binding update lacks required options";
5057
reason = FTR_MISSING_BINDINFO;
5061
ia.len = sizeof msg -> assigned_addr;
5062
memcpy (ia.iabuf, &msg -> assigned_addr, ia.len);
5064
lease = (struct lease *)0;
5065
lt = (struct lease *)0;
5066
if (!find_lease_by_ip_addr (&lease, ia, MDL)) {
5067
message = "unknown IP address";
5068
reason = FTR_ILLEGAL_IP_ADDR;
5073
* If this lease is covered by a different failover peering
5074
* relationship, assert an error.
5076
if ((lease->pool == NULL) || (lease->pool->failover_peer == NULL) ||
5077
(lease->pool->failover_peer != state)) {
5078
message = "IP address is covered by a different failover "
5079
"relationship state";
5080
reason = FTR_ILLEGAL_IP_ADDR;
5085
* Dueling updates: This happens when both servers send a BNDUPD
5086
* at the same time. We want the best update to win, which means
5087
* we reject if we think ours is better, or cancel if we think the
5088
* peer's is better. We only assert a problem if the lease is on
5089
* the ACK queue, not on the UPDATE queue. This means that after
5090
* accepting this server's BNDUPD, we will send our own BNDUPD
5091
* /after/ sending the BNDACK (this order was recently enforced in
5092
* queue processing).
5094
if ((lease->flags & ON_ACK_QUEUE) != 0) {
5095
if (failover_lease_is_better(state, lease, msg)) {
5096
message = "incoming update is less critical than "
5098
reason = FTR_LESS_CRIT_BIND_INFO;
5101
/* This makes it so we ignore any spurious ACKs. */
5102
dhcp_failover_ack_queue_remove(state, lease);
5106
/* Install the new info. Start by taking a copy to markup. */
5107
if (!lease_copy (<, lease, MDL)) {
5108
message = "no memory";
5112
if (msg -> options_present & FTB_CHADDR) {
5113
if (msg->binding_status == FTS_ABANDONED) {
5114
message = "BNDUPD to ABANDONED with a CHADDR";
5117
if (msg -> chaddr.count > sizeof lt -> hardware_addr.hbuf) {
5118
message = "chaddr too long";
5122
if ((lt->hardware_addr.hlen != msg->chaddr.count) ||
5123
(memcmp(lt->hardware_addr.hbuf, msg->chaddr.data,
5124
msg->chaddr.count) != 0))
5125
chaddr_changed = ISC_TRUE;
5127
lt -> hardware_addr.hlen = msg -> chaddr.count;
5128
memcpy (lt -> hardware_addr.hbuf, msg -> chaddr.data,
5129
msg -> chaddr.count);
5130
} else if (msg->binding_status == FTS_ACTIVE ||
5131
msg->binding_status == FTS_EXPIRED ||
5132
msg->binding_status == FTS_RELEASED) {
5133
message = "BNDUPD without CHADDR";
5134
reason = FTR_MISSING_BINDINFO;
5136
} else if (msg->binding_status == FTS_ABANDONED) {
5137
chaddr_changed = ISC_TRUE;
5138
lt->hardware_addr.hlen = 0;
5140
binding_scope_dereference(<->scope, MDL);
5143
/* There is no explicit message content to indicate that the client
5144
* supplied no client-identifier. So if we don't hear of a value,
5145
* we discard the last one.
5147
if (msg->options_present & FTB_CLIENT_IDENTIFIER) {
5148
if (msg->binding_status == FTS_ABANDONED) {
5149
message = "BNDUPD to ABANDONED with client-id";
5153
if ((lt->uid_len != msg->client_identifier.count) ||
5154
(lt->uid == NULL) || /* Sanity; should never happen. */
5155
(memcmp(lt->uid, msg->client_identifier.data,
5157
ident_changed = ISC_TRUE;
5159
lt->uid_len = msg->client_identifier.count;
5161
/* Allocate the lt->uid buffer if we haven't already, or
5162
* re-allocate the lt-uid buffer if we have one that is not
5163
* large enough. Otherwise, just use the extant buffer.
5165
if (!lt->uid || lt->uid == lt->uid_buf ||
5166
lt->uid_len > lt->uid_max) {
5167
if (lt->uid && lt->uid != lt->uid_buf)
5168
dfree(lt->uid, MDL);
5170
if (lt->uid_len > sizeof(lt->uid_buf)) {
5171
lt->uid_max = lt->uid_len;
5172
lt->uid = dmalloc(lt->uid_len, MDL);
5174
message = "no memory";
5178
lt->uid_max = sizeof(lt->uid_buf);
5179
lt->uid = lt->uid_buf;
5183
msg -> client_identifier.data, lt -> uid_len);
5184
} else if (lt->uid && msg->binding_status != FTS_RESET &&
5185
msg->binding_status != FTS_FREE &&
5186
msg->binding_status != FTS_BACKUP) {
5187
ident_changed = ISC_TRUE;
5188
if (lt->uid != lt->uid_buf)
5189
dfree (lt->uid, MDL);
5191
lt->uid_max = lt->uid_len = 0;
5195
* A server's configuration can assign a 'binding scope';
5197
* set var = "value";
5199
* The problem with these binding scopes is that they are refreshed
5200
* when the server processes a client's DHCP packet. A local binding
5201
* scope is trash, then, when the lease has been assigned by the
5202
* partner server. There is no real way to detect this, a peer may
5203
* be updating us (as through potential conflict) with a binding we
5204
* sent them, but we can trivially detect the /problematic/ case;
5207
* primary allocates lease to client A, assigns ddns name A.
5209
* secondary enters partner down.
5210
* lease expires, and is set free.
5211
* lease is allocated to client B and given ddns name B.
5214
* The binding update in this case will be active->active, but the
5215
* client identification on the lease will have changed. The ddns
5216
* update on client A will have leaked if we just remove the binding
5219
if (msg->binding_status == FTS_ACTIVE &&
5220
(chaddr_changed || ident_changed)) {
5221
ddns_removals(lease, NULL, NULL);
5223
if (lease->scope != NULL)
5224
binding_scope_dereference(&lease->scope, MDL);
5227
/* XXX Times may need to be adjusted based on clock skew! */
5228
if (msg -> options_present & FTB_STOS) {
5229
lt -> starts = msg -> stos;
5231
if (msg -> options_present & FTB_LEASE_EXPIRY) {
5232
lt -> ends = msg -> expiry;
5234
if (msg->options_present & FTB_POTENTIAL_EXPIRY) {
5235
lt->atsfp = lt->tsfp = msg->potential_expiry;
5237
if (msg->options_present & FTB_IP_FLAGS) {
5238
if (msg->ip_flags & FTF_IP_FLAG_RESERVE) {
5239
if ((((state->i_am == primary) &&
5240
(lease->binding_state == FTS_FREE)) ||
5241
((state->i_am == secondary) &&
5242
(lease->binding_state == FTS_BACKUP))) &&
5243
!(lease->flags & RESERVED_LEASE)) {
5244
message = "Address is not reserved.";
5245
reason = FTR_IP_NOT_RESERVED;
5249
lt->flags |= RESERVED_LEASE;
5251
lt->flags &= ~RESERVED_LEASE;
5253
if (msg->ip_flags & FTF_IP_FLAG_BOOTP) {
5254
if ((((state->i_am == primary) &&
5255
(lease->binding_state == FTS_FREE)) ||
5256
((state->i_am == secondary) &&
5257
(lease->binding_state == FTS_BACKUP))) &&
5258
!(lease->flags & BOOTP_LEASE)) {
5259
message = "Address is not allocated to BOOTP.";
5262
lt->flags |= BOOTP_LEASE;
5264
lt->flags &= ~BOOTP_LEASE;
5266
if (msg->ip_flags & ~(FTF_IP_FLAG_RESERVE | FTF_IP_FLAG_BOOTP))
5267
log_info("Unknown IP-flags set in BNDUPD (0x%x).",
5269
} else /* Flags may only not appear if the values are zero. */
5270
lt->flags &= ~(RESERVED_LEASE | BOOTP_LEASE);
5272
#if defined (DEBUG_LEASE_STATE_TRANSITIONS)
5273
log_info ("processing state transition for %s: %s to %s",
5274
piaddr (lease -> ip_addr),
5275
binding_state_print (lease -> binding_state),
5276
binding_state_print (msg -> binding_status));
5279
/* If we're in normal state, make sure the state transition
5281
if (state -> me.state == normal) {
5283
(normal_binding_state_transition_check
5284
(lease, state, msg -> binding_status,
5285
msg -> potential_expiry));
5286
/* XXX if the transition the peer asked for isn't
5287
XXX allowed, maybe we should make the transition
5288
XXX into potential-conflict at this point. */
5291
(conflict_binding_state_transition_check
5292
(lease, state, msg -> binding_status,
5293
msg -> potential_expiry));
5295
if (new_binding_state != msg -> binding_status) {
5298
if (snprintf (outbuf, sizeof outbuf,
5299
"%s: invalid state transition: %s to %s",
5300
piaddr (lease -> ip_addr),
5301
binding_state_print (lease -> binding_state),
5302
binding_state_print (msg -> binding_status))
5304
log_fatal ("%s: impossible outbuf overflow",
5305
"dhcp_failover_process_bind_update");
5307
dhcp_failover_send_bind_ack (state, msg,
5312
if (new_binding_state == FTS_EXPIRED ||
5313
new_binding_state == FTS_RELEASED ||
5314
new_binding_state == FTS_RESET) {
5315
lt -> next_binding_state = FTS_FREE;
5317
/* Mac address affinity. Assign the lease to
5318
* BACKUP state if we are the primary and the
5319
* peer is more likely to reallocate this lease
5320
* to a returning client.
5322
if ((state->i_am == primary) &&
5323
!(lt->flags & (RESERVED_LEASE | BOOTP_LEASE)))
5324
send_to_backup = peer_wants_lease(lt);
5326
lt -> next_binding_state = new_binding_state;
5328
msg -> binding_status = lt -> next_binding_state;
5331
* If we accept a peer's binding update, then we can't rewind a
5332
* lease behind the peer's state.
5334
lease->rewind_binding_state = lt->next_binding_state;
5336
/* Try to install the new information. */
5337
if (!supersede_lease (lease, lt, 0, 0, 0) ||
5338
!write_lease (lease)) {
5339
message = "database update failed";
5341
dhcp_failover_send_bind_ack (state, msg, reason, message);
5344
dhcp_failover_queue_ack (state, msg);
5347
/* If it is probably wise, assign lease to backup state if the peer
5348
* is not already hoarding leases.
5350
if (send_to_backup && secondary_not_hoarding(state, lease->pool)) {
5351
lease->next_binding_state = FTS_BACKUP;
5352
lease->tstp = cur_time;
5353
lease->starts = cur_time;
5355
if (!supersede_lease(lease, NULL, 0, 1, 0) ||
5356
!write_lease(lease))
5357
log_error("can't commit lease %s for mac addr "
5358
"affinity", piaddr(lease->ip_addr));
5360
dhcp_failover_send_updates(state);
5365
lease_dereference (<, MDL);
5367
lease_dereference (&lease, MDL);
5369
return ISC_R_SUCCESS;
5372
/* This was hairy enough I didn't want to do it all in an if statement.
5374
* Returns: Truth is the secondary is allowed to get more leases based upon
5375
* MAC address affinity. False otherwise.
5378
secondary_not_hoarding(dhcp_failover_state_t *state, struct pool *p) {
5383
total = p->free_leases + p->backup_leases;
5385
/* How many leases is one side or the other allowed to "hold"? */
5386
hold = ((total * state->max_lease_ownership) + 50) / 100;
5388
/* If we were to send leases (or if the secondary were to send us
5389
* leases in the negative direction), how many would that be?
5391
lts = (p->free_leases - p->backup_leases) / 2;
5393
/* The peer is not hoarding leases if we would send them more leases
5394
* (or they would take fewer leases) than the maximum they are allowed
5395
* to hold (the negative hold).
5397
return(lts > -hold);
5400
isc_result_t dhcp_failover_process_bind_ack (dhcp_failover_state_t *state,
5401
failover_message_t *msg)
5403
struct lease *lt = (struct lease *)0;
5404
struct lease *lease = (struct lease *)0;
5406
const char *message = "no memory";
5407
u_int32_t pot_expire;
5408
int send_to_backup = ISC_FALSE;
5411
ia.len = sizeof msg -> assigned_addr;
5412
memcpy (ia.iabuf, &msg -> assigned_addr, ia.len);
5414
if (!find_lease_by_ip_addr (&lease, ia, MDL)) {
5415
message = "no such lease";
5419
/* XXX check for conflicts. */
5420
if (msg -> options_present & FTB_REJECT_REASON) {
5421
log_error ("bind update on %s from %s rejected: %.*s",
5422
piaddr (ia), state -> name,
5423
(int)((msg -> options_present & FTB_MESSAGE)
5424
? msg -> message.count
5425
: strlen (dhcp_failover_reject_reason_print
5426
(msg -> reject_reason))),
5427
(msg -> options_present & FTB_MESSAGE)
5428
? (const char *)(msg -> message.data)
5429
: (dhcp_failover_reject_reason_print
5430
(msg -> reject_reason)));
5434
/* Silently discard acks for leases we did not update (or multiple
5437
if (!lease->last_xid)
5440
if (lease->last_xid != msg->xid) {
5441
message = "xid mismatch";
5445
/* XXX Times may need to be adjusted based on clock skew! */
5446
if (msg->options_present & FTO_POTENTIAL_EXPIRY)
5447
pot_expire = msg->potential_expiry;
5449
pot_expire = lease->tstp;
5451
/* If the lease was desired to enter a binding state, we set
5452
* such a value upon transmitting a bndupd. We do not clear it
5453
* if we receive a bndupd in the meantime (or change the state
5454
* of the lease again ourselves), but we do set binding_state
5455
* if we get a bndupd.
5457
* So desired_binding_state tells us what we sent a bndupd for,
5458
* and binding_state tells us what we have since determined in
5461
if (lease->desired_binding_state == FTS_EXPIRED ||
5462
lease->desired_binding_state == FTS_RESET ||
5463
lease->desired_binding_state == FTS_RELEASED)
5465
/* It is not a problem to do this directly as we call
5466
* supersede_lease immediately after: the lease is requeued
5467
* even if its sort order (tsfp) has changed.
5469
lease->atsfp = lease->tsfp = pot_expire;
5470
if ((state->i_am == secondary) &&
5471
(lease->flags & RESERVED_LEASE))
5472
lease->next_binding_state = FTS_BACKUP;
5474
lease->next_binding_state = FTS_FREE;
5476
/* Clear this condition for the next go-round. */
5477
lease->desired_binding_state = lease->next_binding_state;
5479
/* The peer will have made this state change, so set rewind. */
5480
lease->rewind_binding_state = lease->next_binding_state;
5482
supersede_lease(lease, (struct lease *)0, 0, 0, 0);
5485
/* Lease has returned to FREE state from the
5486
* transitional states. If the lease 'belongs'
5487
* to a client that would be served by the
5488
* peer, process a binding update now to send
5489
* the lease to backup state. But not if we
5490
* think we already have.
5492
if (state->i_am == primary &&
5493
!(lease->flags & (RESERVED_LEASE | BOOTP_LEASE)) &&
5494
peer_wants_lease(lease))
5495
send_to_backup = ISC_TRUE;
5497
if (!send_to_backup && state->me.state == normal)
5500
/* XXX It could be a problem to do this directly if the lease
5501
* XXX is sorted by tsfp.
5503
lease->atsfp = lease->tsfp = pot_expire;
5504
if (lease->desired_binding_state != lease->binding_state) {
5505
lease->next_binding_state =
5506
lease->desired_binding_state;
5507
supersede_lease(lease,
5508
(struct lease *)0, 0, 0, 0);
5511
/* Commit the lease only after a two-second timeout,
5512
so that if we get a bunch of acks in quick
5513
succession (e.g., when stealing leases from the
5514
secondary), we do not do an immediate commit for
5516
tv.tv_sec = cur_time + 2;
5518
add_timeout(&tv, commit_leases_timeout, (void *)0, 0, 0);
5522
dhcp_failover_ack_queue_remove (state, lease);
5524
/* If we are supposed to send an update done after we send
5525
this lease, go ahead and send it. */
5526
if (state -> send_update_done == lease) {
5527
lease_dereference (&state -> send_update_done, MDL);
5528
dhcp_failover_send_update_done (state);
5531
/* Now that the lease is off the ack queue, consider putting it
5532
* back on the update queue for mac address affinity.
5534
if (send_to_backup && secondary_not_hoarding(state, lease->pool)) {
5535
lease->next_binding_state = FTS_BACKUP;
5536
lease->tstp = lease->starts = cur_time;
5538
if (!supersede_lease(lease, NULL, 0, 1, 0) ||
5539
!write_lease(lease))
5540
log_error("can't commit lease %s for "
5541
"client affinity", piaddr(lease->ip_addr));
5543
if (state->me.state == normal)
5547
/* If there are updates pending, we've created space to send at
5549
dhcp_failover_send_updates (state);
5552
lease_dereference (&lease, MDL);
5554
lease_dereference (<, MDL);
5556
return ISC_R_SUCCESS;
5559
log_info ("bind update on %s got ack from %s: %s.",
5560
piaddr (ia), state -> name, message);
5564
isc_result_t dhcp_failover_generate_update_queue (dhcp_failover_state_t *state,
5567
struct shared_network *s;
5571
#define FREE_LEASES 0
5572
#define ACTIVE_LEASES 1
5573
#define EXPIRED_LEASES 2
5574
#define ABANDONED_LEASES 3
5575
#define BACKUP_LEASES 4
5576
#define RESERVED_LEASES 5
5577
struct lease **lptr[RESERVED_LEASES+1];
5579
/* Loop through each pool in each shared network and call the
5580
expiry routine on the pool. */
5581
for (s = shared_networks; s; s = s -> next) {
5582
for (p = s -> pools; p; p = p -> next) {
5583
if (p->failover_peer != state)
5586
lptr[FREE_LEASES] = &p->free;
5587
lptr[ACTIVE_LEASES] = &p->active;
5588
lptr[EXPIRED_LEASES] = &p->expired;
5589
lptr[ABANDONED_LEASES] = &p->abandoned;
5590
lptr[BACKUP_LEASES] = &p->backup;
5591
lptr[RESERVED_LEASES] = &p->reserved;
5593
for (i = FREE_LEASES; i <= RESERVED_LEASES; i++) {
5594
for (l = *(lptr [i]); l; l = l -> next) {
5595
if ((l->flags & ON_QUEUE) == 0 &&
5597
(l->tstp > l->atsfp) ||
5598
(i == EXPIRED_LEASES))) {
5599
l -> desired_binding_state = l -> binding_state;
5600
dhcp_failover_queue_update (l, 0);
5606
return ISC_R_SUCCESS;
5610
dhcp_failover_process_update_request (dhcp_failover_state_t *state,
5611
failover_message_t *msg)
5613
if (state->send_update_done) {
5614
log_info("Received update request while old update still "
5615
"flying! Silently discarding old request.");
5616
lease_dereference(&state->send_update_done, MDL);
5619
/* Generate a fresh update queue. */
5620
dhcp_failover_generate_update_queue (state, 0);
5622
state->updxid = msg->xid;
5624
/* If there's anything on the update queue (there shouldn't be
5625
anything on the ack queue), trigger an update done message
5626
when we get an ack for that lease. */
5627
if (state -> update_queue_tail) {
5628
lease_reference (&state -> send_update_done,
5629
state -> update_queue_tail, MDL);
5630
dhcp_failover_send_updates (state);
5631
log_info ("Update request from %s: sending update",
5634
/* Otherwise, there are no updates to send, so we can
5635
just send an UPDDONE message immediately. */
5636
dhcp_failover_send_update_done (state);
5637
log_info ("Update request from %s: nothing pending",
5641
return ISC_R_SUCCESS;
5645
dhcp_failover_process_update_request_all (dhcp_failover_state_t *state,
5646
failover_message_t *msg)
5648
if (state->send_update_done) {
5649
log_info("Received update request while old update still "
5650
"flying! Silently discarding old request.");
5651
lease_dereference(&state->send_update_done, MDL);
5654
/* Generate a fresh update queue that includes every lease. */
5655
dhcp_failover_generate_update_queue (state, 1);
5657
state->updxid = msg->xid;
5659
if (state -> update_queue_tail) {
5660
lease_reference (&state -> send_update_done,
5661
state -> update_queue_tail, MDL);
5662
dhcp_failover_send_updates (state);
5663
log_info ("Update request all from %s: sending update",
5666
/* This should really never happen, but it could happen
5667
on a server that currently has no leases configured. */
5668
dhcp_failover_send_update_done (state);
5669
log_info ("Update request all from %s: nothing pending",
5673
return ISC_R_SUCCESS;
5677
dhcp_failover_process_update_done (dhcp_failover_state_t *state,
5678
failover_message_t *msg)
5682
log_info ("failover peer %s: peer update completed.",
5685
state -> curUPD = 0;
5687
switch (state -> me.state) {
5691
case communications_interrupted:
5692
case resolution_interrupted:
5698
break; /* shouldn't happen. */
5700
/* We got the UPDDONE, so we can go into normal state! */
5701
case potential_conflict:
5702
if (state->partner.state == conflict_done) {
5703
if (state->i_am == secondary) {
5704
dhcp_failover_set_state (state, normal);
5706
log_error("Secondary is in conflict_done "
5707
"state after conflict resolution, "
5708
"this is illegal.");
5709
dhcp_failover_set_state (state, shut_down);
5712
if (state->i_am == primary)
5713
dhcp_failover_set_state (state, conflict_done);
5715
log_error("Spurious update-done message.");
5721
log_error("Spurious update-done message.");
5725
/* Wait for MCLT to expire before moving to recover_done,
5726
except that if both peers come up in recover, there is
5727
no point in waiting for MCLT to expire - this probably
5728
indicates the initial startup of a newly-configured
5730
if (state -> me.stos + state -> mclt > cur_time &&
5731
state -> partner.state != recover &&
5732
state -> partner.state != recover_done) {
5733
dhcp_failover_set_state (state, recover_wait);
5734
#if defined (DEBUG_FAILOVER_TIMING)
5735
log_info ("add_timeout +%d %s",
5737
state -> me.stos + state -> mclt),
5738
"dhcp_failover_recover_done");
5740
tv . tv_sec = (int)(state -> me.stos + state -> mclt);
5743
dhcp_failover_recover_done,
5745
(tvref_t)omapi_object_reference,
5747
omapi_object_dereference);
5749
dhcp_failover_recover_done (state);
5752
return ISC_R_SUCCESS;
5755
void dhcp_failover_recover_done (void *sp)
5757
dhcp_failover_state_t *state = sp;
5759
#if defined (DEBUG_FAILOVER_TIMING)
5760
log_info ("dhcp_failover_recover_done");
5763
dhcp_failover_set_state (state, recover_done);
5766
#if defined (DEBUG_FAILOVER_MESSAGES)
5767
/* Print hunks of failover messages, doing line breaks as appropriate.
5768
Note that this assumes syslog is being used, rather than, e.g., the
5769
Windows NT logging facility, where just dumping the whole message in
5770
one hunk would be more appropriate. */
5772
void failover_print (char *obuf,
5773
unsigned *obufix, unsigned obufmax, const char *s)
5775
int len = strlen (s);
5777
while (len + *obufix + 1 >= obufmax) {
5778
log_debug ("%s", obuf);
5780
log_debug ("%s", s);
5786
strcpy (&obuf [*obufix], s);
5789
#endif /* defined (DEBUG_FAILOVER_MESSAGES) */
5791
/* Taken from draft-ietf-dhc-loadb-01.txt: */
5792
/* A "mixing table" of 256 distinct values, in pseudo-random order. */
5793
unsigned char loadb_mx_tbl[256] = {
5794
251, 175, 119, 215, 81, 14, 79, 191, 103, 49,
5795
181, 143, 186, 157, 0, 232, 31, 32, 55, 60,
5796
152, 58, 17, 237, 174, 70, 160, 144, 220, 90,
5797
57, 223, 59, 3, 18, 140, 111, 166, 203, 196,
5798
134, 243, 124, 95, 222, 179, 197, 65, 180, 48,
5799
36, 15, 107, 46, 233, 130, 165, 30, 123, 161,
5800
209, 23, 97, 16, 40, 91, 219, 61, 100, 10,
5801
210, 109, 250, 127, 22, 138, 29, 108, 244, 67,
5802
207, 9, 178, 204, 74, 98, 126, 249, 167, 116,
5803
34, 77, 193, 200, 121, 5, 20, 113, 71, 35,
5804
128, 13, 182, 94, 25, 226, 227, 199, 75, 27,
5805
41, 245, 230, 224, 43, 225, 177, 26, 155, 150,
5806
212, 142, 218, 115, 241, 73, 88, 105, 39, 114,
5807
62, 255, 192, 201, 145, 214, 168, 158, 221, 148,
5808
154, 122, 12, 84, 82, 163, 44, 139, 228, 236,
5809
205, 242, 217, 11, 187, 146, 159, 64, 86, 239,
5810
195, 42, 106, 198, 118, 112, 184, 172, 87, 2,
5811
173, 117, 176, 229, 247, 253, 137, 185, 99, 164,
5812
102, 147, 45, 66, 231, 52, 141, 211, 194, 206,
5813
246, 238, 56, 110, 78, 248, 63, 240, 189, 93,
5814
92, 51, 53, 183, 19, 171, 72, 50, 33, 104,
5815
101, 69, 8, 252, 83, 120, 76, 135, 85, 54,
5816
202, 125, 188, 213, 96, 235, 136, 208, 162, 129,
5817
190, 132, 156, 38, 47, 1, 7, 254, 24, 4,
5818
216, 131, 89, 21, 28, 133, 37, 153, 149, 80,
5819
170, 68, 6, 169, 234, 151 };
5821
static unsigned char loadb_p_hash (const unsigned char *, unsigned);
5822
static unsigned char loadb_p_hash (const unsigned char *key, unsigned len)
5824
unsigned char hash = len;
5826
for(i = len; i > 0; )
5827
hash = loadb_mx_tbl [hash ^ (key [--i])];
5831
int load_balance_mine (struct packet *packet, dhcp_failover_state_t *state)
5833
struct option_cache *oc;
5834
struct data_string ds;
5835
unsigned char hbaix;
5838
if (state -> load_balance_max_secs < ntohs (packet -> raw -> secs)) {
5842
/* If we don't have a hash bucket array, we can't tell if this
5843
one's ours, so we assume it's not. */
5847
oc = lookup_option (&dhcp_universe, packet -> options,
5848
DHO_DHCP_CLIENT_IDENTIFIER);
5849
memset (&ds, 0, sizeof ds);
5851
evaluate_option_cache (&ds, packet, (struct lease *)0,
5852
(struct client_state *)0,
5853
packet -> options, (struct option_state *)0,
5854
&global_scope, oc, MDL)) {
5855
hbaix = loadb_p_hash (ds.data, ds.len);
5857
data_string_forget(&ds, MDL);
5859
hbaix = loadb_p_hash (packet -> raw -> chaddr,
5860
packet -> raw -> hlen);
5863
hm = state->hba[(hbaix >> 3) & 0x1F] & (1 << (hbaix & 0x07));
5865
if (state -> i_am == primary)
5871
/* The inverse of load_balance_mine ("load balance theirs"). We can't
5872
* use the regular load_balance_mine() and invert it because of the case
5873
* where there might not be an HBA, and we want to indicate false here
5874
* in this case only.
5877
peer_wants_lease(struct lease *lp)
5879
dhcp_failover_state_t *state;
5880
unsigned char hbaix;
5886
state = lp->pool->failover_peer;
5888
if (!state || !state->hba)
5892
hbaix = loadb_p_hash(lp->uid, lp->uid_len);
5893
else if (lp->hardware_addr.hlen > 1)
5894
/* Skip the first byte, which is the hardware type, and is
5895
* not included during actual load balancing checks above
5896
* since it is separate from the packet header chaddr field.
5897
* The remainder of the hardware address should be identical
5898
* to the chaddr contents.
5900
hbaix = loadb_p_hash(lp->hardware_addr.hbuf + 1,
5901
lp->hardware_addr.hlen - 1);
5902
else /* impossible to categorize into LBA */
5905
hm = state->hba[(hbaix >> 3) & 0x1F] & (1 << (hbaix & 0x07));
5907
if (state->i_am == primary)
5913
/* This deals with what to do with bind updates when
5914
we're in the normal state
5916
Note that tsfp had better be set from the latest bind update
5917
_before_ this function is called! */
5920
normal_binding_state_transition_check (struct lease *lease,
5921
dhcp_failover_state_t *state,
5922
binding_state_t binding_state,
5925
binding_state_t new_state;
5927
/* If there is no transition, it's no problem. */
5928
if (binding_state == lease -> binding_state)
5929
return binding_state;
5931
switch (lease -> binding_state) {
5934
switch (binding_state) {
5941
/* If the lease was free, and our peer is primary,
5942
then it can make it active, or abandoned, or
5943
backup. Abandoned is treated like free in
5945
if (state -> i_am == secondary)
5946
return binding_state;
5948
/* Otherwise, it can't legitimately do any sort of
5949
state transition. Because the lease was free,
5950
and the error has already been made, we allow the
5951
peer to change its state anyway, but log a warning
5952
message in hopes that the error will be fixed. */
5953
case FTS_FREE: /* for compiler */
5954
new_state = binding_state;
5958
log_fatal ("Impossible case at %s:%d.", MDL);
5962
/* The secondary can't change the state of an active
5964
if (state -> i_am == primary) {
5965
/* Except that the client may send the DHCPRELEASE
5966
to the secondary, and we have to accept that. */
5967
if (binding_state == FTS_RELEASED)
5968
return binding_state;
5969
new_state = lease -> binding_state;
5973
/* So this is only for transitions made by the primary: */
5974
switch (binding_state) {
5977
/* Can't set a lease to free or backup until the
5978
peer agrees that it's expired. */
5979
if (tsfp > cur_time) {
5980
new_state = lease -> binding_state;
5983
return binding_state;
5986
/* XXX 65 should be the clock skew between the peers
5987
XXX plus a fudge factor. This code will result
5988
XXX in problems if MCLT is really short or the
5989
XXX max-lease-time is really short (less than the
5990
XXX fudge factor. */
5991
if (lease -> ends - 65 > cur_time) {
5992
new_state = lease -> binding_state;
6000
return binding_state;
6003
log_fatal ("Impossible case at %s:%d.", MDL);
6008
switch (binding_state) {
6011
/* Can't set a lease to free or backup until the
6012
peer agrees that it's expired. */
6013
if (tsfp > cur_time) {
6014
new_state = lease -> binding_state;
6017
return binding_state;
6024
return binding_state;
6027
log_fatal ("Impossible case at %s:%d.", MDL);
6031
switch (binding_state) {
6035
/* These are invalid state transitions - should we
6042
return binding_state;
6045
log_fatal ("Impossible case at %s:%d.", MDL);
6049
switch (binding_state) {
6052
/* Can't set a lease to free or backup until the
6053
peer agrees that it's expired. */
6054
if (tsfp > cur_time) {
6055
new_state = lease -> binding_state;
6058
return binding_state;
6065
return binding_state;
6068
log_fatal ("Impossible case at %s:%d.", MDL);
6072
switch (binding_state) {
6078
/* If the lease was in backup, and our peer
6079
is secondary, then it can make it active
6081
if (state -> i_am == primary)
6082
return binding_state;
6084
/* Either the primary or the secondary can
6085
reasonably move a lease from the backup
6086
state to the free state. */
6088
return binding_state;
6091
new_state = lease -> binding_state;
6095
log_fatal ("Impossible case at %s:%d.", MDL);
6100
log_fatal ("Impossible case at %s:%d.", MDL);
6107
/* Determine whether the state transition is okay when we're potentially
6108
in conflict with the peer. */
6110
conflict_binding_state_transition_check (struct lease *lease,
6111
dhcp_failover_state_t *state,
6112
binding_state_t binding_state,
6115
binding_state_t new_state;
6117
/* If there is no transition, it's no problem. */
6118
if (binding_state == lease -> binding_state)
6119
new_state = binding_state;
6121
switch (lease -> binding_state) {
6122
/* If we think the lease is not in use, then the
6123
state into which the partner put it is just fine,
6131
new_state = binding_state;
6134
/* If we think the lease *is* in use, then we're not
6135
going to take the partner's change if the partner
6136
thinks it's free. */
6138
switch (binding_state) {
6141
new_state = lease -> binding_state;
6145
/* If we don't agree about expiry, it's
6146
* invalid. 65 should allow for max
6147
* clock skew (60) plus some fudge.
6148
* XXX: should we refetch cur_time?
6150
if ((lease->ends - 65) > cur_time)
6151
new_state = lease->binding_state;
6153
new_state = binding_state;
6156
/* RELEASED, RESET, and ABANDONED indicate
6157
* that our partner has information about
6158
* this lease that we did not witness. Our
6164
new_state = binding_state;
6168
log_fatal ("Impossible case at %s:%d.", MDL);
6174
log_fatal ("Impossible case at %s:%d.", MDL);
6181
/* We can reallocate a lease under the following circumstances:
6183
(1) It belongs to us - it's FTS_FREE, and we're primary, or it's
6184
FTS_BACKUP, and we're secondary.
6185
(2) We're in partner_down, and the lease is not active, and we
6186
can be sure that the other server didn't make it active.
6187
We can only be sure that the server didn't make it active
6188
when we are in the partner_down state and one of the following
6189
two conditions holds:
6190
(a) in the case that the time sent from the peer is earlier than
6191
the time we entered the partner_down state, at least MCLT has
6192
gone by since we entered partner_down, or
6193
(b) in the case that the time sent from the peer is later than
6194
the time when we entered partner_down, the current time is
6195
later than the time sent from the peer by at least MCLT. */
6197
int lease_mine_to_reallocate (struct lease *lease)
6199
dhcp_failover_state_t *peer;
6201
if (lease && lease->pool &&
6202
(peer = lease->pool->failover_peer)) {
6204
* In addition to the normal rules governing wether a server
6205
* is allowed to operate changes on a lease, the server is
6206
* allowed to operate on a lease from the standpoint of the
6207
* most conservative guess of the peer's state for this lease.
6209
switch (lease->binding_state) {
6211
/* ACTIVE leases may not be reallocated. */
6216
/* FREE leases may only be allocated by the primary,
6217
* unless the secondary is acting in partner_down
6218
* state and stos+mclt or tsfp+mclt has expired,
6219
* whichever is greater.
6221
* ABANDONED are treated the same as FREE for all
6222
* purposes here. Note that servers will only try
6223
* for ABANDONED leases as a last resort anyway.
6225
if (peer -> i_am == primary)
6228
return(peer->service_state == service_partner_down &&
6229
((lease->tsfp < peer->me.stos) ?
6230
(peer->me.stos + peer->mclt < cur_time) :
6231
(lease->tsfp + peer->mclt < cur_time)));
6236
* These leases are generally untouchable until the
6237
* peer acknowledges their state change. However, as
6238
* this is impossible if the peer is offline, the
6239
* failover protocol permits an 'optimization' to
6240
* rewind the lease to a previous state that the server
6241
* is allowed to operate on, if that was the state that
6242
* was last acknowledged by the peer.
6244
* So if a lease was free, was allocated by this
6245
* server, and expired without ever being transmitted
6246
* to the peer, it can be returned to free and given
6247
* to any new client legally.
6249
if ((peer->i_am == primary) &&
6250
(lease->rewind_binding_state == FTS_FREE))
6252
if ((peer->i_am == secondary) &&
6253
(lease->rewind_binding_state == FTS_BACKUP))
6256
/* FALL THROUGH (released, expired, reset) */
6259
* Released, expired, and reset leases go onto the
6260
* 'expired' queue all together. Upon entry into
6261
* partner-down state, this queue of leases has their
6262
* tsfp values modified to equal stos+mclt, the point
6263
* at which the server is allowed to remove them from
6264
* these transitional states.
6266
* Note that although tsfp has been possibly extended
6267
* past the actual tsfp we received from the peer, we
6268
* don't have to take any special action. Since tsfp
6269
* will be equal to the current time when the lease
6270
* transitions to free, tsfp will not be used to grant
6271
* lease-times longer than the MCLT to clients, which
6272
* is the only danger for this sort of modification.
6274
return((peer->service_state == service_partner_down) &&
6275
(lease->tsfp < cur_time));
6278
/* Only the secondary may allocate BACKUP leases,
6279
* unless in partner_down state in which case at
6280
* least TSFP+MCLT or STOS+MCLT must have expired,
6281
* whichever is greater.
6283
if (peer->i_am == secondary)
6286
return((peer->service_state == service_partner_down) &&
6287
((lease->tsfp < peer->me.stos) ?
6288
(peer->me.stos + peer->mclt < cur_time) :
6289
(lease->tsfp + peer->mclt < cur_time)));
6292
/* All lease states appear above. */
6293
log_fatal("Impossible case at %s:%d.", MDL);
6299
return(lease->binding_state == FTS_FREE ||
6300
lease->binding_state == FTS_BACKUP);
6305
static isc_result_t failover_message_reference (failover_message_t **mp,
6306
failover_message_t *m,
6307
const char *file, int line)
6311
return ISC_R_SUCCESS;
6314
static isc_result_t failover_message_dereference (failover_message_t **mp,
6315
const char *file, int line)
6317
failover_message_t *m;
6320
if (m -> refcnt == 0) {
6322
failover_message_dereference (&m -> next,
6324
if (m -> chaddr.data)
6325
dfree (m -> chaddr.data, file, line);
6326
if (m -> client_identifier.data)
6327
dfree (m -> client_identifier.data, file, line);
6329
dfree (m -> hba.data, file, line);
6330
if (m -> message.data)
6331
dfree (m -> message.data, file, line);
6332
if (m -> reply_options.data)
6333
dfree (m -> reply_options.data, file, line);
6334
if (m -> request_options.data)
6335
dfree (m -> request_options.data, file, line);
6336
if (m -> vendor_class.data)
6337
dfree (m -> vendor_class.data, file, line);
6338
if (m -> vendor_options.data)
6339
dfree (m -> vendor_options.data, file, line);
6341
dfree (m -> ddns.data, file, line);
6342
dfree (*mp, file, line);
6345
return ISC_R_SUCCESS;
6348
OMAPI_OBJECT_ALLOC (dhcp_failover_state, dhcp_failover_state_t,
6349
dhcp_type_failover_state)
6350
OMAPI_OBJECT_ALLOC (dhcp_failover_listener, dhcp_failover_listener_t,
6351
dhcp_type_failover_listener)
6352
OMAPI_OBJECT_ALLOC (dhcp_failover_link, dhcp_failover_link_t,
6353
dhcp_type_failover_link)
6354
#endif /* defined (FAILOVER_PROTOCOL) */
6356
const char *binding_state_print (enum failover_state state)