35
36
#include <types/global.h>
39
#include <types/ssl_sock.h>
40
#include <proto/ssl_sock.h>
41
#endif /* USE_OPENSSL */
37
43
#include <proto/backend.h>
38
44
#include <proto/checks.h>
39
#include <proto/buffers.h>
45
#include <proto/dumpstats.h>
40
46
#include <proto/fd.h>
41
47
#include <proto/log.h>
42
48
#include <proto/queue.h>
43
49
#include <proto/port_range.h>
44
50
#include <proto/proto_http.h>
45
51
#include <proto/proto_tcp.h>
52
#include <proto/protocol.h>
46
53
#include <proto/proxy.h>
54
#include <proto/raw_sock.h>
47
55
#include <proto/server.h>
56
#include <proto/session.h>
57
#include <proto/stream_interface.h>
48
58
#include <proto/task.h>
50
60
static int httpchk_expect(struct server *s, int done);
61
static int tcpcheck_get_step_id(struct server *);
62
static void tcpcheck_main(struct connection *);
52
const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
53
[HCHK_STATUS_UNKNOWN] = { SRV_CHK_UNKNOWN, "UNK", "Unknown" },
54
[HCHK_STATUS_INI] = { SRV_CHK_UNKNOWN, "INI", "Initializing" },
64
static const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
65
[HCHK_STATUS_UNKNOWN] = { CHK_RES_UNKNOWN, "UNK", "Unknown" },
66
[HCHK_STATUS_INI] = { CHK_RES_UNKNOWN, "INI", "Initializing" },
55
67
[HCHK_STATUS_START] = { /* SPECIAL STATUS*/ },
57
[HCHK_STATUS_HANA] = { SRV_CHK_ERROR, "HANA", "Health analyze" },
59
[HCHK_STATUS_SOCKERR] = { SRV_CHK_ERROR, "SOCKERR", "Socket error" },
61
[HCHK_STATUS_L4OK] = { SRV_CHK_RUNNING, "L4OK", "Layer4 check passed" },
62
[HCHK_STATUS_L4TOUT] = { SRV_CHK_ERROR, "L4TOUT", "Layer4 timeout" },
63
[HCHK_STATUS_L4CON] = { SRV_CHK_ERROR, "L4CON", "Layer4 connection problem" },
65
[HCHK_STATUS_L6OK] = { SRV_CHK_RUNNING, "L6OK", "Layer6 check passed" },
66
[HCHK_STATUS_L6TOUT] = { SRV_CHK_ERROR, "L6TOUT", "Layer6 timeout" },
67
[HCHK_STATUS_L6RSP] = { SRV_CHK_ERROR, "L6RSP", "Layer6 invalid response" },
69
[HCHK_STATUS_L7TOUT] = { SRV_CHK_ERROR, "L7TOUT", "Layer7 timeout" },
70
[HCHK_STATUS_L7RSP] = { SRV_CHK_ERROR, "L7RSP", "Layer7 invalid response" },
69
/* Below we have finished checks */
70
[HCHK_STATUS_CHECKED] = { CHK_RES_NEUTRAL, "CHECKED", "No status change" },
71
[HCHK_STATUS_HANA] = { CHK_RES_FAILED, "HANA", "Health analyze" },
73
[HCHK_STATUS_SOCKERR] = { CHK_RES_FAILED, "SOCKERR", "Socket error" },
75
[HCHK_STATUS_L4OK] = { CHK_RES_PASSED, "L4OK", "Layer4 check passed" },
76
[HCHK_STATUS_L4TOUT] = { CHK_RES_FAILED, "L4TOUT", "Layer4 timeout" },
77
[HCHK_STATUS_L4CON] = { CHK_RES_FAILED, "L4CON", "Layer4 connection problem" },
79
[HCHK_STATUS_L6OK] = { CHK_RES_PASSED, "L6OK", "Layer6 check passed" },
80
[HCHK_STATUS_L6TOUT] = { CHK_RES_FAILED, "L6TOUT", "Layer6 timeout" },
81
[HCHK_STATUS_L6RSP] = { CHK_RES_FAILED, "L6RSP", "Layer6 invalid response" },
83
[HCHK_STATUS_L7TOUT] = { CHK_RES_FAILED, "L7TOUT", "Layer7 timeout" },
84
[HCHK_STATUS_L7RSP] = { CHK_RES_FAILED, "L7RSP", "Layer7 invalid response" },
72
86
[HCHK_STATUS_L57DATA] = { /* DUMMY STATUS */ },
74
[HCHK_STATUS_L7OKD] = { SRV_CHK_RUNNING, "L7OK", "Layer7 check passed" },
75
[HCHK_STATUS_L7OKCD] = { SRV_CHK_RUNNING | SRV_CHK_DISABLE, "L7OKC", "Layer7 check conditionally passed" },
76
[HCHK_STATUS_L7STS] = { SRV_CHK_ERROR, "L7STS", "Layer7 wrong status" },
88
[HCHK_STATUS_L7OKD] = { CHK_RES_PASSED, "L7OK", "Layer7 check passed" },
89
[HCHK_STATUS_L7OKCD] = { CHK_RES_CONDPASS, "L7OKC", "Layer7 check conditionally passed" },
90
[HCHK_STATUS_L7STS] = { CHK_RES_FAILED, "L7STS", "Layer7 wrong status" },
79
const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: ignore, 1: error, 2: OK */
93
static const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: ignore, 1: error, 2: OK */
80
94
[HANA_STATUS_UNKNOWN] = { "Unknown", { 0, 0 }},
82
96
[HANA_STATUS_L4_OK] = { "L4 successful connection", { 2, 0 }},
143
157
return analyze_statuses[HANA_STATUS_UNKNOWN].desc;
146
#define SSP_O_HCHK 0x0002
148
static void server_status_printf(struct chunk *msg, struct server *s, unsigned options, int xferred) {
151
chunk_printf(msg, " via %s/%s",
152
s->tracked->proxy->id, s->tracked->id);
154
if (options & SSP_O_HCHK) {
155
chunk_printf(msg, ", reason: %s", get_check_status_description(s->check_status));
157
if (s->check_status >= HCHK_STATUS_L57DATA)
158
chunk_printf(msg, ", code: %d", s->check_code);
160
if (*s->check_desc) {
163
chunk_printf(msg, ", info: \"");
165
chunk_initlen(&src, s->check_desc, 0, strlen(s->check_desc));
166
chunk_asciiencode(msg, &src, '"');
168
chunk_printf(msg, "\"");
171
if (s->check_duration >= 0)
172
chunk_printf(msg, ", check duration: %ldms", s->check_duration);
176
if (!(s->state & SRV_RUNNING))
177
chunk_printf(msg, ". %d active and %d backup servers left.%s"
178
" %d sessions active, %d requeued, %d remaining in queue",
179
s->proxy->srv_act, s->proxy->srv_bck,
180
(s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
181
s->cur_sess, xferred, s->nbpend);
183
chunk_printf(msg, ". %d active and %d backup servers online.%s"
184
" %d sessions requeued, %d total in queue",
185
s->proxy->srv_act, s->proxy->srv_bck,
186
(s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
160
/* Builds a string containing some information about the health check's result.
161
* The output string is allocated from the trash chunks. If the check is NULL,
162
* NULL is returned. This is designed to be used when emitting logs about health
165
static const char *check_reason_string(struct check *check)
172
msg = get_trash_chunk();
173
chunk_printf(msg, "reason: %s", get_check_status_description(check->status));
175
if (check->status >= HCHK_STATUS_L57DATA)
176
chunk_appendf(msg, ", code: %d", check->code);
181
chunk_appendf(msg, ", info: \"");
183
chunk_initlen(&src, check->desc, 0, strlen(check->desc));
184
chunk_asciiencode(msg, &src, '"');
186
chunk_appendf(msg, "\"");
189
if (check->duration >= 0)
190
chunk_appendf(msg, ", check duration: %ldms", check->duration);
192
* Set s->check_status, update s->check_duration and fill s->result with
193
* an adequate SRV_CHK_* value.
196
* Set check->status, update check->duration and fill check->result with
197
* an adequate CHK_RES_* value. The new check->health is computed based
195
200
* Show information in logs about failed health check if server is UP
196
201
* or succeeded health checks if server is DOWN.
198
static void set_server_check_status(struct server *s, short status, char *desc) {
203
static void set_server_check_status(struct check *check, short status, const char *desc)
205
struct server *s = check->server;
206
short prev_status = check->status;
202
209
if (status == HCHK_STATUS_START) {
203
s->result = SRV_CHK_UNKNOWN; /* no result yet */
204
s->check_desc[0] = '\0';
205
s->check_start = now;
210
check->result = CHK_RES_UNKNOWN; /* no result yet */
211
check->desc[0] = '\0';
209
if (!s->check_status)
212
219
if (desc && *desc) {
213
strncpy(s->check_desc, desc, HCHK_DESC_LEN-1);
214
s->check_desc[HCHK_DESC_LEN-1] = '\0';
220
strncpy(check->desc, desc, HCHK_DESC_LEN-1);
221
check->desc[HCHK_DESC_LEN-1] = '\0';
216
s->check_desc[0] = '\0';
223
check->desc[0] = '\0';
218
s->check_status = status;
225
check->status = status;
219
226
if (check_statuses[status].result)
220
s->result = check_statuses[status].result;
227
check->result = check_statuses[status].result;
222
229
if (status == HCHK_STATUS_HANA)
223
s->check_duration = -1;
224
else if (!tv_iszero(&s->check_start)) {
230
check->duration = -1;
231
else if (!tv_iszero(&check->start)) {
225
232
/* set_server_check_status() may be called more than once */
226
s->check_duration = tv_ms_elapsed(&s->check_start, &now);
227
tv_zero(&s->check_start);
233
check->duration = tv_ms_elapsed(&check->start, &now);
234
tv_zero(&check->start);
237
/* no change is expected if no state change occurred */
238
if (check->result == CHK_RES_NEUTRAL)
243
switch (check->result) {
245
/* Failure to connect to the agent as a secondary check should not
246
* cause the server to be marked down.
248
if ((!(check->state & CHK_ST_AGENT) ||
249
(check->status >= HCHK_STATUS_L7TOUT)) &&
250
(check->health >= check->rise)) {
251
s->counters.failed_checks++;
254
if (check->health < check->rise)
260
case CHK_RES_CONDPASS: /* "condpass" cannot make the first step but it OK after a "passed" */
261
if ((check->health < check->rise + check->fall - 1) &&
262
(check->result == CHK_RES_PASSED || check->health > 0)) {
266
if (check->health >= check->rise)
267
check->health = check->rise + check->fall - 1; /* OK now */
270
/* clear consecutive_errors if observing is enabled */
272
s->consecutive_errors = 0;
230
279
if (s->proxy->options2 & PR_O2_LOGHCHKS &&
231
(((s->health != 0) && (s->result & SRV_CHK_ERROR)) ||
232
((s->health != s->rise + s->fall - 1) && (s->result & SRV_CHK_RUNNING)) ||
233
((s->state & SRV_GOINGDOWN) && !(s->result & SRV_CHK_DISABLE)) ||
234
(!(s->state & SRV_GOINGDOWN) && (s->result & SRV_CHK_DISABLE)))) {
236
int health, rise, fall, state;
238
chunk_init(&msg, trash, trashlen);
240
/* FIXME begin: calculate local version of the health/rise/fall/state */
246
if (s->result & SRV_CHK_ERROR) {
248
health--; /* still good */
251
state &= ~(SRV_RUNNING | SRV_GOINGDOWN);
257
if (s->result & SRV_CHK_RUNNING) {
258
if (health < rise + fall - 1) {
259
health++; /* was bad, stays for a while */
262
state |= SRV_RUNNING;
265
health = rise + fall - 1; /* OK now */
268
/* clear consecutive_errors if observing is enabled */
270
s->consecutive_errors = 0;
272
/* FIXME end: calculate local version of the health/rise/fall/state */
275
"Health check for %sserver %s/%s %s%s",
276
s->state & SRV_BACKUP ? "backup " : "",
278
(s->result & SRV_CHK_DISABLE)?"conditionally ":"",
279
(s->result & SRV_CHK_RUNNING)?"succeeded":"failed");
281
server_status_printf(&msg, s, SSP_O_HCHK, -1);
283
chunk_printf(&msg, ", status: %d/%d %s",
284
(state & SRV_RUNNING) ? (health - rise + 1) : (health),
285
(state & SRV_RUNNING) ? (fall) : (rise),
286
(state & SRV_RUNNING)?"UP":"DOWN");
288
Warning("%s.\n", trash);
289
send_log(s->proxy, LOG_NOTICE, "%s.\n", trash);
293
/* sends a log message when a backend goes down, and also sets last
296
static void set_backend_down(struct proxy *be)
298
be->last_change = now.tv_sec;
301
Alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id);
302
send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id);
305
/* Redistribute pending connections when a server goes down. The number of
306
* connections redistributed is returned.
308
static int redistribute_pending(struct server *s)
310
struct pendconn *pc, *pc_bck, *pc_end;
313
FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) {
314
struct session *sess = pc->sess;
315
if ((sess->be->options & (PR_O_REDISP|PR_O_PERSIST)) == PR_O_REDISP &&
316
!(sess->flags & SN_FORCE_PRST)) {
317
/* The REDISP option was specified. We will ignore
318
* cookie and force to balance or use the dispatcher.
321
/* it's left to the dispatcher to choose a server */
322
sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
325
task_wakeup(sess->task, TASK_WOKEN_RES);
332
/* Check for pending connections at the backend, and assign some of them to
333
* the server coming up. The server's weight is checked before being assigned
334
* connections it may not be able to handle. The total number of transferred
335
* connections is returned.
337
static int check_for_pending(struct server *s)
344
for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) {
345
struct session *sess;
348
p = pendconn_from_px(s->proxy);
354
task_wakeup(sess->task, TASK_WOKEN_RES);
359
/* Sets server <s> down, notifies by all available means, recounts the
360
* remaining servers on the proxy and transfers queued sessions whenever
361
* possible to other servers. It automatically recomputes the number of
362
* servers, but not the map.
364
void set_server_down(struct server *s)
370
if (s->state & SRV_MAINTAIN) {
374
if (s->health == s->rise || s->tracked) {
375
int srv_was_paused = s->state & SRV_GOINGDOWN;
376
int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
378
s->last_change = now.tv_sec;
379
s->state &= ~(SRV_RUNNING | SRV_GOINGDOWN);
380
if (s->proxy->lbprm.set_server_status_down)
381
s->proxy->lbprm.set_server_status_down(s);
383
/* we might have sessions queued on this server and waiting for
384
* a connection. Those which are redispatchable will be queued
385
* to another server or to the proxy itself.
387
xferred = redistribute_pending(s);
389
chunk_init(&msg, trash, trashlen);
391
if (s->state & SRV_MAINTAIN) {
393
"%sServer %s/%s is DOWN for maintenance", s->state & SRV_BACKUP ? "Backup " : "",
394
s->proxy->id, s->id);
397
"%sServer %s/%s is DOWN", s->state & SRV_BACKUP ? "Backup " : "",
398
s->proxy->id, s->id);
400
server_status_printf(&msg, s,
401
((!s->tracked && !(s->proxy->options2 & PR_O2_LOGHCHKS))?SSP_O_HCHK:0),
404
Warning("%s.\n", trash);
406
/* we don't send an alert if the server was previously paused */
408
send_log(s->proxy, LOG_NOTICE, "%s.\n", trash);
410
send_log(s->proxy, LOG_ALERT, "%s.\n", trash);
412
if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
413
set_backend_down(s->proxy);
415
s->counters.down_trans++;
417
if (s->state & SRV_CHECKED)
418
for(srv = s->tracknext; srv; srv = srv->tracknext)
419
if (! (srv->state & SRV_MAINTAIN))
420
/* Only notify tracking servers that are not already in maintenance. */
421
set_server_down(srv);
424
s->health = 0; /* failure */
427
void set_server_up(struct server *s) {
432
unsigned int old_state = s->state;
434
if (s->state & SRV_MAINTAIN) {
438
if (s->health == s->rise || s->tracked) {
439
if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
440
if (s->proxy->last_change < now.tv_sec) // ignore negative times
441
s->proxy->down_time += now.tv_sec - s->proxy->last_change;
442
s->proxy->last_change = now.tv_sec;
445
if (s->last_change < now.tv_sec) // ignore negative times
446
s->down_time += now.tv_sec - s->last_change;
448
s->last_change = now.tv_sec;
449
s->state |= SRV_RUNNING;
450
s->state &= ~SRV_MAINTAIN;
452
if (s->slowstart > 0) {
453
s->state |= SRV_WARMINGUP;
454
task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
457
server_recalc_eweight(s);
459
/* check if we can handle some connections queued at the proxy. We
460
* will take as many as we can handle.
462
xferred = check_for_pending(s);
464
chunk_init(&msg, trash, trashlen);
466
if (old_state & SRV_MAINTAIN) {
468
"%sServer %s/%s is UP (leaving maintenance)", s->state & SRV_BACKUP ? "Backup " : "",
469
s->proxy->id, s->id);
472
"%sServer %s/%s is UP", s->state & SRV_BACKUP ? "Backup " : "",
473
s->proxy->id, s->id);
475
server_status_printf(&msg, s,
476
((!s->tracked && !(s->proxy->options2 & PR_O2_LOGHCHKS))?SSP_O_HCHK:0),
480
Warning("%s.\n", trash);
481
send_log(s->proxy, LOG_NOTICE, "%s.\n", trash);
483
if (s->state & SRV_CHECKED)
484
for(srv = s->tracknext; srv; srv = srv->tracknext)
485
if (! (srv->state & SRV_MAINTAIN))
486
/* Only notify tracking servers if they're not in maintenance. */
490
if (s->health >= s->rise)
491
s->health = s->rise + s->fall - 1; /* OK now */
495
static void set_server_disabled(struct server *s) {
501
s->state |= SRV_GOINGDOWN;
502
if (s->proxy->lbprm.set_server_status_down)
503
s->proxy->lbprm.set_server_status_down(s);
505
/* we might have sessions queued on this server and waiting for
506
* a connection. Those which are redispatchable will be queued
507
* to another server or to the proxy itself.
509
xferred = redistribute_pending(s);
511
chunk_init(&msg, trash, trashlen);
514
"Load-balancing on %sServer %s/%s is disabled",
515
s->state & SRV_BACKUP ? "Backup " : "",
516
s->proxy->id, s->id);
518
server_status_printf(&msg, s,
519
((!s->tracked && !(s->proxy->options2 & PR_O2_LOGHCHKS))?SSP_O_HCHK:0),
522
Warning("%s.\n", trash);
523
send_log(s->proxy, LOG_NOTICE, "%s.\n", trash);
525
if (!s->proxy->srv_bck && !s->proxy->srv_act)
526
set_backend_down(s->proxy);
528
if (s->state & SRV_CHECKED)
529
for(srv = s->tracknext; srv; srv = srv->tracknext)
530
set_server_disabled(srv);
533
static void set_server_enabled(struct server *s) {
539
s->state &= ~SRV_GOINGDOWN;
540
if (s->proxy->lbprm.set_server_status_up)
541
s->proxy->lbprm.set_server_status_up(s);
543
/* check if we can handle some connections queued at the proxy. We
544
* will take as many as we can handle.
546
xferred = check_for_pending(s);
548
chunk_init(&msg, trash, trashlen);
551
"Load-balancing on %sServer %s/%s is enabled again",
552
s->state & SRV_BACKUP ? "Backup " : "",
553
s->proxy->id, s->id);
555
server_status_printf(&msg, s,
556
((!s->tracked && !(s->proxy->options2 & PR_O2_LOGHCHKS))?SSP_O_HCHK:0),
559
Warning("%s.\n", trash);
560
send_log(s->proxy, LOG_NOTICE, "%s.\n", trash);
562
if (s->state & SRV_CHECKED)
563
for(srv = s->tracknext; srv; srv = srv->tracknext)
564
set_server_enabled(srv);
567
void health_adjust(struct server *s, short status) {
280
(status != prev_status || report)) {
282
"%s check for %sserver %s/%s %s%s",
283
(check->state & CHK_ST_AGENT) ? "Agent" : "Health",
284
s->flags & SRV_F_BACKUP ? "backup " : "",
286
(check->result == CHK_RES_CONDPASS) ? "conditionally ":"",
287
(check->result >= CHK_RES_PASSED) ? "succeeded" : "failed");
289
srv_append_status(&trash, s, check_reason_string(check), -1, 0);
291
chunk_appendf(&trash, ", status: %d/%d %s",
292
(check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
293
(check->health >= check->rise) ? check->fall : check->rise,
294
(check->health >= check->rise) ? (s->uweight ? "UP" : "DRAIN") : "DOWN");
296
Warning("%s.\n", trash.str);
297
send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
301
/* Marks the check <check>'s server down if the current check is already failed
302
* and the server is not down yet nor in maintenance.
304
static void check_notify_failure(struct check *check)
306
struct server *s = check->server;
308
/* The agent secondary check should only cause a server to be marked
309
* as down if check->status is HCHK_STATUS_L7STS, which indicates
310
* that the agent returned "fail", "stopped" or "down".
311
* The implication here is that failure to connect to the agent
312
* as a secondary check should not cause the server to be marked
314
if ((check->state & CHK_ST_AGENT) && check->status != HCHK_STATUS_L7STS)
317
if (check->health > 0)
320
/* We only report a reason for the check if we did not do so previously */
321
srv_set_stopped(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL);
324
/* Marks the check <check> as valid and tries to set its server up, provided
325
* it isn't in maintenance, it is not tracking a down server and other checks
326
* comply. The rule is simple : by default, a server is up, unless any of the
327
* following conditions is true :
328
* - health check failed (check->health < rise)
329
* - agent check failed (agent->health < rise)
330
* - the server tracks a down server (track && track->state == STOPPED)
331
* Note that if the server has a slowstart, it will switch to STARTING instead
332
* of RUNNING. Also, only the health checks support the nolb mode, so the
333
* agent's success may not take the server out of this mode.
335
static void check_notify_success(struct check *check)
337
struct server *s = check->server;
339
if (s->admin & SRV_ADMF_MAINT)
342
if (s->track && s->track->state == SRV_ST_STOPPED)
345
if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
348
if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
351
if ((check->state & CHK_ST_AGENT) && s->state == SRV_ST_STOPPING)
354
srv_set_running(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL);
357
/* Marks the check <check> as valid and tries to set its server into stopping mode
358
* if it was running or starting, and provided it isn't in maintenance and other
359
* checks comply. The conditions for the server to be marked in stopping mode are
360
* the same as for it to be turned up. Also, only the health checks support the
363
static void check_notify_stopping(struct check *check)
365
struct server *s = check->server;
367
if (s->admin & SRV_ADMF_MAINT)
370
if (check->state & CHK_ST_AGENT)
373
if (s->track && s->track->state == SRV_ST_STOPPED)
376
if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
379
if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
382
srv_set_stopping(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL);
385
/* note: use health_adjust() only, which first checks that the observe mode is
388
void __health_adjust(struct server *s, short status)
572
/* return now if observing nor health check is not enabled */
573
if (!s->observe || !s->check)
576
393
if (s->observe >= HANA_OBS_SIZE)
525
/* Check the connection. If an error has already been reported or the socket is
526
* closed, keep errno intact as it is supposed to contain the valid error code.
527
* If no error is reported, check the socket's error queue using getsockopt().
528
* Warning, this must be done only once when returning from poll, and never
529
* after an I/O error was attempted, otherwise the error queue might contain
530
* inconsistent errors. If an error is detected, the CO_FL_ERROR is set on the
531
* socket. Returns non-zero if an error was reported, zero if everything is
532
* clean (including a properly closed socket).
534
static int retrieve_errno_from_socket(struct connection *conn)
537
socklen_t lskerr = sizeof(skerr);
539
if (conn->flags & CO_FL_ERROR && ((errno && errno != EAGAIN) || !conn->ctrl))
542
if (!conn_ctrl_ready(conn))
545
if (getsockopt(conn->t.sock.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == 0)
552
/* we could not retrieve an error, that does not mean there is
553
* none. Just don't change anything and only report the prior
556
if (conn->flags & CO_FL_ERROR)
562
conn->flags |= CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_SOCK_RD_SH;
566
/* Try to collect as much information as possible on the connection status,
567
* and adjust the server status accordingly. It may make use of <errno_bck>
568
* if non-null when the caller is absolutely certain of its validity (eg:
569
* checked just after a syscall). If the caller doesn't have a valid errno,
570
* it can pass zero, and retrieve_errno_from_socket() will be called to try
571
* to extract errno from the socket. If no error is reported, it will consider
572
* the <expired> flag. This is intended to be used when a connection error was
573
* reported in conn->flags or when a timeout was reported in <expired>. The
574
* function takes care of not updating a server status which was already set.
575
* All situations where at least one of <expired> or CO_FL_ERROR are set
578
static void chk_report_conn_err(struct connection *conn, int errno_bck, int expired)
580
struct check *check = conn->owner;
584
if (check->result != CHK_RES_UNKNOWN)
588
if (!errno || errno == EAGAIN)
589
retrieve_errno_from_socket(conn);
591
if (!(conn->flags & CO_FL_ERROR) && !expired)
594
/* we'll try to build a meaningful error message depending on the
595
* context of the error possibly present in conn->err_code, and the
596
* socket error possibly collected above. This is useful to know the
597
* exact step of the L6 layer (eg: SSL handshake).
599
chk = get_trash_chunk();
601
if (check->type == PR_O2_TCPCHK_CHK) {
602
chunk_printf(chk, " at step %d of tcp-check", tcpcheck_get_step_id(check->server));
603
/* we were looking for a string */
604
if (check->current_step && check->current_step->action == TCPCHK_ACT_CONNECT) {
605
chunk_appendf(chk, " (connect)");
607
else if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT) {
608
if (check->current_step->string)
609
chunk_appendf(chk, " (string '%s')", check->current_step->string);
610
else if (check->current_step->expect_regex)
611
chunk_appendf(chk, " (expect regex)");
613
else if (check->current_step && check->current_step->action == TCPCHK_ACT_SEND) {
614
chunk_appendf(chk, " (send)");
618
if (conn->err_code) {
619
if (errno && errno != EAGAIN)
620
chunk_printf(&trash, "%s (%s)%s", conn_err_code_str(conn), strerror(errno), chk->str);
622
chunk_printf(&trash, "%s%s", conn_err_code_str(conn), chk->str);
626
if (errno && errno != EAGAIN) {
627
chunk_printf(&trash, "%s%s", strerror(errno), chk->str);
635
if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L4_CONN)) == CO_FL_WAIT_L4_CONN) {
636
/* L4 not established (yet) */
637
if (conn->flags & CO_FL_ERROR)
638
set_server_check_status(check, HCHK_STATUS_L4CON, err_msg);
640
set_server_check_status(check, HCHK_STATUS_L4TOUT, err_msg);
642
else if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L6_CONN)) == CO_FL_WAIT_L6_CONN) {
643
/* L6 not established (yet) */
644
if (conn->flags & CO_FL_ERROR)
645
set_server_check_status(check, HCHK_STATUS_L6RSP, err_msg);
647
set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
649
else if (conn->flags & CO_FL_ERROR) {
650
/* I/O error after connection was established and before we could diagnose */
651
set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
654
/* connection established but expired check */
655
if (check->type == PR_O2_SSL3_CHK)
656
set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
657
else /* HTTP, SMTP, ... */
658
set_server_check_status(check, HCHK_STATUS_L7TOUT, err_msg);
717
665
* This function is used only for server health-checks. It handles
718
666
* the connection acknowledgement. If the proxy requires L7 health-checks,
719
667
* it sends the request. In other cases, it calls set_server_check_status()
720
* to set s->check_status, s->check_duration and s->result.
721
* The function itself returns 0 if it needs some polling before being called
722
* again, otherwise 1.
668
* to set check->status, check->duration and check->result.
724
static int event_srv_chk_w(int fd)
670
static void event_srv_chk_w(struct connection *conn)
726
__label__ out_wakeup, out_nowake, out_poll, out_error;
727
struct task *t = fdtab[fd].owner;
728
struct server *s = t->context;
730
//fprintf(stderr, "event_srv_chk_w, state=%ld\n", unlikely(fdtab[fd].state));
731
if (unlikely(fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))) {
732
int skerr, err = errno;
733
socklen_t lskerr = sizeof(skerr);
735
if (!getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) && skerr)
738
set_server_check_status(s, HCHK_STATUS_L4CON, strerror(err));
742
/* here, we know that the connection is established */
744
if (!(s->result & SRV_CHK_ERROR)) {
745
/* we don't want to mark 'UP' a server on which we detected an error earlier */
746
if ((s->proxy->options & PR_O_HTTP_CHK) ||
747
(s->proxy->options & PR_O_SMTP_CHK) ||
748
(s->proxy->options2 & PR_O2_SSL3_CHK) ||
749
(s->proxy->options2 & PR_O2_MYSQL_CHK) ||
750
(s->proxy->options2 & PR_O2_LDAP_CHK)) {
752
const char *check_req = s->proxy->check_req;
753
int check_len = s->proxy->check_len;
755
/* we want to check if this host replies to HTTP or SSLv3 requests
756
* so we'll send the request, and won't wake the checker up now.
759
if (s->proxy->options2 & PR_O2_SSL3_CHK) {
760
/* SSL requires that we put Unix time in the request */
761
int gmt_time = htonl(date.tv_sec);
762
memcpy(s->proxy->check_req + 11, &gmt_time, 4);
764
else if (s->proxy->options & PR_O_HTTP_CHK) {
765
memcpy(trash, check_req, check_len);
767
if (s->proxy->options2 & PR_O2_CHK_SNDST)
768
check_len += httpchk_build_status_header(s, trash + check_len);
770
trash[check_len++] = '\r';
771
trash[check_len++] = '\n';
772
trash[check_len] = '\0';
776
ret = send(fd, check_req, check_len, MSG_DONTWAIT | MSG_NOSIGNAL);
777
if (ret == check_len) {
778
/* we allow up to <timeout.check> if nonzero for a responce */
779
if (s->proxy->timeout.check) {
780
t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
783
EV_FD_SET(fd, DIR_RD); /* prepare for reading reply */
786
else if (ret == 0 || errno == EAGAIN)
792
set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
796
set_server_check_status(s, HCHK_STATUS_SOCKERR, strerror(errno));
803
/* We have no data to send to check the connection, and
804
* getsockopt() will not inform us whether the connection
805
* is still pending. So we'll reuse connect() to check the
806
* state of the socket. This has the advantage of givig us
807
* the following info :
809
* - connecting (EALREADY, EINPROGRESS)
810
* - connected (EISCONN, 0)
813
struct sockaddr_in sa;
815
sa = (s->check_addr.sin_addr.s_addr) ? s->check_addr : s->addr;
816
sa.sin_port = htons(s->check_port);
818
if (connect(fd, (struct sockaddr *)&sa, sizeof(sa)) == 0)
821
if (errno == EALREADY || errno == EINPROGRESS)
824
if (errno && errno != EISCONN) {
825
set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
829
/* good TCP connection is enough */
830
set_server_check_status(s, HCHK_STATUS_L4OK, NULL);
832
/* avoid accumulating TIME_WAIT on connect-only checks */
834
setsockopt(fd, SOL_SOCKET, SO_LINGER,
835
(struct linger *) &nolinger, sizeof(struct linger));
672
struct check *check = conn->owner;
673
struct server *s = check->server;
674
struct task *t = check->task;
676
if (unlikely(check->result == CHK_RES_FAILED))
679
if (conn->flags & CO_FL_HANDSHAKE)
682
if (retrieve_errno_from_socket(conn)) {
683
chk_report_conn_err(conn, errno, 0);
684
__conn_data_stop_both(conn);
688
if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH)) {
689
/* if the output is closed, we can't do anything */
690
conn->flags |= CO_FL_ERROR;
691
chk_report_conn_err(conn, 0, 0);
695
/* here, we know that the connection is established. That's enough for
701
if (check->type == PR_O2_TCPCHK_CHK) {
707
conn->xprt->snd_buf(conn, check->bo, 0);
708
if (conn->flags & CO_FL_ERROR) {
709
chk_report_conn_err(conn, errno, 0);
710
__conn_data_stop_both(conn);
717
/* full request sent, we allow up to <timeout.check> if nonzero for a response */
718
if (s->proxy->timeout.check) {
719
t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
840
725
task_wakeup(t, TASK_WOKEN_IO);
842
EV_FD_CLR(fd, DIR_WR); /* nothing more to write */
843
fdtab[fd].ev &= ~FD_POLL_OUT;
846
/* The connection is still pending. We'll have to poll it
847
* before attempting to go further. */
848
fdtab[fd].ev &= ~FD_POLL_OUT;
851
fdtab[fd].state = FD_STERROR;
727
__conn_data_stop_send(conn); /* nothing more to write */
857
731
* This function is used only for server health-checks. It handles the server's
858
732
* reply to an HTTP request, SSL HELLO or MySQL client Auth. It calls
859
* set_server_check_status() to update s->check_status, s->check_duration
733
* set_server_check_status() to update check->status, check->duration
862
736
* The set_server_check_status function is called with HCHK_STATUS_L7OKD if
863
737
* an HTTP server replies HTTP 2xx or 3xx (valid responses), if an SMTP server
964
831
goto wait_more_data;
966
833
/* check the reply : HTTP/1.X 2xx and 3xx are OK */
967
else if (*(s->check_data + 9) == '2' || *(s->check_data + 9) == '3') {
834
else if (*(check->bi->data + 9) == '2' || *(check->bi->data + 9) == '3') {
969
set_server_check_status(s, HCHK_STATUS_L7OKD, desc);
836
set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
973
set_server_check_status(s, HCHK_STATUS_L7STS, desc);
840
set_server_check_status(check, HCHK_STATUS_L7STS, desc);
976
else if (s->proxy->options2 & PR_O2_SSL3_CHK) {
977
if (!done && s->check_data_len < 5)
845
if (!done && check->bi->i < 5)
978
846
goto wait_more_data;
980
848
/* Check for SSLv3 alert or handshake */
981
if ((s->check_data_len >= 5) && (*s->check_data == 0x15 || *s->check_data == 0x16))
982
set_server_check_status(s, HCHK_STATUS_L6OK, NULL);
849
if ((check->bi->i >= 5) && (*check->bi->data == 0x15 || *check->bi->data == 0x16))
850
set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
984
set_server_check_status(s, HCHK_STATUS_L6RSP, NULL);
986
else if (s->proxy->options & PR_O_SMTP_CHK) {
987
if (!done && s->check_data_len < strlen("000\r"))
852
set_server_check_status(check, HCHK_STATUS_L6RSP, NULL);
856
if (!done && check->bi->i < strlen("000\r"))
988
857
goto wait_more_data;
990
859
/* Check if the server speaks SMTP */
991
if ((s->check_data_len < strlen("000\r")) ||
992
(*(s->check_data + 3) != ' ' && *(s->check_data + 3) != '\r') ||
993
!isdigit((unsigned char) *s->check_data) || !isdigit((unsigned char) *(s->check_data + 1)) ||
994
!isdigit((unsigned char) *(s->check_data + 2))) {
995
cut_crlf(s->check_data);
996
set_server_check_status(s, HCHK_STATUS_L7RSP, s->check_data);
860
if ((check->bi->i < strlen("000\r")) ||
861
(*(check->bi->data + 3) != ' ' && *(check->bi->data + 3) != '\r') ||
862
!isdigit((unsigned char) *check->bi->data) || !isdigit((unsigned char) *(check->bi->data + 1)) ||
863
!isdigit((unsigned char) *(check->bi->data + 2))) {
864
cut_crlf(check->bi->data);
865
set_server_check_status(check, HCHK_STATUS_L7RSP, check->bi->data);
1001
s->check_code = str2uic(s->check_data);
870
check->code = str2uic(check->bi->data);
1003
desc = ltrim(s->check_data + 3, ' ');
872
desc = ltrim(check->bi->data + 3, ' ');
1006
875
/* Check for SMTP code 2xx (should be 250) */
1007
if (*s->check_data == '2')
1008
set_server_check_status(s, HCHK_STATUS_L7OKD, desc);
1010
set_server_check_status(s, HCHK_STATUS_L7STS, desc);
876
if (*check->bi->data == '2')
877
set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
879
set_server_check_status(check, HCHK_STATUS_L7STS, desc);
882
case PR_O2_LB_AGENT_CHK: {
883
int status = HCHK_STATUS_CHECKED;
884
const char *hs = NULL; /* health status */
885
const char *as = NULL; /* admin status */
886
const char *ps = NULL; /* performance status */
887
const char *err = NULL; /* first error to report */
888
const char *wrn = NULL; /* first warning to report */
891
/* We're getting an agent check response. The agent could
892
* have been disabled in the mean time with a long check
893
* still pending. It is important that we ignore the whole
896
if (!(check->server->agent.state & CHK_ST_ENABLED))
899
/* The agent supports strings made of a single line ended by the
900
* first CR ('\r') or LF ('\n'). This line is composed of words
901
* delimited by spaces (' '), tabs ('\t'), or commas (','). The
902
* line may optionally contained a description of a state change
903
* after a sharp ('#'), which is only considered if a health state
906
* Words may be composed of :
907
* - a numeric weight suffixed by the percent character ('%').
908
* - a health status among "up", "down", "stopped", and "fail".
909
* - an admin status among "ready", "drain", "maint".
911
* These words may appear in any order. If multiple words of the
912
* same category appear, the last one wins.
916
while (*p && *p != '\n' && *p != '\r')
923
/* at least inform the admin that the agent is mis-behaving */
924
set_server_check_status(check, check->status, "Ignoring incomplete line from agent");
929
cmd = check->bi->data;
932
/* look for next word */
933
if (*cmd == ' ' || *cmd == '\t' || *cmd == ',') {
939
/* this is the beginning of a health status description,
940
* skip the sharp and blanks.
943
while (*cmd == '\t' || *cmd == ' ')
948
/* find the end of the word so that we have a null-terminated
949
* word between <cmd> and <p>.
952
while (*p && *p != '\t' && *p != ' ' && *p != '\n' && *p != ',')
957
/* first, health statuses */
958
if (strcasecmp(cmd, "up") == 0) {
959
check->health = check->rise + check->fall - 1;
960
status = HCHK_STATUS_L7OKD;
963
else if (strcasecmp(cmd, "down") == 0) {
965
status = HCHK_STATUS_L7STS;
968
else if (strcasecmp(cmd, "stopped") == 0) {
970
status = HCHK_STATUS_L7STS;
973
else if (strcasecmp(cmd, "fail") == 0) {
975
status = HCHK_STATUS_L7STS;
979
else if (strcasecmp(cmd, "ready") == 0) {
982
else if (strcasecmp(cmd, "drain") == 0) {
985
else if (strcasecmp(cmd, "maint") == 0) {
988
/* else try to parse a weight here and keep the last one */
989
else if (isdigit((unsigned char)*cmd) && strchr(cmd, '%') != NULL) {
993
/* keep a copy of the first error */
997
/* skip to next word */
1000
/* here, cmd points either to \0 or to the beginning of a
1001
* description. Skip possible leading spaces.
1003
while (*cmd == ' ' || *cmd == '\n')
1006
/* First, update the admin status so that we avoid sending other
1007
* possibly useless warnings and can also update the health if
1008
* present after going back up.
1011
if (strcasecmp(as, "drain") == 0)
1012
srv_adm_set_drain(check->server);
1013
else if (strcasecmp(as, "maint") == 0)
1014
srv_adm_set_maint(check->server);
1016
srv_adm_set_ready(check->server);
1019
/* now change weights */
1023
msg = server_parse_weight_change_request(s, ps);
1028
/* and finally health status */
1030
/* We'll report some of the warnings and errors we have
1031
* here. Down reports are critical, we leave them untouched.
1032
* Lack of report, or report of 'UP' leaves the room for
1033
* ERR first, then WARN.
1035
const char *msg = cmd;
1038
if (!*msg || status == HCHK_STATUS_L7OKD) {
1041
else if (wrn && *wrn)
1045
t = get_trash_chunk();
1046
chunk_printf(t, "via agent : %s%s%s%s",
1047
hs, *msg ? " (" : "",
1048
msg, *msg ? ")" : "");
1050
set_server_check_status(check, status, t->str);
1052
else if (err && *err) {
1053
/* No status change but we'd like to report something odd.
1054
* Just report the current state and copy the message.
1056
chunk_printf(&trash, "agent reports an error : %s", err);
1057
set_server_check_status(check, status/*check->status*/, trash.str);
1060
else if (wrn && *wrn) {
1061
/* No status change but we'd like to report something odd.
1062
* Just report the current state and copy the message.
1064
chunk_printf(&trash, "agent warns : %s", wrn);
1065
set_server_check_status(check, status/*check->status*/, trash.str);
1068
set_server_check_status(check, status, NULL);
1012
else if (s->proxy->options2 & PR_O2_MYSQL_CHK) {
1013
if (!done && s->check_data_len < 5)
1072
case PR_O2_PGSQL_CHK:
1073
if (!done && check->bi->i < 9)
1074
goto wait_more_data;
1076
if (check->bi->data[0] == 'R') {
1077
set_server_check_status(check, HCHK_STATUS_L7OKD, "PostgreSQL server is ok");
1080
if ((check->bi->data[0] == 'E') && (check->bi->data[5]!=0) && (check->bi->data[6]!=0))
1081
desc = &check->bi->data[6];
1083
desc = "PostgreSQL unknown error";
1085
set_server_check_status(check, HCHK_STATUS_L7STS, desc);
1089
case PR_O2_REDIS_CHK:
1090
if (!done && check->bi->i < 7)
1091
goto wait_more_data;
1093
if (strcmp(check->bi->data, "+PONG\r\n") == 0) {
1094
set_server_check_status(check, HCHK_STATUS_L7OKD, "Redis server is ok");
1097
set_server_check_status(check, HCHK_STATUS_L7STS, check->bi->data);
1101
case PR_O2_MYSQL_CHK:
1102
if (!done && check->bi->i < 5)
1014
1103
goto wait_more_data;
1016
1105
if (s->proxy->check_len == 0) { // old mode
1017
if (*(s->check_data + 4) != '\xff') {
1106
if (*(check->bi->data + 4) != '\xff') {
1018
1107
/* We set the MySQL Version in description for information purpose
1019
1108
* FIXME : it can be cool to use MySQL Version for other purpose,
1020
1109
* like mark as down old MySQL server.
1022
if (s->check_data_len > 51) {
1023
desc = ltrim(s->check_data + 5, ' ');
1024
set_server_check_status(s, HCHK_STATUS_L7OKD, desc);
1111
if (check->bi->i > 51) {
1112
desc = ltrim(check->bi->data + 5, ' ');
1113
set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
1195
1333
/* by default, plan on stopping the task */
1196
1334
t->expire = TICK_ETERNITY;
1197
if ((s->state & (SRV_RUNNING|SRV_WARMINGUP|SRV_MAINTAIN)) != (SRV_RUNNING|SRV_WARMINGUP))
1335
if ((s->admin & SRV_ADMF_MAINT) ||
1336
(s->state != SRV_ST_STARTING))
1339
/* recalculate the weights and update the state */
1200
1340
server_recalc_eweight(s);
1202
1342
/* probably that we can refill this server with a bit more connections */
1203
check_for_pending(s);
1343
pendconn_grab_from_px(s);
1205
1345
/* get back there in 1 second or 1/20th of the slowstart interval,
1206
1346
* whichever is greater, resulting in small 5% steps.
1208
if (s->state & SRV_WARMINGUP)
1348
if (s->state == SRV_ST_STARTING)
1209
1349
t->expire = tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20)));
1354
* establish a server health-check.
1356
* It can return one of :
1357
* - SN_ERR_NONE if everything's OK and tcpcheck_main() was not called
1358
* - SN_ERR_UP if if everything's OK and tcpcheck_main() was called
1359
* - SN_ERR_SRVTO if there are no more servers
1360
* - SN_ERR_SRVCL if the connection was refused by the server
1361
* - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1362
* - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1363
* - SN_ERR_INTERNAL for any other purely internal errors
1364
* Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1365
* Note that we try to prevent the network stack from sending the ACK during the
1366
* connect() when a pure TCP check is used (without PROXY protocol).
1368
static int connect_chk(struct task *t)
1370
struct check *check = t->context;
1371
struct server *s = check->server;
1372
struct connection *conn = check->conn;
1373
struct protocol *proto;
1376
/* tcpcheck send/expect initialisation */
1377
if (check->type == PR_O2_TCPCHK_CHK)
1378
check->current_step = NULL;
1380
/* prepare the check buffer.
1381
* This should not be used if check is the secondary agent check
1382
* of a server as s->proxy->check_req will relate to the
1383
* configuration of the primary check. Similarly, tcp-check uses
1386
if (check->type && check->type != PR_O2_TCPCHK_CHK && !(check->state & CHK_ST_AGENT)) {
1387
bo_putblk(check->bo, s->proxy->check_req, s->proxy->check_len);
1389
/* we want to check if this host replies to HTTP or SSLv3 requests
1390
* so we'll send the request, and won't wake the checker up now.
1392
if ((check->type) == PR_O2_SSL3_CHK) {
1393
/* SSL requires that we put Unix time in the request */
1394
int gmt_time = htonl(date.tv_sec);
1395
memcpy(check->bo->data + 11, &gmt_time, 4);
1397
else if ((check->type) == PR_O2_HTTP_CHK) {
1398
if (s->proxy->options2 & PR_O2_CHK_SNDST)
1399
bo_putblk(check->bo, trash.str, httpchk_build_status_header(s, trash.str, trash.size));
1400
bo_putstr(check->bo, "\r\n");
1401
*check->bo->p = '\0'; /* to make gdb output easier to read */
1405
/* prepare a new connection */
1407
conn_prepare(conn, s->check_common.proto, s->check_common.xprt);
1408
conn_attach(conn, check, &check_conn_cb);
1409
conn->target = &s->obj_type;
1411
/* no client address */
1412
clear_addr(&conn->addr.from);
1414
if (is_addr(&s->check_common.addr)) {
1416
/* we'll connect to the check addr specified on the server */
1417
conn->addr.to = s->check_common.addr;
1418
proto = s->check_common.proto;
1421
/* we'll connect to the addr on the server */
1422
conn->addr.to = s->addr;
1427
set_host_port(&conn->addr.to, check->port);
1430
if (check->type == PR_O2_TCPCHK_CHK) {
1431
struct tcpcheck_rule *r = (struct tcpcheck_rule *) s->proxy->tcpcheck_rules.n;
1432
/* if first step is a 'connect', then tcpcheck_main must run it */
1433
if (r->action == TCPCHK_ACT_CONNECT) {
1434
tcpcheck_main(conn);
1439
ret = SN_ERR_INTERNAL;
1441
ret = proto->connect(conn, check->type, (check->type) ? 0 : 2);
1442
conn->flags |= CO_FL_WAKE_DATA;
1443
if (s->check.send_proxy) {
1444
conn->send_proxy_ofs = 1;
1445
conn->flags |= CO_FL_SEND_PROXY;
1214
1452
* manages a server health-check. Returns
1215
1453
* the time the task accepts to wait, or TIME_ETERNITY for infinity.
1217
struct task *process_chk(struct task *t)
1455
static struct task *process_chk(struct task *t)
1220
struct server *s = t->context;
1221
struct sockaddr_in sa;
1457
struct check *check = t->context;
1458
struct server *s = check->server;
1459
struct connection *conn = check->conn;
1462
int expired = tick_is_expired(t->expire, now_ms);
1226
if (attempts++ > 0) {
1227
/* we always fail to create a server, let's stop insisting... */
1228
while (tick_is_expired(t->expire, now_ms))
1229
t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
1233
if (fd < 0) { /* no check currently running */
1234
if (!tick_is_expired(t->expire, now_ms)) /* woke up too early */
1464
if (!(check->state & CHK_ST_INPROGRESS)) {
1465
/* no check currently running */
1466
if (!expired) /* woke up too early */
1237
/* we don't send any health-checks when the proxy is stopped or when
1238
* the server should not be checked.
1469
/* we don't send any health-checks when the proxy is
1470
* stopped, the server should not be checked or the check
1240
if (!(s->state & SRV_CHECKED) || s->proxy->state == PR_STSTOPPED || (s->state & SRV_MAINTAIN)) {
1241
while (tick_is_expired(t->expire, now_ms))
1242
t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
1473
if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
1474
s->proxy->state == PR_STSTOPPED)
1246
1477
/* we'll initiate a new check */
1247
set_server_check_status(s, HCHK_STATUS_START, NULL);
1248
if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) {
1249
if ((fd < global.maxsock) &&
1250
(fcntl(fd, F_SETFL, O_NONBLOCK) != -1) &&
1251
(setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) != -1)) {
1252
//fprintf(stderr, "process_chk: 3\n");
1254
if (s->proxy->options & PR_O_TCP_NOLING) {
1255
/* We don't want to useless data */
1256
setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1259
if (s->check_addr.sin_addr.s_addr)
1260
/* we'll connect to the check addr specified on the server */
1263
/* we'll connect to the addr on the server */
1266
/* we'll connect to the check port on the server */
1267
sa.sin_port = htons(s->check_port);
1269
/* allow specific binding :
1270
* - server-specific at first
1271
* - proxy-specific next
1273
if (s->state & SRV_BIND_SRC) {
1274
struct sockaddr_in *remote = NULL;
1277
#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
1278
if ((s->state & SRV_TPROXY_MASK) == SRV_TPROXY_ADDR) {
1279
remote = (struct sockaddr_in *)&s->tproxy_addr;
1283
#ifdef SO_BINDTODEVICE
1284
/* Note: this might fail if not CAP_NET_RAW */
1286
setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
1287
s->iface_name, s->iface_len + 1);
1289
if (s->sport_range) {
1290
int bind_attempts = 10; /* should be more than enough to find a spare port */
1291
struct sockaddr_in src;
1294
src = s->source_addr;
1297
/* note: in case of retry, we may have to release a previously
1298
* allocated port, hence this loop's construct.
1300
port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
1301
fdinfo[fd].port_range = NULL;
1307
fdinfo[fd].local_port = port_range_alloc_port(s->sport_range);
1308
if (!fdinfo[fd].local_port)
1311
fdinfo[fd].port_range = s->sport_range;
1312
src.sin_port = htons(fdinfo[fd].local_port);
1314
ret = tcpv4_bind_socket(fd, flags, &src, remote);
1315
} while (ret != 0); /* binding NOK */
1318
ret = tcpv4_bind_socket(fd, flags, &s->source_addr, remote);
1322
set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
1325
Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1326
s->proxy->id, s->id);
1329
Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
1330
s->proxy->id, s->id);
1335
else if (s->proxy->options & PR_O_BIND_SRC) {
1336
struct sockaddr_in *remote = NULL;
1339
#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
1340
if ((s->proxy->options & PR_O_TPXY_MASK) == PR_O_TPXY_ADDR) {
1341
remote = (struct sockaddr_in *)&s->proxy->tproxy_addr;
1345
#ifdef SO_BINDTODEVICE
1346
/* Note: this might fail if not CAP_NET_RAW */
1347
if (s->proxy->iface_name)
1348
setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
1349
s->proxy->iface_name, s->proxy->iface_len + 1);
1351
ret = tcpv4_bind_socket(fd, flags, &s->proxy->source_addr, remote);
1353
set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
1356
Alert("Cannot bind to source address before connect() for %s '%s'. Aborting.\n",
1357
proxy_type_str(s->proxy), s->proxy->id);
1360
Alert("Cannot bind to tproxy source address before connect() for %s '%s'. Aborting.\n",
1361
proxy_type_str(s->proxy), s->proxy->id);
1367
if (s->result == SRV_CHK_UNKNOWN) {
1368
#if defined(TCP_QUICKACK)
1369
/* disabling tcp quick ack now allows
1370
* the request to leave the machine with
1372
* We also want to do this to perform a
1373
* SYN-SYN/ACK-RST sequence when raw TCP
1374
* checks are configured.
1376
if ((s->proxy->options2 & PR_O2_SMARTCON) ||
1377
(!(s->proxy->options & (PR_O_HTTP_CHK|PR_O_SMTP_CHK)) &&
1378
!(s->proxy->options2 & (PR_O2_SSL3_CHK|PR_O2_MYSQL_CHK|PR_O2_LDAP_CHK))))
1379
setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, (char *) &zero, sizeof(zero));
1381
if ((connect(fd, (struct sockaddr *)&sa, sizeof(sa)) != -1) || (errno == EINPROGRESS)) {
1382
/* OK, connection in progress or established */
1384
//fprintf(stderr, "process_chk: 4\n");
1386
s->curfd = fd; /* that's how we know a test is in progress ;-) */
1388
fdtab[fd].owner = t;
1389
fdtab[fd].cb[DIR_RD].f = &event_srv_chk_r;
1390
fdtab[fd].cb[DIR_RD].b = NULL;
1391
fdtab[fd].cb[DIR_WR].f = &event_srv_chk_w;
1392
fdtab[fd].cb[DIR_WR].b = NULL;
1393
fdinfo[fd].peeraddr = (struct sockaddr *)&sa;
1394
fdinfo[fd].peerlen = sizeof(sa);
1395
fdtab[fd].state = FD_STCONN; /* connection in progress */
1396
fdtab[fd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY;
1397
EV_FD_SET(fd, DIR_WR); /* for connect status */
1399
assert (!EV_FD_ISSET(fd, DIR_RD));
1401
//fprintf(stderr, "process_chk: 4+, %lu\n", __tv_to_ms(&s->proxy->timeout.connect));
1402
/* we allow up to min(inter, timeout.connect) for a connection
1403
* to establish but only when timeout.check is set
1404
* as it may be to short for a full check otherwise
1406
t->expire = tick_add(now_ms, MS_TO_TICKS(s->inter));
1408
if (s->proxy->timeout.check && s->proxy->timeout.connect) {
1409
int t_con = tick_add(now_ms, s->proxy->timeout.connect);
1410
t->expire = tick_first(t->expire, t_con);
1414
else if (errno != EALREADY && errno != EISCONN && errno != EAGAIN) {
1418
/* FIXME: is it possible to get ECONNREFUSED/ENETUNREACH with O_NONBLOCK? */
1421
set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
1425
set_server_check_status(s, HCHK_STATUS_SOCKERR, strerror(errno));
1478
set_server_check_status(check, HCHK_STATUS_START, NULL);
1480
check->state |= CHK_ST_INPROGRESS;
1481
check->bi->p = check->bi->data;
1483
check->bo->p = check->bo->data;
1486
ret = connect_chk(t);
1491
/* we allow up to min(inter, timeout.connect) for a connection
1492
* to establish but only when timeout.check is set
1493
* as it may be to short for a full check otherwise
1495
t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
1497
if (s->proxy->timeout.check && s->proxy->timeout.connect) {
1498
int t_con = tick_add(now_ms, s->proxy->timeout.connect);
1499
t->expire = tick_first(t->expire, t_con);
1430
port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
1431
fdinfo[fd].port_range = NULL;
1432
close(fd); /* socket creation error */
1435
if (s->result == SRV_CHK_UNKNOWN) { /* nothing done */
1436
//fprintf(stderr, "process_chk: 6\n");
1437
while (tick_is_expired(t->expire, now_ms))
1438
t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
1439
goto new_chk; /* may be we should initialize a new check */
1442
/* here, we have seen a failure */
1443
if (s->health > s->rise) {
1444
s->health--; /* still good */
1445
s->counters.failed_checks++;
1450
//fprintf(stderr, "process_chk: 7, %lu\n", __tv_to_ms(&s->proxy->timeout.connect));
1503
conn_data_want_recv(conn); /* prepare for reading a possible reply */
1507
case SN_ERR_SRVTO: /* ETIMEDOUT */
1508
case SN_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
1509
conn->flags |= CO_FL_ERROR;
1510
chk_report_conn_err(conn, errno, 0);
1512
case SN_ERR_PRXCOND:
1513
case SN_ERR_RESOURCE:
1514
case SN_ERR_INTERNAL:
1515
conn->flags |= CO_FL_ERROR;
1516
chk_report_conn_err(conn, 0, 0);
1520
/* here, we have seen a synchronous error, no fd was allocated */
1522
check->state &= ~CHK_ST_INPROGRESS;
1523
check_notify_failure(check);
1451
1525
/* we allow up to min(inter, timeout.connect) for a connection
1452
1526
* to establish but only when timeout.check is set
1453
1527
* as it may be to short for a full check otherwise
1458
1532
t_con = tick_add(t->expire, s->proxy->timeout.connect);
1459
t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
1533
t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
1461
1535
if (s->proxy->timeout.check)
1462
1536
t->expire = tick_first(t->expire, t_con);
1467
/* there was a test running */
1468
if ((s->result & (SRV_CHK_ERROR|SRV_CHK_RUNNING)) == SRV_CHK_RUNNING) { /* good server detected */
1469
/* we may have to add/remove this server from the LB group */
1470
if ((s->state & SRV_RUNNING) && (s->proxy->options & PR_O_DISABLE404)) {
1471
if ((s->state & SRV_GOINGDOWN) &&
1472
((s->result & (SRV_CHK_RUNNING|SRV_CHK_DISABLE)) == SRV_CHK_RUNNING))
1473
set_server_enabled(s);
1474
else if (!(s->state & SRV_GOINGDOWN) &&
1475
((s->result & (SRV_CHK_RUNNING | SRV_CHK_DISABLE)) ==
1476
(SRV_CHK_RUNNING | SRV_CHK_DISABLE)))
1477
set_server_disabled(s);
1480
if (!(s->state & SRV_MAINTAIN) && s->health < s->rise + s->fall - 1) {
1481
s->health++; /* was bad, stays for a while */
1484
s->curfd = -1; /* no check running anymore */
1488
if (global.spread_checks > 0) {
1489
rv = srv_getinter(s) * global.spread_checks / 100;
1490
rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
1492
t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(s) + rv));
1495
else if ((s->result & SRV_CHK_ERROR) || tick_is_expired(t->expire, now_ms)) {
1496
if (!(s->result & SRV_CHK_ERROR)) {
1497
if (!EV_FD_ISSET(fd, DIR_RD)) {
1498
set_server_check_status(s, HCHK_STATUS_L4TOUT, NULL);
1500
if (s->proxy->options2 & PR_O2_SSL3_CHK)
1501
set_server_check_status(s, HCHK_STATUS_L6TOUT, NULL);
1502
else /* HTTP, SMTP */
1503
set_server_check_status(s, HCHK_STATUS_L7TOUT, NULL);
1507
/* failure or timeout detected */
1508
if (s->health > s->rise) {
1509
s->health--; /* still good */
1510
s->counters.failed_checks++;
1540
/* there was a test running.
1541
* First, let's check whether there was an uncaught error,
1542
* which can happen on connect timeout or error.
1544
if (s->check.result == CHK_RES_UNKNOWN) {
1545
/* good connection is enough for pure TCP check */
1546
if ((conn->flags & CO_FL_CONNECTED) && !check->type) {
1548
set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
1550
set_server_check_status(check, HCHK_STATUS_L4OK, NULL);
1552
else if ((conn->flags & CO_FL_ERROR) || expired) {
1553
chk_report_conn_err(conn, 0, expired);
1515
/* avoid accumulating TIME_WAIT on timeouts */
1516
setsockopt(fd, SOL_SOCKET, SO_LINGER,
1517
(struct linger *) &nolinger, sizeof(struct linger));
1521
if (global.spread_checks > 0) {
1522
rv = srv_getinter(s) * global.spread_checks / 100;
1523
rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
1525
t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(s) + rv));
1528
/* if result is unknown and there's no timeout, we have to wait again */
1556
goto out_wait; /* timeout not reached, wait again */
1559
/* check complete or aborted */
1561
/* The check was aborted and the connection was not yet closed.
1562
* This can happen upon timeout, or when an external event such
1563
* as a failed response coupled with "observe layer7" caused the
1564
* server state to be suddenly changed.
1567
conn_force_close(conn);
1570
if (check->result == CHK_RES_FAILED) {
1571
/* a failure or timeout detected */
1572
check_notify_failure(check);
1574
else if (check->result == CHK_RES_CONDPASS) {
1575
/* check is OK but asks for stopping mode */
1576
check_notify_stopping(check);
1578
else if (check->result == CHK_RES_PASSED) {
1579
/* a success was detected */
1580
check_notify_success(check);
1582
check->state &= ~CHK_ST_INPROGRESS;
1585
if (global.spread_checks > 0) {
1586
rv = srv_getinter(check) * global.spread_checks / 100;
1587
rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
1589
t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
1530
s->result = SRV_CHK_UNKNOWN;
1593
while (tick_is_expired(t->expire, now_ms))
1594
t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
1599
static int start_check_task(struct check *check, int mininter,
1600
int nbcheck, int srvpos)
1603
/* task for the check */
1604
if ((t = task_new()) == NULL) {
1605
Alert("Starting [%s:%s] check: out of memory.\n",
1606
check->server->proxy->id, check->server->id);
1611
t->process = process_chk;
1614
if (mininter < srv_getinter(check))
1615
mininter = srv_getinter(check);
1617
if (global.max_spread_checks && mininter > global.max_spread_checks)
1618
mininter = global.max_spread_checks;
1620
/* check this every ms */
1621
t->expire = tick_add(now_ms, MS_TO_TICKS(mininter * srvpos / nbcheck));
1535
1629
* Start health-check.
1536
1630
* Returns 0 if OK, -1 if error, and prints the error in this case.
1814
* return the id of a step in a send/expect session
1816
static int tcpcheck_get_step_id(struct server *s)
1818
struct tcpcheck_rule *cur = NULL, *next = NULL;
1821
cur = s->check.last_started_step;
1823
/* no step => first step */
1827
/* increment i until current step */
1828
list_for_each_entry(next, &s->proxy->tcpcheck_rules, list) {
1829
if (next->list.p == &cur->list)
1837
static void tcpcheck_main(struct connection *conn)
1840
struct list *head = NULL;
1841
struct tcpcheck_rule *cur = NULL;
1842
int done = 0, ret = 0;
1844
struct check *check = conn->owner;
1845
struct server *s = check->server;
1846
struct task *t = check->task;
1849
* don't do anything until the connection is established but if we're running
1850
* first step which must be a connect
1852
if (check->current_step && (!(conn->flags & CO_FL_CONNECTED))) {
1853
/* update expire time, should be done by process_chk */
1854
/* we allow up to min(inter, timeout.connect) for a connection
1855
* to establish but only when timeout.check is set
1856
* as it may be to short for a full check otherwise
1858
while (tick_is_expired(t->expire, now_ms)) {
1861
t_con = tick_add(t->expire, s->proxy->timeout.connect);
1862
t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
1864
if (s->proxy->timeout.check)
1865
t->expire = tick_first(t->expire, t_con);
1870
/* here, we know that the connection is established */
1871
if (check->result != CHK_RES_UNKNOWN)
1872
goto out_end_tcpcheck;
1874
/* head is be the first element of the double chained list */
1875
head = &s->proxy->tcpcheck_rules;
1877
/* no step means first step
1879
if (check->current_step == NULL) {
1880
check->bo->p = check->bo->data;
1882
check->bi->p = check->bi->data;
1884
cur = check->current_step = LIST_ELEM(head->n, struct tcpcheck_rule *, list);
1885
t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
1886
if (s->proxy->timeout.check)
1887
t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
1889
/* keep on processing step */
1891
cur = check->current_step;
1894
if (conn->flags & CO_FL_HANDSHAKE)
1897
/* It's only the rules which will enable send/recv */
1898
__conn_data_stop_both(conn);
1901
/* we have to try to flush the output buffer before reading, at the end,
1902
* or if we're about to send a string that does not fit in the remaining space.
1905
(&cur->list == head ||
1906
check->current_step->action != TCPCHK_ACT_SEND ||
1907
check->current_step->string_len >= buffer_total_space(check->bo))) {
1909
if (conn->xprt->snd_buf(conn, check->bo, 0) <= 0) {
1910
if (conn->flags & CO_FL_ERROR) {
1911
chk_report_conn_err(conn, errno, 0);
1912
__conn_data_stop_both(conn);
1913
goto out_end_tcpcheck;
1919
/* did we reach the end ? If so, let's check that everything was sent */
1920
if (&cur->list == head) {
1926
if (check->current_step->action == TCPCHK_ACT_CONNECT) {
1927
struct protocol *proto;
1928
struct xprt_ops *xprt;
1930
/* mark the step as started */
1931
check->last_started_step = check->current_step;
1932
/* first, shut existing connection */
1933
conn_force_close(conn);
1935
/* prepare new connection */
1936
/* initialization */
1938
conn_attach(conn, check, &check_conn_cb);
1939
conn->target = &s->obj_type;
1941
/* no client address */
1942
clear_addr(&conn->addr.from);
1944
if (is_addr(&s->check_common.addr)) {
1945
/* we'll connect to the check addr specified on the server */
1946
conn->addr.to = s->check_common.addr;
1947
proto = s->check_common.proto;
1950
/* we'll connect to the addr on the server */
1951
conn->addr.to = s->addr;
1956
if (check->current_step->port)
1957
set_host_port(&conn->addr.to, check->current_step->port);
1958
else if (check->port)
1959
set_host_port(&conn->addr.to, check->port);
1962
if (check->current_step->conn_opts & TCPCHK_OPT_SSL) {
1968
#else /* USE_OPENSSL */
1970
#endif /* USE_OPENSSL */
1971
conn_prepare(conn, proto, xprt);
1973
ret = SN_ERR_INTERNAL;
1975
ret = proto->connect(conn, check->type, (check->type) ? 0 : 2);
1976
conn->flags |= CO_FL_WAKE_DATA;
1977
if (check->current_step->conn_opts & TCPCHK_OPT_SEND_PROXY) {
1978
conn->send_proxy_ofs = 1;
1979
conn->flags |= CO_FL_SEND_PROXY;
1982
/* It can return one of :
1983
* - SN_ERR_NONE if everything's OK
1984
* - SN_ERR_SRVTO if there are no more servers
1985
* - SN_ERR_SRVCL if the connection was refused by the server
1986
* - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1987
* - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1988
* - SN_ERR_INTERNAL for any other purely internal errors
1989
* Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1990
* Note that we try to prevent the network stack from sending the ACK during the
1991
* connect() when a pure TCP check is used (without PROXY protocol).
1995
/* we allow up to min(inter, timeout.connect) for a connection
1996
* to establish but only when timeout.check is set
1997
* as it may be to short for a full check otherwise
1999
t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
2001
if (s->proxy->timeout.check && s->proxy->timeout.connect) {
2002
int t_con = tick_add(now_ms, s->proxy->timeout.connect);
2003
t->expire = tick_first(t->expire, t_con);
2006
case SN_ERR_SRVTO: /* ETIMEDOUT */
2007
case SN_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
2008
chunk_printf(&trash, "TCPCHK error establishing connection at step %d: %s",
2009
tcpcheck_get_step_id(s), strerror(errno));
2010
set_server_check_status(check, HCHK_STATUS_L4CON, trash.str);
2011
goto out_end_tcpcheck;
2012
case SN_ERR_PRXCOND:
2013
case SN_ERR_RESOURCE:
2014
case SN_ERR_INTERNAL:
2015
chunk_printf(&trash, "TCPCHK error establishing connection at step %d",
2016
tcpcheck_get_step_id(s));
2017
set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.str);
2018
goto out_end_tcpcheck;
2021
/* allow next rule */
2022
cur = (struct tcpcheck_rule *)cur->list.n;
2023
check->current_step = cur;
2025
/* don't do anything until the connection is established */
2026
if (!(conn->flags & CO_FL_CONNECTED)) {
2027
/* update expire time, should be done by process_chk */
2028
/* we allow up to min(inter, timeout.connect) for a connection
2029
* to establish but only when timeout.check is set
2030
* as it may be to short for a full check otherwise
2032
while (tick_is_expired(t->expire, now_ms)) {
2035
t_con = tick_add(t->expire, s->proxy->timeout.connect);
2036
t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
2038
if (s->proxy->timeout.check)
2039
t->expire = tick_first(t->expire, t_con);
2044
} /* end 'connect' */
2045
else if (check->current_step->action == TCPCHK_ACT_SEND) {
2046
/* mark the step as started */
2047
check->last_started_step = check->current_step;
2049
/* reset the read buffer */
2050
if (*check->bi->data != '\0') {
2051
*check->bi->data = '\0';
2055
if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH)) {
2056
conn->flags |= CO_FL_ERROR;
2057
chk_report_conn_err(conn, 0, 0);
2058
goto out_end_tcpcheck;
2061
if (check->current_step->string_len >= check->bo->size) {
2062
chunk_printf(&trash, "tcp-check send : string too large (%d) for buffer size (%d) at step %d",
2063
check->current_step->string_len, check->bo->size,
2064
tcpcheck_get_step_id(s));
2065
set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2066
goto out_end_tcpcheck;
2069
/* do not try to send if there is no space */
2070
if (check->current_step->string_len >= buffer_total_space(check->bo))
2073
bo_putblk(check->bo, check->current_step->string, check->current_step->string_len);
2074
*check->bo->p = '\0'; /* to make gdb output easier to read */
2076
/* go to next rule and try to send */
2077
cur = (struct tcpcheck_rule *)cur->list.n;
2078
check->current_step = cur;
2080
else if (check->current_step->action == TCPCHK_ACT_EXPECT) {
2081
if (unlikely(check->result == CHK_RES_FAILED))
2082
goto out_end_tcpcheck;
2084
if (conn->xprt->rcv_buf(conn, check->bi, check->bi->size) <= 0) {
2085
if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_DATA_RD_SH)) {
2087
if ((conn->flags & CO_FL_ERROR) && !check->bi->i) {
2088
/* Report network errors only if we got no other data. Otherwise
2089
* we'll let the upper layers decide whether the response is OK
2090
* or not. It is very common that an RST sent by the server is
2091
* reported as an error just after the last data chunk.
2093
chk_report_conn_err(conn, errno, 0);
2094
goto out_end_tcpcheck;
2101
/* mark the step as started */
2102
check->last_started_step = check->current_step;
2105
/* Intermediate or complete response received.
2106
* Terminate string in check->bi->data buffer.
2108
if (check->bi->i < check->bi->size) {
2109
check->bi->data[check->bi->i] = '\0';
2112
check->bi->data[check->bi->i - 1] = '\0';
2113
done = 1; /* buffer full, don't wait for more data */
2116
contentptr = check->bi->data;
2118
/* Check that response body is not empty... */
2119
if (!check->bi->i) {
2123
/* empty response */
2124
chunk_printf(&trash, "TCPCHK got an empty response at step %d",
2125
tcpcheck_get_step_id(s));
2126
set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2128
goto out_end_tcpcheck;
2131
if (!done && (cur->string != NULL) && (check->bi->i < cur->string_len) )
2132
continue; /* try to read more */
2135
if (cur->string != NULL)
2136
ret = my_memmem(contentptr, check->bi->i, cur->string, cur->string_len) != NULL;
2137
else if (cur->expect_regex != NULL)
2138
ret = regex_exec(cur->expect_regex, contentptr);
2141
continue; /* try to read more */
2145
/* matched but we did not want to => ERROR */
2147
/* we were looking for a string */
2148
if (cur->string != NULL) {
2149
chunk_printf(&trash, "TCPCHK matched unwanted content '%s' at step %d",
2150
cur->string, tcpcheck_get_step_id(s));
2153
/* we were looking for a regex */
2154
chunk_printf(&trash, "TCPCHK matched unwanted content (regex) at step %d",
2155
tcpcheck_get_step_id(s));
2157
set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2158
goto out_end_tcpcheck;
2160
/* matched and was supposed to => OK, next step */
2162
cur = (struct tcpcheck_rule*)cur->list.n;
2163
check->current_step = cur;
2164
if (check->current_step->action == TCPCHK_ACT_EXPECT)
2165
goto tcpcheck_expect;
2166
__conn_data_stop_recv(conn);
2171
/* not matched and was not supposed to => OK, next step */
2173
cur = (struct tcpcheck_rule*)cur->list.n;
2174
check->current_step = cur;
2175
if (check->current_step->action == TCPCHK_ACT_EXPECT)
2176
goto tcpcheck_expect;
2177
__conn_data_stop_recv(conn);
2179
/* not matched but was supposed to => ERROR */
2181
/* we were looking for a string */
2182
if (cur->string != NULL) {
2183
chunk_printf(&trash, "TCPCHK did not match content '%s' at step %d",
2184
cur->string, tcpcheck_get_step_id(s));
2187
/* we were looking for a regex */
2188
chunk_printf(&trash, "TCPCHK did not match content (regex) at step %d",
2189
tcpcheck_get_step_id(s));
2191
set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2192
goto out_end_tcpcheck;
2196
} /* end loop over double chained step list */
2198
set_server_check_status(check, HCHK_STATUS_L7OKD, "(tcp-check)");
2199
goto out_end_tcpcheck;
2203
__conn_data_want_send(conn);
2205
if (check->current_step->action == TCPCHK_ACT_EXPECT)
2206
__conn_data_want_recv(conn);
2210
/* collect possible new errors */
2211
if (conn->flags & CO_FL_ERROR)
2212
chk_report_conn_err(conn, 0, 0);
2214
/* cleanup before leaving */
2215
check->current_step = NULL;
2217
if (check->result == CHK_RES_FAILED)
2218
conn->flags |= CO_FL_ERROR;
2220
__conn_data_stop_both(conn);
1722
2227
* Local variables:
1723
2228
* c-indent-level: 8
1724
2229
* c-basic-offset: 8