2
* UNIX SOCK_STREAM protocol layer (uxst)
4
* Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
6
* This program is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU General Public License
8
* as published by the Free Software Foundation; either version
9
* 2 of the License, or (at your option) any later version.
22
#include <sys/param.h>
23
#include <sys/socket.h>
25
#include <sys/types.h>
28
#include <common/compat.h>
29
#include <common/config.h>
30
#include <common/debug.h>
31
#include <common/errors.h>
32
#include <common/memory.h>
33
#include <common/mini-clist.h>
34
#include <common/standard.h>
35
#include <common/time.h>
36
#include <common/version.h>
38
#include <types/acl.h>
39
#include <types/capture.h>
40
#include <types/client.h>
41
#include <types/global.h>
42
#include <types/polling.h>
43
#include <types/proxy.h>
44
#include <types/server.h>
46
#include <proto/acl.h>
47
#include <proto/backend.h>
48
#include <proto/buffers.h>
49
#include <proto/dumpstats.h>
51
#include <proto/log.h>
52
#include <proto/protocols.h>
53
#include <proto/proto_uxst.h>
54
#include <proto/queue.h>
55
#include <proto/senddata.h>
56
#include <proto/session.h>
57
#include <proto/stream_sock.h>
58
#include <proto/task.h>
61
#define MAXPATHLEN 128
64
static int uxst_bind_listeners(struct protocol *proto);
65
static int uxst_unbind_listeners(struct protocol *proto);
67
/* Note: must not be declared <const> as its list will be overwritten */
68
static struct protocol proto_unix = {
69
.name = "unix_stream",
70
.sock_domain = PF_UNIX,
71
.sock_type = SOCK_STREAM,
73
.sock_family = AF_UNIX,
74
.sock_addrlen = sizeof(struct sockaddr_un),
75
.l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),/* path len */
76
.read = &stream_sock_read,
77
.write = &stream_sock_write,
78
.bind_all = uxst_bind_listeners,
79
.unbind_all = uxst_unbind_listeners,
80
.enable_all = enable_all_listeners,
81
.disable_all = disable_all_listeners,
82
.listeners = LIST_HEAD_INIT(proto_unix.listeners),
87
/********************************
88
* 1) low-level socket functions
89
********************************/
92
/* This function creates a named PF_UNIX stream socket at address <path>. Note
93
* that the path cannot be NULL nor empty. <uid> and <gid> different of -1 will
94
* be used to change the socket owner. If <mode> is not 0, it will be used to
95
* restrict access to the socket. While it is known not to be portable on every
96
* OS, it's still useful where it works.
97
* It returns the assigned file descriptor, or -1 in the event of an error.
99
static int create_uxst_socket(const char *path, uid_t uid, gid_t gid, mode_t mode)
101
char tempname[MAXPATHLEN];
102
char backname[MAXPATHLEN];
103
struct sockaddr_un addr;
107
/* 1. create socket names */
109
Alert("Invalid name for a UNIX socket. Aborting.\n");
113
ret = snprintf(tempname, MAXPATHLEN, "%s.%d.tmp", path, pid);
114
if (ret < 0 || ret >= MAXPATHLEN) {
115
Alert("name too long for UNIX socket. Aborting.\n");
119
ret = snprintf(backname, MAXPATHLEN, "%s.%d.bak", path, pid);
120
if (ret < 0 || ret >= MAXPATHLEN) {
121
Alert("name too long for UNIX socket. Aborting.\n");
125
/* 2. clean existing orphaned entries */
126
if (unlink(tempname) < 0 && errno != ENOENT) {
127
Alert("error when trying to unlink previous UNIX socket. Aborting.\n");
131
if (unlink(backname) < 0 && errno != ENOENT) {
132
Alert("error when trying to unlink previous UNIX socket. Aborting.\n");
136
/* 3. backup existing socket */
137
if (link(path, backname) < 0 && errno != ENOENT) {
138
Alert("error when trying to preserve previous UNIX socket. Aborting.\n");
142
/* 4. prepare new socket */
143
addr.sun_family = AF_UNIX;
144
strncpy(addr.sun_path, tempname, sizeof(addr.sun_path));
145
addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
147
sock = socket(PF_UNIX, SOCK_STREAM, 0);
149
Alert("cannot create socket for UNIX listener. Aborting.\n");
150
goto err_unlink_back;
153
if (sock >= global.maxsock) {
154
Alert("socket(): not enough free sockets for UNIX listener. Raise -n argument. Aborting.\n");
155
goto err_unlink_temp;
158
if (fcntl(sock, F_SETFL, O_NONBLOCK) == -1) {
159
Alert("cannot make UNIX socket non-blocking. Aborting.\n");
160
goto err_unlink_temp;
163
if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
164
/* note that bind() creates the socket <tempname> on the file system */
165
Alert("cannot bind socket for UNIX listener. Aborting.\n");
166
goto err_unlink_temp;
169
if (((uid != -1 || gid != -1) && (chown(tempname, uid, gid) == -1)) ||
170
(mode != 0 && chmod(tempname, mode) == -1)) {
171
Alert("cannot change UNIX socket ownership. Aborting.\n");
172
goto err_unlink_temp;
175
if (listen(sock, 0) < 0) {
176
Alert("cannot listen to socket for UNIX listener. Aborting.\n");
177
goto err_unlink_temp;
181
* Point of no return: we are ready, we'll switch the sockets. We don't
182
* fear loosing the socket <path> because we have a copy of it in
185
if (rename(tempname, path) < 0) {
186
Alert("cannot switch final and temporary sockets for UNIX listener. Aborting.\n");
191
unlink(backname); /* no need to keep this one either */
196
ret = rename(backname, path);
197
if (ret < 0 && errno == ENOENT)
208
/* Tries to destroy the UNIX stream socket <path>. The socket must not be used
209
* anymore. It practises best effort, and no error is returned.
211
static void destroy_uxst_socket(const char *path)
213
struct sockaddr_un addr;
216
/* We might have been chrooted, so we may not be able to access the
217
* socket. In order to avoid bothering the other end, we connect with a
218
* wrong protocol, namely SOCK_DGRAM. The return code from connect()
219
* is enough to know if the socket is still live or not. If it's live
220
* in mode SOCK_STREAM, we get EPROTOTYPE or anything else but not
221
* ECONNREFUSED. In this case, we do not touch it because it's used
222
* by some other process.
224
sock = socket(PF_UNIX, SOCK_DGRAM, 0);
228
addr.sun_family = AF_UNIX;
229
strncpy(addr.sun_path, path, sizeof(addr.sun_path));
230
addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
231
ret = connect(sock, (struct sockaddr *)&addr, sizeof(addr));
232
if (ret < 0 && errno == ECONNREFUSED) {
233
/* Connect failed: the socket still exists but is not used
234
* anymore. Let's remove this socket now.
242
/********************************
243
* 2) listener-oriented functions
244
********************************/
247
/* This function creates the UNIX socket associated to the listener. It changes
248
* the state from ASSIGNED to LISTEN. The socket is NOT enabled for polling.
249
* The return value is composed from ERR_NONE, ERR_RETRYABLE and ERR_FATAL.
251
static int uxst_bind_listener(struct listener *listener)
255
if (listener->state != LI_ASSIGNED)
256
return ERR_NONE; /* already bound */
258
fd = create_uxst_socket(((struct sockaddr_un *)&listener->addr)->sun_path,
259
listener->perm.ux.uid,
260
listener->perm.ux.gid,
261
listener->perm.ux.mode);
265
/* the socket is now listening */
267
listener->state = LI_LISTEN;
269
/* the function for the accept() event */
271
fdtab[fd].cb[DIR_RD].f = listener->accept;
272
fdtab[fd].cb[DIR_WR].f = NULL; /* never called */
273
fdtab[fd].cb[DIR_RD].b = fdtab[fd].cb[DIR_WR].b = NULL;
274
fdtab[fd].owner = (struct task *)listener; /* reference the listener instead of a task */
275
fdtab[fd].state = FD_STLISTEN;
276
fdtab[fd].peeraddr = NULL;
277
fdtab[fd].peerlen = 0;
278
fdtab[fd].listener = NULL;
282
/* This function closes the UNIX sockets for the specified listener.
283
* The listener enters the LI_ASSIGNED state. It always returns ERR_NONE.
285
static int uxst_unbind_listener(struct listener *listener)
287
if (listener->state == LI_READY)
288
EV_FD_CLR(listener->fd, DIR_RD);
290
if (listener->state >= LI_LISTEN) {
291
fd_delete(listener->fd);
292
listener->state = LI_ASSIGNED;
293
destroy_uxst_socket(((struct sockaddr_un *)&listener->addr)->sun_path);
298
/* Add a listener to the list of unix stream listeners. The listener's state
299
* is automatically updated from LI_INIT to LI_ASSIGNED. The number of
300
* listeners is updated. This is the function to use to add a new listener.
302
void uxst_add_listener(struct listener *listener)
304
if (listener->state != LI_INIT)
306
listener->state = LI_ASSIGNED;
307
listener->proto = &proto_unix;
308
LIST_ADDQ(&proto_unix.listeners, &listener->proto_list);
309
proto_unix.nb_listeners++;
312
/********************************
313
* 3) protocol-oriented functions
314
********************************/
317
/* This function creates all UNIX sockets bound to the protocol entry <proto>.
318
* It is intended to be used as the protocol's bind_all() function.
319
* The sockets will be registered but not added to any fd_set, in order not to
320
* loose them across the fork(). A call to uxst_enable_listeners() is needed
321
* to complete initialization.
323
* The return value is composed from ERR_NONE, ERR_RETRYABLE and ERR_FATAL.
325
static int uxst_bind_listeners(struct protocol *proto)
327
struct listener *listener;
330
list_for_each_entry(listener, &proto->listeners, proto_list) {
331
err |= uxst_bind_listener(listener);
339
/* This function stops all listening UNIX sockets bound to the protocol
340
* <proto>. It does not detaches them from the protocol.
341
* It always returns ERR_NONE.
343
static int uxst_unbind_listeners(struct protocol *proto)
345
struct listener *listener;
347
list_for_each_entry(listener, &proto->listeners, proto_list)
348
uxst_unbind_listener(listener);
353
/********************************
354
* 4) high-level functions
355
********************************/
359
* This function is called on a read event from a listen socket, corresponding
360
* to an accept. It tries to accept as many connections as possible.
361
* It returns 0. Since we use UNIX sockets on the local system for monitoring
362
* purposes and other related things, we do not need to output as many messages
363
* as with TCP which can fall under attack.
365
int uxst_event_accept(int fd) {
366
struct listener *l = (struct listener *)fdtab[fd].owner;
372
if (global.nbproc > 1)
373
max_accept = 8; /* let other processes catch some connections too */
377
while (max_accept--) {
378
struct sockaddr_storage addr;
379
socklen_t laddr = sizeof(addr);
381
if ((cfd = accept(fd, (struct sockaddr *)&addr, &laddr)) == -1) {
386
return 0; /* nothing more to accept */
388
/* Process reached system FD limit. Check system tunables. */
391
/* Process reached process FD limit. Check 'ulimit-n'. */
395
/* Process reached system memory limit. Check system tunables. */
402
if (l->nbconn >= l->maxconn) {
403
/* too many connections, we shoot this one and return.
404
* FIXME: it would be better to simply switch the listener's
405
* state to LI_FULL and disable the FD. We could re-enable
406
* it upon fd_delete(), but this requires all protocols to
413
if ((s = pool_alloc2(pool2_session)) == NULL) {
414
Alert("out of memory in uxst_event_accept().\n");
419
if ((t = pool_alloc2(pool2_task)) == NULL) {
420
Alert("out of memory in uxst_event_accept().\n");
422
pool_free2(pool2_session, s);
428
/* FIXME: should be checked earlier */
429
if (cfd >= global.maxsock) {
430
Alert("accept(): not enough free sockets. Raise -n argument. Giving up.\n");
432
pool_free2(pool2_task, t);
433
pool_free2(pool2_session, s);
437
if (fcntl(cfd, F_SETFL, O_NONBLOCK) == -1) {
438
Alert("accept(): cannot set the socket in non blocking mode. Giving up\n");
440
pool_free2(pool2_task, t);
441
pool_free2(pool2_session, s);
447
t->state = TASK_IDLE;
448
t->process = l->handler;
455
s->cli_state = CL_STDATA;
456
s->srv_state = SV_STIDLE;
457
s->req = s->rep = NULL; /* will be allocated later */
464
memset(&s->logs, 0, sizeof(s->logs));
465
memset(&s->txn, 0, sizeof(s->txn));
467
s->data_state = DATA_ST_INIT;
468
s->data_source = DATA_SRC_NONE;
469
s->uniq_id = totalconn;
471
if ((s->req = pool_alloc2(pool2_buffer)) == NULL) { /* no memory */
472
close(cfd); /* nothing can be done for this fd without memory */
473
pool_free2(pool2_task, t);
474
pool_free2(pool2_session, s);
478
if ((s->rep = pool_alloc2(pool2_buffer)) == NULL) { /* no memory */
479
pool_free2(pool2_buffer, s->req);
480
close(cfd); /* nothing can be done for this fd without memory */
481
pool_free2(pool2_task, t);
482
pool_free2(pool2_session, s);
488
s->req->rlim += BUFSIZE;
489
s->rep->rlim += BUFSIZE;
492
fdtab[cfd].owner = t;
493
fdtab[cfd].listener = l;
494
fdtab[cfd].state = FD_STREADY;
495
fdtab[cfd].cb[DIR_RD].f = l->proto->read;
496
fdtab[cfd].cb[DIR_RD].b = s->req;
497
fdtab[cfd].cb[DIR_WR].f = l->proto->write;
498
fdtab[cfd].cb[DIR_WR].b = s->rep;
499
fdtab[cfd].peeraddr = (struct sockaddr *)&s->cli_addr;
500
fdtab[cfd].peerlen = sizeof(s->cli_addr);
502
tv_eternity(&s->req->rex);
503
tv_eternity(&s->req->wex);
504
tv_eternity(&s->req->cex);
505
tv_eternity(&s->rep->rex);
506
tv_eternity(&s->rep->wex);
508
tv_eternity(&s->req->wto);
509
tv_eternity(&s->req->cto);
510
tv_eternity(&s->req->rto);
511
tv_eternity(&s->rep->rto);
512
tv_eternity(&s->rep->cto);
513
tv_eternity(&s->rep->wto);
516
s->req->rto = *l->timeout;
519
s->rep->wto = *l->timeout;
521
tv_eternity(&t->expire);
522
if (l->timeout && tv_isset(l->timeout)) {
523
EV_FD_SET(cfd, DIR_RD);
524
tv_add(&s->req->rex, &now, &s->req->rto);
525
t->expire = s->req->rex;
531
l->nbconn++; /* warning! right now, it's up to the handler to decrease this */
532
if (l->nbconn >= l->maxconn) {
533
EV_FD_CLR(l->fd, DIR_RD);
539
//fprintf(stderr, "accepting from %p => %d conn, %d total, task=%p, cfd=%d, maxfd=%d\n", p, actconn, totalconn, t, cfd, maxfd);
540
} /* end of while (p->feconn < p->maxconn) */
541
//fprintf(stderr,"fct %s:%d\n", __FUNCTION__, __LINE__);
546
* manages the client FSM and its socket. It returns 1 if a state has changed
547
* (and a resync may be needed), otherwise 0.
549
static int process_uxst_cli(struct session *t)
551
int s = t->srv_state;
552
int c = t->cli_state;
553
struct buffer *req = t->req;
554
struct buffer *rep = t->rep;
555
//fprintf(stderr,"fct %s:%d\n", __FUNCTION__, __LINE__);
556
if (c == CL_STDATA) {
557
/* FIXME: this error handling is partly buggy because we always report
558
* a 'DATA' phase while we don't know if the server was in IDLE, CONN
559
* or HEADER phase. BTW, it's not logical to expire the client while
560
* we're waiting for the server to connect.
562
/* read or write error */
563
if (rep->flags & BF_WRITE_ERROR || req->flags & BF_READ_ERROR) {
566
fd_delete(t->cli_fd);
567
t->cli_state = CL_STCLOSE;
568
if (!(t->flags & SN_ERR_MASK))
569
t->flags |= SN_ERR_CLICL;
570
if (!(t->flags & SN_FINST_MASK)) {
572
t->flags |= SN_FINST_Q;
573
else if (s == SV_STCONN)
574
t->flags |= SN_FINST_C;
576
t->flags |= SN_FINST_D;
580
/* last read, or end of server write */
581
else if (req->flags & BF_READ_NULL || s == SV_STSHUTW || s == SV_STCLOSE) {
582
EV_FD_CLR(t->cli_fd, DIR_RD);
584
t->cli_state = CL_STSHUTR;
587
/* last server read and buffer empty */
588
else if ((s == SV_STSHUTR || s == SV_STCLOSE) && (rep->l == 0)) {
589
EV_FD_CLR(t->cli_fd, DIR_WR);
591
shutdown(t->cli_fd, SHUT_WR);
592
/* We must ensure that the read part is still alive when switching
594
EV_FD_SET(t->cli_fd, DIR_RD);
595
tv_add_ifset(&req->rex, &now, &req->rto);
596
t->cli_state = CL_STSHUTW;
597
//fprintf(stderr,"%p:%s(%d), c=%d, s=%d\n", t, __FUNCTION__, __LINE__, t->cli_state, t->cli_state);
601
else if (tv_isle(&req->rex, &now)) {
602
EV_FD_CLR(t->cli_fd, DIR_RD);
604
t->cli_state = CL_STSHUTR;
605
if (!(t->flags & SN_ERR_MASK))
606
t->flags |= SN_ERR_CLITO;
607
if (!(t->flags & SN_FINST_MASK)) {
609
t->flags |= SN_FINST_Q;
610
else if (s == SV_STCONN)
611
t->flags |= SN_FINST_C;
613
t->flags |= SN_FINST_D;
618
else if (tv_isle(&rep->wex, &now)) {
619
EV_FD_CLR(t->cli_fd, DIR_WR);
621
shutdown(t->cli_fd, SHUT_WR);
622
/* We must ensure that the read part is still alive when switching
624
EV_FD_SET(t->cli_fd, DIR_RD);
625
tv_add_ifset(&req->rex, &now, &req->rto);
627
t->cli_state = CL_STSHUTW;
628
if (!(t->flags & SN_ERR_MASK))
629
t->flags |= SN_ERR_CLITO;
630
if (!(t->flags & SN_FINST_MASK)) {
632
t->flags |= SN_FINST_Q;
633
else if (s == SV_STCONN)
634
t->flags |= SN_FINST_C;
636
t->flags |= SN_FINST_D;
641
if (req->l >= req->rlim - req->data) {
642
/* no room to read more data */
643
if (EV_FD_COND_C(t->cli_fd, DIR_RD)) {
644
/* stop reading until we get some space */
645
tv_eternity(&req->rex);
648
/* there's still some space in the buffer */
649
if (EV_FD_COND_S(t->cli_fd, DIR_RD)) {
650
if (!tv_isset(&req->rto) ||
651
(t->srv_state < SV_STDATA && tv_isset(&req->wto)))
652
/* If the client has no timeout, or if the server not ready yet, and we
653
* know for sure that it can expire, then it's cleaner to disable the
654
* timeout on the client side so that too low values cannot make the
655
* sessions abort too early.
657
tv_eternity(&req->rex);
659
tv_add(&req->rex, &now, &req->rto);
664
((s < SV_STDATA) /* FIXME: this may be optimized && (rep->w == rep->h)*/)) {
665
if (EV_FD_COND_C(t->cli_fd, DIR_WR)) {
667
tv_eternity(&rep->wex);
670
/* buffer not empty */
671
if (EV_FD_COND_S(t->cli_fd, DIR_WR)) {
672
/* restart writing */
673
if (tv_add_ifset(&rep->wex, &now, &rep->wto)) {
674
/* FIXME: to prevent the client from expiring read timeouts during writes,
679
tv_eternity(&rep->wex);
682
return 0; /* other cases change nothing */
684
else if (c == CL_STSHUTR) {
685
if (rep->flags & BF_WRITE_ERROR) {
687
fd_delete(t->cli_fd);
688
t->cli_state = CL_STCLOSE;
689
if (!(t->flags & SN_ERR_MASK))
690
t->flags |= SN_ERR_CLICL;
691
if (!(t->flags & SN_FINST_MASK)) {
693
t->flags |= SN_FINST_Q;
694
else if (s == SV_STCONN)
695
t->flags |= SN_FINST_C;
697
t->flags |= SN_FINST_D;
701
else if ((s == SV_STSHUTR || s == SV_STCLOSE) && (rep->l == 0)) {
703
fd_delete(t->cli_fd);
704
t->cli_state = CL_STCLOSE;
707
else if (tv_isle(&rep->wex, &now)) {
709
fd_delete(t->cli_fd);
710
t->cli_state = CL_STCLOSE;
711
if (!(t->flags & SN_ERR_MASK))
712
t->flags |= SN_ERR_CLITO;
713
if (!(t->flags & SN_FINST_MASK)) {
715
t->flags |= SN_FINST_Q;
716
else if (s == SV_STCONN)
717
t->flags |= SN_FINST_C;
719
t->flags |= SN_FINST_D;
725
if (EV_FD_COND_C(t->cli_fd, DIR_WR)) {
727
tv_eternity(&rep->wex);
730
/* buffer not empty */
731
if (EV_FD_COND_S(t->cli_fd, DIR_WR)) {
732
/* restart writing */
733
if (!tv_add_ifset(&rep->wex, &now, &rep->wto))
734
tv_eternity(&rep->wex);
739
else if (c == CL_STSHUTW) {
740
if (req->flags & BF_READ_ERROR) {
742
fd_delete(t->cli_fd);
743
t->cli_state = CL_STCLOSE;
744
if (!(t->flags & SN_ERR_MASK))
745
t->flags |= SN_ERR_CLICL;
746
if (!(t->flags & SN_FINST_MASK)) {
748
t->flags |= SN_FINST_Q;
749
else if (s == SV_STCONN)
750
t->flags |= SN_FINST_C;
752
t->flags |= SN_FINST_D;
756
else if (req->flags & BF_READ_NULL || s == SV_STSHUTW || s == SV_STCLOSE) {
758
fd_delete(t->cli_fd);
759
t->cli_state = CL_STCLOSE;
762
else if (tv_isle(&req->rex, &now)) {
764
fd_delete(t->cli_fd);
765
t->cli_state = CL_STCLOSE;
766
if (!(t->flags & SN_ERR_MASK))
767
t->flags |= SN_ERR_CLITO;
768
if (!(t->flags & SN_FINST_MASK)) {
770
t->flags |= SN_FINST_Q;
771
else if (s == SV_STCONN)
772
t->flags |= SN_FINST_C;
774
t->flags |= SN_FINST_D;
778
else if (req->l >= req->rlim - req->data) {
779
/* no room to read more data */
781
/* FIXME-20050705: is it possible for a client to maintain a session
782
* after the timeout by sending more data after it receives a close ?
785
if (EV_FD_COND_C(t->cli_fd, DIR_RD)) {
786
/* stop reading until we get some space */
787
tv_eternity(&req->rex);
788
//fprintf(stderr,"%p:%s(%d), c=%d, s=%d\n", t, __FUNCTION__, __LINE__, t->cli_state, t->cli_state);
791
/* there's still some space in the buffer */
792
if (EV_FD_COND_S(t->cli_fd, DIR_RD)) {
793
if (!tv_add_ifset(&req->rex, &now, &req->rto))
794
tv_eternity(&req->rex);
795
//fprintf(stderr,"%p:%s(%d), c=%d, s=%d\n", t, __FUNCTION__, __LINE__, t->cli_state, t->cli_state);
800
else { /* CL_STCLOSE: nothing to do */
801
if ((global.mode & MODE_DEBUG) && (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))) {
803
len = sprintf(trash, "%08x:%s.clicls[%04x:%04x]\n", t->uniq_id, t->be?t->be->id:"",
804
(unsigned short)t->cli_fd, (unsigned short)t->srv_fd);
805
write(1, trash, len);
813
/* FIXME! This part has not been completely converted yet, and it may
814
* still be very specific to TCPv4 ! Also, it relies on some parameters
815
* such as conn_retries which are not set upon accept().
818
* Manages the server FSM and its socket. It returns 1 if a state has changed
819
* (and a resync may be needed), otherwise 0.
821
static int process_uxst_srv(struct session *t)
823
int s = t->srv_state;
824
int c = t->cli_state;
825
struct buffer *req = t->req;
826
struct buffer *rep = t->rep;
829
if (s == SV_STIDLE) {
830
if (c == CL_STCLOSE || c == CL_STSHUTW ||
832
(t->req->l == 0 || t->be->options & PR_O_ABRT_CLOSE))) { /* give up */
833
tv_eternity(&req->cex);
835
t->logs.t_queue = tv_ms_elapsed(&t->logs.tv_accept, &now);
836
srv_close_with_err(t, SN_ERR_CLICL, t->pend_pos ? SN_FINST_Q : SN_FINST_C);
840
/* FIXME: reimplement the TARPIT check here */
842
/* Right now, we will need to create a connection to the server.
843
* We might already have tried, and got a connection pending, in
844
* which case we will not do anything till it's pending. It's up
845
* to any other session to release it and wake us up again.
848
if (!tv_isle(&req->cex, &now))
851
/* we've been waiting too long here */
852
tv_eternity(&req->cex);
853
t->logs.t_queue = tv_ms_elapsed(&t->logs.tv_accept, &now);
854
srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_Q);
856
t->srv->failed_conns++;
858
t->fe->failed_conns++;
864
/* first, get a connection */
865
if (srv_redispatch_connect(t))
866
return t->srv_state != SV_STIDLE;
868
/* try to (re-)connect to the server, and fail if we expire the
871
if (srv_retryable_connect(t)) {
872
t->logs.t_queue = tv_ms_elapsed(&t->logs.tv_accept, &now);
873
return t->srv_state != SV_STIDLE;
878
else if (s == SV_STCONN) { /* connection in progress */
879
if (c == CL_STCLOSE || c == CL_STSHUTW ||
881
((t->req->l == 0 && !(req->flags & BF_WRITE_STATUS)) ||
882
t->be->options & PR_O_ABRT_CLOSE))) { /* give up */
883
tv_eternity(&req->cex);
884
fd_delete(t->srv_fd);
888
srv_close_with_err(t, SN_ERR_CLICL, SN_FINST_C);
891
if (!(req->flags & BF_WRITE_STATUS) && !tv_isle(&req->cex, &now)) {
892
//fprintf(stderr,"1: c=%d, s=%d, now=%d.%06d, exp=%d.%06d\n", c, s, now.tv_sec, now.tv_usec, req->cex.tv_sec, req->cex.tv_usec);
893
return 0; /* nothing changed */
895
else if (!(req->flags & BF_WRITE_STATUS) || (req->flags & BF_WRITE_ERROR)) {
896
/* timeout, asynchronous connect error or first write error */
897
//fprintf(stderr,"2: c=%d, s=%d\n", c, s);
899
fd_delete(t->srv_fd);
903
if (!(req->flags & BF_WRITE_STATUS))
904
conn_err = SN_ERR_SRVTO; // it was a connect timeout.
906
conn_err = SN_ERR_SRVCL; // it was an asynchronous connect error.
908
/* ensure that we have enough retries left */
909
if (srv_count_retry_down(t, conn_err))
912
if (t->srv && t->conn_retries == 0 && t->be->options & PR_O_REDISP) {
913
/* We're on our last chance, and the REDISP option was specified.
914
* We will ignore cookie and force to balance or use the dispatcher.
916
/* let's try to offer this slot to anybody */
917
if (may_dequeue_tasks(t->srv, t->be))
918
task_wakeup(t->srv->queue_mgt);
921
t->srv->failed_conns++;
922
t->be->failed_conns++;
924
t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
925
t->srv = NULL; /* it's left to the dispatcher to choose a server */
927
/* first, get a connection */
928
if (srv_redispatch_connect(t))
929
return t->srv_state != SV_STIDLE;
933
/* Now we will try to either reconnect to the same server or
934
* connect to another server. If the connection gets queued
935
* because all servers are saturated, then we will go back to
936
* the SV_STIDLE state.
938
if (srv_retryable_connect(t)) {
939
t->logs.t_queue = tv_ms_elapsed(&t->logs.tv_accept, &now);
940
return t->srv_state != SV_STCONN;
943
/* we need to redispatch the connection to another server */
944
if (srv_redispatch_connect(t))
945
return t->srv_state != SV_STCONN;
948
else { /* no error or write 0 */
949
t->logs.t_connect = tv_ms_elapsed(&t->logs.tv_accept, &now);
951
//fprintf(stderr,"3: c=%d, s=%d\n", c, s);
952
if (req->l == 0) /* nothing to write */ {
953
EV_FD_CLR(t->srv_fd, DIR_WR);
954
tv_eternity(&req->wex);
955
} else /* need the right to write */ {
956
EV_FD_SET(t->srv_fd, DIR_WR);
957
if (tv_add_ifset(&req->wex, &now, &req->wto)) {
958
/* FIXME: to prevent the server from expiring read timeouts during writes,
963
tv_eternity(&req->wex);
966
EV_FD_SET(t->srv_fd, DIR_RD);
967
if (!tv_add_ifset(&rep->rex, &now, &rep->rto))
968
tv_eternity(&rep->rex);
970
t->srv_state = SV_STDATA;
973
rep->rlim = rep->data + BUFSIZE; /* no rewrite needed */
975
/* if the user wants to log as soon as possible, without counting
976
bytes from the server, then this is the right moment. */
977
if (t->fe && t->fe->to_log && !(t->logs.logwait & LW_BYTES)) {
978
t->logs.t_close = t->logs.t_connect; /* to get a valid end date */
981
tv_eternity(&req->cex);
985
else if (s == SV_STDATA) {
986
/* read or write error */
987
if (req->flags & BF_WRITE_ERROR || rep->flags & BF_READ_ERROR) {
990
fd_delete(t->srv_fd);
993
t->srv->failed_resp++;
995
t->be->failed_resp++;
996
t->srv_state = SV_STCLOSE;
997
if (!(t->flags & SN_ERR_MASK))
998
t->flags |= SN_ERR_SRVCL;
999
if (!(t->flags & SN_FINST_MASK))
1000
t->flags |= SN_FINST_D;
1001
/* We used to have a free connection slot. Since we'll never use it,
1002
* we have to inform the server that it may be used by another session.
1004
if (may_dequeue_tasks(t->srv, t->be))
1005
task_wakeup(t->srv->queue_mgt);
1009
/* last read, or end of client write */
1010
else if (rep->flags & BF_READ_NULL || c == CL_STSHUTW || c == CL_STCLOSE) {
1011
EV_FD_CLR(t->srv_fd, DIR_RD);
1013
t->srv_state = SV_STSHUTR;
1014
//fprintf(stderr,"%p:%s(%d), c=%d, s=%d\n", t, __FUNCTION__, __LINE__, t->cli_state, t->cli_state);
1017
/* end of client read and no more data to send */
1018
else if ((c == CL_STSHUTR || c == CL_STCLOSE) && (req->l == 0)) {
1019
EV_FD_CLR(t->srv_fd, DIR_WR);
1021
shutdown(t->srv_fd, SHUT_WR);
1022
/* We must ensure that the read part is still alive when switching
1024
EV_FD_SET(t->srv_fd, DIR_RD);
1025
tv_add_ifset(&rep->rex, &now, &rep->rto);
1027
t->srv_state = SV_STSHUTW;
1031
else if (tv_isle(&rep->rex, &now)) {
1032
EV_FD_CLR(t->srv_fd, DIR_RD);
1034
t->srv_state = SV_STSHUTR;
1035
if (!(t->flags & SN_ERR_MASK))
1036
t->flags |= SN_ERR_SRVTO;
1037
if (!(t->flags & SN_FINST_MASK))
1038
t->flags |= SN_FINST_D;
1042
else if (tv_isle(&req->wex, &now)) {
1043
EV_FD_CLR(t->srv_fd, DIR_WR);
1045
shutdown(t->srv_fd, SHUT_WR);
1046
/* We must ensure that the read part is still alive when switching
1048
EV_FD_SET(t->srv_fd, DIR_RD);
1049
tv_add_ifset(&rep->rex, &now, &rep->rto);
1050
t->srv_state = SV_STSHUTW;
1051
if (!(t->flags & SN_ERR_MASK))
1052
t->flags |= SN_ERR_SRVTO;
1053
if (!(t->flags & SN_FINST_MASK))
1054
t->flags |= SN_FINST_D;
1058
/* recompute request time-outs */
1060
if (EV_FD_COND_C(t->srv_fd, DIR_WR)) {
1062
tv_eternity(&req->wex);
1065
else { /* buffer not empty, there are still data to be transferred */
1066
if (EV_FD_COND_S(t->srv_fd, DIR_WR)) {
1067
/* restart writing */
1068
if (tv_add_ifset(&req->wex, &now, &req->wto)) {
1069
/* FIXME: to prevent the server from expiring read timeouts during writes,
1071
rep->rex = req->wex;
1074
tv_eternity(&req->wex);
1078
/* recompute response time-outs */
1079
if (rep->l == BUFSIZE) { /* no room to read more data */
1080
if (EV_FD_COND_C(t->srv_fd, DIR_RD)) {
1081
tv_eternity(&rep->rex);
1085
if (EV_FD_COND_S(t->srv_fd, DIR_RD)) {
1086
if (!tv_add_ifset(&rep->rex, &now, &rep->rto))
1087
tv_eternity(&rep->rex);
1091
return 0; /* other cases change nothing */
1093
else if (s == SV_STSHUTR) {
1094
if (req->flags & BF_WRITE_ERROR) {
1095
//EV_FD_CLR(t->srv_fd, DIR_WR);
1097
fd_delete(t->srv_fd);
1100
t->srv->failed_resp++;
1102
t->be->failed_resp++;
1104
t->srv_state = SV_STCLOSE;
1105
if (!(t->flags & SN_ERR_MASK))
1106
t->flags |= SN_ERR_SRVCL;
1107
if (!(t->flags & SN_FINST_MASK))
1108
t->flags |= SN_FINST_D;
1109
/* We used to have a free connection slot. Since we'll never use it,
1110
* we have to inform the server that it may be used by another session.
1112
if (may_dequeue_tasks(t->srv, t->be))
1113
task_wakeup(t->srv->queue_mgt);
1117
else if ((c == CL_STSHUTR || c == CL_STCLOSE) && (req->l == 0)) {
1118
//EV_FD_CLR(t->srv_fd, DIR_WR);
1120
fd_delete(t->srv_fd);
1124
t->srv_state = SV_STCLOSE;
1125
/* We used to have a free connection slot. Since we'll never use it,
1126
* we have to inform the server that it may be used by another session.
1128
if (may_dequeue_tasks(t->srv, t->be))
1129
task_wakeup(t->srv->queue_mgt);
1133
else if (tv_isle(&req->wex, &now)) {
1134
//EV_FD_CLR(t->srv_fd, DIR_WR);
1136
fd_delete(t->srv_fd);
1140
t->srv_state = SV_STCLOSE;
1141
if (!(t->flags & SN_ERR_MASK))
1142
t->flags |= SN_ERR_SRVTO;
1143
if (!(t->flags & SN_FINST_MASK))
1144
t->flags |= SN_FINST_D;
1145
/* We used to have a free connection slot. Since we'll never use it,
1146
* we have to inform the server that it may be used by another session.
1148
if (may_dequeue_tasks(t->srv, t->be))
1149
task_wakeup(t->srv->queue_mgt);
1153
else if (req->l == 0) {
1154
if (EV_FD_COND_C(t->srv_fd, DIR_WR)) {
1156
tv_eternity(&req->wex);
1159
else { /* buffer not empty */
1160
if (EV_FD_COND_S(t->srv_fd, DIR_WR)) {
1161
/* restart writing */
1162
if (!tv_add_ifset(&req->wex, &now, &req->wto))
1163
tv_eternity(&req->wex);
1168
else if (s == SV_STSHUTW) {
1169
if (rep->flags & BF_READ_ERROR) {
1170
//EV_FD_CLR(t->srv_fd, DIR_RD);
1172
fd_delete(t->srv_fd);
1175
t->srv->failed_resp++;
1177
t->be->failed_resp++;
1179
t->srv_state = SV_STCLOSE;
1180
if (!(t->flags & SN_ERR_MASK))
1181
t->flags |= SN_ERR_SRVCL;
1182
if (!(t->flags & SN_FINST_MASK))
1183
t->flags |= SN_FINST_D;
1184
/* We used to have a free connection slot. Since we'll never use it,
1185
* we have to inform the server that it may be used by another session.
1187
if (may_dequeue_tasks(t->srv, t->be))
1188
task_wakeup(t->srv->queue_mgt);
1192
else if (rep->flags & BF_READ_NULL || c == CL_STSHUTW || c == CL_STCLOSE) {
1193
//EV_FD_CLR(t->srv_fd, DIR_RD);
1195
fd_delete(t->srv_fd);
1199
t->srv_state = SV_STCLOSE;
1200
/* We used to have a free connection slot. Since we'll never use it,
1201
* we have to inform the server that it may be used by another session.
1203
if (may_dequeue_tasks(t->srv, t->be))
1204
task_wakeup(t->srv->queue_mgt);
1208
else if (tv_isle(&rep->rex, &now)) {
1209
//EV_FD_CLR(t->srv_fd, DIR_RD);
1211
fd_delete(t->srv_fd);
1215
t->srv_state = SV_STCLOSE;
1216
if (!(t->flags & SN_ERR_MASK))
1217
t->flags |= SN_ERR_SRVTO;
1218
if (!(t->flags & SN_FINST_MASK))
1219
t->flags |= SN_FINST_D;
1220
/* We used to have a free connection slot. Since we'll never use it,
1221
* we have to inform the server that it may be used by another session.
1223
if (may_dequeue_tasks(t->srv, t->be))
1224
task_wakeup(t->srv->queue_mgt);
1228
else if (rep->l == BUFSIZE) { /* no room to read more data */
1229
if (EV_FD_COND_C(t->srv_fd, DIR_RD)) {
1230
tv_eternity(&rep->rex);
1234
if (EV_FD_COND_S(t->srv_fd, DIR_RD)) {
1235
if (!tv_add_ifset(&rep->rex, &now, &rep->rto))
1236
tv_eternity(&rep->rex);
1241
else { /* SV_STCLOSE : nothing to do */
1242
if ((global.mode & MODE_DEBUG) && (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))) {
1244
len = sprintf(trash, "%08x:%s.srvcls[%04x:%04x]\n",
1245
t->uniq_id, t->be->id, (unsigned short)t->cli_fd, (unsigned short)t->srv_fd);
1246
write(1, trash, len);
1253
/* Processes the client and server jobs of a session task, then
1254
* puts it back to the wait queue in a clean state, or
1255
* cleans up its resources if it must be deleted. Returns
1256
* the time the task accepts to wait, or TIME_ETERNITY for
1259
void process_uxst_session(struct task *t, struct timeval *next)
1261
struct session *s = t->context;
1266
fsm_resync |= process_uxst_cli(s);
1267
if (s->srv_state == SV_STIDLE) {
1268
if (s->cli_state == CL_STCLOSE || s->cli_state == CL_STSHUTW) {
1269
s->srv_state = SV_STCLOSE;
1273
if (s->cli_state == CL_STSHUTR ||
1274
(s->req->l >= s->req->rlim - s->req->data)) {
1275
if (s->req->l == 0) {
1276
s->srv_state = SV_STCLOSE;
1280
/* OK we have some remaining data to process */
1281
/* Just as an exercice, we copy the req into the resp,
1282
* and flush the req.
1284
memcpy(s->rep->data, s->req->data, sizeof(s->rep->data));
1285
s->rep->l = s->req->l;
1286
s->rep->rlim = s->rep->data + BUFSIZE;
1287
s->rep->w = s->rep->data;
1288
s->rep->lr = s->rep->r = s->rep->data + s->rep->l;
1291
s->srv_state = SV_STCLOSE;
1297
} while (fsm_resync);
1299
if (likely(s->cli_state != CL_STCLOSE || s->srv_state != SV_STCLOSE)) {
1301
if ((s->fe->options & PR_O_CONTSTATS) && (s->flags & SN_BE_ASSIGNED))
1302
session_process_counters(s);
1304
s->req->flags &= BF_CLEAR_READ & BF_CLEAR_WRITE;
1305
s->rep->flags &= BF_CLEAR_READ & BF_CLEAR_WRITE;
1307
t->expire = s->req->rex;
1308
tv_min(&t->expire, &s->req->rex, &s->req->wex);
1309
tv_bound(&t->expire, &s->req->cex);
1310
tv_bound(&t->expire, &s->rep->rex);
1311
tv_bound(&t->expire, &s->rep->wex);
1313
/* restore t to its place in the task list */
1317
return; /* nothing more to do */
1322
if (s->be && (s->flags & SN_BE_ASSIGNED))
1326
if (unlikely((global.mode & MODE_DEBUG) &&
1327
(!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) {
1329
len = sprintf(trash, "%08x:%s.closed[%04x:%04x]\n",
1330
s->uniq_id, s->be->id,
1331
(unsigned short)s->cli_fd, (unsigned short)s->srv_fd);
1332
write(1, trash, len);
1335
s->logs.t_close = tv_ms_elapsed(&s->logs.tv_accept, &now);
1336
session_process_counters(s);
1338
/* let's do a final log if we need it */
1339
if (s->logs.logwait &&
1340
!(s->flags & SN_MONITOR) &&
1341
(s->req->total || !(s->fe && s->fe->options & PR_O_NULLNOLOG))) {
1345
/* the task MUST not be in the run queue anymore */
1351
#endif /* not converted */
1354
/* Processes data exchanges on the statistics socket. The client processing
1355
* is called and the task is put back in the wait queue or it is cleared.
1356
* In order to ease the transition, we simply simulate the server status
1357
* for now. It only knows states SV_STIDLE, SV_STDATA and SV_STCLOSE. Returns
1358
* in <next> the task's expiration date.
1360
void process_uxst_stats(struct task *t, struct timeval *next)
1362
struct session *s = t->context;
1363
struct listener *listener;
1366
/* we need to be in DATA phase on the "server" side */
1367
if (s->srv_state == SV_STIDLE) {
1368
s->srv_state = SV_STDATA;
1369
s->data_source = DATA_SRC_STATS;
1373
fsm_resync = process_uxst_cli(s);
1374
if (s->srv_state != SV_STDATA)
1377
if (s->cli_state == CL_STCLOSE || s->cli_state == CL_STSHUTW) {
1378
s->srv_state = SV_STCLOSE;
1383
if (s->data_state == DATA_ST_INIT) {
1384
if ((s->req->l >= 10) && (memcmp(s->req->data, "show stat\n", 10) == 0)) {
1385
/* send the stats, and changes the data_state */
1386
if (stats_dump_raw(s, NULL, STAT_SHOW_STAT) != 0) {
1387
s->srv_state = SV_STCLOSE;
1392
if ((s->req->l >= 10) && (memcmp(s->req->data, "show info\n", 10) == 0)) {
1393
/* send the stats, and changes the data_state */
1394
if (stats_dump_raw(s, NULL, STAT_SHOW_INFO) != 0) {
1395
s->srv_state = SV_STCLOSE;
1400
else if (s->cli_state == CL_STSHUTR || (s->req->l >= s->req->rlim - s->req->data)) {
1401
s->srv_state = SV_STCLOSE;
1407
if (s->data_state == DATA_ST_INIT)
1410
/* OK we have some remaining data to process. Just for the
1411
* sake of an exercice, we copy the req into the resp,
1412
* and flush the req. This produces a simple echo function.
1414
if (stats_dump_raw(s, NULL, 0) != 0) {
1415
s->srv_state = SV_STCLOSE;
1419
} while (fsm_resync);
1421
if (likely(s->cli_state != CL_STCLOSE || s->srv_state != SV_STCLOSE)) {
1422
s->req->flags &= BF_CLEAR_READ & BF_CLEAR_WRITE;
1423
s->rep->flags &= BF_CLEAR_READ & BF_CLEAR_WRITE;
1425
t->expire = s->req->rex;
1426
tv_min(&t->expire, &s->req->rex, &s->req->wex);
1427
tv_bound(&t->expire, &s->req->cex);
1428
tv_bound(&t->expire, &s->rep->rex);
1429
tv_bound(&t->expire, &s->rep->wex);
1431
/* restore t to its place in the task list */
1435
return; /* nothing more to do */
1439
listener = fdtab[s->cli_fd].listener;
1442
if (listener->state == LI_FULL &&
1443
listener->nbconn < listener->maxconn) {
1444
/* we should reactivate the listener */
1445
EV_FD_SET(listener->fd, DIR_RD);
1446
listener->state = LI_READY;
1450
/* the task MUST not be in the run queue anymore */
1457
__attribute__((constructor))
1458
static void __uxst_protocol_init(void)
1460
protocol_register(&proto_unix);