~ubuntu-branches/ubuntu/quantal/haproxy/quantal

« back to all changes in this revision

Viewing changes to src/ev_sepoll.c

Committer: Bazaar Package Importer
Author(s): Arnaud Cornet
Date: 2008-06-04 19:21:56 UTC
mfrom: (1.1.2 upstream)
Revision ID: james.westby@ubuntu.com-20080604192156-3n72encce99pkwrh

Tags: 1.3.14.5-1

http://bugs.debian.org/484221

* New Upstream Version (Closes: #484221)
* Use debhelper 7, drop CDBS.

Show diffs side-by-side

added added

removed removed

src/ev_sepoll.c

* FD polling functions for Speculative I/O combined with Linux epoll()

* This program is free software; you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation; either version

* 2 of the License, or (at your option) any later version.

* This code implements "speculative I/O" under Linux. The principle is to

* try to perform expected I/O before registering the events in the poller.

* Each time this succeeds, it saves an expensive epoll_ctl(). It generally

* succeeds for all reads after an accept(), and for writes after a connect().

* It also improves performance for streaming connections because even if only

* one side is polled, the other one may react accordingly depending on the

* level of the buffer.

* It has a presents drawbacks though. If too many events are set for spec I/O,

* those ones can starve the polled events. Experiments show that when polled

* events starve, they quickly turn into spec I/O, making the situation even

* worse. While we can reduce the number of polled events processed at once,

* we cannot do this on speculative events because most of them are new ones

* (avg 2/3 new - 1/3 old from experiments).

* The solution against this problem relies on those two factors :

* 1) one FD registered as a spec event cannot be polled at the same time

* 2) even during very high loads, we will almost never be interested in

* simultaneous read and write streaming on the same FD.

* The first point implies that during starvation, we will not have more than

* half of our FDs in the poll list, otherwise it means there is less than that

* in the spec list, implying there is no starvation.

* The second point implies that we're statically only interested in half of

* the maximum number of file descriptors at once, because we will unlikely

* have simultaneous read and writes for a same buffer during long periods.

* So, if we make it possible to drain maxsock/2/2 during peak loads, then we

* can ensure that there will be no starvation effect. This means that we must

* always allocate maxsock/4 events for the poller.

#include <unistd.h>

120

154

};

121

155

122

156

static int nbspec = 0; // current size of the spec list

157

static int absmaxevents = 0; // absolute maximum amounts of polled events

123

158

124

159

static struct fd_status *fd_list = NULL; // list of FDs

125

160

static unsigned int *spec_list = NULL; // speculative I/O list

255

290

REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)

256

291

{

257

292

static unsigned int last_skipped;

293

static unsigned int spec_processed;

258

294

int status, eo;

259

295

int fd, opcode;

260

296

int count;

370

406

* succeeded. This reduces the number of unsucessful calls to

371

407

* epoll_wait() by a factor of about 3, and the total number of calls

372

408

* by about 2.

409

* However, when we do that after having processed too many events,

410

* events waiting in epoll() starve for too long a time and tend to

411

* become themselves eligible for speculative polling. So we try to

412

* limit this practise to reasonable situations.

373

413

374

414

375

if (status >= MIN_RETURN_EVENTS) {

415

spec_processed += status;

416

if (status >= MIN_RETURN_EVENTS && spec_processed < absmaxevents) {

376

417

/* We have processed at least MIN_RETURN_EVENTS, it's worth

377

418

* returning now without checking epoll_wait().

378

419

400

441

wait_time = __tv_ms_elapsed(&now, exp) + 1;

401

442

}

402

443

403

/* now let's wait for real events */

404

fd = MIN(maxfd, global.tune.maxpollevents);

444

/* now let's wait for real events. We normally use maxpollevents as a

445

* high limit, unless <nbspec> is already big, in which case we need

446

* to compensate for the high number of events processed there.

447

448

fd = MIN(absmaxevents, spec_processed);

449

fd = MAX(global.tune.maxpollevents, fd);

450

fd = MIN(maxfd, fd);

451

spec_processed = 0;

405

452

status = epoll_wait(epoll_fd, epoll_events, fd, wait_time);

406

453

407

454

tv_now(&now);

456

503

if (epoll_fd < 0)

457

504

goto fail_fd;

458

505

506

/* See comments at the top of the file about this formula. */

507

absmaxevents = MAX(global.tune.maxpollevents, global.maxsock/4);

459

508

epoll_events = (struct epoll_event*)

460

calloc(1, sizeof(struct epoll_event) * global.tune.maxpollevents);

509

calloc(1, sizeof(struct epoll_event) * absmaxevents);

461

510

462

511

if (epoll_events == NULL)

463

512

goto fail_ee;

Older »