~ubuntu-branches/ubuntu/trusty/haproxy/trusty-updates

« back to all changes in this revision

Viewing changes to src/lb_fwrr.c

Committer: Bazaar Package Importer
Author(s): Arnaud Cornet
Date: 2010-04-15 20:00:34 UTC
mfrom: (1.2.6 upstream)
mto: This revision was merged to the branch mainline in revision 11.
Revision ID: james.westby@ubuntu.com-20100415200034-mtlky4sy39tk0dfi

Tags: upstream-1.4.4

Import upstream version 1.4.4

files added:
.gitignore

contrib/base64

contrib/base64/base64rev-gen.c

doc/internals/connection-header.txt

ebtree

ebtree/compiler.h

ebtree/eb32tree.c

ebtree/eb32tree.h

ebtree/eb64tree.c

ebtree/eb64tree.h

ebtree/ebimtree.c

ebtree/ebimtree.h

ebtree/ebistree.c

ebtree/ebistree.h

ebtree/ebmbtree.c

ebtree/ebmbtree.h

ebtree/ebpttree.c

ebtree/ebpttree.h

ebtree/ebsttree.c

ebtree/ebsttree.h

ebtree/ebtree.c

ebtree/ebtree.h

examples/auth.cfg

include/common/compiler.h

include/proto/auth.h

include/proto/lb_chash.h

include/proto/lb_fwlc.h

include/proto/lb_fwrr.h

include/proto/lb_map.h

include/proto/pattern.h

include/proto/stick_table.h

include/types/auth.h

include/types/checks.h

include/types/counters.h

include/types/lb_chash.h

include/types/lb_fwlc.h

include/types/lb_fwrr.h

include/types/lb_map.h

include/types/pattern.h

include/types/stick_table.h

src/auth.c

src/lb_chash.c

src/lb_fwlc.c

src/lb_fwrr.c

src/lb_map.c

src/pattern.c

src/stick_table.c

tests/reset.c

tests/test-connection.cfg

tests/test-sql.cfg

files removed:
doc/internals/ebtree

doc/tcp-splicing.txt

examples/tcp-splicing-sample.cfg

include/common/eb32tree.h

include/common/eb64tree.h

include/common/ebpttree.h

include/common/ebtree.h

src/eb32tree.c

src/eb64tree.c

src/ebpttree.c

src/ebtree.c

files modified:
CHANGELOG

Makefile

Makefile.bsd

Makefile.osx

README

VERDATE

VERSION

contrib/halog/Makefile

contrib/halog/halog.c

doc/configuration.txt

doc/haproxy-en.txt

doc/haproxy-fr.txt

doc/haproxy.1

doc/internals/stream-sock-states.fig

examples/haproxy.spec

include/common/base64.h

include/common/cfgparse.h

include/common/compat.h

include/common/config.h

include/common/defaults.h

include/common/mini-clist.h

include/common/regex.h

include/common/standard.h

include/common/uri_auth.h

include/common/version.h

include/proto/acl.h

include/proto/backend.h

include/proto/buffers.h

include/proto/checks.h

include/proto/dumpstats.h

include/proto/proto_http.h

include/proto/proto_tcp.h

include/proto/proto_uxst.h

include/proto/proxy.h

include/proto/server.h

include/proto/stream_interface.h

include/proto/task.h

include/types/acl.h

include/types/backend.h

include/types/buffers.h

include/types/fd.h

include/types/global.h

include/types/proto_http.h

include/types/protocols.h

include/types/proxy.h

include/types/server.h

include/types/session.h

include/types/stream_interface.h

include/types/task.h

src/acl.c

src/backend.c

src/base64.c

src/buffers.c

src/cfgparse.c

src/checks.c

src/client.c

src/dumpstats.c

src/ev_sepoll.c

src/fd.c

src/haproxy.c

src/proto_http.c

src/proto_tcp.c

src/proto_uxst.c

src/proxy.c

src/queue.c

src/regex.c

src/session.c

src/standard.c

src/stream_interface.c

src/stream_sock.c

src/task.c

src/uri_auth.c

Show diffs side-by-side

added added

removed removed

src/lb_fwrr.c

* Fast Weighted Round Robin load balancing algorithm.

* This program is free software; you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation; either version

* 2 of the License, or (at your option) any later version.

#include <common/compat.h>

#include <common/config.h>

#include <common/debug.h>

#include <eb32tree.h>

#include <types/global.h>

#include <types/server.h>

#include <proto/backend.h>

#include <proto/queue.h>

static inline void fwrr_remove_from_tree(struct server *s);

static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);

static inline void fwrr_dequeue_srv(struct server *s);

static void fwrr_get_srv(struct server *s);

static void fwrr_queue_srv(struct server *s);

/* This function updates the server trees according to server <srv>'s new

* state. It should be called when server <srv>'s status changes to down.

* It is not important whether the server was already down or not. It is not

* important either that the new state is completely down (the caller may not

* know all the variables of a server's state).

static void fwrr_set_server_status_down(struct server *srv)

{

struct proxy *p = srv->proxy;

struct fwrr_group *grp;

if (srv->state == srv->prev_state &&

srv->eweight == srv->prev_eweight)

return;

if (srv_is_usable(srv->state, srv->eweight))

goto out_update_state;

if (!srv_is_usable(srv->prev_state, srv->prev_eweight))

/* server was already down */

goto out_update_backend;

grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;

grp->next_weight -= srv->prev_eweight;

if (srv->state & SRV_BACKUP) {

p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;

p->srv_bck--;

if (srv == p->lbprm.fbck) {

/* we lost the first backup server in a single-backup

* configuration, we must search another one.

struct server *srv2 = p->lbprm.fbck;

do {

srv2 = srv2->next;

} while (srv2 &&

!((srv2->state & SRV_BACKUP) &&

srv_is_usable(srv2->state, srv2->eweight)));

p->lbprm.fbck = srv2;

}

} else {

p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;

p->srv_act--;

}

fwrr_dequeue_srv(srv);

fwrr_remove_from_tree(srv);

out_update_backend:

/* check/update tot_used, tot_weight */

update_backend_weight(p);

out_update_state:

srv->prev_state = srv->state;

srv->prev_eweight = srv->eweight;

}

/* This function updates the server trees according to server <srv>'s new

* state. It should be called when server <srv>'s status changes to up.

* It is not important whether the server was already down or not. It is not

* important either that the new state is completely UP (the caller may not

* know all the variables of a server's state). This function will not change

* the weight of a server which was already up.

static void fwrr_set_server_status_up(struct server *srv)

{

struct proxy *p = srv->proxy;

struct fwrr_group *grp;

100

if (srv->state == srv->prev_state &&

101

srv->eweight == srv->prev_eweight)

102

return;

103

104

if (!srv_is_usable(srv->state, srv->eweight))

105

goto out_update_state;

106

107

if (srv_is_usable(srv->prev_state, srv->prev_eweight))

108

/* server was already up */

109

goto out_update_backend;

110

111

grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;

112

grp->next_weight += srv->eweight;

113

114

if (srv->state & SRV_BACKUP) {

115

p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;

116

p->srv_bck++;

117

118

if (!(p->options & PR_O_USE_ALL_BK)) {

119

if (!p->lbprm.fbck) {

120

/* there was no backup server anymore */

121

p->lbprm.fbck = srv;

122

} else {

123

/* we may have restored a backup server prior to fbck,

124

* in which case it should replace it.

125

126

struct server *srv2 = srv;

127

do {

128

srv2 = srv2->next;

129

} while (srv2 && (srv2 != p->lbprm.fbck));

130

if (srv2)

131

p->lbprm.fbck = srv;

132

}

133

}

134

} else {

135

p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;

136

p->srv_act++;

137

}

138

139

/* note that eweight cannot be 0 here */

140

fwrr_get_srv(srv);

141

srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;

142

fwrr_queue_srv(srv);

143

144

out_update_backend:

145

/* check/update tot_used, tot_weight */

146

update_backend_weight(p);

147

out_update_state:

148

srv->prev_state = srv->state;

149

srv->prev_eweight = srv->eweight;

150

}

151

152

/* This function must be called after an update to server <srv>'s effective

153

* weight. It may be called after a state change too.

154

155

static void fwrr_update_server_weight(struct server *srv)

156

{

157

int old_state, new_state;

158

struct proxy *p = srv->proxy;

159

struct fwrr_group *grp;

160

161

if (srv->state == srv->prev_state &&

162

srv->eweight == srv->prev_eweight)

163

return;

164

165

/* If changing the server's weight changes its state, we simply apply

166

* the procedures we already have for status change. If the state

167

* remains down, the server is not in any tree, so it's as easy as

168

* updating its values. If the state remains up with different weights,

169

* there are some computations to perform to find a new place and

170

* possibly a new tree for this server.

171

172

173

old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);

174

new_state = srv_is_usable(srv->state, srv->eweight);

175

176

if (!old_state && !new_state) {

177

srv->prev_state = srv->state;

178

srv->prev_eweight = srv->eweight;

179

return;

180

}

181

else if (!old_state && new_state) {

182

fwrr_set_server_status_up(srv);

183

return;

184

}

185

else if (old_state && !new_state) {

186

fwrr_set_server_status_down(srv);

187

return;

188

}

189

190

grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;

191

grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;

192

193

p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;

194

p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;

195

196

if (srv->lb_tree == grp->init) {

197

fwrr_dequeue_srv(srv);

198

fwrr_queue_by_weight(grp->init, srv);

199

}

200

else if (!srv->lb_tree) {

201

/* FIXME: server was down. This is not possible right now but

202

* may be needed soon for slowstart or graceful shutdown.

203

204

fwrr_dequeue_srv(srv);

205

fwrr_get_srv(srv);

206

srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;

207

fwrr_queue_srv(srv);

208

} else {

209

/* The server is either active or in the next queue. If it's

210

* still in the active queue and it has not consumed all of its

211

* places, let's adjust its next position.

212

213

fwrr_get_srv(srv);

214

215

if (srv->eweight > 0) {

216

int prev_next = srv->npos;

217

int step = grp->next_weight / srv->eweight;

218

219

srv->npos = srv->lpos + step;

220

srv->rweight = 0;

221

222

if (srv->npos > prev_next)

223

srv->npos = prev_next;

224

if (srv->npos < grp->curr_pos + 2)

225

srv->npos = grp->curr_pos + step;

226

} else {

227

/* push it into the next tree */

228

srv->npos = grp->curr_pos + grp->curr_weight;

229

}

230

231

fwrr_dequeue_srv(srv);

232

fwrr_queue_srv(srv);

233

}

234

235

update_backend_weight(p);

236

srv->prev_state = srv->state;

237

srv->prev_eweight = srv->eweight;

238

}

239

240

/* Remove a server from a tree. It must have previously been dequeued. This

241

* function is meant to be called when a server is going down or has its

242

* weight disabled.

243

244

static inline void fwrr_remove_from_tree(struct server *s)

245

{

246

s->lb_tree = NULL;

247

}

248

249

/* Queue a server in the weight tree <root>, assuming the weight is >0.

250

* We want to sort them by inverted weights, because we need to place

251

* heavy servers first in order to get a smooth distribution.

252

253

static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)

254

{

255

s->lb_node.key = SRV_EWGHT_MAX - s->eweight;

256

eb32_insert(root, &s->lb_node);

257

s->lb_tree = root;

258

}

259

260

/* This function is responsible for building the weight trees in case of fast

261

* weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight

262

* ratio. Both active and backup groups are initialized.

263

264

void fwrr_init_server_groups(struct proxy *p)

265

{

266

struct server *srv;

267

struct eb_root init_head = EB_ROOT;

268

269

p->lbprm.set_server_status_up = fwrr_set_server_status_up;

270

p->lbprm.set_server_status_down = fwrr_set_server_status_down;

271

p->lbprm.update_server_eweight = fwrr_update_server_weight;

272

273

p->lbprm.wdiv = BE_WEIGHT_SCALE;

274

for (srv = p->srv; srv; srv = srv->next) {

275

srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;

276

srv->prev_state = srv->state;

277

}

278

279

recount_servers(p);

280

update_backend_weight(p);

281

282

/* prepare the active servers group */

283

p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =

284

p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;

285

p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =

286

p->lbprm.fwrr.act.t1 = init_head;

287

p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;

288

p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;

289

290

/* prepare the backup servers group */

291

p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =

292

p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;

293

p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =

294

p->lbprm.fwrr.bck.t1 = init_head;

295

p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;

296

p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;

297

298

/* queue active and backup servers in two distinct groups */

299

for (srv = p->srv; srv; srv = srv->next) {

300

if (!srv_is_usable(srv->state, srv->eweight))

301

continue;

302

fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?

303

p->lbprm.fwrr.bck.init :

304

p->lbprm.fwrr.act.init,

305

srv);

306

}

307

}

308

309

/* simply removes a server from a weight tree */

310

static inline void fwrr_dequeue_srv(struct server *s)

311

{

312

eb32_delete(&s->lb_node);

313

}

314

315

/* queues a server into the appropriate group and tree depending on its

316

* backup status, and ->npos. If the server is disabled, simply assign

317

* it to the NULL tree.

318

319

static void fwrr_queue_srv(struct server *s)

320

{

321

struct proxy *p = s->proxy;

322

struct fwrr_group *grp;

323

324

grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;

325

326

/* Delay everything which does not fit into the window and everything

327

* which does not fit into the theorical new window.

328

329

if (!srv_is_usable(s->state, s->eweight)) {

330

fwrr_remove_from_tree(s);

331

}

332

else if (s->eweight <= 0 ||

333

s->npos >= 2 * grp->curr_weight ||

334

s->npos >= grp->curr_weight + grp->next_weight) {

335

/* put into next tree, and readjust npos in case we could

336

* finally take this back to current. */

337

s->npos -= grp->curr_weight;

338

fwrr_queue_by_weight(grp->next, s);

339

}

340

else {

341

/* The sorting key is stored in units of s->npos * user_weight

342

* in order to avoid overflows. As stated in backend.h, the

343

* lower the scale, the rougher the weights modulation, and the

344

* higher the scale, the lower the number of servers without

345

* overflow. With this formula, the result is always positive,

346

* so we can use eb3�_insert().

347

348

s->lb_node.key = SRV_UWGHT_RANGE * s->npos +

349

(unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;

350

351

eb32_insert(&grp->curr, &s->lb_node);

352

s->lb_tree = &grp->curr;

353

}

354

}

355

356

/* prepares a server when extracting it from the "init" tree */

357

static inline void fwrr_get_srv_init(struct server *s)

358

{

359

s->npos = s->rweight = 0;

360

}

361

362

/* prepares a server when extracting it from the "next" tree */

363

static inline void fwrr_get_srv_next(struct server *s)

364

{

365

struct fwrr_group *grp = (s->state & SRV_BACKUP) ?

366

&s->proxy->lbprm.fwrr.bck :

367

&s->proxy->lbprm.fwrr.act;

368

369

s->npos += grp->curr_weight;

370

}

371

372

/* prepares a server when it was marked down */

373

static inline void fwrr_get_srv_down(struct server *s)

374

{

375

struct fwrr_group *grp = (s->state & SRV_BACKUP) ?

376

&s->proxy->lbprm.fwrr.bck :

377

&s->proxy->lbprm.fwrr.act;

378

379

s->npos = grp->curr_pos;

380

}

381

382

/* prepares a server when extracting it from its tree */

383

static void fwrr_get_srv(struct server *s)

384

{

385

struct proxy *p = s->proxy;

386

struct fwrr_group *grp = (s->state & SRV_BACKUP) ?

387

&p->lbprm.fwrr.bck :

388

&p->lbprm.fwrr.act;

389

390

if (s->lb_tree == grp->init) {

391

fwrr_get_srv_init(s);

392

}

393

else if (s->lb_tree == grp->next) {

394

fwrr_get_srv_next(s);

395

}

396

else if (s->lb_tree == NULL) {

397

fwrr_get_srv_down(s);

398

}

399

}

400

401

/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty

402

* when this happens, and "next" filled with servers sorted by weights.

403

404

static inline void fwrr_switch_trees(struct fwrr_group *grp)

405

{

406

struct eb_root *swap;

407

swap = grp->init;

408

grp->init = grp->next;

409

grp->next = swap;

410

grp->curr_weight = grp->next_weight;

411

grp->curr_pos = grp->curr_weight;

412

}

413

414

/* return next server from the current tree in FWRR group <grp>, or a server

415

* from the "init" tree if appropriate. If both trees are empty, return NULL.

416

417

static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)

418

{

419

struct eb32_node *node;

420

struct server *s;

421

422

node = eb32_first(&grp->curr);

423

s = eb32_entry(node, struct server, lb_node);

424

425

if (!node || s->npos > grp->curr_pos) {

426

/* either we have no server left, or we have a hole */

427

struct eb32_node *node2;

428

node2 = eb32_first(grp->init);

429

if (node2) {

430

node = node2;

431

s = eb32_entry(node, struct server, lb_node);

432

fwrr_get_srv_init(s);

433

if (s->eweight == 0) /* FIXME: is it possible at all ? */

434

node = NULL;

435

}

436

}

437

if (node)

438

return s;

439

else

440

return NULL;

441

}

442

443

/* Computes next position of server <s> in the group. It is mandatory for <s>

444

* to have a non-zero, positive eweight.

445

446

static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)

447

{

448

if (!s->npos) {

449

/* first time ever for this server */

450

s->lpos = grp->curr_pos;

451

s->npos = grp->curr_pos + grp->next_weight / s->eweight;

452

s->rweight += grp->next_weight % s->eweight;

453

454

if (s->rweight >= s->eweight) {

455

s->rweight -= s->eweight;

456

s->npos++;

457

}

458

} else {

459

s->lpos = s->npos;

460

s->npos += grp->next_weight / s->eweight;

461

s->rweight += grp->next_weight % s->eweight;

462

463

if (s->rweight >= s->eweight) {

464

s->rweight -= s->eweight;

465

s->npos++;

466

}

467

}

468

}

469

470

/* Return next server from the current tree in backend <p>, or a server from

471

* the init tree if appropriate. If both trees are empty, return NULL.

472

* Saturated servers are skipped and requeued.

473

474

struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)

475

{

476

struct server *srv, *full, *avoided;

477

struct fwrr_group *grp;

478

int switched;

479

480

if (p->srv_act)

481

grp = &p->lbprm.fwrr.act;

482

else if (p->lbprm.fbck)

483

return p->lbprm.fbck;

484

else if (p->srv_bck)

485

grp = &p->lbprm.fwrr.bck;

486

else

487

return NULL;

488

489

switched = 0;

490

avoided = NULL;

491

full = NULL; /* NULL-terminated list of saturated servers */

492

while (1) {

493

/* if we see an empty group, let's first try to collect weights

494

* which might have recently changed.

495

496

if (!grp->curr_weight)

497

grp->curr_pos = grp->curr_weight = grp->next_weight;

498

499

/* get first server from the "current" tree. When the end of

500

* the tree is reached, we may have to switch, but only once.

501

502

while (1) {

503

srv = fwrr_get_server_from_group(grp);

504

if (srv)

505

break;

506

if (switched) {

507

if (avoided) {

508

srv = avoided;

509

break;

510

}

511

goto requeue_servers;

512

}

513

switched = 1;

514

fwrr_switch_trees(grp);

515

516

}

517

518

/* OK, we have a server. However, it may be saturated, in which

519

* case we don't want to reconsider it for now. We'll update

520

* its position and dequeue it anyway, so that we can move it

521

* to a better place afterwards.

522

523

fwrr_update_position(grp, srv);

524

fwrr_dequeue_srv(srv);

525

grp->curr_pos++;

526

if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) {

527

/* make sure it is not the server we are trying to exclude... */

528

if (srv != srvtoavoid || avoided)

529

break;

530

531

avoided = srv; /* ...but remember that is was selected yet avoided */

532

}

533

534

/* the server is saturated or avoided, let's chain it for later reinsertion */

535

srv->next_full = full;

536

full = srv;

537

}

538

539

/* OK, we got the best server, let's update it */

540

fwrr_queue_srv(srv);

541

542

requeue_servers:

543

/* Requeue all extracted servers. If full==srv then it was

544

* avoided (unsucessfully) and chained, omit it now.

545

546

if (unlikely(full != NULL)) {

547

if (switched) {

548

/* the tree has switched, requeue all extracted servers

549

* into "init", because their place was lost, and only

550

* their weight matters.

551

552

do {

553

if (likely(full != srv))

554

fwrr_queue_by_weight(grp->init, full);

555

full = full->next_full;

556

} while (full);

557

} else {

558

/* requeue all extracted servers just as if they were consumed

559

* so that they regain their expected place.

560

561

do {

562

if (likely(full != srv))

563

fwrr_queue_srv(full);

564

full = full->next_full;

565

} while (full);

566

}

567

}

568

return srv;

569

}

570

571

572

* Local variables:

573

* c-indent-level: 8

574

* c-basic-offset: 8

575

* End:

576

Older »