75
75
struct list bal_node; /* In bond_rebalance()'s 'bals' list. */
76
76
struct list entries; /* 'struct bond_entry's assigned here. */
77
77
uint64_t tx_bytes; /* Sum across 'tx_bytes' of entries. */
79
/* BM_STABLE specific bonding info. */
80
uint32_t stb_id; /* ID used for 'stb_slaves' ordering. */
83
80
/* A bond, that is, a set of network devices grouped to improve performance or
104
101
long long int next_rebalance; /* Next rebalancing time. */
105
102
bool send_learning_packets;
107
/* BM_STABLE specific bonding info. */
108
tag_type stb_tag; /* Tag associated with this bond. */
110
104
/* Legacy compatibility. */
111
105
long long int next_fake_iface_update; /* LLONG_MAX if disabled. */
114
108
* where we can't otherwise provide revalidation feedback to the client.
115
109
* That's only unixctl commands now; I hope no other cases will arise. */
116
110
struct tag_set unixctl_tags;
119
static struct hmap all_bonds = HMAP_INITIALIZER(&all_bonds);
115
static struct ovs_rwlock rwlock = OVS_RWLOCK_INITIALIZER;
116
static struct hmap all_bonds__ = HMAP_INITIALIZER(&all_bonds__);
117
static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__;
121
static void bond_entry_reset(struct bond *);
122
static struct bond_slave *bond_slave_lookup(struct bond *, const void *slave_);
119
static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock);
120
static struct bond_slave *bond_slave_lookup(struct bond *, const void *slave_)
121
OVS_REQ_RDLOCK(rwlock);
123
122
static void bond_enable_slave(struct bond_slave *, bool enable,
125
static void bond_link_status_update(struct bond_slave *, struct tag_set *);
126
static void bond_choose_active_slave(struct bond *, struct tag_set *);
123
struct tag_set *) OVS_REQ_WRLOCK(rwlock);
124
static void bond_link_status_update(struct bond_slave *, struct tag_set *)
125
OVS_REQ_WRLOCK(rwlock);
126
static void bond_choose_active_slave(struct bond *, struct tag_set *)
127
OVS_REQ_WRLOCK(rwlock);;
127
128
static unsigned int bond_hash_src(const uint8_t mac[ETH_ADDR_LEN],
128
129
uint16_t vlan, uint32_t basis);
129
130
static unsigned int bond_hash_tcp(const struct flow *, uint16_t vlan,
131
132
static struct bond_entry *lookup_bond_entry(const struct bond *,
132
133
const struct flow *,
134
static tag_type bond_get_active_slave_tag(const struct bond *);
135
OVS_REQ_RDLOCK(rwlock);
136
static tag_type bond_get_active_slave_tag(const struct bond *)
137
OVS_REQ_RDLOCK(rwlock);
135
138
static struct bond_slave *choose_output_slave(const struct bond *,
136
139
const struct flow *,
137
uint16_t vlan, tag_type *tags);
138
static void bond_update_fake_slave_stats(struct bond *);
140
struct flow_wildcards *,
141
uint16_t vlan, tag_type *tags)
142
OVS_REQ_RDLOCK(rwlock);
143
static void bond_update_fake_slave_stats(struct bond *)
144
OVS_REQ_RDLOCK(rwlock);
140
146
/* Attempts to parse 's' as the name of a bond balancing mode. If successful,
141
147
* stores the mode in '*balance' and returns true. Otherwise returns false
187
189
bond = xzalloc(sizeof *bond);
188
190
hmap_init(&bond->slaves);
189
191
bond->no_slaves_tag = tag_create_random();
190
bond->stb_tag = tag_create_random();
191
192
bond->next_fake_iface_update = LLONG_MAX;
193
atomic_init(&bond->ref_cnt, 1);
193
195
bond_reconfigure(bond, s);
203
bond_ref(const struct bond *bond_)
205
struct bond *bond = CONST_CAST(struct bond *, bond_);
209
atomic_add(&bond->ref_cnt, 1, &orig);
210
ovs_assert(orig > 0);
200
215
/* Frees 'bond'. */
202
bond_destroy(struct bond *bond)
217
bond_unref(struct bond *bond)
204
219
struct bond_slave *slave, *next_slave;
210
hmap_remove(&all_bonds, &bond->hmap_node);
226
atomic_sub(&bond->ref_cnt, 1, &orig);
227
ovs_assert(orig > 0);
232
ovs_rwlock_wrlock(&rwlock);
233
hmap_remove(all_bonds, &bond->hmap_node);
234
ovs_rwlock_unlock(&rwlock);
212
236
HMAP_FOR_EACH_SAFE (slave, next_slave, hmap_node, &bond->slaves) {
213
237
hmap_remove(&bond->slaves, &slave->hmap_node);
237
261
bool revalidate = false;
263
ovs_rwlock_wrlock(&rwlock);
239
264
if (!bond->name || strcmp(bond->name, s->name)) {
240
265
if (bond->name) {
241
hmap_remove(&all_bonds, &bond->hmap_node);
266
hmap_remove(all_bonds, &bond->hmap_node);
242
267
free(bond->name);
244
269
bond->name = xstrdup(s->name);
245
hmap_insert(&all_bonds, &bond->hmap_node, hash_string(bond->name, 0));
270
hmap_insert(all_bonds, &bond->hmap_node, hash_string(bond->name, 0));
248
273
bond->updelay = s->up_delay;
256
281
if (bond->balance != s->balance) {
257
282
bond->balance = s->balance;
258
283
revalidate = true;
260
if (bond->balance == BM_STABLE) {
261
VLOG_WARN_ONCE("Stable bond mode is deprecated and may be removed"
262
" in February 2013. Please email"
263
" dev@openvswitch.org with concerns.");
267
286
if (bond->basis != s->basis) {
303
324
* bond. If 'slave_' already exists within 'bond' then this function
304
325
* reconfigures the existing slave.
306
* 'stb_id' is used in BM_STABLE bonds to guarantee consistent slave choices
307
* across restarts and distributed vswitch instances. It should be unique per
308
* slave, and preferably consistent across restarts and reconfigurations.
310
327
* 'netdev' must be the network device that 'slave_' represents. It is owned
311
328
* by the client, so the client must not close it before either unregistering
312
329
* 'slave_' or destroying 'bond'.
315
bond_slave_register(struct bond *bond, void *slave_, uint32_t stb_id,
316
struct netdev *netdev)
332
bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev)
318
struct bond_slave *slave = bond_slave_lookup(bond, slave_);
334
struct bond_slave *slave;
336
ovs_rwlock_wrlock(&rwlock);
337
slave = bond_slave_lookup(bond, slave_);
321
339
slave = xzalloc(sizeof *slave);
331
349
bond_enable_slave(slave, netdev_get_carrier(netdev), NULL);
334
if (slave->stb_id != stb_id) {
335
slave->stb_id = stb_id;
336
bond->bond_revalidate = true;
339
352
bond_slave_set_netdev__(slave, netdev);
341
354
free(slave->name);
342
355
slave->name = xstrdup(netdev_get_name(netdev));
356
ovs_rwlock_unlock(&rwlock);
345
359
/* Updates the network device to be used with 'slave_' to 'netdev'.
351
365
bond_slave_set_netdev(struct bond *bond, void *slave_, struct netdev *netdev)
353
struct bond_slave *slave = bond_slave_lookup(bond, slave_);
367
struct bond_slave *slave;
369
ovs_rwlock_wrlock(&rwlock);
370
slave = bond_slave_lookup(bond, slave_);
355
372
bond_slave_set_netdev__(slave, netdev);
374
ovs_rwlock_unlock(&rwlock);
359
377
/* Unregisters 'slave_' from 'bond'. If 'bond' does not contain such a slave
364
382
bond_slave_unregister(struct bond *bond, const void *slave_)
366
struct bond_slave *slave = bond_slave_lookup(bond, slave_);
384
struct bond_slave *slave;
387
ovs_rwlock_wrlock(&rwlock);
388
slave = bond_slave_lookup(bond, slave_);
373
393
bond_enable_slave(slave, false, NULL);
406
428
bond_slave_set_may_enable(struct bond *bond, void *slave_, bool may_enable)
430
ovs_rwlock_wrlock(&rwlock);
408
431
bond_slave_lookup(bond, slave_)->may_enable = may_enable;
432
ovs_rwlock_unlock(&rwlock);
411
435
/* Performs periodic maintenance on 'bond'. The caller must provide 'tags' to
418
442
struct bond_slave *slave;
444
ovs_rwlock_wrlock(&rwlock);
420
445
if (bond->lacp_status != lacp_status) {
421
446
bond->lacp_status = lacp_status;
422
447
bond->bond_revalidate = true;
440
465
if (bond->bond_revalidate) {
466
struct bond_slave *slave;
441
468
bond->bond_revalidate = false;
443
469
bond_entry_reset(bond);
444
if (bond->balance != BM_STABLE) {
445
struct bond_slave *slave;
447
HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
448
tag_set_add(tags, slave->tag);
451
tag_set_add(tags, bond->stb_tag);
470
HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
471
tag_set_add(tags, slave->tag);
453
473
tag_set_add(tags, bond->no_slaves_tag);
456
476
/* Invalidate any tags required by */
457
477
tag_set_union(tags, &bond->unixctl_tags);
458
478
tag_set_init(&bond->unixctl_tags);
479
ovs_rwlock_unlock(&rwlock);
461
482
/* Causes poll_block() to wake up when 'bond' needs something to be done. */
465
486
struct bond_slave *slave;
488
ovs_rwlock_rdlock(&rwlock);
467
489
HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
468
490
if (slave->delay_expires != LLONG_MAX) {
469
491
poll_timer_wait_until(slave->delay_expires);
514
537
bond_should_send_learning_packets(struct bond *bond)
516
bool send = bond->send_learning_packets && may_send_learning_packets(bond);
541
ovs_rwlock_wrlock(&rwlock);
542
send = bond->send_learning_packets && may_send_learning_packets(bond);
517
543
bond->send_learning_packets = false;
544
ovs_rwlock_unlock(&rwlock);
533
560
tag_type tags = 0;
534
561
struct flow flow;
563
ovs_rwlock_rdlock(&rwlock);
536
564
ovs_assert(may_send_learning_packets(bond));
538
565
memset(&flow, 0, sizeof flow);
539
566
memcpy(flow.dl_src, eth_src, ETH_ADDR_LEN);
540
slave = choose_output_slave(bond, &flow, vlan, &tags);
567
slave = choose_output_slave(bond, &flow, NULL, vlan, &tags);
542
569
packet = ofpbuf_new(0);
543
570
compose_rarp(packet, eth_src);
568
596
bond_check_admissibility(struct bond *bond, const void *slave_,
569
597
const uint8_t eth_dst[ETH_ADDR_LEN], tag_type *tags)
571
struct bond_slave *slave = bond_slave_lookup(bond, slave_);
599
enum bond_verdict verdict = BV_DROP;
600
struct bond_slave *slave;
602
ovs_rwlock_rdlock(&rwlock);
603
slave = bond_slave_lookup(bond, slave_);
573
608
/* LACP bonds have very loose admissibility restrictions because we can
574
609
* assume the remote switch is aware of the bond and will "do the right
579
614
* If LACP is configured, but LACP negotiations have been unsuccessful, we
580
615
* drop all incoming traffic. */
581
616
switch (bond->lacp_status) {
582
case LACP_NEGOTIATED: return slave->enabled ? BV_ACCEPT : BV_DROP;
583
case LACP_CONFIGURED: return BV_DROP;
584
case LACP_DISABLED: break;
617
case LACP_NEGOTIATED:
618
verdict = slave->enabled ? BV_ACCEPT : BV_DROP;
620
case LACP_CONFIGURED:
587
626
/* Drop all multicast packets on inactive slaves. */
588
627
if (eth_addr_is_multicast(eth_dst)) {
589
628
*tags |= bond_get_active_slave_tag(bond);
590
if (bond->active_slave != bond_slave_lookup(bond, slave_)) {
629
if (bond->active_slave != slave) {
603
642
VLOG_DBG_RL(&rl, "active-backup bond received packet on backup"
604
643
" slave (%s) destined for " ETH_ADDR_FMT,
605
644
slave->name, ETH_ADDR_ARGS(eth_dst));
611
651
/* TCP balanced bonds require successful LACP negotiated. Based on the
612
652
* above check, LACP is off on this bond. Therfore, we drop all
613
653
* incoming traffic. */
617
657
/* Drop all packets for which we have learned a different input port,
620
660
* the host has moved to another switch. The exception to the
621
661
* exception is if we locked the learning table to avoid reflections on
622
662
* bond slaves. */
623
return BV_DROP_IF_MOVED;
663
verdict = BV_DROP_IF_MOVED;
669
ovs_rwlock_unlock(&rwlock);
632
674
/* Returns the slave (registered on 'bond' by bond_slave_register()) to which
640
682
* packet belongs to (so for an access port it will be the access port's VLAN).
642
684
* Adds a tag to '*tags' that associates the flow with the returned slave.
686
* If 'wc' is non-NULL, bitwise-OR's 'wc' with the set of bits that were
687
* significant in the selection. At some point earlier, 'wc' should
688
* have been initialized (e.g., by flow_wildcards_init_catchall()).
645
691
bond_choose_output_slave(struct bond *bond, const struct flow *flow,
646
uint16_t vlan, tag_type *tags)
692
struct flow_wildcards *wc, uint16_t vlan,
648
struct bond_slave *slave = choose_output_slave(bond, flow, vlan, tags);
695
struct bond_slave *slave;
698
ovs_rwlock_rdlock(&rwlock);
699
slave = choose_output_slave(bond, flow, wc, vlan, tags);
650
*tags |= bond->balance == BM_STABLE ? bond->stb_tag : slave->tag;
653
704
*tags |= bond->no_slaves_tag;
706
ovs_rwlock_unlock(&rwlock);
658
710
/* Rebalancing. */
661
bond_is_balanced(const struct bond *bond)
713
bond_is_balanced(const struct bond *bond) OVS_REQ_RDLOCK(rwlock)
663
715
return bond->rebalance_interval
664
716
&& (bond->balance == BM_SLB || bond->balance == BM_TCP);
669
721
bond_account(struct bond *bond, const struct flow *flow, uint16_t vlan,
670
722
uint64_t n_bytes)
724
ovs_rwlock_wrlock(&rwlock);
672
725
if (bond_is_balanced(bond)) {
673
726
lookup_bond_entry(bond, flow, vlan)->tx_bytes += n_bytes;
728
ovs_rwlock_unlock(&rwlock);
677
731
static struct bond_slave *
678
bond_slave_from_bal_node(struct list *bal)
732
bond_slave_from_bal_node(struct list *bal) OVS_REQ_RDLOCK(rwlock)
680
734
return CONTAINER_OF(bal, struct bond_slave, bal_node);
956
ovs_rwlock_unlock(&rwlock);
902
959
/* Bonding unixctl user interface functions. */
904
961
static struct bond *
905
bond_find(const char *name)
962
bond_find(const char *name) OVS_REQ_RDLOCK(rwlock)
907
964
struct bond *bond;
909
966
HMAP_FOR_EACH_WITH_HASH (bond, hmap_node, hash_string(name, 0),
911
968
if (!strcmp(bond->name, name)) {
939
996
ds_put_cstr(&ds, "bond\ttype\tslaves\n");
941
HMAP_FOR_EACH (bond, hmap_node, &all_bonds) {
998
ovs_rwlock_rdlock(&rwlock);
999
HMAP_FOR_EACH (bond, hmap_node, all_bonds) {
942
1000
const struct bond_slave *slave;
955
1013
ds_put_char(&ds, '\n');
1015
ovs_rwlock_unlock(&rwlock);
957
1016
unixctl_command_reply(conn, ds_cstr(&ds));
958
1017
ds_destroy(&ds);
962
1021
bond_print_details(struct ds *ds, const struct bond *bond)
1022
OVS_REQ_RDLOCK(rwlock)
964
1024
struct shash slave_shash = SHASH_INITIALIZER(&slave_shash);
965
1025
const struct shash_node **sorted_slaves = NULL;
1052
1112
struct ds ds = DS_EMPTY_INITIALIZER;
1114
ovs_rwlock_rdlock(&rwlock);
1054
1115
if (argc > 1) {
1055
1116
const struct bond *bond = bond_find(argv[1]);
1058
1119
unixctl_command_reply_error(conn, "no such bond");
1061
1122
bond_print_details(&ds, bond);
1063
1124
const struct bond *bond;
1065
HMAP_FOR_EACH (bond, hmap_node, &all_bonds) {
1126
HMAP_FOR_EACH (bond, hmap_node, all_bonds) {
1066
1127
bond_print_details(&ds, bond);
1070
1131
unixctl_command_reply(conn, ds_cstr(&ds));
1071
1132
ds_destroy(&ds);
1135
ovs_rwlock_unlock(&rwlock);
1084
1148
struct bond_entry *entry;
1151
ovs_rwlock_wrlock(&rwlock);
1087
1152
bond = bond_find(bond_s);
1089
1154
unixctl_command_reply_error(conn, "no such bond");
1093
1158
if (bond->balance != BM_SLB) {
1094
1159
unixctl_command_reply_error(conn, "not an SLB bond");
1098
1163
if (strspn(hash_s, "0123456789") == strlen(hash_s)) {
1099
1164
hash = atoi(hash_s) & BOND_MASK;
1101
1166
unixctl_command_reply_error(conn, "bad hash");
1105
1170
slave = bond_lookup_slave(bond, slave_s);
1107
1172
unixctl_command_reply_error(conn, "no such slave");
1111
1176
if (!slave->enabled) {
1112
1177
unixctl_command_reply_error(conn, "cannot migrate to disabled slave");
1116
1181
entry = &bond->hash[hash];
1130
1198
struct bond *bond;
1131
1199
struct bond_slave *slave;
1201
ovs_rwlock_wrlock(&rwlock);
1133
1202
bond = bond_find(bond_s);
1135
1204
unixctl_command_reply_error(conn, "no such bond");
1139
1208
slave = bond_lookup_slave(bond, slave_s);
1141
1210
unixctl_command_reply_error(conn, "no such slave");
1145
1214
if (!slave->enabled) {
1146
1215
unixctl_command_reply_error(conn, "cannot make disabled slave active");
1150
1219
if (bond->active_slave != slave) {
1168
1239
struct bond *bond;
1169
1240
struct bond_slave *slave;
1242
ovs_rwlock_wrlock(&rwlock);
1171
1243
bond = bond_find(bond_s);
1173
1245
unixctl_command_reply_error(conn, "no such bond");
1177
1249
slave = bond_lookup_slave(bond, slave_s);
1179
1251
unixctl_command_reply_error(conn, "no such slave");
1183
1255
bond_enable_slave(slave, enable, &bond->unixctl_tags);
1184
1256
unixctl_command_reply(conn, enable ? "enabled" : "disabled");
1259
ovs_rwlock_unlock(&rwlock);
1298
1373
bond_enable_slave(struct bond_slave *slave, bool enable, struct tag_set *tags)
1300
struct bond *bond = slave->bond;
1301
1375
slave->delay_expires = LLONG_MAX;
1302
1376
if (enable != slave->enabled) {
1303
1377
slave->enabled = enable;
1304
1378
if (!slave->enabled) {
1305
VLOG_WARN("interface %s: disabled", slave->name);
1379
VLOG_INFO("interface %s: disabled", slave->name);
1307
1381
tag_set_add(tags, slave->tag);
1310
VLOG_WARN("interface %s: enabled", slave->name);
1384
VLOG_INFO("interface %s: enabled", slave->name);
1311
1385
slave->tag = tag_create_random();
1314
if (bond->balance == BM_STABLE) {
1315
bond->bond_revalidate = true;
1387
1457
return &bond->hash[bond_hash(bond, flow, vlan) & BOND_MASK];
1390
/* This function uses Highest Random Weight hashing to choose an output slave.
1391
* This approach only reassigns a minimal number of flows when slaves are
1392
* enabled or disabled. Unfortunately, it has O(n) performance against the
1393
* number of slaves. There exist algorithms which are O(1), but have slightly
1394
* more complex implementations and require the use of memory. This may need
1395
* to be reimplemented if it becomes a performance bottleneck. */
1396
static struct bond_slave *
1397
choose_stb_slave(const struct bond *bond, uint32_t flow_hash)
1399
struct bond_slave *best, *slave;
1404
HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
1405
if (slave->enabled) {
1408
hash = hash_2words(flow_hash, slave->stb_id);
1409
if (!best || hash > best_hash) {
1419
1460
static struct bond_slave *
1420
1461
choose_output_slave(const struct bond *bond, const struct flow *flow,
1421
uint16_t vlan, tag_type *tags)
1462
struct flow_wildcards *wc, uint16_t vlan, tag_type *tags)
1423
1464
struct bond_entry *e;
1433
1474
return bond->active_slave;
1436
return choose_stb_slave(bond, bond_hash_tcp(flow, vlan, bond->basis));
1439
1477
if (bond->lacp_status != LACP_NEGOTIATED) {
1440
1478
/* Must have LACP negotiations for TCP balanced bonds. */
1482
flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_SYMMETRIC_L4);
1443
1484
/* Fall Through. */
1445
if (!bond_is_balanced(bond)) {
1446
return choose_stb_slave(bond, bond_hash(bond, flow, vlan));
1487
flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_ETH_SRC);
1448
1489
e = lookup_bond_entry(bond, flow, vlan);
1449
1490
if (!e->slave || !e->slave->enabled) {
1513
1554
bond->send_learning_packets = true;
1514
1555
} else if (old_active_slave) {
1515
VLOG_WARN_RL(&rl, "bond %s: all interfaces disabled", bond->name);
1556
VLOG_INFO_RL(&rl, "bond %s: all interfaces disabled", bond->name);