362
532
return dpif_netdev_cast(dpif)->dp;
366
dpif_netdev_enumerate(struct sset *all_dps)
536
PMD_INFO_SHOW_STATS, /* show how cpu cycles are spent */
537
PMD_INFO_CLEAR_STATS /* set the cycles count to 0 */
541
pmd_info_show_stats(struct ds *reply,
542
struct dp_netdev_pmd_thread *pmd,
543
unsigned long long stats[DP_N_STATS],
544
uint64_t cycles[PMD_N_CYCLES])
546
unsigned long long total_packets = 0;
547
uint64_t total_cycles = 0;
550
/* These loops subtracts reference values ('*_zero') from the counters.
551
* Since loads and stores are relaxed, it might be possible for a '*_zero'
552
* value to be more recent than the current value we're reading from the
553
* counter. This is not a big problem, since these numbers are not
554
* supposed to be too accurate, but we should at least make sure that
555
* the result is not negative. */
556
for (i = 0; i < DP_N_STATS; i++) {
557
if (stats[i] > pmd->stats_zero[i]) {
558
stats[i] -= pmd->stats_zero[i];
563
if (i != DP_STAT_LOST) {
564
/* Lost packets are already included in DP_STAT_MISS */
565
total_packets += stats[i];
569
for (i = 0; i < PMD_N_CYCLES; i++) {
570
if (cycles[i] > pmd->cycles_zero[i]) {
571
cycles[i] -= pmd->cycles_zero[i];
576
total_cycles += cycles[i];
579
ds_put_cstr(reply, (pmd->core_id == NON_PMD_CORE_ID)
580
? "main thread" : "pmd thread");
582
if (pmd->numa_id != OVS_NUMA_UNSPEC) {
583
ds_put_format(reply, " numa_id %d", pmd->numa_id);
585
if (pmd->core_id != OVS_CORE_UNSPEC && pmd->core_id != NON_PMD_CORE_ID) {
586
ds_put_format(reply, " core_id %u", pmd->core_id);
588
ds_put_cstr(reply, ":\n");
591
"\temc hits:%llu\n\tmegaflow hits:%llu\n"
592
"\tmiss:%llu\n\tlost:%llu\n",
593
stats[DP_STAT_EXACT_HIT], stats[DP_STAT_MASKED_HIT],
594
stats[DP_STAT_MISS], stats[DP_STAT_LOST]);
596
if (total_cycles == 0) {
601
"\tpolling cycles:%"PRIu64" (%.02f%%)\n"
602
"\tprocessing cycles:%"PRIu64" (%.02f%%)\n",
603
cycles[PMD_CYCLES_POLLING],
604
cycles[PMD_CYCLES_POLLING] / (double)total_cycles * 100,
605
cycles[PMD_CYCLES_PROCESSING],
606
cycles[PMD_CYCLES_PROCESSING] / (double)total_cycles * 100);
608
if (total_packets == 0) {
613
"\tavg cycles per packet: %.02f (%"PRIu64"/%llu)\n",
614
total_cycles / (double)total_packets,
615
total_cycles, total_packets);
618
"\tavg processing cycles per packet: "
619
"%.02f (%"PRIu64"/%llu)\n",
620
cycles[PMD_CYCLES_PROCESSING] / (double)total_packets,
621
cycles[PMD_CYCLES_PROCESSING], total_packets);
625
pmd_info_clear_stats(struct ds *reply OVS_UNUSED,
626
struct dp_netdev_pmd_thread *pmd,
627
unsigned long long stats[DP_N_STATS],
628
uint64_t cycles[PMD_N_CYCLES])
632
/* We cannot write 'stats' and 'cycles' (because they're written by other
633
* threads) and we shouldn't change 'stats' (because they're used to count
634
* datapath stats, which must not be cleared here). Instead, we save the
635
* current values and subtract them from the values to be displayed in the
637
for (i = 0; i < DP_N_STATS; i++) {
638
pmd->stats_zero[i] = stats[i];
640
for (i = 0; i < PMD_N_CYCLES; i++) {
641
pmd->cycles_zero[i] = cycles[i];
646
dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
649
struct ds reply = DS_EMPTY_INITIALIZER;
650
struct dp_netdev_pmd_thread *pmd;
651
struct dp_netdev *dp = NULL;
652
enum pmd_info_type type = *(enum pmd_info_type *) aux;
654
ovs_mutex_lock(&dp_netdev_mutex);
657
dp = shash_find_data(&dp_netdevs, argv[1]);
658
} else if (shash_count(&dp_netdevs) == 1) {
659
/* There's only one datapath */
660
dp = shash_first(&dp_netdevs)->data;
664
ovs_mutex_unlock(&dp_netdev_mutex);
665
unixctl_command_reply_error(conn,
666
"please specify an existing datapath");
670
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
671
unsigned long long stats[DP_N_STATS];
672
uint64_t cycles[PMD_N_CYCLES];
675
/* Read current stats and cycle counters */
676
for (i = 0; i < ARRAY_SIZE(stats); i++) {
677
atomic_read_relaxed(&pmd->stats.n[i], &stats[i]);
679
for (i = 0; i < ARRAY_SIZE(cycles); i++) {
680
atomic_read_relaxed(&pmd->cycles.n[i], &cycles[i]);
683
if (type == PMD_INFO_CLEAR_STATS) {
684
pmd_info_clear_stats(&reply, pmd, stats, cycles);
685
} else if (type == PMD_INFO_SHOW_STATS) {
686
pmd_info_show_stats(&reply, pmd, stats, cycles);
690
ovs_mutex_unlock(&dp_netdev_mutex);
692
unixctl_command_reply(conn, ds_cstr(&reply));
697
dpif_netdev_init(void)
699
static enum pmd_info_type show_aux = PMD_INFO_SHOW_STATS,
700
clear_aux = PMD_INFO_CLEAR_STATS;
702
unixctl_command_register("dpif-netdev/pmd-stats-show", "[dp]",
703
0, 1, dpif_netdev_pmd_info,
705
unixctl_command_register("dpif-netdev/pmd-stats-clear", "[dp]",
706
0, 1, dpif_netdev_pmd_info,
712
dpif_netdev_enumerate(struct sset *all_dps,
713
const struct dpif_class *dpif_class)
368
715
struct shash_node *node;
370
717
ovs_mutex_lock(&dp_netdev_mutex);
371
718
SHASH_FOR_EACH(node, &dp_netdevs) {
719
struct dp_netdev *dp = node->data;
720
if (dpif_class != dp->class) {
721
/* 'dp_netdevs' contains both "netdev" and "dummy" dpifs.
722
* If the class doesn't match, skip this dpif. */
372
725
sset_add(all_dps, node->name);
374
727
ovs_mutex_unlock(&dp_netdev_mutex);
1044
1491
static struct dp_netdev_flow *
1045
dp_netdev_flow_cast(const struct cls_rule *cr)
1492
dp_netdev_flow_cast(const struct dpcls_rule *cr)
1047
1494
return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
1497
static bool dp_netdev_flow_ref(struct dp_netdev_flow *flow)
1499
return ovs_refcount_try_ref_rcu(&flow->ref_cnt);
1502
/* netdev_flow_key utilities.
1504
* netdev_flow_key is basically a miniflow. We use these functions
1505
* (netdev_flow_key_clone, netdev_flow_key_equal, ...) instead of the miniflow
1506
* functions (miniflow_clone_inline, miniflow_equal, ...), because:
1508
* - Since we are dealing exclusively with miniflows created by
1509
* miniflow_extract(), if the map is different the miniflow is different.
1510
* Therefore we can be faster by comparing the map and the miniflow in a
1512
* _ netdev_flow_key's miniflow has always inline values.
1513
* - These functions can be inlined by the compiler.
1515
* The following assertions make sure that what we're doing with miniflow is
1518
BUILD_ASSERT_DECL(offsetof(struct miniflow, inline_values)
1519
== sizeof(uint64_t));
1521
/* Given the number of bits set in the miniflow map, returns the size of the
1522
* 'netdev_flow_key.mf' */
1523
static inline uint32_t
1524
netdev_flow_key_size(uint32_t flow_u32s)
1526
return offsetof(struct miniflow, inline_values) +
1527
MINIFLOW_VALUES_SIZE(flow_u32s);
1531
netdev_flow_key_equal(const struct netdev_flow_key *a,
1532
const struct netdev_flow_key *b)
1534
/* 'b->len' may be not set yet. */
1535
return a->hash == b->hash && !memcmp(&a->mf, &b->mf, a->len);
1538
/* Used to compare 'netdev_flow_key' in the exact match cache to a miniflow.
1539
* The maps are compared bitwise, so both 'key->mf' 'mf' must have been
1540
* generated by miniflow_extract. */
1542
netdev_flow_key_equal_mf(const struct netdev_flow_key *key,
1543
const struct miniflow *mf)
1545
return !memcmp(&key->mf, mf, key->len);
1549
netdev_flow_key_clone(struct netdev_flow_key *dst,
1550
const struct netdev_flow_key *src)
1553
offsetof(struct netdev_flow_key, mf) + src->len);
1558
netdev_flow_key_from_flow(struct netdev_flow_key *dst,
1559
const struct flow *src)
1561
struct dp_packet packet;
1562
uint64_t buf_stub[512 / 8];
1564
miniflow_initialize(&dst->mf, dst->buf);
1566
dp_packet_use_stub(&packet, buf_stub, sizeof buf_stub);
1567
pkt_metadata_from_flow(&packet.md, src);
1568
flow_compose(&packet, src);
1569
miniflow_extract(&packet, &dst->mf);
1570
dp_packet_uninit(&packet);
1572
dst->len = netdev_flow_key_size(count_1bits(dst->mf.map));
1573
dst->hash = 0; /* Not computed yet. */
1576
/* Initialize a netdev_flow_key 'mask' from 'match'. */
1578
netdev_flow_mask_init(struct netdev_flow_key *mask,
1579
const struct match *match)
1581
const uint64_t *mask_u64 = (const uint64_t *) &match->wc.masks;
1582
uint64_t *dst = mask->mf.inline_values;
1583
uint64_t map, mask_map = 0;
1587
/* Only check masks that make sense for the flow. */
1588
map = flow_wc_map(&match->flow);
1591
uint64_t rm1bit = rightmost_1bit(map);
1592
int i = raw_ctz(map);
1596
*dst++ = mask_u64[i];
1597
hash = hash_add64(hash, mask_u64[i]);
1602
mask->mf.values_inline = true;
1603
mask->mf.map = mask_map;
1605
hash = hash_add64(hash, mask_map);
1607
n = dst - mask->mf.inline_values;
1609
mask->hash = hash_finish(hash, n * 8);
1610
mask->len = netdev_flow_key_size(n);
1613
/* Initializes 'dst' as a copy of 'src' masked with 'mask'. */
1615
netdev_flow_key_init_masked(struct netdev_flow_key *dst,
1616
const struct flow *flow,
1617
const struct netdev_flow_key *mask)
1619
uint64_t *dst_u64 = dst->mf.inline_values;
1620
const uint64_t *mask_u64 = mask->mf.inline_values;
1624
dst->len = mask->len;
1625
dst->mf.values_inline = true;
1626
dst->mf.map = mask->mf.map;
1628
FLOW_FOR_EACH_IN_MAP(value, flow, mask->mf.map) {
1629
*dst_u64 = value & *mask_u64++;
1630
hash = hash_add64(hash, *dst_u64++);
1632
dst->hash = hash_finish(hash, (dst_u64 - dst->mf.inline_values) * 8);
1635
/* Iterate through all netdev_flow_key u64 values specified by 'MAP' */
1636
#define NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(VALUE, KEY, MAP) \
1637
for (struct mf_for_each_in_map_aux aux__ \
1638
= { (KEY)->mf.inline_values, (KEY)->mf.map, MAP }; \
1639
mf_get_next_in_map(&aux__, &(VALUE)); \
1642
/* Returns a hash value for the bits of 'key' where there are 1-bits in
1644
static inline uint32_t
1645
netdev_flow_key_hash_in_mask(const struct netdev_flow_key *key,
1646
const struct netdev_flow_key *mask)
1648
const uint64_t *p = mask->mf.inline_values;
1652
NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(key_u64, key, mask->mf.map) {
1653
hash = hash_add64(hash, key_u64 & *p++);
1656
return hash_finish(hash, (p - mask->mf.inline_values) * 8);
1660
emc_entry_alive(struct emc_entry *ce)
1662
return ce->flow && !ce->flow->dead;
1666
emc_clear_entry(struct emc_entry *ce)
1669
dp_netdev_flow_unref(ce->flow);
1675
emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
1676
const struct netdev_flow_key *key)
1678
if (ce->flow != flow) {
1680
dp_netdev_flow_unref(ce->flow);
1683
if (dp_netdev_flow_ref(flow)) {
1690
netdev_flow_key_clone(&ce->key, key);
1695
emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
1696
struct dp_netdev_flow *flow)
1698
struct emc_entry *to_be_replaced = NULL;
1699
struct emc_entry *current_entry;
1701
EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
1702
if (netdev_flow_key_equal(¤t_entry->key, key)) {
1703
/* We found the entry with the 'mf' miniflow */
1704
emc_change_entry(current_entry, flow, NULL);
1708
/* Replacement policy: put the flow in an empty (not alive) entry, or
1709
* in the first entry where it can be */
1711
|| (emc_entry_alive(to_be_replaced)
1712
&& !emc_entry_alive(current_entry))
1713
|| current_entry->key.hash < to_be_replaced->key.hash) {
1714
to_be_replaced = current_entry;
1717
/* We didn't find the miniflow in the cache.
1718
* The 'to_be_replaced' entry is where the new flow will be stored */
1720
emc_change_entry(to_be_replaced, flow, key);
1723
static inline struct dp_netdev_flow *
1724
emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key)
1726
struct emc_entry *current_entry;
1728
EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
1729
if (current_entry->key.hash == key->hash
1730
&& emc_entry_alive(current_entry)
1731
&& netdev_flow_key_equal_mf(¤t_entry->key, &key->mf)) {
1733
/* We found the entry with the 'key->mf' miniflow */
1734
return current_entry->flow;
1050
1741
static struct dp_netdev_flow *
1051
dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct miniflow *key)
1052
OVS_EXCLUDED(dp->cls.rwlock)
1742
dp_netdev_pmd_lookup_flow(const struct dp_netdev_pmd_thread *pmd,
1743
const struct netdev_flow_key *key)
1054
1745
struct dp_netdev_flow *netdev_flow;
1055
struct cls_rule *rule;
1746
struct dpcls_rule *rule;
1057
fat_rwlock_rdlock(&dp->cls.rwlock);
1058
rule = classifier_lookup_miniflow_first(&dp->cls, key);
1748
dpcls_lookup(&pmd->cls, key, &rule, 1);
1059
1749
netdev_flow = dp_netdev_flow_cast(rule);
1060
fat_rwlock_unlock(&dp->cls.rwlock);
1062
1751
return netdev_flow;
1065
1754
static struct dp_netdev_flow *
1066
dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
1067
OVS_REQ_RDLOCK(dp->cls.rwlock)
1755
dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd,
1756
const ovs_u128 *ufidp, const struct nlattr *key,
1069
1759
struct dp_netdev_flow *netdev_flow;
1071
HMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
1073
if (flow_equal(&netdev_flow->flow, flow)) {
1763
/* If a UFID is not provided, determine one based on the key. */
1764
if (!ufidp && key && key_len
1765
&& !dpif_netdev_flow_from_nlattrs(key, key_len, &flow)) {
1766
dpif_flow_hash(pmd->dp->dpif, &flow, sizeof flow, &ufid);
1771
CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, dp_netdev_flow_hash(ufidp),
1773
if (ovs_u128_equals(&netdev_flow->ufid, ufidp)) {
1192
dpif_netdev_flow_get(const struct dpif *dpif,
1193
const struct nlattr *nl_key, size_t nl_key_len,
1194
struct ofpbuf **bufp,
1195
struct nlattr **maskp, size_t *mask_len,
1196
struct nlattr **actionsp, size_t *actions_len,
1197
struct dpif_flow_stats *stats)
1941
dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get)
1199
1943
struct dp_netdev *dp = get_dp_netdev(dpif);
1200
1944
struct dp_netdev_flow *netdev_flow;
1945
struct dp_netdev_pmd_thread *pmd;
1946
unsigned pmd_id = get->pmd_id == PMD_ID_NULL
1947
? NON_PMD_CORE_ID : get->pmd_id;
1204
error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
1950
pmd = dp_netdev_get_pmd(dp, pmd_id);
1209
fat_rwlock_rdlock(&dp->cls.rwlock);
1210
netdev_flow = dp_netdev_find_flow(dp, &key);
1211
fat_rwlock_unlock(&dp->cls.rwlock);
1955
netdev_flow = dp_netdev_pmd_find_flow(pmd, get->ufid, get->key,
1213
1957
if (netdev_flow) {
1215
get_dpif_flow_stats(netdev_flow, stats);
1218
if (maskp || actionsp) {
1219
struct dp_netdev_actions *actions;
1222
actions = dp_netdev_flow_get_actions(netdev_flow);
1223
len += maskp ? sizeof(struct odputil_keybuf) : 0;
1224
len += actionsp ? actions->size : 0;
1226
*bufp = ofpbuf_new(len);
1228
struct flow_wildcards wc;
1230
minimask_expand(&netdev_flow->cr.match.mask, &wc);
1231
odp_flow_key_from_mask(*bufp, &wc.masks, &netdev_flow->flow,
1232
odp_to_u32(wc.masks.in_port.odp_port),
1234
*maskp = ofpbuf_data(*bufp);
1235
*mask_len = ofpbuf_size(*bufp);
1238
struct dp_netdev_actions *actions;
1240
actions = dp_netdev_flow_get_actions(netdev_flow);
1241
*actionsp = ofpbuf_put(*bufp, actions->actions, actions->size);
1242
*actions_len = actions->size;
1958
dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->buffer,
1246
1961
error = ENOENT;
1963
dp_netdev_pmd_unref(pmd);
1969
static struct dp_netdev_flow *
1970
dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
1971
struct match *match, const ovs_u128 *ufid,
1972
const struct nlattr *actions, size_t actions_len)
1973
OVS_REQUIRES(pmd->flow_mutex)
1975
struct dp_netdev_flow *flow;
1976
struct netdev_flow_key mask;
1978
netdev_flow_mask_init(&mask, match);
1979
/* Make sure wc does not have metadata. */
1980
ovs_assert(!(mask.mf.map & (MINIFLOW_MAP(metadata) | MINIFLOW_MAP(regs))));
1982
/* Do not allocate extra space. */
1983
flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
1984
memset(&flow->stats, 0, sizeof flow->stats);
1987
*CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
1988
*CONST_CAST(struct flow *, &flow->flow) = match->flow;
1989
*CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
1990
ovs_refcount_init(&flow->ref_cnt);
1991
ovsrcu_set(&flow->actions, dp_netdev_actions_create(actions, actions_len));
1993
netdev_flow_key_init_masked(&flow->cr.flow, &match->flow, &mask);
1994
dpcls_insert(&pmd->cls, &flow->cr, &mask);
1996
cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow->node),
1997
dp_netdev_flow_hash(&flow->ufid));
1999
if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
2001
struct ds ds = DS_EMPTY_INITIALIZER;
2003
match.flow = flow->flow;
2004
miniflow_expand(&flow->cr.mask->mf, &match.wc.masks);
2006
ds_put_cstr(&ds, "flow_add: ");
2007
odp_format_ufid(ufid, &ds);
2008
ds_put_cstr(&ds, " ");
2009
match_format(&match, &ds, OFP_DEFAULT_PRIORITY);
2010
ds_put_cstr(&ds, ", actions:");
2011
format_odp_actions(&ds, actions, actions_len);
2013
VLOG_DBG_RL(&upcall_rl, "%s", ds_cstr(&ds));
1253
dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *flow,
1254
const struct flow_wildcards *wc,
1255
const struct nlattr *actions,
1257
OVS_REQUIRES(dp->flow_mutex)
2022
dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
2024
struct dp_netdev *dp = get_dp_netdev(dpif);
1259
2025
struct dp_netdev_flow *netdev_flow;
2026
struct netdev_flow_key key;
2027
struct dp_netdev_pmd_thread *pmd;
1260
2028
struct match match;
1262
netdev_flow = xzalloc(sizeof *netdev_flow);
1263
*CONST_CAST(struct flow *, &netdev_flow->flow) = *flow;
1265
ovsthread_stats_init(&netdev_flow->stats);
1267
ovsrcu_set(&netdev_flow->actions,
1268
dp_netdev_actions_create(actions, actions_len));
1270
match_init(&match, flow, wc);
1271
cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr),
1272
&match, NETDEV_RULE_PRIORITY);
1273
fat_rwlock_wrlock(&dp->cls.rwlock);
1274
classifier_insert(&dp->cls,
1275
CONST_CAST(struct cls_rule *, &netdev_flow->cr));
1276
hmap_insert(&dp->flow_table,
1277
CONST_CAST(struct hmap_node *, &netdev_flow->node),
1278
flow_hash(flow, 0));
1279
fat_rwlock_unlock(&dp->cls.rwlock);
1285
clear_stats(struct dp_netdev_flow *netdev_flow)
1287
struct dp_netdev_flow_stats *bucket;
1290
OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
1291
ovs_mutex_lock(&bucket->mutex);
1293
bucket->packet_count = 0;
1294
bucket->byte_count = 0;
1295
bucket->tcp_flags = 0;
1296
ovs_mutex_unlock(&bucket->mutex);
1301
dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
1303
struct dp_netdev *dp = get_dp_netdev(dpif);
1304
struct dp_netdev_flow *netdev_flow;
1306
struct miniflow miniflow;
1307
struct flow_wildcards wc;
2030
unsigned pmd_id = put->pmd_id == PMD_ID_NULL
2031
? NON_PMD_CORE_ID : put->pmd_id;
1310
error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &flow);
2034
error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &match.flow);
1314
2038
error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
1315
2039
put->mask, put->mask_len,
2040
&match.flow, &match.wc.masks);
1320
miniflow_init(&miniflow, &flow);
1322
ovs_mutex_lock(&dp->flow_mutex);
1323
netdev_flow = dp_netdev_lookup_flow(dp, &miniflow);
2045
pmd = dp_netdev_get_pmd(dp, pmd_id);
2050
/* Must produce a netdev_flow_key for lookup.
2051
* This interface is no longer performance critical, since it is not used
2052
* for upcall processing any more. */
2053
netdev_flow_key_from_flow(&key, &match.flow);
2058
dpif_flow_hash(dpif, &match.flow, sizeof match.flow, &ufid);
2061
ovs_mutex_lock(&pmd->flow_mutex);
2062
netdev_flow = dp_netdev_pmd_lookup_flow(pmd, &key);
1324
2063
if (!netdev_flow) {
1325
2064
if (put->flags & DPIF_FP_CREATE) {
1326
if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
2065
if (cmap_count(&pmd->flow_table) < MAX_FLOWS) {
1327
2066
if (put->stats) {
1328
2067
memset(put->stats, 0, sizeof *put->stats);
1330
error = dp_netdev_flow_add(dp, &flow, &wc, put->actions,
2069
dp_netdev_flow_add(pmd, &match, &ufid, put->actions,
1374
2123
struct dp_netdev *dp = get_dp_netdev(dpif);
1375
2124
struct dp_netdev_flow *netdev_flow;
2125
struct dp_netdev_pmd_thread *pmd;
2126
unsigned pmd_id = del->pmd_id == PMD_ID_NULL
2127
? NON_PMD_CORE_ID : del->pmd_id;
1379
error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
2130
pmd = dp_netdev_get_pmd(dp, pmd_id);
1384
ovs_mutex_lock(&dp->flow_mutex);
1385
fat_rwlock_wrlock(&dp->cls.rwlock);
1386
netdev_flow = dp_netdev_find_flow(dp, &key);
2135
ovs_mutex_lock(&pmd->flow_mutex);
2136
netdev_flow = dp_netdev_pmd_find_flow(pmd, del->ufid, del->key,
1387
2138
if (netdev_flow) {
1388
2139
if (del->stats) {
1389
2140
get_dpif_flow_stats(netdev_flow, del->stats);
1391
dp_netdev_remove_flow(dp, netdev_flow);
2142
dp_netdev_pmd_remove_flow(pmd, netdev_flow);
1393
2144
error = ENOENT;
1395
fat_rwlock_unlock(&dp->cls.rwlock);
1396
ovs_mutex_unlock(&dp->flow_mutex);
2146
ovs_mutex_unlock(&pmd->flow_mutex);
2147
dp_netdev_pmd_unref(pmd);
1401
struct dp_netdev_flow_state {
1402
struct odputil_keybuf keybuf;
1403
struct odputil_keybuf maskbuf;
1404
struct dpif_flow_stats stats;
1407
struct dp_netdev_flow_iter {
2152
struct dpif_netdev_flow_dump {
2153
struct dpif_flow_dump up;
2154
struct cmap_position poll_thread_pos;
2155
struct cmap_position flow_pos;
2156
struct dp_netdev_pmd_thread *cur_pmd;
1411
2158
struct ovs_mutex mutex;
1415
dpif_netdev_flow_dump_state_init(void **statep)
1417
struct dp_netdev_flow_state *state;
1419
*statep = state = xmalloc(sizeof *state);
1423
dpif_netdev_flow_dump_state_uninit(void *state_)
1425
struct dp_netdev_flow_state *state = state_;
1431
dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **iterp)
1433
struct dp_netdev_flow_iter *iter;
1435
*iterp = iter = xmalloc(sizeof *iter);
1439
ovs_mutex_init(&iter->mutex);
1443
/* XXX the caller must use 'actions' without quiescing */
1445
dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_,
1446
const struct nlattr **key, size_t *key_len,
1447
const struct nlattr **mask, size_t *mask_len,
1448
const struct nlattr **actions, size_t *actions_len,
1449
const struct dpif_flow_stats **stats)
1451
struct dp_netdev_flow_iter *iter = iter_;
1452
struct dp_netdev_flow_state *state = state_;
1453
struct dp_netdev *dp = get_dp_netdev(dpif);
1454
struct dp_netdev_flow *netdev_flow;
1455
struct flow_wildcards wc;
1458
ovs_mutex_lock(&iter->mutex);
1459
error = iter->status;
1461
struct hmap_node *node;
1463
fat_rwlock_rdlock(&dp->cls.rwlock);
1464
node = hmap_at_position(&dp->flow_table, &iter->bucket, &iter->offset);
1466
netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
1468
fat_rwlock_unlock(&dp->cls.rwlock);
1470
iter->status = error = EOF;
1473
ovs_mutex_unlock(&iter->mutex);
1478
minimask_expand(&netdev_flow->cr.match.mask, &wc);
1483
ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf);
1484
odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks,
1485
netdev_flow->flow.in_port.odp_port);
1487
*key = ofpbuf_data(&buf);
1488
*key_len = ofpbuf_size(&buf);
1494
ofpbuf_use_stack(&buf, &state->maskbuf, sizeof state->maskbuf);
1495
odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
1496
odp_to_u32(wc.masks.in_port.odp_port),
1499
*mask = ofpbuf_data(&buf);
1500
*mask_len = ofpbuf_size(&buf);
1503
if (actions || stats) {
1505
struct dp_netdev_actions *dp_actions =
1506
dp_netdev_flow_get_actions(netdev_flow);
1508
*actions = dp_actions->actions;
1509
*actions_len = dp_actions->size;
1513
get_dpif_flow_stats(netdev_flow, &state->stats);
1514
*stats = &state->stats;
1522
dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *iter_)
1524
struct dp_netdev_flow_iter *iter = iter_;
1526
ovs_mutex_destroy(&iter->mutex);
2161
static struct dpif_netdev_flow_dump *
2162
dpif_netdev_flow_dump_cast(struct dpif_flow_dump *dump)
2164
return CONTAINER_OF(dump, struct dpif_netdev_flow_dump, up);
2167
static struct dpif_flow_dump *
2168
dpif_netdev_flow_dump_create(const struct dpif *dpif_, bool terse)
2170
struct dpif_netdev_flow_dump *dump;
2172
dump = xzalloc(sizeof *dump);
2173
dpif_flow_dump_init(&dump->up, dpif_);
2174
dump->up.terse = terse;
2175
ovs_mutex_init(&dump->mutex);
2181
dpif_netdev_flow_dump_destroy(struct dpif_flow_dump *dump_)
2183
struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
2185
ovs_mutex_destroy(&dump->mutex);
2190
struct dpif_netdev_flow_dump_thread {
2191
struct dpif_flow_dump_thread up;
2192
struct dpif_netdev_flow_dump *dump;
2193
struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
2194
struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
2197
static struct dpif_netdev_flow_dump_thread *
2198
dpif_netdev_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
2200
return CONTAINER_OF(thread, struct dpif_netdev_flow_dump_thread, up);
2203
static struct dpif_flow_dump_thread *
2204
dpif_netdev_flow_dump_thread_create(struct dpif_flow_dump *dump_)
2206
struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
2207
struct dpif_netdev_flow_dump_thread *thread;
2209
thread = xmalloc(sizeof *thread);
2210
dpif_flow_dump_thread_init(&thread->up, &dump->up);
2211
thread->dump = dump;
2216
dpif_netdev_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
2218
struct dpif_netdev_flow_dump_thread *thread
2219
= dpif_netdev_flow_dump_thread_cast(thread_);
2225
dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
2226
struct dpif_flow *flows, int max_flows)
2228
struct dpif_netdev_flow_dump_thread *thread
2229
= dpif_netdev_flow_dump_thread_cast(thread_);
2230
struct dpif_netdev_flow_dump *dump = thread->dump;
2231
struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH];
2235
ovs_mutex_lock(&dump->mutex);
2236
if (!dump->status) {
2237
struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
2238
struct dp_netdev *dp = get_dp_netdev(&dpif->dpif);
2239
struct dp_netdev_pmd_thread *pmd = dump->cur_pmd;
2240
int flow_limit = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
2242
/* First call to dump_next(), extracts the first pmd thread.
2243
* If there is no pmd thread, returns immediately. */
2245
pmd = dp_netdev_pmd_get_next(dp, &dump->poll_thread_pos);
2247
ovs_mutex_unlock(&dump->mutex);
2254
for (n_flows = 0; n_flows < flow_limit; n_flows++) {
2255
struct cmap_node *node;
2257
node = cmap_next_position(&pmd->flow_table, &dump->flow_pos);
2261
netdev_flows[n_flows] = CONTAINER_OF(node,
2262
struct dp_netdev_flow,
2265
/* When finishing dumping the current pmd thread, moves to
2267
if (n_flows < flow_limit) {
2268
memset(&dump->flow_pos, 0, sizeof dump->flow_pos);
2269
dp_netdev_pmd_unref(pmd);
2270
pmd = dp_netdev_pmd_get_next(dp, &dump->poll_thread_pos);
2276
/* Keeps the reference to next caller. */
2277
dump->cur_pmd = pmd;
2279
/* If the current dump is empty, do not exit the loop, since the
2280
* remaining pmds could have flows to be dumped. Just dumps again
2281
* on the new 'pmd'. */
2284
ovs_mutex_unlock(&dump->mutex);
2286
for (i = 0; i < n_flows; i++) {
2287
struct odputil_keybuf *maskbuf = &thread->maskbuf[i];
2288
struct odputil_keybuf *keybuf = &thread->keybuf[i];
2289
struct dp_netdev_flow *netdev_flow = netdev_flows[i];
2290
struct dpif_flow *f = &flows[i];
2291
struct ofpbuf key, mask;
2293
ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
2294
ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
2295
dp_netdev_flow_to_dpif_flow(netdev_flow, &key, &mask, f,
1532
2303
dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
2304
OVS_NO_THREAD_SAFETY_ANALYSIS
1534
2306
struct dp_netdev *dp = get_dp_netdev(dpif);
1535
struct pkt_metadata *md = &execute->md;
1537
struct miniflow flow;
1538
uint32_t buf[FLOW_U32S];
2307
struct dp_netdev_pmd_thread *pmd;
2308
struct dp_packet *pp;
1541
if (ofpbuf_size(execute->packet) < ETH_HEADER_LEN ||
1542
ofpbuf_size(execute->packet) > UINT16_MAX) {
2310
if (dp_packet_size(execute->packet) < ETH_HEADER_LEN ||
2311
dp_packet_size(execute->packet) > UINT16_MAX) {
1546
/* Extract flow key. */
1547
miniflow_initialize(&key.flow, key.buf);
1548
miniflow_extract(execute->packet, md, &key.flow);
1550
ovs_rwlock_rdlock(&dp->port_rwlock);
1551
dp_netdev_execute_actions(dp, &key.flow, execute->packet, false, md,
1552
execute->actions, execute->actions_len);
1553
ovs_rwlock_unlock(&dp->port_rwlock);
2315
/* Tries finding the 'pmd'. If NULL is returned, that means
2316
* the current thread is a non-pmd thread and should use
2317
* dp_netdev_get_pmd(dp, NON_PMD_CORE_ID). */
2318
pmd = ovsthread_getspecific(dp->per_pmd_key);
2320
pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID);
2323
/* If the current thread is non-pmd thread, acquires
2324
* the 'non_pmd_mutex'. */
2325
if (pmd->core_id == NON_PMD_CORE_ID) {
2326
ovs_mutex_lock(&dp->non_pmd_mutex);
2327
ovs_mutex_lock(&dp->port_mutex);
2330
pp = execute->packet;
2331
dp_netdev_execute_actions(pmd, &pp, 1, false, execute->actions,
2332
execute->actions_len);
2333
if (pmd->core_id == NON_PMD_CORE_ID) {
2334
dp_netdev_pmd_unref(pmd);
2335
ovs_mutex_unlock(&dp->port_mutex);
2336
ovs_mutex_unlock(&dp->non_pmd_mutex);
1559
dp_netdev_destroy_all_queues(struct dp_netdev *dp)
1560
OVS_REQ_WRLOCK(dp->queue_rwlock)
2343
dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
1564
dp_netdev_purge_queues(dp);
1566
for (i = 0; i < dp->n_handlers; i++) {
1567
struct dp_netdev_queue *q = &dp->handler_queues[i];
1569
ovs_mutex_destroy(&q->mutex);
1570
seq_destroy(q->seq);
1572
free(dp->handler_queues);
1573
dp->handler_queues = NULL;
1578
dp_netdev_refresh_queues(struct dp_netdev *dp, uint32_t n_handlers)
1579
OVS_REQ_WRLOCK(dp->queue_rwlock)
1581
if (dp->n_handlers != n_handlers) {
1584
dp_netdev_destroy_all_queues(dp);
1586
dp->n_handlers = n_handlers;
1587
dp->handler_queues = xzalloc(n_handlers * sizeof *dp->handler_queues);
1589
for (i = 0; i < n_handlers; i++) {
1590
struct dp_netdev_queue *q = &dp->handler_queues[i];
1592
ovs_mutex_init(&q->mutex);
1593
q->seq = seq_create();
2347
for (i = 0; i < n_ops; i++) {
2348
struct dpif_op *op = ops[i];
2351
case DPIF_OP_FLOW_PUT:
2352
op->error = dpif_netdev_flow_put(dpif, &op->u.flow_put);
2355
case DPIF_OP_FLOW_DEL:
2356
op->error = dpif_netdev_flow_del(dpif, &op->u.flow_del);
2359
case DPIF_OP_EXECUTE:
2360
op->error = dpif_netdev_execute(dpif, &op->u.execute);
2363
case DPIF_OP_FLOW_GET:
2364
op->error = dpif_netdev_flow_get(dpif, &op->u.flow_get);
1599
dpif_netdev_recv_set(struct dpif *dpif, bool enable)
2370
/* Returns true if the configuration for rx queues or cpu mask
2373
pmd_config_changed(const struct dp_netdev *dp, size_t rxqs, const char *cmask)
1601
struct dp_netdev *dp = get_dp_netdev(dpif);
1603
if ((dp->handler_queues != NULL) == enable) {
1607
fat_rwlock_wrlock(&dp->queue_rwlock);
1609
dp_netdev_destroy_all_queues(dp);
2375
if (dp->n_dpdk_rxqs != rxqs) {
1611
dp_netdev_refresh_queues(dp, 1);
2378
if (dp->pmd_cmask != NULL && cmask != NULL) {
2379
return strcmp(dp->pmd_cmask, cmask);
2381
return (dp->pmd_cmask != NULL || cmask != NULL);
1613
fat_rwlock_unlock(&dp->queue_rwlock);
2386
/* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */
1619
dpif_netdev_handlers_set(struct dpif *dpif, uint32_t n_handlers)
2388
dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask)
1621
2390
struct dp_netdev *dp = get_dp_netdev(dpif);
1623
fat_rwlock_wrlock(&dp->queue_rwlock);
1624
if (dp->handler_queues) {
1625
dp_netdev_refresh_queues(dp, n_handlers);
2392
if (pmd_config_changed(dp, n_rxqs, cmask)) {
2393
struct dp_netdev_port *port;
2395
dp_netdev_destroy_all_pmds(dp);
2397
CMAP_FOR_EACH (port, node, &dp->ports) {
2398
if (netdev_is_pmd(port->netdev)) {
2401
/* Closes the existing 'rxq's. */
2402
for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
2403
netdev_rxq_close(port->rxq[i]);
2404
port->rxq[i] = NULL;
2407
/* Sets the new rx queue config. */
2408
err = netdev_set_multiq(port->netdev,
2409
ovs_numa_get_n_cores() + 1,
2411
if (err && (err != EOPNOTSUPP)) {
2412
VLOG_ERR("Failed to set dpdk interface %s rx_queue to:"
2413
" %u", netdev_get_name(port->netdev),
2418
/* If the set_multiq() above succeeds, reopens the 'rxq's. */
2419
port->rxq = xrealloc(port->rxq, sizeof *port->rxq
2420
* netdev_n_rxq(port->netdev));
2421
for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
2422
netdev_rxq_open(port->netdev, &port->rxq[i], i);
2426
dp->n_dpdk_rxqs = n_rxqs;
2428
/* Reconfigures the cpu mask. */
2429
ovs_numa_set_cpu_mask(cmask);
2430
free(dp->pmd_cmask);
2431
dp->pmd_cmask = cmask ? xstrdup(cmask) : NULL;
2433
/* Restores the non-pmd. */
2434
dp_netdev_set_nonpmd(dp);
2435
/* Restores all pmd threads. */
2436
dp_netdev_reset_pmd_threads(dp);
1627
fat_rwlock_unlock(&dp->queue_rwlock);
1922
2706
port_unref(poll_list[i].port);
2709
dp_netdev_pmd_reload_done(pmd);
1925
2711
free(poll_list);
1930
dp_netdev_set_pmd_threads(struct dp_netdev *dp, int n)
1934
if (n == dp->n_pmd_threads) {
1938
/* Stop existing threads. */
1939
latch_set(&dp->exit_latch);
1940
dp_netdev_reload_pmd_threads(dp);
1941
for (i = 0; i < dp->n_pmd_threads; i++) {
1942
struct pmd_thread *f = &dp->pmd_threads[i];
1944
xpthread_join(f->thread, NULL);
1946
latch_poll(&dp->exit_latch);
1947
free(dp->pmd_threads);
1949
/* Start new threads. */
1950
dp->pmd_threads = xmalloc(n * sizeof *dp->pmd_threads);
1951
dp->n_pmd_threads = n;
1953
for (i = 0; i < n; i++) {
1954
struct pmd_thread *f = &dp->pmd_threads[i];
1958
atomic_store(&f->change_seq, 1);
1960
/* Each thread will distribute all devices rx-queues among
1962
f->thread = ovs_thread_create("pmd", pmd_thread_main, f);
2716
dp_netdev_disable_upcall(struct dp_netdev *dp)
2717
OVS_ACQUIRES(dp->upcall_rwlock)
2719
fat_rwlock_wrlock(&dp->upcall_rwlock);
2723
dpif_netdev_disable_upcall(struct dpif *dpif)
2724
OVS_NO_THREAD_SAFETY_ANALYSIS
2726
struct dp_netdev *dp = get_dp_netdev(dpif);
2727
dp_netdev_disable_upcall(dp);
2731
dp_netdev_enable_upcall(struct dp_netdev *dp)
2732
OVS_RELEASES(dp->upcall_rwlock)
2734
fat_rwlock_unlock(&dp->upcall_rwlock);
2738
dpif_netdev_enable_upcall(struct dpif *dpif)
2739
OVS_NO_THREAD_SAFETY_ANALYSIS
2741
struct dp_netdev *dp = get_dp_netdev(dpif);
2742
dp_netdev_enable_upcall(dp);
2746
dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd)
2748
ovs_mutex_lock(&pmd->cond_mutex);
2749
xpthread_cond_signal(&pmd->cond);
2750
ovs_mutex_unlock(&pmd->cond_mutex);
2753
/* Finds and refs the dp_netdev_pmd_thread on core 'core_id'. Returns
2754
* the pointer if succeeds, otherwise, NULL.
2756
* Caller must unrefs the returned reference. */
2757
static struct dp_netdev_pmd_thread *
2758
dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id)
2760
struct dp_netdev_pmd_thread *pmd;
2761
const struct cmap_node *pnode;
2763
pnode = cmap_find(&dp->poll_threads, hash_int(core_id, 0));
2767
pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node);
2769
return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL;
2772
/* Sets the 'struct dp_netdev_pmd_thread' for non-pmd threads. */
2774
dp_netdev_set_nonpmd(struct dp_netdev *dp)
2776
struct dp_netdev_pmd_thread *non_pmd;
2778
non_pmd = xzalloc(sizeof *non_pmd);
2779
dp_netdev_configure_pmd(non_pmd, dp, 0, NON_PMD_CORE_ID,
2783
/* Caller must have valid pointer to 'pmd'. */
2785
dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd)
2787
return ovs_refcount_try_ref_rcu(&pmd->ref_cnt);
2791
dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd)
2793
if (pmd && ovs_refcount_unref(&pmd->ref_cnt) == 1) {
2794
ovsrcu_postpone(dp_netdev_destroy_pmd, pmd);
2798
/* Given cmap position 'pos', tries to ref the next node. If try_ref()
2799
* fails, keeps checking for next node until reaching the end of cmap.
2801
* Caller must unrefs the returned reference. */
2802
static struct dp_netdev_pmd_thread *
2803
dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos)
2805
struct dp_netdev_pmd_thread *next;
2808
struct cmap_node *node;
2810
node = cmap_next_position(&dp->poll_threads, pos);
2811
next = node ? CONTAINER_OF(node, struct dp_netdev_pmd_thread, node)
2813
} while (next && !dp_netdev_pmd_try_ref(next));
2819
core_id_to_qid(unsigned core_id)
2821
if (core_id != NON_PMD_CORE_ID) {
2824
return ovs_numa_get_n_cores();
2828
/* Configures the 'pmd' based on the input argument. */
2830
dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
2831
int index, unsigned core_id, int numa_id)
2835
pmd->core_id = core_id;
2836
pmd->tx_qid = core_id_to_qid(core_id);
2837
pmd->numa_id = numa_id;
2839
ovs_refcount_init(&pmd->ref_cnt);
2840
latch_init(&pmd->exit_latch);
2841
atomic_init(&pmd->change_seq, PMD_INITIAL_SEQ);
2842
xpthread_cond_init(&pmd->cond, NULL);
2843
ovs_mutex_init(&pmd->cond_mutex);
2844
ovs_mutex_init(&pmd->flow_mutex);
2845
dpcls_init(&pmd->cls);
2846
cmap_init(&pmd->flow_table);
2847
/* init the 'flow_cache' since there is no
2848
* actual thread created for NON_PMD_CORE_ID. */
2849
if (core_id == NON_PMD_CORE_ID) {
2850
emc_cache_init(&pmd->flow_cache);
2852
cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node),
2853
hash_int(core_id, 0));
2857
dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
2859
dp_netdev_pmd_flow_flush(pmd);
2860
dpcls_destroy(&pmd->cls);
2861
cmap_destroy(&pmd->flow_table);
2862
ovs_mutex_destroy(&pmd->flow_mutex);
2863
latch_destroy(&pmd->exit_latch);
2864
xpthread_cond_destroy(&pmd->cond);
2865
ovs_mutex_destroy(&pmd->cond_mutex);
2869
/* Stops the pmd thread, removes it from the 'dp->poll_threads',
2870
* and unrefs the struct. */
2872
dp_netdev_del_pmd(struct dp_netdev_pmd_thread *pmd)
2874
/* Uninit the 'flow_cache' since there is
2875
* no actual thread uninit it for NON_PMD_CORE_ID. */
2876
if (pmd->core_id == NON_PMD_CORE_ID) {
2877
emc_cache_uninit(&pmd->flow_cache);
2879
latch_set(&pmd->exit_latch);
2880
dp_netdev_reload_pmd__(pmd);
2881
ovs_numa_unpin_core(pmd->core_id);
2882
xpthread_join(pmd->thread, NULL);
2884
cmap_remove(&pmd->dp->poll_threads, &pmd->node, hash_int(pmd->core_id, 0));
2885
dp_netdev_pmd_unref(pmd);
2888
/* Destroys all pmd threads. */
2890
dp_netdev_destroy_all_pmds(struct dp_netdev *dp)
2892
struct dp_netdev_pmd_thread *pmd;
2894
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
2895
dp_netdev_del_pmd(pmd);
2899
/* Deletes all pmd threads on numa node 'numa_id'. */
2901
dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id)
2903
struct dp_netdev_pmd_thread *pmd;
2905
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
2906
if (pmd->numa_id == numa_id) {
2907
dp_netdev_del_pmd(pmd);
2912
/* Checks the numa node id of 'netdev' and starts pmd threads for
2915
dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
2919
if (!ovs_numa_numa_id_is_valid(numa_id)) {
2920
VLOG_ERR("Cannot create pmd threads due to numa id (%d)"
2921
"invalid", numa_id);
2925
n_pmds = get_n_pmd_threads_on_numa(dp, numa_id);
2927
/* If there are already pmd threads created for the numa node
2928
* in which 'netdev' is on, do nothing. Else, creates the
2929
* pmd threads for the numa node. */
2931
int can_have, n_unpinned, i;
2933
n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
2935
VLOG_ERR("Cannot create pmd threads due to out of unpinned "
2936
"cores on numa node");
2940
/* If cpu mask is specified, uses all unpinned cores, otherwise
2941
* tries creating NR_PMD_THREADS pmd threads. */
2942
can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS);
2943
for (i = 0; i < can_have; i++) {
2944
struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
2945
unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
2947
dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
2948
/* Each thread will distribute all devices rx-queues among
2950
pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
2952
VLOG_INFO("Created %d pmd threads on numa node %d", can_have, numa_id);
1968
dp_netdev_flow_stats_new_cb(void)
1970
struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket);
1971
ovs_mutex_init(&bucket->mutex);
1976
dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
1977
const struct ofpbuf *packet,
1978
const struct miniflow *key)
1980
uint16_t tcp_flags = miniflow_get_tcp_flags(key);
1981
long long int now = time_msec();
1982
struct dp_netdev_flow_stats *bucket;
1984
bucket = ovsthread_stats_bucket_get(&netdev_flow->stats,
1985
dp_netdev_flow_stats_new_cb);
1987
ovs_mutex_lock(&bucket->mutex);
1988
bucket->used = MAX(now, bucket->used);
1989
bucket->packet_count++;
1990
bucket->byte_count += ofpbuf_size(packet);
1991
bucket->tcp_flags |= tcp_flags;
1992
ovs_mutex_unlock(&bucket->mutex);
1996
dp_netdev_stats_new_cb(void)
1998
struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket);
1999
ovs_mutex_init(&bucket->mutex);
2004
dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type)
2006
struct dp_netdev_stats *bucket;
2008
bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
2009
ovs_mutex_lock(&bucket->mutex);
2011
ovs_mutex_unlock(&bucket->mutex);
2015
dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet,
2016
struct pkt_metadata *md)
2017
OVS_REQ_RDLOCK(dp->port_rwlock)
2019
struct dp_netdev_flow *netdev_flow;
2021
struct miniflow flow;
2022
uint32_t buf[FLOW_U32S];
2025
if (ofpbuf_size(packet) < ETH_HEADER_LEN) {
2026
ofpbuf_delete(packet);
2957
/* Called after pmd threads config change. Restarts pmd threads with
2958
* new configuration. */
2960
dp_netdev_reset_pmd_threads(struct dp_netdev *dp)
2962
struct dp_netdev_port *port;
2964
CMAP_FOR_EACH (port, node, &dp->ports) {
2965
if (netdev_is_pmd(port->netdev)) {
2966
int numa_id = netdev_get_numa_id(port->netdev);
2968
dp_netdev_set_pmds_on_numa(dp, numa_id);
2974
dpif_netdev_get_datapath_version(void)
2976
return xstrdup("<built-in>");
2980
dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, int cnt, int size,
2981
uint16_t tcp_flags, long long now)
2985
atomic_store_relaxed(&netdev_flow->stats.used, now);
2986
non_atomic_ullong_add(&netdev_flow->stats.packet_count, cnt);
2987
non_atomic_ullong_add(&netdev_flow->stats.byte_count, size);
2988
atomic_read_relaxed(&netdev_flow->stats.tcp_flags, &flags);
2990
atomic_store_relaxed(&netdev_flow->stats.tcp_flags, flags);
2994
dp_netdev_count_packet(struct dp_netdev_pmd_thread *pmd,
2995
enum dp_stat_type type, int cnt)
2997
non_atomic_ullong_add(&pmd->stats.n[type], cnt);
3001
dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
3002
struct flow *flow, struct flow_wildcards *wc, ovs_u128 *ufid,
3003
enum dpif_upcall_type type, const struct nlattr *userdata,
3004
struct ofpbuf *actions, struct ofpbuf *put_actions)
3006
struct dp_netdev *dp = pmd->dp;
3008
if (OVS_UNLIKELY(!dp->upcall_cb)) {
3012
if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) {
3013
struct ds ds = DS_EMPTY_INITIALIZER;
3017
ofpbuf_init(&key, 0);
3018
odp_flow_key_from_flow(&key, flow, &wc->masks, flow->in_port.odp_port,
3020
packet_str = ofp_packet_to_string(dp_packet_data(packet_),
3021
dp_packet_size(packet_));
3023
odp_flow_key_format(key.data, key.size, &ds);
3025
VLOG_DBG("%s: %s upcall:\n%s\n%s", dp->name,
3026
dpif_upcall_type_to_string(type), ds_cstr(&ds), packet_str);
3028
ofpbuf_uninit(&key);
3034
return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
3035
actions, wc, put_actions, dp->upcall_aux);
3038
static inline uint32_t
3039
dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
3040
const struct miniflow *mf)
3042
uint32_t hash, recirc_depth;
3044
hash = dp_packet_get_rss_hash(packet);
3045
if (OVS_UNLIKELY(!hash)) {
3046
hash = miniflow_hash_5tuple(mf, 0);
3047
dp_packet_set_rss_hash(packet, hash);
3050
/* The RSS hash must account for the recirculation depth to avoid
3051
* collisions in the exact match cache */
3052
recirc_depth = *recirc_depth_get_unsafe();
3053
if (OVS_UNLIKELY(recirc_depth)) {
3054
hash = hash_finish(hash, recirc_depth);
3055
dp_packet_set_rss_hash(packet, hash);
3060
struct packet_batch {
3061
unsigned int packet_count;
3062
unsigned int byte_count;
3065
struct dp_netdev_flow *flow;
3067
struct dp_packet *packets[NETDEV_MAX_BURST];
3071
packet_batch_update(struct packet_batch *batch, struct dp_packet *packet,
3072
const struct miniflow *mf)
3074
batch->tcp_flags |= miniflow_get_tcp_flags(mf);
3075
batch->packets[batch->packet_count++] = packet;
3076
batch->byte_count += dp_packet_size(packet);
3080
packet_batch_init(struct packet_batch *batch, struct dp_netdev_flow *flow)
3082
flow->batch = batch;
3085
batch->packet_count = 0;
3086
batch->byte_count = 0;
3087
batch->tcp_flags = 0;
3091
packet_batch_execute(struct packet_batch *batch,
3092
struct dp_netdev_pmd_thread *pmd,
3095
struct dp_netdev_actions *actions;
3096
struct dp_netdev_flow *flow = batch->flow;
3098
dp_netdev_flow_used(flow, batch->packet_count, batch->byte_count,
3099
batch->tcp_flags, now);
3101
actions = dp_netdev_flow_get_actions(flow);
3103
dp_netdev_execute_actions(pmd, batch->packets, batch->packet_count, true,
3104
actions->actions, actions->size);
3108
dp_netdev_queue_batches(struct dp_packet *pkt,
3109
struct dp_netdev_flow *flow, const struct miniflow *mf,
3110
struct packet_batch *batches, size_t *n_batches)
3112
struct packet_batch *batch = flow->batch;
3114
if (OVS_LIKELY(batch)) {
3115
packet_batch_update(batch, pkt, mf);
2029
miniflow_initialize(&key.flow, key.buf);
2030
miniflow_extract(packet, md, &key.flow);
2032
netdev_flow = dp_netdev_lookup_flow(dp, &key.flow);
2034
struct dp_netdev_actions *actions;
2036
dp_netdev_flow_used(netdev_flow, packet, &key.flow);
2038
actions = dp_netdev_flow_get_actions(netdev_flow);
2039
dp_netdev_execute_actions(dp, &key.flow, packet, true, md,
2040
actions->actions, actions->size);
2041
dp_netdev_count_packet(dp, DP_STAT_HIT);
2042
} else if (dp->handler_queues) {
2043
dp_netdev_count_packet(dp, DP_STAT_MISS);
2044
dp_netdev_output_userspace(dp, packet,
2045
miniflow_hash_5tuple(&key.flow, 0)
2047
DPIF_UC_MISS, &key.flow, NULL);
2048
ofpbuf_delete(packet);
3119
batch = &batches[(*n_batches)++];
3120
packet_batch_init(batch, flow);
3121
packet_batch_update(batch, pkt, mf);
3125
dp_packet_swap(struct dp_packet **a, struct dp_packet **b)
3127
struct dp_packet *tmp = *a;
3132
/* Try to process all ('cnt') the 'packets' using only the exact match cache
3133
* 'flow_cache'. If a flow is not found for a packet 'packets[i]', the
3134
* miniflow is copied into 'keys' and the packet pointer is moved at the
3135
* beginning of the 'packets' array.
3137
* The function returns the number of packets that needs to be processed in the
3138
* 'packets' array (they have been moved to the beginning of the vector).
3140
static inline size_t
3141
emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets,
3142
size_t cnt, struct netdev_flow_key *keys,
3143
struct packet_batch batches[], size_t *n_batches)
3145
struct emc_cache *flow_cache = &pmd->flow_cache;
3146
struct netdev_flow_key key;
3147
size_t i, notfound_cnt = 0;
3149
miniflow_initialize(&key.mf, key.buf);
3150
for (i = 0; i < cnt; i++) {
3151
struct dp_netdev_flow *flow;
3153
if (OVS_UNLIKELY(dp_packet_size(packets[i]) < ETH_HEADER_LEN)) {
3154
dp_packet_delete(packets[i]);
3159
/* Prefetch next packet data */
3160
OVS_PREFETCH(dp_packet_data(packets[i+1]));
3163
miniflow_extract(packets[i], &key.mf);
3164
key.len = 0; /* Not computed yet. */
3165
key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf);
3167
flow = emc_lookup(flow_cache, &key);
3168
if (OVS_LIKELY(flow)) {
3169
dp_netdev_queue_batches(packets[i], flow, &key.mf, batches,
3172
if (i != notfound_cnt) {
3173
dp_packet_swap(&packets[i], &packets[notfound_cnt]);
3176
keys[notfound_cnt++] = key;
3180
dp_netdev_count_packet(pmd, DP_STAT_EXACT_HIT, cnt - notfound_cnt);
3182
return notfound_cnt;
3186
fast_path_processing(struct dp_netdev_pmd_thread *pmd,
3187
struct dp_packet **packets, size_t cnt,
3188
struct netdev_flow_key *keys,
3189
struct packet_batch batches[], size_t *n_batches)
3191
#if !defined(__CHECKER__) && !defined(_WIN32)
3192
const size_t PKT_ARRAY_SIZE = cnt;
3194
/* Sparse or MSVC doesn't like variable length array. */
3195
enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
3197
struct dpcls_rule *rules[PKT_ARRAY_SIZE];
3198
struct dp_netdev *dp = pmd->dp;
3199
struct emc_cache *flow_cache = &pmd->flow_cache;
3200
int miss_cnt = 0, lost_cnt = 0;
3204
for (i = 0; i < cnt; i++) {
3205
/* Key length is needed in all the cases, hash computed on demand. */
3206
keys[i].len = netdev_flow_key_size(count_1bits(keys[i].mf.map));
3208
any_miss = !dpcls_lookup(&pmd->cls, keys, rules, cnt);
3209
if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
3210
uint64_t actions_stub[512 / 8], slow_stub[512 / 8];
3211
struct ofpbuf actions, put_actions;
3214
ofpbuf_use_stub(&actions, actions_stub, sizeof actions_stub);
3215
ofpbuf_use_stub(&put_actions, slow_stub, sizeof slow_stub);
3217
for (i = 0; i < cnt; i++) {
3218
struct dp_netdev_flow *netdev_flow;
3219
struct ofpbuf *add_actions;
3223
if (OVS_LIKELY(rules[i])) {
3227
/* It's possible that an earlier slow path execution installed
3228
* a rule covering this flow. In this case, it's a lot cheaper
3229
* to catch it here than execute a miss. */
3230
netdev_flow = dp_netdev_pmd_lookup_flow(pmd, &keys[i]);
3232
rules[i] = &netdev_flow->cr;
3238
miniflow_expand(&keys[i].mf, &match.flow);
3240
ofpbuf_clear(&actions);
3241
ofpbuf_clear(&put_actions);
3243
dpif_flow_hash(dp->dpif, &match.flow, sizeof match.flow, &ufid);
3244
error = dp_netdev_upcall(pmd, packets[i], &match.flow, &match.wc,
3245
&ufid, DPIF_UC_MISS, NULL, &actions,
3247
if (OVS_UNLIKELY(error && error != ENOSPC)) {
3248
dp_packet_delete(packets[i]);
3253
/* We can't allow the packet batching in the next loop to execute
3254
* the actions. Otherwise, if there are any slow path actions,
3255
* we'll send the packet up twice. */
3256
dp_netdev_execute_actions(pmd, &packets[i], 1, true,
3257
actions.data, actions.size);
3259
add_actions = put_actions.size ? &put_actions : &actions;
3260
if (OVS_LIKELY(error != ENOSPC)) {
3261
/* XXX: There's a race window where a flow covering this packet
3262
* could have already been installed since we last did the flow
3263
* lookup before upcall. This could be solved by moving the
3264
* mutex lock outside the loop, but that's an awful long time
3265
* to be locking everyone out of making flow installs. If we
3266
* move to a per-core classifier, it would be reasonable. */
3267
ovs_mutex_lock(&pmd->flow_mutex);
3268
netdev_flow = dp_netdev_pmd_lookup_flow(pmd, &keys[i]);
3269
if (OVS_LIKELY(!netdev_flow)) {
3270
netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
3274
ovs_mutex_unlock(&pmd->flow_mutex);
3276
emc_insert(flow_cache, &keys[i], netdev_flow);
3280
ofpbuf_uninit(&actions);
3281
ofpbuf_uninit(&put_actions);
3282
fat_rwlock_unlock(&dp->upcall_rwlock);
3283
dp_netdev_count_packet(pmd, DP_STAT_LOST, lost_cnt);
3284
} else if (OVS_UNLIKELY(any_miss)) {
3285
for (i = 0; i < cnt; i++) {
3286
if (OVS_UNLIKELY(!rules[i])) {
3287
dp_packet_delete(packets[i]);
3294
for (i = 0; i < cnt; i++) {
3295
struct dp_packet *packet = packets[i];
3296
struct dp_netdev_flow *flow;
3298
if (OVS_UNLIKELY(!rules[i])) {
3302
flow = dp_netdev_flow_cast(rules[i]);
3304
emc_insert(flow_cache, &keys[i], flow);
3305
dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
3308
dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt);
3309
dp_netdev_count_packet(pmd, DP_STAT_MISS, miss_cnt);
3310
dp_netdev_count_packet(pmd, DP_STAT_LOST, lost_cnt);
2053
dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
2054
struct pkt_metadata *md)
2055
OVS_REQ_RDLOCK(dp->port_rwlock)
2057
uint32_t *recirc_depth = recirc_depth_get();
2060
dp_netdev_input(dp, packet, md);
2064
dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
2065
int queue_no, int type, const struct miniflow *key,
2066
const struct nlattr *userdata)
2068
struct dp_netdev_queue *q;
2071
fat_rwlock_rdlock(&dp->queue_rwlock);
2072
q = &dp->handler_queues[queue_no];
2073
ovs_mutex_lock(&q->mutex);
2074
if (q->head - q->tail < MAX_QUEUE_LEN) {
2075
struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
2076
struct dpif_upcall *upcall = &u->upcall;
2077
struct ofpbuf *buf = &u->buf;
2082
upcall->type = type;
2084
/* Allocate buffer big enough for everything. */
2085
buf_size = ODPUTIL_FLOW_KEY_BYTES;
2087
buf_size += NLA_ALIGN(userdata->nla_len);
2089
buf_size += ofpbuf_size(packet);
2090
ofpbuf_init(buf, buf_size);
2093
miniflow_expand(key, &flow);
2094
odp_flow_key_from_flow(buf, &flow, NULL, flow.in_port.odp_port);
2095
upcall->key = ofpbuf_data(buf);
2096
upcall->key_len = ofpbuf_size(buf);
2100
upcall->userdata = ofpbuf_put(buf, userdata,
2101
NLA_ALIGN(userdata->nla_len));
2104
data = ofpbuf_put(buf, ofpbuf_data(packet), ofpbuf_size(packet));
2105
ofpbuf_use_stub(&upcall->packet, data, ofpbuf_size(packet));
2106
ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
2112
dp_netdev_count_packet(dp, DP_STAT_LOST);
2115
ovs_mutex_unlock(&q->mutex);
2116
fat_rwlock_unlock(&dp->queue_rwlock);
3314
dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
3315
struct dp_packet **packets, int cnt)
3317
#if !defined(__CHECKER__) && !defined(_WIN32)
3318
const size_t PKT_ARRAY_SIZE = cnt;
3320
/* Sparse or MSVC doesn't like variable length array. */
3321
enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
3323
struct netdev_flow_key keys[PKT_ARRAY_SIZE];
3324
struct packet_batch batches[PKT_ARRAY_SIZE];
3325
long long now = time_msec();
3326
size_t newcnt, n_batches, i;
3329
newcnt = emc_processing(pmd, packets, cnt, keys, batches, &n_batches);
3330
if (OVS_UNLIKELY(newcnt)) {
3331
fast_path_processing(pmd, packets, newcnt, keys, batches, &n_batches);
3334
for (i = 0; i < n_batches; i++) {
3335
batches[i].flow->batch = NULL;
3338
for (i = 0; i < n_batches; i++) {
3339
packet_batch_execute(&batches[i], pmd, now);
2121
3343
struct dp_netdev_execute_aux {
2122
struct dp_netdev *dp;
2123
const struct miniflow *key;
3344
struct dp_netdev_pmd_thread *pmd;
2127
dp_execute_cb(void *aux_, struct ofpbuf *packet,
2128
struct pkt_metadata *md,
3348
dpif_netdev_register_upcall_cb(struct dpif *dpif, upcall_callback *cb,
3351
struct dp_netdev *dp = get_dp_netdev(dpif);
3352
dp->upcall_aux = aux;
3357
dp_netdev_drop_packets(struct dp_packet **packets, int cnt, bool may_steal)
3362
for (i = 0; i < cnt; i++) {
3363
dp_packet_delete(packets[i]);
3369
push_tnl_action(const struct dp_netdev *dp,
3370
const struct nlattr *attr,
3371
struct dp_packet **packets, int cnt)
3373
struct dp_netdev_port *tun_port;
3374
const struct ovs_action_push_tnl *data;
3376
data = nl_attr_get(attr);
3378
tun_port = dp_netdev_lookup_port(dp, u32_to_odp(data->tnl_port));
3382
netdev_push_header(tun_port->netdev, packets, cnt, data);
3388
dp_netdev_clone_pkt_batch(struct dp_packet **dst_pkts,
3389
struct dp_packet **src_pkts, int cnt)
3393
for (i = 0; i < cnt; i++) {
3394
dst_pkts[i] = dp_packet_clone(src_pkts[i]);
3399
dp_execute_cb(void *aux_, struct dp_packet **packets, int cnt,
2129
3400
const struct nlattr *a, bool may_steal)
2130
3401
OVS_NO_THREAD_SAFETY_ANALYSIS
2132
3403
struct dp_netdev_execute_aux *aux = aux_;
3404
uint32_t *depth = recirc_depth_get();
3405
struct dp_netdev_pmd_thread *pmd = aux->pmd;
3406
struct dp_netdev *dp = pmd->dp;
2133
3407
int type = nl_attr_type(a);
2134
3408
struct dp_netdev_port *p;
2135
uint32_t *depth = recirc_depth_get();
2137
3411
switch ((enum ovs_action_attr)type) {
2138
3412
case OVS_ACTION_ATTR_OUTPUT:
2139
p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
2141
netdev_send(p->netdev, packet, may_steal);
2142
} else if (may_steal) {
2143
ofpbuf_delete(packet);
2148
case OVS_ACTION_ATTR_USERSPACE: {
2149
const struct nlattr *userdata;
2151
userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
2153
if (aux->dp->n_handlers > 0) {
2154
dp_netdev_output_userspace(aux->dp, packet,
2155
miniflow_hash_5tuple(aux->key, 0)
2156
% aux->dp->n_handlers,
2157
DPIF_UC_ACTION, aux->key,
2162
ofpbuf_delete(packet);
2167
case OVS_ACTION_ATTR_HASH: {
2168
const struct ovs_action_hash *hash_act;
2171
hash_act = nl_attr_get(a);
2172
if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
2173
/* Hash need not be symmetric, nor does it need to include
2175
hash = miniflow_hash_5tuple(aux->key, hash_act->hash_basis);
2177
hash = 1; /* 0 is not valid */
2181
VLOG_WARN("Unknown hash algorithm specified for the hash action.");
3413
p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a)));
3414
if (OVS_LIKELY(p)) {
3415
netdev_send(p->netdev, pmd->tx_qid, packets, cnt, may_steal);
3420
case OVS_ACTION_ATTR_TUNNEL_PUSH:
3421
if (*depth < MAX_RECIRC_DEPTH) {
3422
struct dp_packet *tnl_pkt[NETDEV_MAX_BURST];
3426
dp_netdev_clone_pkt_batch(tnl_pkt, packets, cnt);
3430
err = push_tnl_action(dp, a, packets, cnt);
3433
dp_netdev_input(pmd, packets, cnt);
3436
dp_netdev_drop_packets(tnl_pkt, cnt, !may_steal);
3442
case OVS_ACTION_ATTR_TUNNEL_POP:
3443
if (*depth < MAX_RECIRC_DEPTH) {
3444
odp_port_t portno = u32_to_odp(nl_attr_get_u32(a));
3446
p = dp_netdev_lookup_port(dp, portno);
3448
struct dp_packet *tnl_pkt[NETDEV_MAX_BURST];
3452
dp_netdev_clone_pkt_batch(tnl_pkt, packets, cnt);
3456
err = netdev_pop_header(p->netdev, packets, cnt);
3459
for (i = 0; i < cnt; i++) {
3460
packets[i]->md.in_port.odp_port = portno;
3464
dp_netdev_input(pmd, packets, cnt);
3467
dp_netdev_drop_packets(tnl_pkt, cnt, !may_steal);
3474
case OVS_ACTION_ATTR_USERSPACE:
3475
if (!fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
3476
const struct nlattr *userdata;
3477
struct ofpbuf actions;
3481
userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
3482
ofpbuf_init(&actions, 0);
3484
for (i = 0; i < cnt; i++) {
3487
ofpbuf_clear(&actions);
3489
flow_extract(packets[i], &flow);
3490
dpif_flow_hash(dp->dpif, &flow, sizeof flow, &ufid);
3491
error = dp_netdev_upcall(pmd, packets[i], &flow, NULL, &ufid,
3492
DPIF_UC_ACTION, userdata,&actions,
3494
if (!error || error == ENOSPC) {
3495
dp_netdev_execute_actions(pmd, &packets[i], 1, may_steal,
3496
actions.data, actions.size);
3497
} else if (may_steal) {
3498
dp_packet_delete(packets[i]);
3501
ofpbuf_uninit(&actions);
3502
fat_rwlock_unlock(&dp->upcall_rwlock);
2189
3508
case OVS_ACTION_ATTR_RECIRC:
2190
3509
if (*depth < MAX_RECIRC_DEPTH) {
2191
struct pkt_metadata recirc_md = *md;
2192
struct ofpbuf *recirc_packet;
2194
recirc_packet = may_steal ? packet : ofpbuf_clone(packet);
2195
recirc_md.recirc_id = nl_attr_get_u32(a);
3510
struct dp_packet *recirc_pkts[NETDEV_MAX_BURST];
3513
dp_netdev_clone_pkt_batch(recirc_pkts, packets, cnt);
3514
packets = recirc_pkts;
3517
for (i = 0; i < cnt; i++) {
3518
packets[i]->md.recirc_id = nl_attr_get_u32(a);
2198
dp_netdev_input(aux->dp, recirc_packet, &recirc_md);
3522
dp_netdev_input(pmd, packets, cnt);
2204
ofpbuf_delete(packet);
2206
VLOG_WARN("Packet dropped. Max recirculation depth exceeded.");
3528
VLOG_WARN("Packet dropped. Max recirculation depth exceeded.");
2210
3531
case OVS_ACTION_ATTR_PUSH_VLAN:
2346
3711
dpif_dummy_register__("dummy");
2348
3713
unixctl_command_register("dpif-dummy/change-port-number",
2349
"DP PORT NEW-NUMBER",
3714
"dp port new-number",
2350
3715
3, 3, dpif_dummy_change_port_number, NULL);
3716
unixctl_command_register("dpif-dummy/delete-port", "dp port",
3717
2, 2, dpif_dummy_delete_port, NULL);
3720
/* Datapath Classifier. */
3722
/* A set of rules that all have the same fields wildcarded. */
3723
struct dpcls_subtable {
3724
/* The fields are only used by writers. */
3725
struct cmap_node cmap_node OVS_GUARDED; /* Within dpcls 'subtables_map'. */
3727
/* These fields are accessed by readers. */
3728
struct cmap rules; /* Contains "struct dpcls_rule"s. */
3729
struct netdev_flow_key mask; /* Wildcards for fields (const). */
3730
/* 'mask' must be the last field, additional space is allocated here. */
3733
/* Initializes 'cls' as a classifier that initially contains no classification
3736
dpcls_init(struct dpcls *cls)
3738
cmap_init(&cls->subtables_map);
3739
pvector_init(&cls->subtables);
3743
dpcls_destroy_subtable(struct dpcls *cls, struct dpcls_subtable *subtable)
3745
pvector_remove(&cls->subtables, subtable);
3746
cmap_remove(&cls->subtables_map, &subtable->cmap_node,
3747
subtable->mask.hash);
3748
cmap_destroy(&subtable->rules);
3749
ovsrcu_postpone(free, subtable);
3752
/* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the
3753
* caller's responsibility.
3754
* May only be called after all the readers have been terminated. */
3756
dpcls_destroy(struct dpcls *cls)
3759
struct dpcls_subtable *subtable;
3761
CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
3762
dpcls_destroy_subtable(cls, subtable);
3764
cmap_destroy(&cls->subtables_map);
3765
pvector_destroy(&cls->subtables);
3769
static struct dpcls_subtable *
3770
dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
3772
struct dpcls_subtable *subtable;
3774
/* Need to add one. */
3775
subtable = xmalloc(sizeof *subtable
3776
- sizeof subtable->mask.mf + mask->len);
3777
cmap_init(&subtable->rules);
3778
netdev_flow_key_clone(&subtable->mask, mask);
3779
cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
3780
pvector_insert(&cls->subtables, subtable, 0);
3781
pvector_publish(&cls->subtables);
3786
static inline struct dpcls_subtable *
3787
dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
3789
struct dpcls_subtable *subtable;
3791
CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, mask->hash,
3792
&cls->subtables_map) {
3793
if (netdev_flow_key_equal(&subtable->mask, mask)) {
3797
return dpcls_create_subtable(cls, mask);
3800
/* Insert 'rule' into 'cls'. */
3802
dpcls_insert(struct dpcls *cls, struct dpcls_rule *rule,
3803
const struct netdev_flow_key *mask)
3805
struct dpcls_subtable *subtable = dpcls_find_subtable(cls, mask);
3807
rule->mask = &subtable->mask;
3808
cmap_insert(&subtable->rules, &rule->cmap_node, rule->flow.hash);
3811
/* Removes 'rule' from 'cls', also destructing the 'rule'. */
3813
dpcls_remove(struct dpcls *cls, struct dpcls_rule *rule)
3815
struct dpcls_subtable *subtable;
3817
ovs_assert(rule->mask);
3819
INIT_CONTAINER(subtable, rule->mask, mask);
3821
if (cmap_remove(&subtable->rules, &rule->cmap_node, rule->flow.hash)
3823
dpcls_destroy_subtable(cls, subtable);
3824
pvector_publish(&cls->subtables);
3828
/* Returns true if 'target' satisifies 'key' in 'mask', that is, if each 1-bit
3829
* in 'mask' the values in 'key' and 'target' are the same.
3831
* Note: 'key' and 'mask' have the same mask, and 'key' is already masked. */
3833
dpcls_rule_matches_key(const struct dpcls_rule *rule,
3834
const struct netdev_flow_key *target)
3836
const uint64_t *keyp = rule->flow.mf.inline_values;
3837
const uint64_t *maskp = rule->mask->mf.inline_values;
3838
uint64_t target_u64;
3840
NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(target_u64, target, rule->flow.mf.map) {
3841
if (OVS_UNLIKELY((target_u64 & *maskp++) != *keyp++)) {
3848
/* For each miniflow in 'flows' performs a classifier lookup writing the result
3849
* into the corresponding slot in 'rules'. If a particular entry in 'flows' is
3850
* NULL it is skipped.
3852
* This function is optimized for use in the userspace datapath and therefore
3853
* does not implement a lot of features available in the standard
3854
* classifier_lookup() function. Specifically, it does not implement
3855
* priorities, instead returning any rule which matches the flow.
3857
* Returns true if all flows found a corresponding rule. */
3859
dpcls_lookup(const struct dpcls *cls, const struct netdev_flow_key keys[],
3860
struct dpcls_rule **rules, const size_t cnt)
3862
/* The batch size 16 was experimentally found faster than 8 or 32. */
3863
typedef uint16_t map_type;
3864
#define MAP_BITS (sizeof(map_type) * CHAR_BIT)
3866
#if !defined(__CHECKER__) && !defined(_WIN32)
3867
const int N_MAPS = DIV_ROUND_UP(cnt, MAP_BITS);
3869
enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_BURST, MAP_BITS) };
3871
map_type maps[N_MAPS];
3872
struct dpcls_subtable *subtable;
3874
memset(maps, 0xff, sizeof maps);
3875
if (cnt % MAP_BITS) {
3876
maps[N_MAPS - 1] >>= MAP_BITS - cnt % MAP_BITS; /* Clear extra bits. */
3878
memset(rules, 0, cnt * sizeof *rules);
3880
PVECTOR_FOR_EACH (subtable, &cls->subtables) {
3881
const struct netdev_flow_key *mkeys = keys;
3882
struct dpcls_rule **mrules = rules;
3883
map_type remains = 0;
3886
BUILD_ASSERT_DECL(sizeof remains == sizeof *maps);
3888
for (m = 0; m < N_MAPS; m++, mkeys += MAP_BITS, mrules += MAP_BITS) {
3889
uint32_t hashes[MAP_BITS];
3890
const struct cmap_node *nodes[MAP_BITS];
3891
unsigned long map = maps[m];
3895
continue; /* Skip empty maps. */
3898
/* Compute hashes for the remaining keys. */
3899
ULONG_FOR_EACH_1(i, map) {
3900
hashes[i] = netdev_flow_key_hash_in_mask(&mkeys[i],
3904
map = cmap_find_batch(&subtable->rules, map, hashes, nodes);
3905
/* Check results. */
3906
ULONG_FOR_EACH_1(i, map) {
3907
struct dpcls_rule *rule;
3909
CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
3910
if (OVS_LIKELY(dpcls_rule_matches_key(rule, &mkeys[i]))) {
3915
ULONG_SET0(map, i); /* Did not match. */
3917
; /* Keep Sparse happy. */
3919
maps[m] &= ~map; /* Clear the found rules. */
3923
return true; /* All found. */
3926
return false; /* Some misses. */