143
struct flow_miss *flow_miss; /* This upcall's flow_miss. */
145
/* Raw upcall plus data for keeping track of the memory backing it. */
146
struct dpif_upcall dpif_upcall; /* As returned by dpif_recv() */
147
struct ofpbuf upcall_buf; /* Owns some data in 'dpif_upcall'. */
148
uint64_t upcall_stub[512 / 8]; /* Buffer to reduce need for malloc(). */
154
struct ofproto_dpif *ofproto; /* Parent ofproto. */
155
const struct recirc_id_node *recirc; /* Recirculation context. */
156
bool have_recirc_ref; /* Reference held on recirc ctx? */
158
/* The flow and packet are only required to be constant when using
159
* dpif-netdev. If a modification is absolutely necessary, a const cast
160
* may be used with other datapaths. */
161
const struct flow *flow; /* Parsed representation of the packet. */
162
const ovs_u128 *ufid; /* Unique identifier for 'flow'. */
163
unsigned pmd_id; /* Datapath poll mode driver id. */
164
const struct dp_packet *packet; /* Packet associated with this upcall. */
165
ofp_port_t in_port; /* OpenFlow in port, or OFPP_NONE. */
167
enum dpif_upcall_type type; /* Datapath type of the upcall. */
168
const struct nlattr *userdata; /* Userdata for DPIF_UC_ACTION Upcalls. */
170
bool xout_initialized; /* True if 'xout' must be uninitialized. */
171
struct xlate_out xout; /* Result of xlate_actions(). */
172
struct ofpbuf put_actions; /* Actions 'put' in the fastapath. */
174
struct dpif_ipfix *ipfix; /* IPFIX pointer or NULL. */
175
struct dpif_sflow *sflow; /* SFlow pointer or NULL. */
177
bool vsp_adjusted; /* 'packet' and 'flow' were adjusted for
178
VLAN splinters if true. */
180
struct udpif_key *ukey; /* Revalidator flow cache. */
181
bool ukey_persists; /* Set true to keep 'ukey' beyond the
182
lifetime of this upcall. */
184
uint64_t dump_seq; /* udpif->dump_seq at translation time. */
185
uint64_t reval_seq; /* udpif->reval_seq at translation time. */
187
/* Not used by the upcall callback interface. */
188
const struct nlattr *key; /* Datapath flow key. */
189
size_t key_len; /* Datapath flow key length. */
190
const struct nlattr *out_tun_key; /* Datapath output tunnel key. */
151
193
/* 'udpif_key's are responsible for tracking the little bit of state udpif
152
194
* needs to do flow expiration which can't be pulled directly from the
153
* datapath. They may be created or maintained by any revalidator during
154
* the dump phase, but are owned by a single revalidator, and are destroyed
155
* by that revalidator during the garbage-collection phase.
195
* datapath. They may be created by any handler or revalidator thread at any
196
* time, and read by any revalidator during the dump phase. They are however
197
* each owned by a single revalidator which takes care of destroying them
198
* during the garbage-collection phase.
157
* While some elements of a udpif_key are protected by a mutex, the ukey itself
158
* is not. Therefore it is not safe to destroy a udpif_key except when all
159
* revalidators are in garbage collection phase, or they aren't running. */
200
* The mutex within the ukey protects some members of the ukey. The ukey
201
* itself is protected by RCU and is held within a umap in the parent udpif.
202
* Adding or removing a ukey from a umap is only safe when holding the
203
* corresponding umap lock. */
160
204
struct udpif_key {
161
struct hmap_node hmap_node; /* In parent revalidator 'ukeys' map. */
205
struct cmap_node cmap_node; /* In parent revalidator 'ukeys' map. */
163
207
/* These elements are read only once created, and therefore aren't
164
208
* protected by a mutex. */
165
209
const struct nlattr *key; /* Datapath flow key. */
166
210
size_t key_len; /* Length of 'key'. */
211
const struct nlattr *mask; /* Datapath flow mask. */
212
size_t mask_len; /* Length of 'mask'. */
213
struct ofpbuf *actions; /* Datapath flow actions as nlattrs. */
214
ovs_u128 ufid; /* Unique flow identifier. */
215
bool ufid_present; /* True if 'ufid' is in datapath. */
216
uint32_t hash; /* Pre-computed hash for 'key'. */
217
unsigned pmd_id; /* Datapath poll mode driver id. */
168
219
struct ovs_mutex mutex; /* Guards the following. */
169
220
struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
170
221
long long int created OVS_GUARDED; /* Estimate of creation time. */
171
bool mark OVS_GUARDED; /* For mark and sweep garbage
222
uint64_t dump_seq OVS_GUARDED; /* Tracks udpif->dump_seq. */
223
uint64_t reval_seq OVS_GUARDED; /* Tracks udpif->reval_seq. */
173
224
bool flow_exists OVS_GUARDED; /* Ensures flows are only deleted
176
227
struct xlate_cache *xcache OVS_GUARDED; /* Cache for xlate entries that
177
228
* are affected by this ukey.
178
229
* Used for stats and learning.*/
179
struct odputil_keybuf key_buf; /* Memory for 'key'. */
231
struct odputil_keybuf buf;
235
/* Recirculation IDs with references held by the ukey. */
237
uint32_t recircs[]; /* 'n_recircs' id's for which references are held. */
182
/* Flow miss batching.
184
* Some dpifs implement operations faster when you hand them off in a batch.
185
* To allow batching, "struct flow_miss" queues the dpif-related work needed
186
* for a given flow. Each "struct flow_miss" corresponds to sending one or
187
* more packets, plus possibly installing the flow in the dpif. */
189
struct hmap_node hmap_node;
190
struct ofproto_dpif *ofproto;
193
const struct nlattr *key;
195
enum dpif_upcall_type upcall_type;
196
struct dpif_flow_stats stats;
197
odp_port_t odp_in_port;
199
uint64_t slow_path_buf[128 / 8];
200
struct odputil_keybuf mask_buf;
202
struct xlate_out xout;
240
/* Datapath operation with optional ukey attached. */
242
struct udpif_key *ukey;
243
struct dpif_flow_stats stats; /* Stats for 'op'. */
244
struct dpif_op dop; /* Flow operation. */
207
247
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
208
static struct list all_udpifs = LIST_INITIALIZER(&all_udpifs);
248
static struct ovs_list all_udpifs = OVS_LIST_INITIALIZER(&all_udpifs);
210
static size_t read_upcalls(struct handler *,
211
struct upcall upcalls[FLOW_MISS_MAX_BATCH],
212
struct flow_miss miss_buf[FLOW_MISS_MAX_BATCH],
214
static void handle_upcalls(struct handler *, struct hmap *, struct upcall *,
250
static size_t recv_upcalls(struct handler *);
251
static int process_upcall(struct udpif *, struct upcall *,
252
struct ofpbuf *odp_actions);
253
static void handle_upcalls(struct udpif *, struct upcall *, size_t n_upcalls);
216
254
static void udpif_stop_threads(struct udpif *);
217
255
static void udpif_start_threads(struct udpif *, size_t n_handlers,
218
256
size_t n_revalidators);
515
627
struct handler *handler = arg;
516
628
struct udpif *udpif = handler->udpif;
517
struct hmap misses = HMAP_INITIALIZER(&misses);
519
630
while (!latch_is_set(&handler->udpif->exit_latch)) {
520
struct upcall upcalls[FLOW_MISS_MAX_BATCH];
521
struct flow_miss miss_buf[FLOW_MISS_MAX_BATCH];
522
struct flow_miss *miss;
525
n_upcalls = read_upcalls(handler, upcalls, miss_buf, &misses);
631
if (recv_upcalls(handler)) {
632
poll_immediate_wake();
527
634
dpif_recv_wait(udpif->dpif, handler->handler_id);
528
635
latch_wait(&udpif->exit_latch);
530
poll_immediate_wake();
532
handle_upcalls(handler, &misses, upcalls, n_upcalls);
534
HMAP_FOR_EACH (miss, hmap_node, &misses) {
535
xlate_out_uninit(&miss->xout);
538
for (i = 0; i < n_upcalls; i++) {
539
ofpbuf_uninit(&upcalls[i].dpif_upcall.packet);
540
ofpbuf_uninit(&upcalls[i].upcall_buf);
545
hmap_destroy(&misses);
644
recv_upcalls(struct handler *handler)
646
struct udpif *udpif = handler->udpif;
647
uint64_t recv_stubs[UPCALL_MAX_BATCH][512 / 8];
648
struct ofpbuf recv_bufs[UPCALL_MAX_BATCH];
649
struct dpif_upcall dupcalls[UPCALL_MAX_BATCH];
650
struct upcall upcalls[UPCALL_MAX_BATCH];
651
struct flow flows[UPCALL_MAX_BATCH];
655
while (n_upcalls < UPCALL_MAX_BATCH) {
656
struct ofpbuf *recv_buf = &recv_bufs[n_upcalls];
657
struct dpif_upcall *dupcall = &dupcalls[n_upcalls];
658
struct upcall *upcall = &upcalls[n_upcalls];
659
struct flow *flow = &flows[n_upcalls];
662
ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls],
663
sizeof recv_stubs[n_upcalls]);
664
if (dpif_recv(udpif->dpif, handler->handler_id, dupcall, recv_buf)) {
665
ofpbuf_uninit(recv_buf);
669
if (odp_flow_key_to_flow(dupcall->key, dupcall->key_len, flow)
674
error = upcall_receive(upcall, udpif->backer, &dupcall->packet,
675
dupcall->type, dupcall->userdata, flow,
676
&dupcall->ufid, PMD_ID_NULL);
678
if (error == ENODEV) {
679
/* Received packet on datapath port for which we couldn't
680
* associate an ofproto. This can happen if a port is removed
681
* while traffic is being received. Print a rate-limited
682
* message in case it happens frequently. */
683
dpif_flow_put(udpif->dpif, DPIF_FP_CREATE, dupcall->key,
684
dupcall->key_len, NULL, 0, NULL, 0,
685
&dupcall->ufid, PMD_ID_NULL, NULL);
686
VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
687
"port %"PRIu32, flow->in_port.odp_port);
692
upcall->key = dupcall->key;
693
upcall->key_len = dupcall->key_len;
694
upcall->ufid = &dupcall->ufid;
696
upcall->out_tun_key = dupcall->out_tun_key;
698
if (vsp_adjust_flow(upcall->ofproto, flow, &dupcall->packet)) {
699
upcall->vsp_adjusted = true;
702
pkt_metadata_from_flow(&dupcall->packet.md, flow);
703
flow_extract(&dupcall->packet, flow);
705
error = process_upcall(udpif, upcall, NULL);
714
upcall_uninit(upcall);
716
dp_packet_uninit(&dupcall->packet);
717
ofpbuf_uninit(recv_buf);
721
handle_upcalls(handler->udpif, upcalls, n_upcalls);
722
for (i = 0; i < n_upcalls; i++) {
723
dp_packet_uninit(&dupcalls[i].packet);
724
ofpbuf_uninit(&recv_bufs[i]);
725
upcall_uninit(&upcalls[i]);
551
733
udpif_revalidator(void *arg)
710
895
pid = dpif_port_get_pid(udpif->dpif, port, flow_hash_5tuple(flow, 0));
711
odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, buf);
714
static struct flow_miss *
715
flow_miss_find(struct hmap *todo, const struct ofproto_dpif *ofproto,
716
const struct flow *flow, uint32_t hash)
718
struct flow_miss *miss;
720
HMAP_FOR_EACH_WITH_HASH (miss, hmap_node, hash, todo) {
721
if (miss->ofproto == ofproto && flow_equal(&miss->flow, flow)) {
729
/* Reads and classifies upcalls. Returns the number of upcalls successfully
732
read_upcalls(struct handler *handler,
733
struct upcall upcalls[FLOW_MISS_MAX_BATCH],
734
struct flow_miss miss_buf[FLOW_MISS_MAX_BATCH],
737
struct udpif *udpif = handler->udpif;
740
size_t n_upcalls = 0;
743
* Try reading FLOW_MISS_MAX_BATCH upcalls from dpif.
745
* Extract the flow from each upcall. Construct in 'misses' a hash table
746
* that maps each unique flow to a 'struct flow_miss'.
748
* Most commonly there is a single packet per flow_miss, but there are
749
* several reasons why there might be more than one, e.g.:
751
* - The dpif packet interface does not support TSO (or UFO, etc.), so a
752
* large packet sent to userspace is split into a sequence of smaller
755
* - A stream of quickly arriving packets in an established "slow-pathed"
758
* - Rarely, a stream of quickly arriving packets in a flow not yet
759
* established. (This is rare because most protocols do not send
760
* multiple back-to-back packets before receiving a reply from the
761
* other end of the connection, which gives OVS a chance to set up a
764
for (i = 0; i < FLOW_MISS_MAX_BATCH; i++) {
765
struct upcall *upcall = &upcalls[n_upcalls];
766
struct flow_miss *miss = &miss_buf[n_misses];
767
struct dpif_upcall *dupcall;
768
struct ofpbuf *packet;
769
struct flow_miss *existing_miss;
770
struct ofproto_dpif *ofproto;
771
struct dpif_sflow *sflow;
772
struct dpif_ipfix *ipfix;
774
enum upcall_type type;
775
odp_port_t odp_in_port;
778
ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub,
779
sizeof upcall->upcall_stub);
780
error = dpif_recv(udpif->dpif, handler->handler_id,
781
&upcall->dpif_upcall, &upcall->upcall_buf);
783
ofpbuf_uninit(&upcall->upcall_buf);
787
dupcall = &upcall->dpif_upcall;
788
packet = &dupcall->packet;
789
error = xlate_receive(udpif->backer, packet, dupcall->key,
790
dupcall->key_len, &flow,
791
&ofproto, &ipfix, &sflow, NULL, &odp_in_port);
793
if (error == ENODEV) {
794
/* Received packet on datapath port for which we couldn't
795
* associate an ofproto. This can happen if a port is removed
796
* while traffic is being received. Print a rate-limited
797
* message in case it happens frequently. Install a drop flow
798
* so that future packets of the flow are inexpensively dropped
800
VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
801
"port %"PRIu32, odp_in_port);
802
dpif_flow_put(udpif->dpif, DPIF_FP_CREATE,
803
dupcall->key, dupcall->key_len, NULL, 0, NULL, 0,
809
type = classify_upcall(upcall);
810
if (type == MISS_UPCALL) {
812
struct pkt_metadata md = pkt_metadata_from_flow(&flow);
814
flow_extract(packet, &md, &miss->flow);
815
hash = flow_hash(&miss->flow, 0);
816
existing_miss = flow_miss_find(misses, ofproto, &miss->flow,
818
if (!existing_miss) {
819
hmap_insert(misses, &miss->hmap_node, hash);
820
miss->ofproto = ofproto;
821
miss->key = dupcall->key;
822
miss->key_len = dupcall->key_len;
823
miss->upcall_type = dupcall->type;
824
miss->stats.n_packets = 0;
825
miss->stats.n_bytes = 0;
826
miss->stats.used = time_msec();
827
miss->stats.tcp_flags = 0;
828
miss->odp_in_port = odp_in_port;
832
miss = existing_miss;
834
miss->stats.tcp_flags |= ntohs(miss->flow.tcp_flags);
835
miss->stats.n_bytes += ofpbuf_size(packet);
836
miss->stats.n_packets++;
838
upcall->flow_miss = miss;
846
union user_action_cookie cookie;
848
memset(&cookie, 0, sizeof cookie);
849
memcpy(&cookie, nl_attr_get(dupcall->userdata),
850
sizeof cookie.sflow);
851
dpif_sflow_received(sflow, packet, &flow, odp_in_port,
857
dpif_ipfix_bridge_sample(ipfix, packet, &flow);
860
case FLOW_SAMPLE_UPCALL:
862
union user_action_cookie cookie;
864
memset(&cookie, 0, sizeof cookie);
865
memcpy(&cookie, nl_attr_get(dupcall->userdata),
866
sizeof cookie.flow_sample);
868
/* The flow reflects exactly the contents of the packet.
869
* Sample the packet using it. */
870
dpif_ipfix_flow_sample(ipfix, packet, &flow,
871
cookie.flow_sample.collector_set_id,
872
cookie.flow_sample.probability,
873
cookie.flow_sample.obs_domain_id,
874
cookie.flow_sample.obs_point_id);
883
dpif_ipfix_unref(ipfix);
884
dpif_sflow_unref(sflow);
887
ofpbuf_uninit(&upcall->dpif_upcall.packet);
888
ofpbuf_uninit(&upcall->upcall_buf);
895
handle_upcalls(struct handler *handler, struct hmap *misses,
896
struct upcall *upcalls, size_t n_upcalls)
898
struct udpif *udpif = handler->udpif;
899
struct dpif_op *opsp[FLOW_MISS_MAX_BATCH * 2];
900
struct dpif_op ops[FLOW_MISS_MAX_BATCH * 2];
901
struct flow_miss *miss;
903
unsigned int flow_limit;
904
bool fail_open, may_put;
906
atomic_read(&udpif->flow_limit, &flow_limit);
896
odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, ODPP_NONE,
900
/* If there is no error, the upcall must be destroyed with upcall_uninit()
901
* before quiescing, as the referred objects are guaranteed to exist only
902
* until the calling thread quiesces. Otherwise, do not call upcall_uninit()
903
* since the 'upcall->put_actions' remains uninitialized. */
905
upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
906
const struct dp_packet *packet, enum dpif_upcall_type type,
907
const struct nlattr *userdata, const struct flow *flow,
908
const ovs_u128 *ufid, const unsigned pmd_id)
912
error = xlate_lookup(backer, flow, &upcall->ofproto, &upcall->ipfix,
913
&upcall->sflow, NULL, &upcall->in_port);
918
upcall->recirc = NULL;
919
upcall->have_recirc_ref = false;
921
upcall->packet = packet;
923
upcall->pmd_id = pmd_id;
925
upcall->userdata = userdata;
926
ofpbuf_init(&upcall->put_actions, 0);
928
upcall->xout_initialized = false;
929
upcall->vsp_adjusted = false;
930
upcall->ukey_persists = false;
936
upcall->out_tun_key = NULL;
942
upcall_xlate(struct udpif *udpif, struct upcall *upcall,
943
struct ofpbuf *odp_actions)
945
struct dpif_flow_stats stats;
949
stats.n_bytes = dp_packet_size(upcall->packet);
950
stats.used = time_msec();
951
stats.tcp_flags = ntohs(upcall->flow->tcp_flags);
953
xlate_in_init(&xin, upcall->ofproto, upcall->flow, upcall->in_port, NULL,
954
stats.tcp_flags, upcall->packet);
955
xin.odp_actions = odp_actions;
957
if (upcall->type == DPIF_UC_MISS) {
958
xin.resubmit_stats = &stats;
961
/* We may install a datapath flow only if we get a reference to the
962
* recirculation context (otherwise we could have recirculation
963
* upcalls using recirculation ID for which no context can be
964
* found). We may still execute the flow's actions even if we
965
* don't install the flow. */
966
upcall->recirc = xin.recirc;
967
upcall->have_recirc_ref = recirc_id_node_try_ref_rcu(xin.recirc);
970
/* For non-miss upcalls, we are either executing actions (one of which
971
* is an userspace action) for an upcall, in which case the stats have
972
* already been taken care of, or there's a flow in the datapath which
973
* this packet was accounted to. Presumably the revalidators will deal
974
* with pushing its stats eventually. */
977
upcall->dump_seq = seq_read(udpif->dump_seq);
978
upcall->reval_seq = seq_read(udpif->reval_seq);
979
xlate_actions(&xin, &upcall->xout);
980
upcall->xout_initialized = true;
982
/* Special case for fail-open mode.
984
* If we are in fail-open mode, but we are connected to a controller too,
985
* then we should send the packet up to the controller in the hope that it
986
* will try to set up a flow and thereby allow us to exit fail-open.
988
* See the top-level comment in fail-open.c for more information.
990
* Copy packets before they are modified by execution. */
991
if (upcall->xout.fail_open) {
992
const struct dp_packet *packet = upcall->packet;
993
struct ofproto_packet_in *pin;
995
pin = xmalloc(sizeof *pin);
996
pin->up.packet = xmemdup(dp_packet_data(packet), dp_packet_size(packet));
997
pin->up.packet_len = dp_packet_size(packet);
998
pin->up.reason = OFPR_NO_MATCH;
999
pin->up.table_id = 0;
1000
pin->up.cookie = OVS_BE64_MAX;
1001
flow_get_metadata(upcall->flow, &pin->up.flow_metadata);
1002
pin->send_len = 0; /* Not used for flow table misses. */
1003
pin->miss_type = OFPROTO_PACKET_IN_NO_MISS;
1004
ofproto_dpif_send_packet_in(upcall->ofproto, pin);
1007
if (!upcall->xout.slow) {
1008
ofpbuf_use_const(&upcall->put_actions,
1009
upcall->xout.odp_actions->data,
1010
upcall->xout.odp_actions->size);
1012
ofpbuf_init(&upcall->put_actions, 0);
1013
compose_slow_path(udpif, &upcall->xout, upcall->flow,
1014
upcall->flow->in_port.odp_port,
1015
&upcall->put_actions);
1018
/* This function is also called for slow-pathed flows. As we are only
1019
* going to create new datapath flows for actual datapath misses, there is
1020
* no point in creating a ukey otherwise. */
1021
if (upcall->type == DPIF_UC_MISS) {
1022
upcall->ukey = ukey_create_from_upcall(upcall);
1027
upcall_uninit(struct upcall *upcall)
1030
if (upcall->xout_initialized) {
1031
xlate_out_uninit(&upcall->xout);
1033
ofpbuf_uninit(&upcall->put_actions);
1035
if (!upcall->ukey_persists) {
1036
ukey_delete__(upcall->ukey);
1038
} else if (upcall->have_recirc_ref) {
1039
/* The reference was transferred to the ukey if one was created. */
1040
recirc_id_node_unref(upcall->recirc);
1046
upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
1047
unsigned pmd_id, enum dpif_upcall_type type,
1048
const struct nlattr *userdata, struct ofpbuf *actions,
1049
struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
1051
struct udpif *udpif = aux;
1052
unsigned int flow_limit;
1053
struct upcall upcall;
1057
atomic_read_relaxed(&enable_megaflows, &megaflow);
1058
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
1060
error = upcall_receive(&upcall, udpif->backer, packet, type, userdata,
1061
flow, ufid, pmd_id);
1066
error = process_upcall(udpif, &upcall, actions);
1071
if (upcall.xout.slow && put_actions) {
1072
ofpbuf_put(put_actions, upcall.put_actions.data,
1073
upcall.put_actions.size);
1076
if (OVS_LIKELY(wc)) {
1078
/* XXX: This could be avoided with sufficient API changes. */
1079
*wc = upcall.xout.wc;
1081
flow_wildcards_init_for_packet(wc, flow);
1085
if (udpif_get_n_flows(udpif) >= flow_limit) {
1090
/* Prevent miss flow installation if the key has recirculation ID but we
1091
* were not able to get a reference on it. */
1092
if (type == DPIF_UC_MISS && upcall.recirc && !upcall.have_recirc_ref) {
1097
if (upcall.ukey && !ukey_install(udpif, upcall.ukey)) {
1102
upcall.ukey_persists = true;
1104
upcall_uninit(&upcall);
1109
process_upcall(struct udpif *udpif, struct upcall *upcall,
1110
struct ofpbuf *odp_actions)
1112
const struct nlattr *userdata = upcall->userdata;
1113
const struct dp_packet *packet = upcall->packet;
1114
const struct flow *flow = upcall->flow;
1116
switch (classify_upcall(upcall->type, userdata)) {
1118
upcall_xlate(udpif, upcall, odp_actions);
1122
if (upcall->sflow) {
1123
union user_action_cookie cookie;
1125
memset(&cookie, 0, sizeof cookie);
1126
memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.sflow);
1127
dpif_sflow_received(upcall->sflow, packet, flow,
1128
flow->in_port.odp_port, &cookie);
1133
if (upcall->ipfix) {
1134
union user_action_cookie cookie;
1135
struct flow_tnl output_tunnel_key;
1137
memset(&cookie, 0, sizeof cookie);
1138
memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix);
1140
if (upcall->out_tun_key) {
1141
memset(&output_tunnel_key, 0, sizeof output_tunnel_key);
1142
odp_tun_key_from_attr(upcall->out_tun_key,
1143
&output_tunnel_key);
1145
dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow,
1146
flow->in_port.odp_port,
1147
cookie.ipfix.output_odp_port,
1148
upcall->out_tun_key ?
1149
&output_tunnel_key : NULL);
1153
case FLOW_SAMPLE_UPCALL:
1154
if (upcall->ipfix) {
1155
union user_action_cookie cookie;
1157
memset(&cookie, 0, sizeof cookie);
1158
memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.flow_sample);
1160
/* The flow reflects exactly the contents of the packet.
1161
* Sample the packet using it. */
1162
dpif_ipfix_flow_sample(upcall->ipfix, packet, flow,
1163
cookie.flow_sample.collector_set_id,
1164
cookie.flow_sample.probability,
1165
cookie.flow_sample.obs_domain_id,
1166
cookie.flow_sample.obs_point_id);
1178
handle_upcalls(struct udpif *udpif, struct upcall *upcalls,
1181
struct dpif_op *opsp[UPCALL_MAX_BATCH * 2];
1182
struct ukey_op ops[UPCALL_MAX_BATCH * 2];
1183
unsigned int flow_limit;
1184
size_t n_ops, n_opsp, i;
1188
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
1189
atomic_read_relaxed(&enable_megaflows, &megaflow);
907
1191
may_put = udpif_get_n_flows(udpif) < flow_limit;
909
/* Initialize each 'struct flow_miss's ->xout.
911
* We do this per-flow_miss rather than per-packet because, most commonly,
912
* all the packets in a flow can use the same translation.
914
* We can't do this in the previous loop because we need the TCP flags for
915
* all the packets in each miss. */
917
HMAP_FOR_EACH (miss, hmap_node, misses) {
920
xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL,
921
miss->stats.tcp_flags, NULL);
922
xin.may_learn = true;
924
if (miss->upcall_type == DPIF_UC_MISS) {
925
xin.resubmit_stats = &miss->stats;
927
/* For non-miss upcalls, there's a flow in the datapath which this
928
* packet was accounted to. Presumably the revalidators will deal
929
* with pushing its stats eventually. */
932
xlate_actions(&xin, &miss->xout);
933
fail_open = fail_open || miss->xout.fail_open;
936
/* Now handle the packets individually in order of arrival. In the common
937
* case each packet of a miss can share the same actions, but slow-pathed
938
* packets need to be translated individually:
1193
/* Handle the packets individually in order of arrival.
940
1195
* - For SLOW_CFM, SLOW_LACP, SLOW_STP, and SLOW_BFD, translation is what
941
1196
* processes received packets for these protocols.
949
1204
for (i = 0; i < n_upcalls; i++) {
950
1205
struct upcall *upcall = &upcalls[i];
951
struct flow_miss *miss = upcall->flow_miss;
952
struct ofpbuf *packet = &upcall->dpif_upcall.packet;
954
ovs_be16 flow_vlan_tci;
956
/* Save a copy of flow.vlan_tci in case it is changed to
957
* generate proper mega flow masks for VLAN splinter flows. */
958
flow_vlan_tci = miss->flow.vlan_tci;
960
if (miss->xout.slow) {
963
xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL, 0, packet);
964
xlate_actions_for_side_effects(&xin);
967
if (miss->flow.in_port.ofp_port
968
!= vsp_realdev_to_vlandev(miss->ofproto,
969
miss->flow.in_port.ofp_port,
970
miss->flow.vlan_tci)) {
971
/* This packet was received on a VLAN splinter port. We
972
* added a VLAN to the packet to make the packet resemble
973
* the flow, but the actions were composed assuming that
974
* the packet contained no VLAN. So, we must remove the
975
* VLAN header from the packet before trying to execute the
977
if (ofpbuf_size(&miss->xout.odp_actions)) {
978
eth_pop_vlan(packet);
1206
const struct dp_packet *packet = upcall->packet;
1209
if (upcall->vsp_adjusted) {
1210
/* This packet was received on a VLAN splinter port. We added a
1211
* VLAN to the packet to make the packet resemble the flow, but the
1212
* actions were composed assuming that the packet contained no
1213
* VLAN. So, we must remove the VLAN header from the packet before
1214
* trying to execute the actions. */
1215
if (upcall->xout.odp_actions->size) {
1216
eth_pop_vlan(CONST_CAST(struct dp_packet *, upcall->packet));
981
1219
/* Remove the flow vlan tags inserted by vlan splinter logic
982
1220
* to ensure megaflow masks generated match the data path flow. */
983
miss->flow.vlan_tci = 0;
1221
CONST_CAST(struct flow *, upcall->flow)->vlan_tci = 0;
986
1224
/* Do not install a flow into the datapath if:
988
1226
* - The datapath already has too many flows.
990
* - An earlier iteration of this loop already put the same flow.
992
1228
* - We received this packet via some flow installed in the kernel
996
&& upcall->dpif_upcall.type == DPIF_UC_MISS) {
1002
atomic_read(&enable_megaflows, &megaflow);
1003
ofpbuf_use_stack(&mask, &miss->mask_buf, sizeof miss->mask_buf);
1007
max_mpls = ofproto_dpif_get_max_mpls_depth(miss->ofproto);
1008
odp_flow_key_from_mask(&mask, &miss->xout.wc.masks,
1009
&miss->flow, UINT32_MAX, max_mpls);
1013
op->type = DPIF_OP_FLOW_PUT;
1014
op->u.flow_put.flags = DPIF_FP_CREATE;
1015
op->u.flow_put.key = miss->key;
1016
op->u.flow_put.key_len = miss->key_len;
1017
op->u.flow_put.mask = ofpbuf_data(&mask);
1018
op->u.flow_put.mask_len = ofpbuf_size(&mask);
1019
op->u.flow_put.stats = NULL;
1021
if (!miss->xout.slow) {
1022
op->u.flow_put.actions = ofpbuf_data(&miss->xout.odp_actions);
1023
op->u.flow_put.actions_len = ofpbuf_size(&miss->xout.odp_actions);
1027
ofpbuf_use_stack(&buf, miss->slow_path_buf,
1028
sizeof miss->slow_path_buf);
1029
compose_slow_path(udpif, &miss->xout, &miss->flow,
1030
miss->odp_in_port, &buf);
1031
op->u.flow_put.actions = ofpbuf_data(&buf);
1032
op->u.flow_put.actions_len = ofpbuf_size(&buf);
1037
* The 'miss' may be shared by multiple upcalls. Restore
1038
* the saved flow vlan_tci field before processing the next
1040
miss->flow.vlan_tci = flow_vlan_tci;
1042
if (ofpbuf_size(&miss->xout.odp_actions)) {
1045
op->type = DPIF_OP_EXECUTE;
1046
op->u.execute.packet = packet;
1047
odp_key_to_pkt_metadata(miss->key, miss->key_len,
1049
op->u.execute.actions = ofpbuf_data(&miss->xout.odp_actions);
1050
op->u.execute.actions_len = ofpbuf_size(&miss->xout.odp_actions);
1051
op->u.execute.needs_help = (miss->xout.slow & SLOW_ACTION) != 0;
1055
/* Special case for fail-open mode.
1057
* If we are in fail-open mode, but we are connected to a controller too,
1058
* then we should send the packet up to the controller in the hope that it
1059
* will try to set up a flow and thereby allow us to exit fail-open.
1061
* See the top-level comment in fail-open.c for more information.
1063
* Copy packets before they are modified by execution. */
1065
for (i = 0; i < n_upcalls; i++) {
1066
struct upcall *upcall = &upcalls[i];
1067
struct flow_miss *miss = upcall->flow_miss;
1068
struct ofpbuf *packet = &upcall->dpif_upcall.packet;
1069
struct ofproto_packet_in *pin;
1071
pin = xmalloc(sizeof *pin);
1072
pin->up.packet = xmemdup(ofpbuf_data(packet), ofpbuf_size(packet));
1073
pin->up.packet_len = ofpbuf_size(packet);
1074
pin->up.reason = OFPR_NO_MATCH;
1075
pin->up.table_id = 0;
1076
pin->up.cookie = OVS_BE64_MAX;
1077
flow_get_metadata(&miss->flow, &pin->up.fmd);
1078
pin->send_len = 0; /* Not used for flow table misses. */
1079
pin->miss_type = OFPROTO_PACKET_IN_NO_MISS;
1080
ofproto_dpif_send_packet_in(miss->ofproto, pin);
1084
/* Execute batch. */
1085
for (i = 0; i < n_ops; i++) {
1088
dpif_operate(udpif->dpif, opsp, n_ops);
1091
/* Must be called with udpif->ukeys[hash % udpif->n_revalidators].mutex. */
1231
* - Upcall was a recirculation but we do not have a reference to
1232
* to the recirculation ID. */
1233
if (may_put && upcall->type == DPIF_UC_MISS &&
1234
(!upcall->recirc || upcall->have_recirc_ref)) {
1235
struct udpif_key *ukey = upcall->ukey;
1237
upcall->ukey_persists = true;
1241
op->dop.type = DPIF_OP_FLOW_PUT;
1242
op->dop.u.flow_put.flags = DPIF_FP_CREATE;
1243
op->dop.u.flow_put.key = ukey->key;
1244
op->dop.u.flow_put.key_len = ukey->key_len;
1245
op->dop.u.flow_put.mask = ukey->mask;
1246
op->dop.u.flow_put.mask_len = ukey->mask_len;
1247
op->dop.u.flow_put.ufid = upcall->ufid;
1248
op->dop.u.flow_put.stats = NULL;
1249
op->dop.u.flow_put.actions = ukey->actions->data;
1250
op->dop.u.flow_put.actions_len = ukey->actions->size;
1253
if (upcall->xout.odp_actions->size) {
1256
op->dop.type = DPIF_OP_EXECUTE;
1257
op->dop.u.execute.packet = CONST_CAST(struct dp_packet *, packet);
1258
odp_key_to_pkt_metadata(upcall->key, upcall->key_len,
1259
&op->dop.u.execute.packet->md);
1260
op->dop.u.execute.actions = upcall->xout.odp_actions->data;
1261
op->dop.u.execute.actions_len = upcall->xout.odp_actions->size;
1262
op->dop.u.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0;
1263
op->dop.u.execute.probe = false;
1269
* We install ukeys before installing the flows, locking them for exclusive
1270
* access by this thread for the period of installation. This ensures that
1271
* other threads won't attempt to delete the flows as we are creating them.
1274
for (i = 0; i < n_ops; i++) {
1275
struct udpif_key *ukey = ops[i].ukey;
1278
/* If we can't install the ukey, don't install the flow. */
1279
if (!ukey_install_start(udpif, ukey)) {
1280
ukey_delete__(ukey);
1285
opsp[n_opsp++] = &ops[i].dop;
1287
dpif_operate(udpif->dpif, opsp, n_opsp);
1288
for (i = 0; i < n_ops; i++) {
1290
ukey_install_finish(ops[i].ukey, ops[i].dop.error);
1296
get_ufid_hash(const ovs_u128 *ufid)
1298
return ufid->u32[0];
1092
1301
static struct udpif_key *
1093
ukey_lookup__(struct udpif *udpif, const struct nlattr *key, size_t key_len,
1302
ukey_lookup(struct udpif *udpif, const ovs_u128 *ufid)
1096
1304
struct udpif_key *ukey;
1097
struct hmap *hmap = &udpif->ukeys[hash % udpif->n_revalidators].hmap;
1305
int idx = get_ufid_hash(ufid) % N_UMAPS;
1306
struct cmap *cmap = &udpif->ukeys[idx].cmap;
1099
HMAP_FOR_EACH_WITH_HASH (ukey, hmap_node, hash, hmap) {
1100
if (ukey->key_len == key_len && !memcmp(ukey->key, key, key_len)) {
1308
CMAP_FOR_EACH_WITH_HASH (ukey, cmap_node, get_ufid_hash(ufid), cmap) {
1309
if (ovs_u128_equals(&ukey->ufid, ufid)) {
1107
1316
static struct udpif_key *
1108
ukey_lookup(struct udpif *udpif, const struct nlattr *key, size_t key_len,
1111
struct udpif_key *ukey;
1112
uint32_t idx = hash % udpif->n_revalidators;
1114
ovs_mutex_lock(&udpif->ukeys[idx].mutex);
1115
ukey = ukey_lookup__(udpif, key, key_len, hash);
1116
ovs_mutex_unlock(&udpif->ukeys[idx].mutex);
1121
static struct udpif_key *
1122
ukey_create(const struct nlattr *key, size_t key_len, long long int used)
1124
struct udpif_key *ukey = xmalloc(sizeof *ukey);
1317
ukey_create__(const struct nlattr *key, size_t key_len,
1318
const struct nlattr *mask, size_t mask_len,
1319
bool ufid_present, const ovs_u128 *ufid,
1320
const unsigned pmd_id, const struct ofpbuf *actions,
1321
uint64_t dump_seq, uint64_t reval_seq, long long int used,
1322
const struct recirc_id_node *key_recirc, struct xlate_out *xout)
1323
OVS_NO_THREAD_SAFETY_ANALYSIS
1325
unsigned n_recircs = (key_recirc ? 1 : 0) + (xout ? xout->n_recircs : 0);
1326
struct udpif_key *ukey = xmalloc(sizeof *ukey +
1327
n_recircs * sizeof *ukey->recircs);
1329
memcpy(&ukey->keybuf, key, key_len);
1330
ukey->key = &ukey->keybuf.nla;
1331
ukey->key_len = key_len;
1332
memcpy(&ukey->maskbuf, mask, mask_len);
1333
ukey->mask = &ukey->maskbuf.nla;
1334
ukey->mask_len = mask_len;
1335
ukey->ufid_present = ufid_present;
1337
ukey->pmd_id = pmd_id;
1338
ukey->hash = get_ufid_hash(&ukey->ufid);
1339
ukey->actions = ofpbuf_clone(actions);
1125
1341
ovs_mutex_init(&ukey->mutex);
1127
ukey->key = (struct nlattr *) &ukey->key_buf;
1128
memcpy(&ukey->key_buf, key, key_len);
1129
ukey->key_len = key_len;
1131
ovs_mutex_lock(&ukey->mutex);
1133
ukey->flow_exists = true;
1134
ukey->created = used ? used : time_msec();
1342
ukey->dump_seq = dump_seq;
1343
ukey->reval_seq = reval_seq;
1344
ukey->flow_exists = false;
1345
ukey->created = time_msec();
1135
1346
memset(&ukey->stats, 0, sizeof ukey->stats);
1347
ukey->stats.used = used;
1136
1348
ukey->xcache = NULL;
1350
ukey->n_recircs = n_recircs;
1352
ukey->recircs[0] = key_recirc->id;
1354
if (xout && xout->n_recircs) {
1355
const uint32_t *act_recircs = xlate_out_get_recircs(xout);
1357
memcpy(ukey->recircs + (key_recirc ? 1 : 0), act_recircs,
1358
xout->n_recircs * sizeof *ukey->recircs);
1359
xlate_out_take_recircs(xout);
1364
static struct udpif_key *
1365
ukey_create_from_upcall(struct upcall *upcall)
1367
struct odputil_keybuf keystub, maskstub;
1368
struct ofpbuf keybuf, maskbuf;
1369
bool recirc, megaflow;
1371
if (upcall->key_len) {
1372
ofpbuf_use_const(&keybuf, upcall->key, upcall->key_len);
1374
/* dpif-netdev doesn't provide a netlink-formatted flow key in the
1375
* upcall, so convert the upcall's flow here. */
1376
ofpbuf_use_stack(&keybuf, &keystub, sizeof keystub);
1377
odp_flow_key_from_flow(&keybuf, upcall->flow, &upcall->xout.wc.masks,
1378
upcall->flow->in_port.odp_port, true);
1381
atomic_read_relaxed(&enable_megaflows, &megaflow);
1382
recirc = ofproto_dpif_get_enable_recirc(upcall->ofproto);
1383
ofpbuf_use_stack(&maskbuf, &maskstub, sizeof maskstub);
1387
max_mpls = ofproto_dpif_get_max_mpls_depth(upcall->ofproto);
1388
odp_flow_key_from_mask(&maskbuf, &upcall->xout.wc.masks, upcall->flow,
1389
UINT32_MAX, max_mpls, recirc);
1392
return ukey_create__(keybuf.data, keybuf.size, maskbuf.data, maskbuf.size,
1393
true, upcall->ufid, upcall->pmd_id,
1394
&upcall->put_actions, upcall->dump_seq,
1395
upcall->reval_seq, 0,
1396
upcall->have_recirc_ref ? upcall->recirc : NULL,
1401
ukey_create_from_dpif_flow(const struct udpif *udpif,
1402
const struct dpif_flow *flow,
1403
struct udpif_key **ukey)
1405
struct dpif_flow full_flow;
1406
struct ofpbuf actions;
1407
uint64_t dump_seq, reval_seq;
1408
uint64_t stub[DPIF_FLOW_BUFSIZE / 8];
1409
const struct nlattr *a;
1412
if (!flow->key_len || !flow->actions_len) {
1416
/* If the key or actions were not provided by the datapath, fetch the
1418
ofpbuf_use_stack(&buf, &stub, sizeof stub);
1419
err = dpif_flow_get(udpif->dpif, NULL, 0, &flow->ufid,
1420
flow->pmd_id, &buf, &full_flow);
1427
/* Check the flow actions for recirculation action. As recirculation
1428
* relies on OVS userspace internal state, we need to delete all old
1429
* datapath flows with recirculation upon OVS restart. */
1430
NL_ATTR_FOR_EACH_UNSAFE (a, left, flow->actions, flow->actions_len) {
1431
if (nl_attr_type(a) == OVS_ACTION_ATTR_RECIRC) {
1436
dump_seq = seq_read(udpif->dump_seq);
1437
reval_seq = seq_read(udpif->reval_seq);
1438
ofpbuf_use_const(&actions, &flow->actions, flow->actions_len);
1439
*ukey = ukey_create__(flow->key, flow->key_len,
1440
flow->mask, flow->mask_len, flow->ufid_present,
1441
&flow->ufid, flow->pmd_id, &actions, dump_seq,
1442
reval_seq, flow->stats.used, NULL, NULL);
1447
/* Attempts to insert a ukey into the shared ukey maps.
1449
* On success, returns true, installs the ukey and returns it in a locked
1450
* state. Otherwise, returns false. */
1452
ukey_install_start(struct udpif *udpif, struct udpif_key *new_ukey)
1453
OVS_TRY_LOCK(true, new_ukey->mutex)
1456
struct udpif_key *old_ukey;
1458
bool locked = false;
1460
idx = new_ukey->hash % N_UMAPS;
1461
umap = &udpif->ukeys[idx];
1462
ovs_mutex_lock(&umap->mutex);
1463
old_ukey = ukey_lookup(udpif, &new_ukey->ufid);
1465
/* Uncommon case: A ukey is already installed with the same UFID. */
1466
if (old_ukey->key_len == new_ukey->key_len
1467
&& !memcmp(old_ukey->key, new_ukey->key, new_ukey->key_len)) {
1468
COVERAGE_INC(handler_duplicate_upcall);
1470
struct ds ds = DS_EMPTY_INITIALIZER;
1472
odp_format_ufid(&old_ukey->ufid, &ds);
1473
ds_put_cstr(&ds, " ");
1474
odp_flow_key_format(old_ukey->key, old_ukey->key_len, &ds);
1475
ds_put_cstr(&ds, "\n");
1476
odp_format_ufid(&new_ukey->ufid, &ds);
1477
ds_put_cstr(&ds, " ");
1478
odp_flow_key_format(new_ukey->key, new_ukey->key_len, &ds);
1480
VLOG_WARN_RL(&rl, "Conflicting ukey for flows:\n%s", ds_cstr(&ds));
1484
ovs_mutex_lock(&new_ukey->mutex);
1485
cmap_insert(&umap->cmap, &new_ukey->cmap_node, new_ukey->hash);
1488
ovs_mutex_unlock(&umap->mutex);
1494
ukey_install_finish__(struct udpif_key *ukey) OVS_REQUIRES(ukey->mutex)
1496
ukey->flow_exists = true;
1500
ukey_install_finish(struct udpif_key *ukey, int error)
1501
OVS_RELEASES(ukey->mutex)
1504
ukey_install_finish__(ukey);
1137
1506
ovs_mutex_unlock(&ukey->mutex);
1142
/* Checks for a ukey in 'udpif->ukeys' with the same 'ukey->key' and 'hash',
1143
* and inserts 'ukey' if it does not exist.
1145
* Returns true if 'ukey' was inserted into 'udpif->ukeys', false otherwise. */
1147
udpif_insert_ukey(struct udpif *udpif, struct udpif_key *ukey, uint32_t hash)
1149
struct udpif_key *duplicate;
1150
uint32_t idx = hash % udpif->n_revalidators;
1153
ovs_mutex_lock(&udpif->ukeys[idx].mutex);
1154
duplicate = ukey_lookup__(udpif, ukey->key, ukey->key_len, hash);
1158
hmap_insert(&udpif->ukeys[idx].hmap, &ukey->hmap_node, hash);
1161
ovs_mutex_unlock(&udpif->ukeys[idx].mutex);
1512
ukey_install(struct udpif *udpif, struct udpif_key *ukey)
1514
/* The usual way to keep 'ukey->flow_exists' in sync with the datapath is
1515
* to call ukey_install_start(), install the corresponding datapath flow,
1516
* then call ukey_install_finish(). The netdev interface using upcall_cb()
1517
* doesn't provide a function to separately finish the flow installation,
1518
* so we perform the operations together here.
1520
* This is fine currently, as revalidator threads will only delete this
1521
* ukey during revalidator_sweep() and only if the dump_seq is mismatched.
1522
* It is unlikely for a revalidator thread to advance dump_seq and reach
1523
* the next GC phase between ukey creation and flow installation. */
1524
return ukey_install_start(udpif, ukey) && ukey_install_finish(ukey, 0);
1527
/* Searches for a ukey in 'udpif->ukeys' that matches 'flow' and attempts to
1528
* lock the ukey. If the ukey does not exist, create it.
1530
* Returns 0 on success, setting *result to the matching ukey and returning it
1531
* in a locked state. Otherwise, returns an errno and clears *result. EBUSY
1532
* indicates that another thread is handling this flow. Other errors indicate
1533
* an unexpected condition creating a new ukey.
1535
* *error is an output parameter provided to appease the threadsafety analyser,
1536
* and its value matches the return value. */
1538
ukey_acquire(struct udpif *udpif, const struct dpif_flow *flow,
1539
struct udpif_key **result, int *error)
1540
OVS_TRY_LOCK(0, (*result)->mutex)
1542
struct udpif_key *ukey;
1545
ukey = ukey_lookup(udpif, &flow->ufid);
1547
retval = ovs_mutex_trylock(&ukey->mutex);
1549
/* Usually we try to avoid installing flows from revalidator threads,
1550
* because locking on a umap may cause handler threads to block.
1551
* However there are certain cases, like when ovs-vswitchd is
1552
* restarted, where it is desirable to handle flows that exist in the
1553
* datapath gracefully (ie, don't just clear the datapath). */
1556
retval = ukey_create_from_dpif_flow(udpif, flow, &ukey);
1560
install = ukey_install_start(udpif, ukey);
1562
ukey_install_finish__(ukey);
1565
ukey_delete__(ukey);
1167
ukey_delete(struct revalidator *revalidator, struct udpif_key *ukey)
1581
ukey_delete__(struct udpif_key *ukey)
1168
1582
OVS_NO_THREAD_SAFETY_ANALYSIS
1171
hmap_remove(revalidator->ukeys, &ukey->hmap_node);
1585
for (int i = 0; i < ukey->n_recircs; i++) {
1586
recirc_free_id(ukey->recircs[i]);
1588
xlate_cache_delete(ukey->xcache);
1589
ofpbuf_delete(ukey->actions);
1590
ovs_mutex_destroy(&ukey->mutex);
1173
xlate_cache_delete(ukey->xcache);
1174
ovs_mutex_destroy(&ukey->mutex);
1596
ukey_delete(struct umap *umap, struct udpif_key *ukey)
1597
OVS_REQUIRES(umap->mutex)
1599
cmap_remove(&umap->cmap, &ukey->cmap_node, ukey->hash);
1600
ovsrcu_postpone(ukey_delete__, ukey);
1421
push_dump_ops(struct revalidator *revalidator,
1422
struct dump_op *ops, size_t n_ops)
1873
push_ukey_ops(struct udpif *udpif, struct umap *umap,
1874
struct ukey_op *ops, size_t n_ops)
1426
push_dump_ops__(revalidator->udpif, ops, n_ops);
1878
push_ukey_ops__(udpif, ops, n_ops);
1879
ovs_mutex_lock(&umap->mutex);
1427
1880
for (i = 0; i < n_ops; i++) {
1428
ukey_delete(revalidator, ops[i].ukey);
1881
ukey_delete(umap, ops[i].ukey);
1883
ovs_mutex_unlock(&umap->mutex);
1887
log_unexpected_flow(const struct dpif_flow *flow, int error)
1889
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 60);
1890
struct ds ds = DS_EMPTY_INITIALIZER;
1892
ds_put_format(&ds, "Failed to acquire udpif_key corresponding to "
1893
"unexpected flow (%s): ", ovs_strerror(error));
1894
odp_format_ufid(&flow->ufid, &ds);
1895
VLOG_WARN_RL(&rl, "%s", ds_cstr(&ds));
1433
1899
revalidate(struct revalidator *revalidator)
1435
1901
struct udpif *udpif = revalidator->udpif;
1437
struct dump_op ops[REVALIDATE_MAX_BATCH];
1438
const struct nlattr *key, *mask, *actions;
1439
size_t key_len, mask_len, actions_len;
1440
const struct dpif_flow_stats *stats;
1902
struct dpif_flow_dump_thread *dump_thread;
1903
uint64_t dump_seq, reval_seq;
1442
1904
unsigned int flow_limit;
1448
atomic_read(&udpif->flow_limit, &flow_limit);
1450
dpif_flow_dump_state_init(udpif->dpif, &state);
1451
while (dpif_flow_dump_next(&udpif->dump, state, &key, &key_len, &mask,
1452
&mask_len, &actions, &actions_len, &stats)) {
1453
struct udpif_key *ukey;
1454
bool mark, may_destroy;
1455
long long int used, max_idle;
1459
hash = hash_bytes(key, key_len, udpif->secret);
1460
ukey = ukey_lookup(udpif, key, key_len, hash);
1464
ukey = ukey_create(key, key_len, used);
1465
if (!udpif_insert_ukey(udpif, ukey, hash)) {
1466
/* The same ukey has already been created. This means that
1467
* another revalidator is processing this flow
1468
* concurrently, so don't bother processing it. */
1469
COVERAGE_INC(upcall_duplicate_flow);
1470
ukey_delete(NULL, ukey);
1475
if (ovs_mutex_trylock(&ukey->mutex)) {
1476
/* The flow has been dumped, and is being handled by another
1477
* revalidator concurrently. This can occasionally occur if the
1478
* datapath is changed in the middle of a flow dump. Rather than
1479
* perform the same work twice, skip the flow this time. */
1480
COVERAGE_INC(upcall_duplicate_flow);
1484
if (ukey->mark || !ukey->flow_exists) {
1485
/* The flow has already been dumped and handled by another
1486
* revalidator during this flow dump operation. Skip it. */
1487
COVERAGE_INC(upcall_duplicate_flow);
1906
dump_seq = seq_read(udpif->dump_seq);
1907
reval_seq = seq_read(udpif->reval_seq);
1908
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
1909
dump_thread = dpif_flow_dump_thread_create(udpif->dump);
1911
struct ukey_op ops[REVALIDATE_MAX_BATCH];
1914
struct dpif_flow flows[REVALIDATE_MAX_BATCH];
1915
const struct dpif_flow *f;
1918
long long int max_idle;
1923
n_dumped = dpif_flow_dump_next(dump_thread, flows, ARRAY_SIZE(flows));
1930
/* In normal operation we want to keep flows around until they have
1931
* been idle for 'ofproto_max_idle' milliseconds. However:
1933
* - If the number of datapath flows climbs above 'flow_limit',
1934
* drop that down to 100 ms to try to bring the flows down to
1937
* - If the number of datapath flows climbs above twice
1938
* 'flow_limit', delete all the datapath flows as an emergency
1939
* measure. (We reassess this condition for the next batch of
1940
* datapath flows, so we will recover before all the flows are
1942
n_dp_flows = udpif_get_n_flows(udpif);
1943
kill_them_all = n_dp_flows > flow_limit * 2;
1944
max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle;
1946
for (f = flows; f < &flows[n_dumped]; f++) {
1947
long long int used = f->stats.used;
1948
struct udpif_key *ukey;
1949
bool already_dumped, keep;
1952
if (ukey_acquire(udpif, f, &ukey, &error)) {
1953
if (error == EBUSY) {
1954
/* Another thread is processing this flow, so don't bother
1956
COVERAGE_INC(upcall_ukey_contention);
1958
log_unexpected_flow(f, error);
1959
if (error != ENOENT) {
1960
delete_op_init__(udpif, &ops[n_ops++], f);
1966
already_dumped = ukey->dump_seq == dump_seq;
1967
if (already_dumped) {
1968
/* The flow has already been handled during this flow dump
1969
* operation. Skip it. */
1971
COVERAGE_INC(dumped_duplicate_flow);
1973
COVERAGE_INC(dumped_new_flow);
1975
ovs_mutex_unlock(&ukey->mutex);
1980
used = ukey->created;
1982
if (kill_them_all || (used && used < now - max_idle)) {
1985
keep = revalidate_ukey(udpif, ukey, &f->stats, reval_seq);
1987
ukey->dump_seq = dump_seq;
1988
ukey->flow_exists = keep;
1991
delete_op_init(udpif, &ops[n_ops++], ukey);
1488
1993
ovs_mutex_unlock(&ukey->mutex);
1493
used = ukey->created;
1495
n_flows = udpif_get_n_flows(udpif);
1496
max_idle = ofproto_max_idle;
1497
if (n_flows > flow_limit) {
1501
if ((used && used < now - max_idle) || n_flows > flow_limit * 2) {
1504
mark = revalidate_ukey(udpif, ukey, mask, mask_len, actions,
1505
actions_len, stats);
1507
ukey->mark = ukey->flow_exists = mark;
1510
dump_op_init(&ops[n_ops++], key, key_len, ukey);
1512
ovs_mutex_unlock(&ukey->mutex);
1515
may_destroy = dpif_flow_dump_next_may_destroy_keys(&udpif->dump,
1518
/* Only update 'now' immediately before 'buffer' will be updated.
1519
* This gives us the current time relative to the time the datapath
1520
* will write into 'stats'. */
1525
/* Only do a dpif_operate when we've hit our maximum batch, or when our
1526
* memory is about to be clobbered by the next call to
1527
* dpif_flow_dump_next(). */
1528
if (n_ops == REVALIDATE_MAX_BATCH || (n_ops && may_destroy)) {
1529
push_dump_ops__(udpif, ops, n_ops);
1535
push_dump_ops__(udpif, ops, n_ops);
1538
dpif_flow_dump_state_uninit(udpif->dpif, state);
1997
push_ukey_ops__(udpif, ops, n_ops);
2001
dpif_flow_dump_thread_destroy(dump_thread);
1541
/* Called with exclusive access to 'revalidator' and 'ukey'. */
1543
handle_missed_revalidation(struct revalidator *revalidator,
2005
handle_missed_revalidation(struct udpif *udpif, uint64_t reval_seq,
1544
2006
struct udpif_key *ukey)
1545
OVS_NO_THREAD_SAFETY_ANALYSIS
1547
struct udpif *udpif = revalidator->udpif;
1548
struct nlattr *mask, *actions;
1549
size_t mask_len, actions_len;
1550
2008
struct dpif_flow_stats stats;
1554
2011
COVERAGE_INC(revalidate_missed_dp_flow);
1556
if (!dpif_flow_get(udpif->dpif, ukey->key, ukey->key_len, &buf,
1557
&mask, &mask_len, &actions, &actions_len, &stats)) {
1558
keep = revalidate_ukey(udpif, ukey, mask, mask_len, actions,
1559
actions_len, &stats);
2013
memset(&stats, 0, sizeof stats);
2014
ovs_mutex_lock(&ukey->mutex);
2015
keep = revalidate_ukey(udpif, ukey, &stats, reval_seq);
2016
ovs_mutex_unlock(&ukey->mutex);
1567
2022
revalidator_sweep__(struct revalidator *revalidator, bool purge)
1568
OVS_NO_THREAD_SAFETY_ANALYSIS
1570
struct dump_op ops[REVALIDATE_MAX_BATCH];
1571
struct udpif_key *ukey, *next;
1576
/* During garbage collection, this revalidator completely owns its ukeys
1577
* map, and therefore doesn't need to do any locking. */
1578
HMAP_FOR_EACH_SAFE (ukey, next, hmap_node, revalidator->ukeys) {
1579
if (ukey->flow_exists) {
1580
bool missed_flow = !ukey->mark;
1585
&& revalidator->udpif->need_revalidate
1586
&& !handle_missed_revalidation(revalidator, ukey))) {
1587
struct dump_op *op = &ops[n_ops++];
1589
dump_op_init(op, ukey->key, ukey->key_len, ukey);
2024
struct udpif *udpif;
2025
uint64_t dump_seq, reval_seq;
2028
udpif = revalidator->udpif;
2029
dump_seq = seq_read(udpif->dump_seq);
2030
reval_seq = seq_read(udpif->reval_seq);
2031
slice = revalidator - udpif->revalidators;
2032
ovs_assert(slice < udpif->n_revalidators);
2034
for (int i = slice; i < N_UMAPS; i += udpif->n_revalidators) {
2035
struct ukey_op ops[REVALIDATE_MAX_BATCH];
2036
struct udpif_key *ukey;
2037
struct umap *umap = &udpif->ukeys[i];
2040
CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
2041
bool flow_exists, seq_mismatch;
2043
/* Handler threads could be holding a ukey lock while it installs a
2044
* new flow, so don't hang around waiting for access to it. */
2045
if (ovs_mutex_trylock(&ukey->mutex)) {
2048
flow_exists = ukey->flow_exists;
2049
seq_mismatch = (ukey->dump_seq != dump_seq
2050
&& ukey->reval_seq != reval_seq);
2051
ovs_mutex_unlock(&ukey->mutex);
2056
&& !handle_missed_revalidation(udpif, reval_seq,
2058
struct ukey_op *op = &ops[n_ops++];
2060
delete_op_init(udpif, op, ukey);
1590
2061
if (n_ops == REVALIDATE_MAX_BATCH) {
1591
push_dump_ops(revalidator, ops, n_ops);
2062
push_ukey_ops(udpif, umap, ops, n_ops);
2065
} else if (!flow_exists) {
2066
ovs_mutex_lock(&umap->mutex);
2067
ukey_delete(umap, ukey);
2068
ovs_mutex_unlock(&umap->mutex);
1596
ukey_delete(revalidator, ukey);
1601
push_dump_ops(revalidator, ops, n_ops);
2073
push_ukey_ops(udpif, umap, ops, n_ops);