32
32
#include <net/ipv6.h>
33
33
#include <net/checksum.h>
34
34
#include <net/dsfield.h>
35
36
#include <net/sctp/checksum.h>
37
38
#include "datapath.h"
41
43
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
44
struct sw_flow_key *key,
42
45
const struct nlattr *attr, int len);
44
static int make_writable(struct sk_buff *skb, int write_len)
46
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
49
return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
52
/* remove VLAN header from packet and update csum accordingly. */
53
static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
55
struct vlan_hdr *vhdr;
58
err = make_writable(skb, VLAN_ETH_HLEN);
62
if (skb->ip_summed == CHECKSUM_COMPLETE)
63
skb->csum = csum_sub(skb->csum, csum_partial(skb->data
64
+ (2 * ETH_ALEN), VLAN_HLEN, 0));
66
vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
67
*current_tci = vhdr->h_vlan_TCI;
69
memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
70
__skb_pull(skb, VLAN_HLEN);
72
vlan_set_encap_proto(skb, vhdr);
73
skb->mac_header += VLAN_HLEN;
74
skb_reset_mac_len(skb);
79
static int pop_vlan(struct sk_buff *skb)
84
if (likely(vlan_tx_tag_present(skb))) {
87
if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
88
skb->len < VLAN_ETH_HLEN))
91
err = __pop_vlan_tci(skb, &tci);
95
/* move next vlan tag to hw accel tag */
96
if (likely(skb->protocol != htons(ETH_P_8021Q) ||
97
skb->len < VLAN_ETH_HLEN))
100
err = __pop_vlan_tci(skb, &tci);
104
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
108
static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
110
if (unlikely(vlan_tx_tag_present(skb))) {
113
/* push down current VLAN tag */
114
current_tag = vlan_tx_tag_get(skb);
116
if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
119
if (skb->ip_summed == CHECKSUM_COMPLETE)
120
skb->csum = csum_add(skb->csum, csum_partial(skb->data
121
+ (2 * ETH_ALEN), VLAN_HLEN, 0));
124
__vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
128
static int set_eth_addr(struct sk_buff *skb,
129
const struct ovs_key_ethernet *eth_key)
132
err = make_writable(skb, ETH_HLEN);
47
struct deferred_action {
49
const struct nlattr *actions;
51
/* Store pkt_key clone when creating deferred action. */
52
struct sw_flow_key pkt_key;
55
#define DEFERRED_ACTION_FIFO_SIZE 10
59
/* Deferred action fifo queue storage. */
60
struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
63
static struct action_fifo __percpu *action_fifos;
64
#define EXEC_ACTIONS_LEVEL_LIMIT 4 /* limit used to detect packet
65
* looping by the network stack
67
static DEFINE_PER_CPU(int, exec_actions_level);
69
static void action_fifo_init(struct action_fifo *fifo)
75
static bool action_fifo_is_empty(const struct action_fifo *fifo)
77
return (fifo->head == fifo->tail);
80
static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
82
if (action_fifo_is_empty(fifo))
85
return &fifo->fifo[fifo->tail++];
88
static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
90
if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
93
return &fifo->fifo[fifo->head++];
96
/* Return queue entry if fifo is not full */
97
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
98
const struct sw_flow_key *key,
99
const struct nlattr *attr)
101
struct action_fifo *fifo;
102
struct deferred_action *da;
104
fifo = this_cpu_ptr(action_fifos);
105
da = action_fifo_put(fifo);
115
static void invalidate_flow_key(struct sw_flow_key *key)
117
key->eth.type = htons(0);
120
static bool is_flow_key_valid(const struct sw_flow_key *key)
122
return !!key->eth.type;
125
static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
126
const struct ovs_action_push_mpls *mpls)
128
__be32 *new_mpls_lse;
131
/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
132
if (skb_encapsulation(skb))
135
if (skb_cow_head(skb, MPLS_HLEN) < 0)
138
skb_push(skb, MPLS_HLEN);
139
memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
141
skb_reset_mac_header(skb);
143
new_mpls_lse = (__be32 *)skb_mpls_header(skb);
144
*new_mpls_lse = mpls->mpls_lse;
146
if (skb->ip_summed == CHECKSUM_COMPLETE)
147
skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
151
hdr->h_proto = mpls->mpls_ethertype;
152
if (!ovs_skb_get_inner_protocol(skb))
153
ovs_skb_set_inner_protocol(skb, skb->protocol);
154
skb->protocol = mpls->mpls_ethertype;
156
invalidate_flow_key(key);
160
static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
161
const __be16 ethertype)
166
err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
170
if (skb->ip_summed == CHECKSUM_COMPLETE)
171
skb->csum = csum_sub(skb->csum,
172
csum_partial(skb_mpls_header(skb),
175
memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
178
__skb_pull(skb, MPLS_HLEN);
179
skb_reset_mac_header(skb);
181
/* skb_mpls_header() is used to locate the ethertype
182
* field correctly in the presence of VLAN tags.
184
hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
185
hdr->h_proto = ethertype;
186
if (eth_p_mpls(skb->protocol))
187
skb->protocol = ethertype;
189
invalidate_flow_key(key);
193
/* 'KEY' must not have any bits set outside of the 'MASK' */
194
#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
195
#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))
197
static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
198
const __be32 *mpls_lse, const __be32 *mask)
204
err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
208
stack = (__be32 *)skb_mpls_header(skb);
209
lse = MASKED(*stack, *mpls_lse, *mask);
210
if (skb->ip_summed == CHECKSUM_COMPLETE) {
211
__be32 diff[] = { ~(*stack), lse };
213
skb->csum = ~csum_partial((char *)diff, sizeof(diff),
218
flow_key->mpls.top_lse = lse;
222
static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
226
err = skb_vlan_pop(skb);
227
if (skb_vlan_tag_present(skb))
228
invalidate_flow_key(key);
235
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
236
const struct ovs_action_push_vlan *vlan)
238
if (skb_vlan_tag_present(skb))
239
invalidate_flow_key(key);
241
key->eth.tci = vlan->vlan_tci;
243
return skb_vlan_push(skb, vlan->vlan_tpid,
244
ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
247
/* 'src' is already properly masked. */
248
static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
250
u16 *dst = (u16 *)dst_;
251
const u16 *src = (const u16 *)src_;
252
const u16 *mask = (const u16 *)mask_;
254
SET_MASKED(dst[0], src[0], mask[0]);
255
SET_MASKED(dst[1], src[1], mask[1]);
256
SET_MASKED(dst[2], src[2], mask[2]);
259
static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
260
const struct ovs_key_ethernet *key,
261
const struct ovs_key_ethernet *mask)
265
err = skb_ensure_writable(skb, ETH_HLEN);
133
266
if (unlikely(err))
136
269
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
138
ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
139
ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
271
ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
273
ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
141
276
ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
278
ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
279
ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
146
283
static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
147
__be32 *addr, __be32 new_addr)
284
__be32 *addr, __be32 new_addr)
149
286
int transport_len = skb->len - skb_transport_offset(skb);
337
static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
338
const __be32 mask[4], __be32 masked[4])
340
masked[0] = MASKED(old[0], addr[0], mask[0]);
341
masked[1] = MASKED(old[1], addr[1], mask[1]);
342
masked[2] = MASKED(old[2], addr[2], mask[2]);
343
masked[3] = MASKED(old[3], addr[3], mask[3]);
200
346
static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
201
347
__be32 addr[4], const __be32 new_addr[4],
202
348
bool recalculate_csum)
204
if (recalculate_csum)
350
if (likely(recalculate_csum))
205
351
update_ipv6_checksum(skb, l4_proto, addr, new_addr);
207
353
skb_clear_hash(skb);
208
354
memcpy(addr, new_addr, sizeof(__be32[4]));
211
static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
213
nh->priority = tc >> 4;
214
nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
217
static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
219
nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
220
nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
221
nh->flow_lbl[2] = fl & 0x000000FF;
224
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
357
static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
359
/* Bits 21-24 are always unmasked, so this retains their values. */
360
SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
361
SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
362
SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
365
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
368
new_ttl = MASKED(nh->ttl, new_ttl, mask);
226
370
csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
227
371
nh->ttl = new_ttl;
230
static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
374
static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
375
const struct ovs_key_ipv4 *key,
376
const struct ovs_key_ipv4 *mask)
232
378
struct iphdr *nh;
235
err = make_writable(skb, skb_network_offset(skb) +
236
sizeof(struct iphdr));
382
err = skb_ensure_writable(skb, skb_network_offset(skb) +
383
sizeof(struct iphdr));
237
384
if (unlikely(err))
240
387
nh = ip_hdr(skb);
242
if (ipv4_key->ipv4_src != nh->saddr)
243
set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
245
if (ipv4_key->ipv4_dst != nh->daddr)
246
set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
248
if (ipv4_key->ipv4_tos != nh->tos)
249
ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
251
if (ipv4_key->ipv4_ttl != nh->ttl)
252
set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
389
/* Setting an IP addresses is typically only a side effect of
390
* matching on them in the current userspace implementation, so it
391
* makes sense to check if the value actually changed.
393
if (mask->ipv4_src) {
394
new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
396
if (unlikely(new_addr != nh->saddr)) {
397
set_ip_addr(skb, nh, &nh->saddr, new_addr);
398
flow_key->ipv4.addr.src = new_addr;
401
if (mask->ipv4_dst) {
402
new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
404
if (unlikely(new_addr != nh->daddr)) {
405
set_ip_addr(skb, nh, &nh->daddr, new_addr);
406
flow_key->ipv4.addr.dst = new_addr;
409
if (mask->ipv4_tos) {
410
ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
411
flow_key->ip.tos = nh->tos;
413
if (mask->ipv4_ttl) {
414
set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
415
flow_key->ip.ttl = nh->ttl;
257
static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
421
static bool is_ipv6_mask_nonzero(const __be32 addr[4])
423
return !!(addr[0] | addr[1] | addr[2] | addr[3]);
426
static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
427
const struct ovs_key_ipv6 *key,
428
const struct ovs_key_ipv6 *mask)
259
430
struct ipv6hdr *nh;
264
err = make_writable(skb, skb_network_offset(skb) +
265
sizeof(struct ipv6hdr));
433
err = skb_ensure_writable(skb, skb_network_offset(skb) +
434
sizeof(struct ipv6hdr));
266
435
if (unlikely(err))
269
438
nh = ipv6_hdr(skb);
270
saddr = (__be32 *)&nh->saddr;
271
daddr = (__be32 *)&nh->daddr;
273
if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
274
set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
275
ipv6_key->ipv6_src, true);
277
if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
440
/* Setting an IP addresses is typically only a side effect of
441
* matching on them in the current userspace implementation, so it
442
* makes sense to check if the value actually changed.
444
if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
445
__be32 *saddr = (__be32 *)&nh->saddr;
448
mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
450
if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
451
set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
453
memcpy(&flow_key->ipv6.addr.src, masked,
454
sizeof(flow_key->ipv6.addr.src));
457
if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
278
458
unsigned int offset = 0;
279
459
int flags = IP6_FH_F_SKIP_RH;
280
460
bool recalc_csum = true;
282
if (ipv6_ext_hdr(nh->nexthdr))
283
recalc_csum = ipv6_find_hdr(skb, &offset,
284
NEXTHDR_ROUTING, NULL,
285
&flags) != NEXTHDR_ROUTING;
287
set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
288
ipv6_key->ipv6_dst, recalc_csum);
291
set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
292
set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
293
nh->hop_limit = ipv6_key->ipv6_hlimit;
461
__be32 *daddr = (__be32 *)&nh->daddr;
464
mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
466
if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
467
if (ipv6_ext_hdr(nh->nexthdr))
468
recalc_csum = (ipv6_find_hdr(skb, &offset,
473
set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
475
memcpy(&flow_key->ipv6.addr.dst, masked,
476
sizeof(flow_key->ipv6.addr.dst));
479
if (mask->ipv6_tclass) {
480
ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
481
flow_key->ip.tos = ipv6_get_dsfield(nh);
483
if (mask->ipv6_label) {
484
set_ipv6_fl(nh, ntohl(key->ipv6_label),
485
ntohl(mask->ipv6_label));
486
flow_key->ipv6.label =
487
*(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
489
if (mask->ipv6_hlimit) {
490
SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
491
flow_key->ip.ttl = nh->hop_limit;
298
/* Must follow make_writable() since that can move the skb data. */
496
/* Must follow skb_ensure_writable() since that can move the skb data. */
299
497
static void set_tp_port(struct sk_buff *skb, __be16 *port,
300
__be16 new_port, __sum16 *check)
498
__be16 new_port, __sum16 *check)
302
500
inet_proto_csum_replace2(check, skb, *port, new_port, 0);
303
501
*port = new_port;
307
static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
504
static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
505
const struct ovs_key_udp *key,
506
const struct ovs_key_udp *mask)
309
struct udphdr *uh = udp_hdr(skb);
512
err = skb_ensure_writable(skb, skb_transport_offset(skb) +
513
sizeof(struct udphdr));
518
/* Either of the masks is non-zero, so do not bother checking them. */
519
src = MASKED(uh->source, key->udp_src, mask->udp_src);
520
dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
311
522
if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
312
set_tp_port(skb, port, new_port, &uh->check);
523
if (likely(src != uh->source)) {
524
set_tp_port(skb, &uh->source, src, &uh->check);
525
flow_key->tp.src = src;
527
if (likely(dst != uh->dest)) {
528
set_tp_port(skb, &uh->dest, dst, &uh->check);
529
flow_key->tp.dst = dst;
532
if (unlikely(!uh->check))
315
533
uh->check = CSUM_MANGLED_0;
537
flow_key->tp.src = src;
538
flow_key->tp.dst = dst;
322
static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
327
err = make_writable(skb, skb_transport_offset(skb) +
328
sizeof(struct udphdr));
333
if (udp_port_key->udp_src != uh->source)
334
set_udp_port(skb, &uh->source, udp_port_key->udp_src);
336
if (udp_port_key->udp_dst != uh->dest)
337
set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
342
static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
546
static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
547
const struct ovs_key_tcp *key,
548
const struct ovs_key_tcp *mask)
344
550
struct tcphdr *th;
347
err = make_writable(skb, skb_transport_offset(skb) +
348
sizeof(struct tcphdr));
554
err = skb_ensure_writable(skb, skb_transport_offset(skb) +
555
sizeof(struct tcphdr));
349
556
if (unlikely(err))
352
559
th = tcp_hdr(skb);
353
if (tcp_port_key->tcp_src != th->source)
354
set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
356
if (tcp_port_key->tcp_dst != th->dest)
357
set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
561
src = MASKED(th->source, key->tcp_src, mask->tcp_src);
562
if (likely(src != th->source)) {
563
set_tp_port(skb, &th->source, src, &th->check);
564
flow_key->tp.src = src;
566
dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
567
if (likely(dst != th->dest)) {
568
set_tp_port(skb, &th->dest, dst, &th->check);
569
flow_key->tp.dst = dst;
362
static int set_sctp(struct sk_buff *skb,
363
const struct ovs_key_sctp *sctp_port_key)
576
static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
577
const struct ovs_key_sctp *key,
578
const struct ovs_key_sctp *mask)
580
unsigned int sctphoff = skb_transport_offset(skb);
365
581
struct sctphdr *sh;
582
__le32 old_correct_csum, new_csum, old_csum;
367
unsigned int sctphoff = skb_transport_offset(skb);
369
err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
585
err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
370
586
if (unlikely(err))
373
589
sh = sctp_hdr(skb);
374
if (sctp_port_key->sctp_src != sh->source ||
375
sctp_port_key->sctp_dst != sh->dest) {
376
__le32 old_correct_csum, new_csum, old_csum;
378
old_csum = sh->checksum;
379
old_correct_csum = sctp_compute_cksum(skb, sctphoff);
381
sh->source = sctp_port_key->sctp_src;
382
sh->dest = sctp_port_key->sctp_dst;
384
new_csum = sctp_compute_cksum(skb, sctphoff);
386
/* Carry any checksum errors through. */
387
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
591
old_csum = sh->checksum;
592
old_correct_csum = sctp_compute_cksum(skb, sctphoff);
594
sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
595
sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
597
new_csum = sctp_compute_cksum(skb, sctphoff);
599
/* Carry any checksum errors through. */
600
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
603
flow_key->tp.src = sh->source;
604
flow_key->tp.dst = sh->dest;
395
static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
609
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
402
vport = ovs_vport_rcu(dp, out_port);
403
if (unlikely(!vport)) {
611
struct vport *vport = ovs_vport_rcu(dp, out_port);
614
ovs_vport_send(vport, skb);
408
ovs_vport_send(vport, skb);
412
619
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
413
const struct nlattr *attr)
620
struct sw_flow_key *key, const struct nlattr *attr)
622
struct ovs_tunnel_info info;
415
623
struct dp_upcall_info upcall;
416
624
const struct nlattr *a;
419
BUG_ON(!OVS_CB(skb)->pkt_key);
421
627
upcall.cmd = OVS_PACKET_CMD_ACTION;
422
upcall.key = OVS_CB(skb)->pkt_key;
423
628
upcall.userdata = NULL;
424
629
upcall.portid = 0;
630
upcall.egress_tun_info = NULL;
426
632
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
427
633
a = nla_next(a, &rem)) {
506
732
static int execute_set_action(struct sk_buff *skb,
507
const struct nlattr *nested_attr)
733
struct sw_flow_key *flow_key,
734
const struct nlattr *a)
736
/* Only tunnel set execution is supported without a mask. */
737
if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
738
OVS_CB(skb)->egress_tun_info = nla_data(a);
746
/* Mask is at the midpoint of the data. */
747
#define get_mask(a, type) ((const type)nla_data(a) + 1)
749
static int execute_masked_set_action(struct sk_buff *skb,
750
struct sw_flow_key *flow_key,
751
const struct nlattr *a)
511
switch (nla_type(nested_attr)) {
755
switch (nla_type(a)) {
512
756
case OVS_KEY_ATTR_PRIORITY:
513
skb->priority = nla_get_u32(nested_attr);
757
SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
758
flow_key->phy.priority = skb->priority;
516
761
case OVS_KEY_ATTR_SKB_MARK:
517
skb->mark = nla_get_u32(nested_attr);
762
SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
763
flow_key->phy.skb_mark = skb->mark;
520
case OVS_KEY_ATTR_IPV4_TUNNEL:
521
OVS_CB(skb)->tun_key = nla_data(nested_attr);
766
case OVS_KEY_ATTR_TUNNEL_INFO:
767
/* Masked data not supported for tunnel. */
524
771
case OVS_KEY_ATTR_ETHERNET:
525
err = set_eth_addr(skb, nla_data(nested_attr));
772
err = set_eth_addr(skb, flow_key, nla_data(a),
773
get_mask(a, struct ovs_key_ethernet *));
528
776
case OVS_KEY_ATTR_IPV4:
529
err = set_ipv4(skb, nla_data(nested_attr));
777
err = set_ipv4(skb, flow_key, nla_data(a),
778
get_mask(a, struct ovs_key_ipv4 *));
532
781
case OVS_KEY_ATTR_IPV6:
533
err = set_ipv6(skb, nla_data(nested_attr));
782
err = set_ipv6(skb, flow_key, nla_data(a),
783
get_mask(a, struct ovs_key_ipv6 *));
536
786
case OVS_KEY_ATTR_TCP:
537
err = set_tcp(skb, nla_data(nested_attr));
787
err = set_tcp(skb, flow_key, nla_data(a),
788
get_mask(a, struct ovs_key_tcp *));
540
791
case OVS_KEY_ATTR_UDP:
541
err = set_udp(skb, nla_data(nested_attr));
792
err = set_udp(skb, flow_key, nla_data(a),
793
get_mask(a, struct ovs_key_udp *));
544
796
case OVS_KEY_ATTR_SCTP:
545
err = set_sctp(skb, nla_data(nested_attr));
797
err = set_sctp(skb, flow_key, nla_data(a),
798
get_mask(a, struct ovs_key_sctp *));
801
case OVS_KEY_ATTR_MPLS:
802
err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
656
/* We limit the number of times that we pass into execute_actions()
657
* to avoid blowing out the stack in the event that we have a loop.
659
* Each loop adds some (estimated) cost to the kernel stack.
660
* The loop terminates when the max cost is exceeded.
662
#define RECIRC_STACK_COST 1
663
#define DEFAULT_STACK_COST 4
664
/* Allow up to 4 regular services, and up to 3 recirculations */
665
#define MAX_STACK_COST (DEFAULT_STACK_COST * 4 + RECIRC_STACK_COST * 3)
667
struct loop_counter {
668
u8 stack_cost; /* loop stack cost. */
669
bool looping; /* Loop detected? */
672
static DEFINE_PER_CPU(struct loop_counter, loop_counters);
674
static int loop_suppress(struct datapath *dp, struct sw_flow_actions *actions)
677
pr_warn("%s: flow loop detected, dropping\n",
947
static void process_deferred_actions(struct datapath *dp)
949
struct action_fifo *fifo = this_cpu_ptr(action_fifos);
951
/* Do not touch the FIFO in case there is no deferred actions. */
952
if (action_fifo_is_empty(fifo))
955
/* Finishing executing all deferred actions. */
957
struct deferred_action *da = action_fifo_get(fifo);
958
struct sk_buff *skb = da->skb;
959
struct sw_flow_key *key = &da->pkt_key;
960
const struct nlattr *actions = da->actions;
963
do_execute_actions(dp, skb, key, actions,
966
ovs_dp_process_packet(skb, key);
967
} while (!action_fifo_is_empty(fifo));
969
/* Reset FIFO for the next packet. */
970
action_fifo_init(fifo);
973
/* Execute a list of actions against 'skb'. */
974
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
975
const struct sw_flow_actions *acts,
976
struct sw_flow_key *key)
978
int level = this_cpu_read(exec_actions_level);
981
if (unlikely(level >= EXEC_ACTIONS_LEVEL_LIMIT)) {
983
pr_warn("%s: packet loop detected, dropping.\n",
678
984
ovs_dp_name(dp));
679
actions->actions_len = 0;
683
/* Execute a list of actions against 'skb'. */
684
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, bool recirc)
686
struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
687
const u8 stack_cost = recirc ? RECIRC_STACK_COST : DEFAULT_STACK_COST;
688
struct loop_counter *loop;
691
/* Check whether we've looped too much. */
692
loop = &__get_cpu_var(loop_counters);
693
loop->stack_cost += stack_cost;
694
if (unlikely(loop->stack_cost > MAX_STACK_COST))
695
loop->looping = true;
696
if (unlikely(loop->looping)) {
697
error = loop_suppress(dp, acts);
702
OVS_CB(skb)->tun_key = NULL;
703
error = do_execute_actions(dp, skb, acts->actions, acts->actions_len);
705
/* Check whether sub-actions looped too much. */
706
if (unlikely(loop->looping))
707
error = loop_suppress(dp, acts);
710
/* Decrement loop stack cost. */
711
loop->stack_cost -= stack_cost;
712
if (!loop->stack_cost)
713
loop->looping = false;
990
this_cpu_inc(exec_actions_level);
991
err = do_execute_actions(dp, skb, key,
992
acts->actions, acts->actions_len);
995
process_deferred_actions(dp);
997
this_cpu_dec(exec_actions_level);
999
/* This return status currently does not reflect the errors
1000
* encounted during deferred actions execution. Probably needs to
1001
* be fixed in the future.
1006
int action_fifos_init(void)
1008
action_fifos = alloc_percpu(struct action_fifo);
1015
void action_fifos_exit(void)
1017
free_percpu(action_fifos);