~james-page/ubuntu/saucy/openvswitch/1.12-snapshot

49 by James Page
* New upstream snapshot.
1
/*
2
 * Copyright (c) 2007-2012 Nicira, Inc.
3
 *
4
 * This program is free software; you can redistribute it and/or
5
 * modify it under the terms of version 2 of the GNU General Public
6
 * License as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but
9
 * WITHOUT ANY WARRANTY; without even the implied warranty of
10
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
 * General Public License for more details.
12
 *
13
 * You should have received a copy of the GNU General Public License
14
 * along with this program; if not, write to the Free Software
15
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16
 * 02110-1301, USA
17
 */
18
19
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21
#include <linux/init.h>
22
#include <linux/module.h>
23
#include <linux/if_arp.h>
24
#include <linux/if_vlan.h>
25
#include <linux/in.h>
26
#include <linux/ip.h>
27
#include <linux/jhash.h>
28
#include <linux/delay.h>
29
#include <linux/time.h>
30
#include <linux/etherdevice.h>
31
#include <linux/genetlink.h>
32
#include <linux/kernel.h>
33
#include <linux/kthread.h>
34
#include <linux/mutex.h>
35
#include <linux/percpu.h>
36
#include <linux/rcupdate.h>
37
#include <linux/tcp.h>
38
#include <linux/udp.h>
39
#include <linux/version.h>
40
#include <linux/ethtool.h>
41
#include <linux/wait.h>
42
#include <asm/div64.h>
43
#include <linux/highmem.h>
44
#include <linux/netfilter_bridge.h>
45
#include <linux/netfilter_ipv4.h>
46
#include <linux/inetdevice.h>
47
#include <linux/list.h>
48
#include <linux/openvswitch.h>
49
#include <linux/rculist.h>
50
#include <linux/dmi.h>
51
#include <net/genetlink.h>
52
#include <net/net_namespace.h>
53
#include <net/netns/generic.h>
54
55
#include "checksum.h"
56
#include "datapath.h"
57
#include "flow.h"
58
#include "genl_exec.h"
59
#include "vlan.h"
60
#include "tunnel.h"
61
#include "vport-internal_dev.h"
62
63
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
64
    LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
65
#error Kernels before 2.6.18 or after 3.9 are not supported by this version of Open vSwitch.
66
#endif
67
68
#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
69
static void rehash_flow_table(struct work_struct *work);
70
static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
71
72
int ovs_net_id __read_mostly;
73
74
/**
75
 * DOC: Locking:
76
 *
77
 * Writes to device state (add/remove datapath, port, set operations on vports,
78
 * etc.) are protected by RTNL.
79
 *
80
 * Writes to other state (flow table modifications, set miscellaneous datapath
81
 * parameters, etc.) are protected by genl_mutex.  The RTNL lock nests inside
82
 * genl_mutex.
83
 *
84
 * Reads are protected by RCU.
85
 *
86
 * There are a few special cases (mostly stats) that have their own
87
 * synchronization but they nest under all of above and don't interact with
88
 * each other.
89
 */
90
91
static struct vport *new_vport(const struct vport_parms *);
92
static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
93
			     const struct dp_upcall_info *);
94
static int queue_userspace_packet(struct net *, int dp_ifindex,
95
				  struct sk_buff *,
96
				  const struct dp_upcall_info *);
97
98
/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
99
static struct datapath *get_dp(struct net *net, int dp_ifindex)
100
{
101
	struct datapath *dp = NULL;
102
	struct net_device *dev;
103
104
	rcu_read_lock();
105
	dev = dev_get_by_index_rcu(net, dp_ifindex);
106
	if (dev) {
107
		struct vport *vport = ovs_internal_dev_get_vport(dev);
108
		if (vport)
109
			dp = vport->dp;
110
	}
111
	rcu_read_unlock();
112
113
	return dp;
114
}
115
116
/* Must be called with rcu_read_lock or RTNL lock. */
117
const char *ovs_dp_name(const struct datapath *dp)
118
{
119
	struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
120
	return vport->ops->get_name(vport);
121
}
122
123
static int get_dpifindex(struct datapath *dp)
124
{
125
	struct vport *local;
126
	int ifindex;
127
128
	rcu_read_lock();
129
130
	local = ovs_vport_rcu(dp, OVSP_LOCAL);
131
	if (local)
132
		ifindex = local->ops->get_ifindex(local);
133
	else
134
		ifindex = 0;
135
136
	rcu_read_unlock();
137
138
	return ifindex;
139
}
140
141
static void destroy_dp_rcu(struct rcu_head *rcu)
142
{
143
	struct datapath *dp = container_of(rcu, struct datapath, rcu);
144
145
	ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
146
	free_percpu(dp->stats_percpu);
147
	release_net(ovs_dp_get_net(dp));
148
	kfree(dp->ports);
149
	kfree(dp);
150
}
151
152
static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
153
					    u16 port_no)
154
{
155
	return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
156
}
157
158
struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
159
{
160
	struct vport *vport;
161
	struct hlist_head *head;
162
163
	head = vport_hash_bucket(dp, port_no);
164
	hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
165
		if (vport->port_no == port_no)
166
			return vport;
167
	}
168
	return NULL;
169
}
170
171
/* Called with RTNL lock and genl_lock. */
172
static struct vport *new_vport(const struct vport_parms *parms)
173
{
174
	struct vport *vport;
175
176
	vport = ovs_vport_add(parms);
177
	if (!IS_ERR(vport)) {
178
		struct datapath *dp = parms->dp;
179
		struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
180
181
		hlist_add_head_rcu(&vport->dp_hash_node, head);
182
	}
183
	return vport;
184
}
185
186
/* Called with RTNL lock. */
187
void ovs_dp_detach_port(struct vport *p)
188
{
189
	ASSERT_RTNL();
190
191
	/* First drop references to device. */
192
	hlist_del_rcu(&p->dp_hash_node);
193
194
	/* Then destroy it. */
195
	ovs_vport_del(p);
196
}
197
198
/* Must be called with rcu_read_lock. */
199
void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
200
{
201
	struct datapath *dp = p->dp;
202
	struct sw_flow *flow;
203
	struct dp_stats_percpu *stats;
204
	u64 *stats_counter;
205
	int error;
206
207
	stats = this_cpu_ptr(dp->stats_percpu);
208
209
	if (!OVS_CB(skb)->flow) {
210
		struct sw_flow_key key;
211
		int key_len;
212
213
		/* Extract flow from 'skb' into 'key'. */
214
		error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
215
		if (unlikely(error)) {
216
			kfree_skb(skb);
217
			return;
218
		}
219
220
		/* Look up flow. */
221
		flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table),
222
					   &key, key_len);
223
		if (unlikely(!flow)) {
224
			struct dp_upcall_info upcall;
225
226
			upcall.cmd = OVS_PACKET_CMD_MISS;
227
			upcall.key = &key;
228
			upcall.userdata = NULL;
229
			upcall.portid = p->upcall_portid;
230
			ovs_dp_upcall(dp, skb, &upcall);
231
			consume_skb(skb);
232
			stats_counter = &stats->n_missed;
233
			goto out;
234
		}
235
236
		OVS_CB(skb)->flow = flow;
237
	}
238
239
	stats_counter = &stats->n_hit;
240
	ovs_flow_used(OVS_CB(skb)->flow, skb);
241
	ovs_execute_actions(dp, skb);
242
243
out:
244
	/* Update datapath statistics. */
245
	u64_stats_update_begin(&stats->sync);
246
	(*stats_counter)++;
247
	u64_stats_update_end(&stats->sync);
248
}
249
250
static struct genl_family dp_packet_genl_family = {
251
	.id = GENL_ID_GENERATE,
252
	.hdrsize = sizeof(struct ovs_header),
253
	.name = OVS_PACKET_FAMILY,
254
	.version = OVS_PACKET_VERSION,
255
	.maxattr = OVS_PACKET_ATTR_MAX,
256
	 SET_NETNSOK
257
};
258
259
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
260
		  const struct dp_upcall_info *upcall_info)
261
{
262
	struct dp_stats_percpu *stats;
263
	int dp_ifindex;
264
	int err;
265
266
	if (upcall_info->portid == 0) {
267
		err = -ENOTCONN;
268
		goto err;
269
	}
270
271
	dp_ifindex = get_dpifindex(dp);
272
	if (!dp_ifindex) {
273
		err = -ENODEV;
274
		goto err;
275
	}
276
277
	forward_ip_summed(skb, true);
278
279
	if (!skb_is_gso(skb))
280
		err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
281
	else
282
		err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
283
	if (err)
284
		goto err;
285
286
	return 0;
287
288
err:
289
	stats = this_cpu_ptr(dp->stats_percpu);
290
291
	u64_stats_update_begin(&stats->sync);
292
	stats->n_lost++;
293
	u64_stats_update_end(&stats->sync);
294
295
	return err;
296
}
297
298
static int queue_gso_packets(struct net *net, int dp_ifindex,
299
			     struct sk_buff *skb,
300
			     const struct dp_upcall_info *upcall_info)
301
{
302
	unsigned short gso_type = skb_shinfo(skb)->gso_type;
303
	struct dp_upcall_info later_info;
304
	struct sw_flow_key later_key;
305
	struct sk_buff *segs, *nskb;
306
	int err;
307
308
	segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
309
	if (IS_ERR(segs))
310
		return PTR_ERR(segs);
311
312
	/* Queue all of the segments. */
313
	skb = segs;
314
	do {
315
		err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
316
		if (err)
317
			break;
318
319
		if (skb == segs && gso_type & SKB_GSO_UDP) {
320
			/* The initial flow key extracted by ovs_flow_extract()
321
			 * in this case is for a first fragment, so we need to
322
			 * properly mark later fragments.
323
			 */
324
			later_key = *upcall_info->key;
325
			later_key.ip.frag = OVS_FRAG_TYPE_LATER;
326
327
			later_info = *upcall_info;
328
			later_info.key = &later_key;
329
			upcall_info = &later_info;
330
		}
331
	} while ((skb = skb->next));
332
333
	/* Free all of the segments. */
334
	skb = segs;
335
	do {
336
		nskb = skb->next;
337
		if (err)
338
			kfree_skb(skb);
339
		else
340
			consume_skb(skb);
341
	} while ((skb = nskb));
342
	return err;
343
}
344
345
static int queue_userspace_packet(struct net *net, int dp_ifindex,
346
				  struct sk_buff *skb,
347
				  const struct dp_upcall_info *upcall_info)
348
{
349
	struct ovs_header *upcall;
350
	struct sk_buff *nskb = NULL;
351
	struct sk_buff *user_skb; /* to be queued to userspace */
352
	struct nlattr *nla;
353
	unsigned int len;
354
	int err;
355
356
	if (vlan_tx_tag_present(skb)) {
357
		nskb = skb_clone(skb, GFP_ATOMIC);
358
		if (!nskb)
359
			return -ENOMEM;
360
		
361
		err = vlan_deaccel_tag(nskb);
362
		if (err)
363
			return err;
364
365
		skb = nskb;
366
	}
367
368
	if (nla_attr_size(skb->len) > USHRT_MAX) {
369
		err = -EFBIG;
370
		goto out;
371
	}
372
373
	len = sizeof(struct ovs_header);
374
	len += nla_total_size(skb->len);
375
	len += nla_total_size(FLOW_BUFSIZE);
376
	if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
377
		len += nla_total_size(8);
378
379
	user_skb = genlmsg_new(len, GFP_ATOMIC);
380
	if (!user_skb) {
381
		err = -ENOMEM;
382
		goto out;
383
	}
384
385
	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
386
			     0, upcall_info->cmd);
387
	upcall->dp_ifindex = dp_ifindex;
388
389
	nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
390
	ovs_flow_to_nlattrs(upcall_info->key, user_skb);
391
	nla_nest_end(user_skb, nla);
392
393
	if (upcall_info->userdata)
394
		nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
395
			    nla_get_u64(upcall_info->userdata));
396
397
	nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
398
399
	skb_copy_and_csum_dev(skb, nla_data(nla));
400
401
	genlmsg_end(user_skb, upcall);
402
	err = genlmsg_unicast(net, user_skb, upcall_info->portid);
403
404
out:
405
	kfree_skb(nskb);
406
	return err;
407
}
408
409
/* Called with genl_mutex. */
410
static int flush_flows(struct datapath *dp)
411
{
412
	struct flow_table *old_table;
413
	struct flow_table *new_table;
414
415
	old_table = genl_dereference(dp->table);
416
	new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
417
	if (!new_table)
418
		return -ENOMEM;
419
420
	rcu_assign_pointer(dp->table, new_table);
421
422
	ovs_flow_tbl_deferred_destroy(old_table);
423
	return 0;
424
}
425
426
static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
427
{
428
429
	struct sw_flow_actions *acts;
430
	int new_acts_size;
431
	int req_size = NLA_ALIGN(attr_len);
432
	int next_offset = offsetof(struct sw_flow_actions, actions) +
433
					(*sfa)->actions_len;
434
435
	if (req_size <= (ksize(*sfa) - next_offset))
436
		goto out;
437
438
	new_acts_size = ksize(*sfa) * 2;
439
440
	if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
441
		if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
442
			return ERR_PTR(-EMSGSIZE);
443
		new_acts_size = MAX_ACTIONS_BUFSIZE;
444
	}
445
446
	acts = ovs_flow_actions_alloc(new_acts_size);
447
	if (IS_ERR(acts))
448
		return (void *)acts;
449
450
	memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
451
	acts->actions_len = (*sfa)->actions_len;
452
	kfree(*sfa);
453
	*sfa = acts;
454
455
out:
456
	(*sfa)->actions_len += req_size;
457
	return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
458
}
459
460
static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
461
{
462
	struct nlattr *a;
463
464
	a = reserve_sfa_size(sfa, nla_attr_size(len));
465
	if (IS_ERR(a))
466
		return PTR_ERR(a);
467
468
	a->nla_type = attrtype;
469
	a->nla_len = nla_attr_size(len);
470
471
	if (data)
472
		memcpy(nla_data(a), data, len);
473
	memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
474
475
	return 0;
476
}
477
478
static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype)
479
{
480
	int used = (*sfa)->actions_len;
481
	int err;
482
483
	err = add_action(sfa, attrtype, NULL, 0);
484
	if (err)
485
		return err;
486
487
	return used;
488
}
489
490
static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset)
491
{
492
	struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset);
493
494
	a->nla_len = sfa->actions_len - st_offset;
495
}
496
497
static int validate_and_copy_actions(const struct nlattr *attr,
498
				const struct sw_flow_key *key, int depth,
499
				struct sw_flow_actions **sfa);
500
501
static int validate_and_copy_sample(const struct nlattr *attr,
502
			   const struct sw_flow_key *key, int depth,
503
			   struct sw_flow_actions **sfa)
504
{
505
	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
506
	const struct nlattr *probability, *actions;
507
	const struct nlattr *a;
508
	int rem, start, err, st_acts;
509
510
	memset(attrs, 0, sizeof(attrs));
511
	nla_for_each_nested(a, attr, rem) {
512
		int type = nla_type(a);
513
		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
514
			return -EINVAL;
515
		attrs[type] = a;
516
	}
517
	if (rem)
518
		return -EINVAL;
519
520
	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
521
	if (!probability || nla_len(probability) != sizeof(u32))
522
		return -EINVAL;
523
524
	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
525
	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
526
		return -EINVAL;
527
528
	/* validation done, copy sample action. */
529
	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
530
	if (start < 0)
531
		return start;
532
	err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32));
533
	if (err)
534
		return err;
535
	st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
536
	if (st_acts < 0)
537
		return st_acts;
538
539
	err = validate_and_copy_actions(actions, key, depth + 1, sfa);
540
	if (err)
541
		return err;
542
543
	add_nested_action_end(*sfa, st_acts);
544
	add_nested_action_end(*sfa, start);
545
546
	return 0;
547
}
548
549
static int validate_tp_port(const struct sw_flow_key *flow_key)
550
{
551
	if (flow_key->eth.type == htons(ETH_P_IP)) {
552
		if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
553
			return 0;
554
	} else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
555
		if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
556
			return 0;
557
	}
558
559
	return -EINVAL;
560
}
561
562
static int validate_and_copy_set_tun(const struct nlattr *attr,
563
				     struct sw_flow_actions **sfa)
564
{
565
	struct ovs_key_ipv4_tunnel tun_key;
566
	int err, start;
567
568
	err = ipv4_tun_from_nlattr(nla_data(attr), &tun_key);
569
	if (err)
570
		return err;
571
572
	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
573
	if (start < 0)
574
		return start;
575
576
	err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key));
577
	add_nested_action_end(*sfa, start);
578
579
	return err;
580
}
581
582
static int validate_set(const struct nlattr *a,
583
			const struct sw_flow_key *flow_key,
584
			struct sw_flow_actions **sfa,
585
			bool *set_tun)
586
{
587
	const struct nlattr *ovs_key = nla_data(a);
588
	int key_type = nla_type(ovs_key);
589
590
	/* There can be only one key in a action */
591
	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
592
		return -EINVAL;
593
594
	if (key_type > OVS_KEY_ATTR_MAX ||
595
	    (ovs_key_lens[key_type] != nla_len(ovs_key) &&
596
	     ovs_key_lens[key_type] != -1))
597
		return -EINVAL;
598
599
	switch (key_type) {
600
	const struct ovs_key_ipv4 *ipv4_key;
601
	const struct ovs_key_ipv6 *ipv6_key;
602
	int err;
603
604
	case OVS_KEY_ATTR_PRIORITY:
605
	case OVS_KEY_ATTR_ETHERNET:
606
		break;
607
608
	case OVS_KEY_ATTR_SKB_MARK:
609
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) && !defined(CONFIG_NETFILTER)
610
		if (nla_get_u32(ovs_key) != 0)
611
			return -EINVAL;
612
#endif
613
		break;
614
615
	case OVS_KEY_ATTR_TUNNEL:
616
		*set_tun = true;
617
		err = validate_and_copy_set_tun(a, sfa);
618
		if (err)
619
			return err;
620
		break;
621
622
	case OVS_KEY_ATTR_IPV4:
623
		if (flow_key->eth.type != htons(ETH_P_IP))
624
			return -EINVAL;
625
626
		if (!flow_key->ip.proto)
627
			return -EINVAL;
628
629
		ipv4_key = nla_data(ovs_key);
630
		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
631
			return -EINVAL;
632
633
		if (ipv4_key->ipv4_frag != flow_key->ip.frag)
634
			return -EINVAL;
635
636
		break;
637
638
	case OVS_KEY_ATTR_IPV6:
639
		if (flow_key->eth.type != htons(ETH_P_IPV6))
640
			return -EINVAL;
641
642
		if (!flow_key->ip.proto)
643
			return -EINVAL;
644
645
		ipv6_key = nla_data(ovs_key);
646
		if (ipv6_key->ipv6_proto != flow_key->ip.proto)
647
			return -EINVAL;
648
649
		if (ipv6_key->ipv6_frag != flow_key->ip.frag)
650
			return -EINVAL;
651
652
		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
653
			return -EINVAL;
654
655
		break;
656
657
	case OVS_KEY_ATTR_TCP:
658
		if (flow_key->ip.proto != IPPROTO_TCP)
659
			return -EINVAL;
660
661
		return validate_tp_port(flow_key);
662
663
	case OVS_KEY_ATTR_UDP:
664
		if (flow_key->ip.proto != IPPROTO_UDP)
665
			return -EINVAL;
666
667
		return validate_tp_port(flow_key);
668
669
	default:
670
		return -EINVAL;
671
	}
672
673
	return 0;
674
}
675
676
static int validate_userspace(const struct nlattr *attr)
677
{
678
	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =	{
679
		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
680
		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
681
	};
682
	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
683
	int error;
684
685
	error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
686
				 attr, userspace_policy);
687
	if (error)
688
		return error;
689
690
	if (!a[OVS_USERSPACE_ATTR_PID] ||
691
	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
692
		return -EINVAL;
693
694
	return 0;
695
}
696
697
static int copy_action(const struct nlattr *from,
698
		      struct sw_flow_actions **sfa)
699
{
700
	int totlen = NLA_ALIGN(from->nla_len);
701
	struct nlattr *to;
702
703
	to = reserve_sfa_size(sfa, from->nla_len);
704
	if (IS_ERR(to))
705
		return PTR_ERR(to);
706
707
	memcpy(to, from, totlen);
708
	return 0;
709
}
710
711
static int validate_and_copy_actions(const struct nlattr *attr,
712
				const struct sw_flow_key *key,
713
				int depth,
714
				struct sw_flow_actions **sfa)
715
{
716
	const struct nlattr *a;
717
	int rem, err;
718
719
	if (depth >= SAMPLE_ACTION_DEPTH)
720
		return -EOVERFLOW;
721
722
	nla_for_each_nested(a, attr, rem) {
723
		/* Expected argument lengths, (u32)-1 for variable length. */
724
		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
725
			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
726
			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
727
			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
728
			[OVS_ACTION_ATTR_POP_VLAN] = 0,
729
			[OVS_ACTION_ATTR_SET] = (u32)-1,
730
			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
731
		};
732
		const struct ovs_action_push_vlan *vlan;
733
		int type = nla_type(a);
734
		bool skip_copy;
735
736
		if (type > OVS_ACTION_ATTR_MAX ||
737
		    (action_lens[type] != nla_len(a) &&
738
		     action_lens[type] != (u32)-1))
739
			return -EINVAL;
740
741
		skip_copy = false;
742
		switch (type) {
743
		case OVS_ACTION_ATTR_UNSPEC:
744
			return -EINVAL;
745
746
		case OVS_ACTION_ATTR_USERSPACE:
747
			err = validate_userspace(a);
748
			if (err)
749
				return err;
750
			break;
751
752
		case OVS_ACTION_ATTR_OUTPUT:
753
			if (nla_get_u32(a) >= DP_MAX_PORTS)
754
				return -EINVAL;
755
			break;
756
757
758
		case OVS_ACTION_ATTR_POP_VLAN:
759
			break;
760
761
		case OVS_ACTION_ATTR_PUSH_VLAN:
762
			vlan = nla_data(a);
763
			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
764
				return -EINVAL;
765
			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
766
				return -EINVAL;
767
			break;
768
769
		case OVS_ACTION_ATTR_SET:
770
			err = validate_set(a, key, sfa, &skip_copy);
771
			if (err)
772
				return err;
773
			break;
774
775
		case OVS_ACTION_ATTR_SAMPLE:
776
			err = validate_and_copy_sample(a, key, depth, sfa);
777
			if (err)
778
				return err;
779
			skip_copy = true;
780
			break;
781
782
		default:
783
			return -EINVAL;
784
		}
785
		if (!skip_copy) {
786
			err = copy_action(a, sfa);
787
			if (err)
788
				return err;
789
		}
790
	}
791
792
	if (rem > 0)
793
		return -EINVAL;
794
795
	return 0;
796
}
797
798
static void clear_stats(struct sw_flow *flow)
799
{
800
	flow->used = 0;
801
	flow->tcp_flags = 0;
802
	flow->packet_count = 0;
803
	flow->byte_count = 0;
804
}
805
806
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
807
{
808
	struct ovs_header *ovs_header = info->userhdr;
809
	struct nlattr **a = info->attrs;
810
	struct sw_flow_actions *acts;
811
	struct sk_buff *packet;
812
	struct sw_flow *flow;
813
	struct datapath *dp;
814
	struct ethhdr *eth;
815
	int len;
816
	int err;
817
	int key_len;
818
819
	err = -EINVAL;
820
	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
821
	    !a[OVS_PACKET_ATTR_ACTIONS] ||
822
	    nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
823
		goto err;
824
825
	len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
826
	packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
827
	err = -ENOMEM;
828
	if (!packet)
829
		goto err;
830
	skb_reserve(packet, NET_IP_ALIGN);
831
832
	memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
833
834
	skb_reset_mac_header(packet);
835
	eth = eth_hdr(packet);
836
837
	/* Normally, setting the skb 'protocol' field would be handled by a
838
	 * call to eth_type_trans(), but it assumes there's a sending
839
	 * device, which we may not have. */
840
	if (ntohs(eth->h_proto) >= 1536)
841
		packet->protocol = eth->h_proto;
842
	else
843
		packet->protocol = htons(ETH_P_802_2);
844
845
	/* Build an sw_flow for sending this packet. */
846
	flow = ovs_flow_alloc();
847
	err = PTR_ERR(flow);
848
	if (IS_ERR(flow))
849
		goto err_kfree_skb;
850
851
	err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
852
	if (err)
853
		goto err_flow_free;
854
855
	err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
856
	if (err)
857
		goto err_flow_free;
858
	acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
859
	err = PTR_ERR(acts);
860
	if (IS_ERR(acts))
861
		goto err_flow_free;
862
863
	err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
864
	rcu_assign_pointer(flow->sf_acts, acts);
865
	if (err)
866
		goto err_flow_free;
867
868
	OVS_CB(packet)->flow = flow;
869
	packet->priority = flow->key.phy.priority;
870
	skb_set_mark(packet, flow->key.phy.skb_mark);
871
872
	rcu_read_lock();
873
	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
874
	err = -ENODEV;
875
	if (!dp)
876
		goto err_unlock;
877
878
	local_bh_disable();
879
	err = ovs_execute_actions(dp, packet);
880
	local_bh_enable();
881
	rcu_read_unlock();
882
883
	ovs_flow_free(flow);
884
	return err;
885
886
err_unlock:
887
	rcu_read_unlock();
888
err_flow_free:
889
	ovs_flow_free(flow);
890
err_kfree_skb:
891
	kfree_skb(packet);
892
err:
893
	return err;
894
}
895
896
static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
897
	[OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
898
	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
899
	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
900
};
901
902
static struct genl_ops dp_packet_genl_ops[] = {
903
	{ .cmd = OVS_PACKET_CMD_EXECUTE,
904
	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
905
	  .policy = packet_policy,
906
	  .doit = ovs_packet_cmd_execute
907
	}
908
};
909
910
static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
911
{
912
	int i;
913
	struct flow_table *table = genl_dereference(dp->table);
914
915
	stats->n_flows = ovs_flow_tbl_count(table);
916
917
	stats->n_hit = stats->n_missed = stats->n_lost = 0;
918
	for_each_possible_cpu(i) {
919
		const struct dp_stats_percpu *percpu_stats;
920
		struct dp_stats_percpu local_stats;
921
		unsigned int start;
922
923
		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
924
925
		do {
926
			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
927
			local_stats = *percpu_stats;
928
		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
929
930
		stats->n_hit += local_stats.n_hit;
931
		stats->n_missed += local_stats.n_missed;
932
		stats->n_lost += local_stats.n_lost;
933
	}
934
}
935
936
static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
937
	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
938
	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
939
	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
940
};
941
942
static struct genl_family dp_flow_genl_family = {
943
	.id = GENL_ID_GENERATE,
944
	.hdrsize = sizeof(struct ovs_header),
945
	.name = OVS_FLOW_FAMILY,
946
	.version = OVS_FLOW_VERSION,
947
	.maxattr = OVS_FLOW_ATTR_MAX,
948
	 SET_NETNSOK
949
};
950
951
static struct genl_multicast_group ovs_dp_flow_multicast_group = {
952
	.name = OVS_FLOW_MCGROUP
953
};
954
955
static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb);
956
static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
957
{
958
	const struct nlattr *a;
959
	struct nlattr *start;
960
	int err = 0, rem;
961
962
	start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
963
	if (!start)
964
		return -EMSGSIZE;
965
966
	nla_for_each_nested(a, attr, rem) {
967
		int type = nla_type(a);
968
		struct nlattr *st_sample;
969
970
		switch (type) {
971
		case OVS_SAMPLE_ATTR_PROBABILITY:
972
			if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a)))
973
				return -EMSGSIZE;
974
			break;
975
		case OVS_SAMPLE_ATTR_ACTIONS:
976
			st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
977
			if (!st_sample)
978
				return -EMSGSIZE;
979
			err = actions_to_attr(nla_data(a), nla_len(a), skb);
980
			if (err)
981
				return err;
982
			nla_nest_end(skb, st_sample);
983
			break;
984
		}
985
	}
986
987
	nla_nest_end(skb, start);
988
	return err;
989
}
990
991
static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
992
{
993
	const struct nlattr *ovs_key = nla_data(a);
994
	int key_type = nla_type(ovs_key);
995
	struct nlattr *start;
996
	int err;
997
998
	switch (key_type) {
999
	case OVS_KEY_ATTR_IPV4_TUNNEL:
1000
		start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1001
		if (!start)
1002
			return -EMSGSIZE;
1003
1004
		err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key));
1005
		if (err)
1006
			return err;
1007
		nla_nest_end(skb, start);
1008
		break;
1009
	default:
1010
		if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1011
			return -EMSGSIZE;
1012
		break;
1013
	}
1014
1015
	return 0;
1016
}
1017
1018
static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
1019
{
1020
	const struct nlattr *a;
1021
	int rem, err;
1022
1023
	nla_for_each_attr(a, attr, len, rem) {
1024
		int type = nla_type(a);
1025
1026
		switch (type) {
1027
		case OVS_ACTION_ATTR_SET:
1028
			err = set_action_to_attr(a, skb);
1029
			if (err)
1030
				return err;
1031
			break;
1032
1033
		case OVS_ACTION_ATTR_SAMPLE:
1034
			err = sample_action_to_attr(a, skb);
1035
			if (err)
1036
				return err;
1037
			break;
1038
		default:
1039
			if (nla_put(skb, type, nla_len(a), nla_data(a)))
1040
				return -EMSGSIZE;
1041
			break;
1042
		}
1043
	}
1044
1045
	return 0;
1046
}
1047
1048
/* Called with genl_lock. */
1049
static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
1050
				  struct sk_buff *skb, u32 portid,
1051
				  u32 seq, u32 flags, u8 cmd)
1052
{
1053
	const int skb_orig_len = skb->len;
1054
	const struct sw_flow_actions *sf_acts;
1055
	struct nlattr *start;
1056
	struct ovs_flow_stats stats;
1057
	struct ovs_header *ovs_header;
1058
	struct nlattr *nla;
1059
	unsigned long used;
1060
	u8 tcp_flags;
1061
	int err;
1062
1063
	sf_acts = rcu_dereference_protected(flow->sf_acts,
1064
					    lockdep_genl_is_held());
1065
1066
	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
1067
	if (!ovs_header)
1068
		return -EMSGSIZE;
1069
1070
	ovs_header->dp_ifindex = get_dpifindex(dp);
1071
1072
	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
1073
	if (!nla)
1074
		goto nla_put_failure;
1075
	err = ovs_flow_to_nlattrs(&flow->key, skb);
1076
	if (err)
1077
		goto error;
1078
	nla_nest_end(skb, nla);
1079
1080
	spin_lock_bh(&flow->lock);
1081
	used = flow->used;
1082
	stats.n_packets = flow->packet_count;
1083
	stats.n_bytes = flow->byte_count;
1084
	tcp_flags = flow->tcp_flags;
1085
	spin_unlock_bh(&flow->lock);
1086
1087
	if (used &&
1088
	    nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
1089
		goto nla_put_failure;
1090
1091
	if (stats.n_packets &&
1092
	    nla_put(skb, OVS_FLOW_ATTR_STATS,
1093
		    sizeof(struct ovs_flow_stats), &stats))
1094
		goto nla_put_failure;
1095
1096
	if (tcp_flags &&
1097
	    nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
1098
		goto nla_put_failure;
1099
1100
	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
1101
	 * this is the first flow to be dumped into 'skb'.  This is unusual for
1102
	 * Netlink but individual action lists can be longer than
1103
	 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
1104
	 * The userspace caller can always fetch the actions separately if it
1105
	 * really wants them.  (Most userspace callers in fact don't care.)
1106
	 *
1107
	 * This can only fail for dump operations because the skb is always
1108
	 * properly sized for single flows.
1109
	 */
1110
	start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
1111
	if (start) {
1112
		err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
1113
		if (!err)
1114
			nla_nest_end(skb, start);
1115
		else {
1116
			if (skb_orig_len)
1117
				goto error;
1118
1119
			nla_nest_cancel(skb, start);
1120
		}
1121
	} else if (skb_orig_len)
1122
		goto nla_put_failure;
1123
1124
	return genlmsg_end(skb, ovs_header);
1125
1126
nla_put_failure:
1127
	err = -EMSGSIZE;
1128
error:
1129
	genlmsg_cancel(skb, ovs_header);
1130
	return err;
1131
}
1132
1133
static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
1134
{
1135
	const struct sw_flow_actions *sf_acts;
1136
	int len;
1137
1138
	sf_acts = rcu_dereference_protected(flow->sf_acts,
1139
					    lockdep_genl_is_held());
1140
1141
	/* OVS_FLOW_ATTR_KEY */
1142
	len = nla_total_size(FLOW_BUFSIZE);
1143
	/* OVS_FLOW_ATTR_ACTIONS */
1144
	len += nla_total_size(sf_acts->actions_len);
1145
	/* OVS_FLOW_ATTR_STATS */
1146
	len += nla_total_size(sizeof(struct ovs_flow_stats));
1147
	/* OVS_FLOW_ATTR_TCP_FLAGS */
1148
	len += nla_total_size(1);
1149
	/* OVS_FLOW_ATTR_USED */
1150
	len += nla_total_size(8);
1151
1152
	len += NLMSG_ALIGN(sizeof(struct ovs_header));
1153
1154
	return genlmsg_new(len, GFP_KERNEL);
1155
}
1156
1157
static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
1158
					       struct datapath *dp,
1159
					       u32 portid, u32 seq, u8 cmd)
1160
{
1161
	struct sk_buff *skb;
1162
	int retval;
1163
1164
	skb = ovs_flow_cmd_alloc_info(flow);
1165
	if (!skb)
1166
		return ERR_PTR(-ENOMEM);
1167
1168
	retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
1169
	BUG_ON(retval < 0);
1170
	return skb;
1171
}
1172
1173
static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1174
{
1175
	struct nlattr **a = info->attrs;
1176
	struct ovs_header *ovs_header = info->userhdr;
1177
	struct sw_flow_key key;
1178
	struct sw_flow *flow;
1179
	struct sk_buff *reply;
1180
	struct datapath *dp;
1181
	struct flow_table *table;
1182
	struct sw_flow_actions *acts = NULL;
1183
	int error;
1184
	int key_len;
1185
1186
	/* Extract key. */
1187
	error = -EINVAL;
1188
	if (!a[OVS_FLOW_ATTR_KEY])
1189
		goto error;
1190
	error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1191
	if (error)
1192
		goto error;
1193
1194
	/* Validate actions. */
1195
	if (a[OVS_FLOW_ATTR_ACTIONS]) {
1196
		acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
1197
		error = PTR_ERR(acts);
1198
		if (IS_ERR(acts))
1199
			goto error;
1200
1201
		error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0, &acts);
1202
		if (error)
1203
			goto err_kfree;
1204
	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
1205
		error = -EINVAL;
1206
		goto error;
1207
	}
1208
1209
	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1210
	error = -ENODEV;
1211
	if (!dp)
1212
		goto err_kfree;
1213
1214
	table = genl_dereference(dp->table);
1215
	flow = ovs_flow_tbl_lookup(table, &key, key_len);
1216
	if (!flow) {
1217
		/* Bail out if we're not allowed to create a new flow. */
1218
		error = -ENOENT;
1219
		if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
1220
			goto err_kfree;
1221
1222
		/* Expand table, if necessary, to make room. */
1223
		if (ovs_flow_tbl_need_to_expand(table)) {
1224
			struct flow_table *new_table;
1225
1226
			new_table = ovs_flow_tbl_expand(table);
1227
			if (!IS_ERR(new_table)) {
1228
				rcu_assign_pointer(dp->table, new_table);
1229
				ovs_flow_tbl_deferred_destroy(table);
1230
				table = genl_dereference(dp->table);
1231
			}
1232
		}
1233
1234
		/* Allocate flow. */
1235
		flow = ovs_flow_alloc();
1236
		if (IS_ERR(flow)) {
1237
			error = PTR_ERR(flow);
1238
			goto err_kfree;
1239
		}
1240
		clear_stats(flow);
1241
1242
		rcu_assign_pointer(flow->sf_acts, acts);
1243
1244
		/* Put flow in bucket. */
1245
		ovs_flow_tbl_insert(table, flow, &key, key_len);
1246
1247
		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1248
						info->snd_seq,
1249
						OVS_FLOW_CMD_NEW);
1250
	} else {
1251
		/* We found a matching flow. */
1252
		struct sw_flow_actions *old_acts;
1253
1254
		/* Bail out if we're not allowed to modify an existing flow.
1255
		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1256
		 * because Generic Netlink treats the latter as a dump
1257
		 * request.  We also accept NLM_F_EXCL in case that bug ever
1258
		 * gets fixed.
1259
		 */
1260
		error = -EEXIST;
1261
		if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1262
		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1263
			goto err_kfree;
1264
1265
		/* Update actions. */
1266
		old_acts = rcu_dereference_protected(flow->sf_acts,
1267
						     lockdep_genl_is_held());
1268
		rcu_assign_pointer(flow->sf_acts, acts);
1269
		ovs_flow_deferred_free_acts(old_acts);
1270
1271
		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1272
					       info->snd_seq, OVS_FLOW_CMD_NEW);
1273
1274
		/* Clear stats. */
1275
		if (a[OVS_FLOW_ATTR_CLEAR]) {
1276
			spin_lock_bh(&flow->lock);
1277
			clear_stats(flow);
1278
			spin_unlock_bh(&flow->lock);
1279
		}
1280
	}
1281
1282
	if (!IS_ERR(reply))
1283
		genl_notify(reply, genl_info_net(info), info->snd_portid,
1284
			   ovs_dp_flow_multicast_group.id, info->nlhdr,
1285
			   GFP_KERNEL);
1286
	else
1287
		netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
1288
				ovs_dp_flow_multicast_group.id,	PTR_ERR(reply));
1289
	return 0;
1290
1291
err_kfree:
1292
	kfree(acts);
1293
error:
1294
	return error;
1295
}
1296
1297
static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1298
{
1299
	struct nlattr **a = info->attrs;
1300
	struct ovs_header *ovs_header = info->userhdr;
1301
	struct sw_flow_key key;
1302
	struct sk_buff *reply;
1303
	struct sw_flow *flow;
1304
	struct datapath *dp;
1305
	struct flow_table *table;
1306
	int err;
1307
	int key_len;
1308
1309
	if (!a[OVS_FLOW_ATTR_KEY])
1310
		return -EINVAL;
1311
	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1312
	if (err)
1313
		return err;
1314
1315
	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1316
	if (!dp)
1317
		return -ENODEV;
1318
1319
	table = genl_dereference(dp->table);
1320
	flow = ovs_flow_tbl_lookup(table, &key, key_len);
1321
	if (!flow)
1322
		return -ENOENT;
1323
1324
	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1325
					info->snd_seq, OVS_FLOW_CMD_NEW);
1326
	if (IS_ERR(reply))
1327
		return PTR_ERR(reply);
1328
1329
	return genlmsg_reply(reply, info);
1330
}
1331
1332
static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1333
{
1334
	struct nlattr **a = info->attrs;
1335
	struct ovs_header *ovs_header = info->userhdr;
1336
	struct sw_flow_key key;
1337
	struct sk_buff *reply;
1338
	struct sw_flow *flow;
1339
	struct datapath *dp;
1340
	struct flow_table *table;
1341
	int err;
1342
	int key_len;
1343
1344
	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1345
	if (!dp)
1346
		return -ENODEV;
1347
1348
	if (!a[OVS_FLOW_ATTR_KEY])
1349
		return flush_flows(dp);
1350
1351
	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1352
	if (err)
1353
		return err;
1354
1355
	table = genl_dereference(dp->table);
1356
	flow = ovs_flow_tbl_lookup(table, &key, key_len);
1357
	if (!flow)
1358
		return -ENOENT;
1359
1360
	reply = ovs_flow_cmd_alloc_info(flow);
1361
	if (!reply)
1362
		return -ENOMEM;
1363
1364
	ovs_flow_tbl_remove(table, flow);
1365
1366
	err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
1367
				     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1368
	BUG_ON(err < 0);
1369
1370
	ovs_flow_deferred_free(flow);
1371
1372
	genl_notify(reply, genl_info_net(info), info->snd_portid,
1373
		    ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1374
	return 0;
1375
}
1376
1377
static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1378
{
1379
	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1380
	struct datapath *dp;
1381
	struct flow_table *table;
1382
1383
	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1384
	if (!dp)
1385
		return -ENODEV;
1386
1387
	table = genl_dereference(dp->table);
1388
1389
	for (;;) {
1390
		struct sw_flow *flow;
1391
		u32 bucket, obj;
1392
1393
		bucket = cb->args[0];
1394
		obj = cb->args[1];
1395
		flow = ovs_flow_tbl_next(table, &bucket, &obj);
1396
		if (!flow)
1397
			break;
1398
1399
		if (ovs_flow_cmd_fill_info(flow, dp, skb,
1400
					   NETLINK_CB(cb->skb).portid,
1401
					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1402
					   OVS_FLOW_CMD_NEW) < 0)
1403
			break;
1404
1405
		cb->args[0] = bucket;
1406
		cb->args[1] = obj;
1407
	}
1408
	return skb->len;
1409
}
1410
1411
static struct genl_ops dp_flow_genl_ops[] = {
1412
	{ .cmd = OVS_FLOW_CMD_NEW,
1413
	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1414
	  .policy = flow_policy,
1415
	  .doit = ovs_flow_cmd_new_or_set
1416
	},
1417
	{ .cmd = OVS_FLOW_CMD_DEL,
1418
	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1419
	  .policy = flow_policy,
1420
	  .doit = ovs_flow_cmd_del
1421
	},
1422
	{ .cmd = OVS_FLOW_CMD_GET,
1423
	  .flags = 0,		    /* OK for unprivileged users. */
1424
	  .policy = flow_policy,
1425
	  .doit = ovs_flow_cmd_get,
1426
	  .dumpit = ovs_flow_cmd_dump
1427
	},
1428
	{ .cmd = OVS_FLOW_CMD_SET,
1429
	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1430
	  .policy = flow_policy,
1431
	  .doit = ovs_flow_cmd_new_or_set,
1432
	},
1433
};
1434
1435
static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1436
#ifdef HAVE_NLA_NUL_STRING
1437
	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1438
#endif
1439
	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1440
};
1441
1442
static struct genl_family dp_datapath_genl_family = {
1443
	.id = GENL_ID_GENERATE,
1444
	.hdrsize = sizeof(struct ovs_header),
1445
	.name = OVS_DATAPATH_FAMILY,
1446
	.version = OVS_DATAPATH_VERSION,
1447
	.maxattr = OVS_DP_ATTR_MAX,
1448
	 SET_NETNSOK
1449
};
1450
1451
static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1452
	.name = OVS_DATAPATH_MCGROUP
1453
};
1454
1455
static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1456
				u32 portid, u32 seq, u32 flags, u8 cmd)
1457
{
1458
	struct ovs_header *ovs_header;
1459
	struct ovs_dp_stats dp_stats;
1460
	int err;
1461
1462
	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1463
				   flags, cmd);
1464
	if (!ovs_header)
1465
		goto error;
1466
1467
	ovs_header->dp_ifindex = get_dpifindex(dp);
1468
1469
	rcu_read_lock();
1470
	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1471
	rcu_read_unlock();
1472
	if (err)
1473
		goto nla_put_failure;
1474
1475
	get_dp_stats(dp, &dp_stats);
1476
	if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
1477
		goto nla_put_failure;
1478
1479
	return genlmsg_end(skb, ovs_header);
1480
1481
nla_put_failure:
1482
	genlmsg_cancel(skb, ovs_header);
1483
error:
1484
	return -EMSGSIZE;
1485
}
1486
1487
static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1488
					     u32 seq, u8 cmd)
1489
{
1490
	struct sk_buff *skb;
1491
	int retval;
1492
1493
	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1494
	if (!skb)
1495
		return ERR_PTR(-ENOMEM);
1496
1497
	retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
1498
	if (retval < 0) {
1499
		kfree_skb(skb);
1500
		return ERR_PTR(retval);
1501
	}
1502
	return skb;
1503
}
1504
1505
static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1506
{
1507
	return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
1508
}
1509
1510
/* Called with genl_mutex and optionally with RTNL lock also. */
1511
static struct datapath *lookup_datapath(struct net *net,
1512
					struct ovs_header *ovs_header,
1513
					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1514
{
1515
	struct datapath *dp;
1516
1517
	if (!a[OVS_DP_ATTR_NAME])
1518
		dp = get_dp(net, ovs_header->dp_ifindex);
1519
	else {
1520
		struct vport *vport;
1521
1522
		rcu_read_lock();
1523
		vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1524
		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1525
		rcu_read_unlock();
1526
	}
1527
	return dp ? dp : ERR_PTR(-ENODEV);
1528
}
1529
1530
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1531
{
1532
	struct nlattr **a = info->attrs;
1533
	struct vport_parms parms;
1534
	struct sk_buff *reply;
1535
	struct datapath *dp;
1536
	struct vport *vport;
1537
	struct ovs_net *ovs_net;
1538
	int err, i;
1539
1540
	err = -EINVAL;
1541
	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1542
		goto err;
1543
1544
	err = ovs_dp_cmd_validate(a);
1545
	if (err)
1546
		goto err;
1547
1548
	rtnl_lock();
1549
1550
	err = -ENOMEM;
1551
	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1552
	if (dp == NULL)
1553
		goto err_unlock_rtnl;
1554
1555
	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1556
1557
	/* Allocate table. */
1558
	err = -ENOMEM;
1559
	rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
1560
	if (!dp->table)
1561
		goto err_free_dp;
1562
1563
	dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1564
	if (!dp->stats_percpu) {
1565
		err = -ENOMEM;
1566
		goto err_destroy_table;
1567
	}
1568
1569
	dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1570
			    GFP_KERNEL);
1571
	if (!dp->ports) {
1572
		err = -ENOMEM;
1573
		goto err_destroy_percpu;
1574
	}
1575
1576
	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1577
		INIT_HLIST_HEAD(&dp->ports[i]);
1578
1579
	/* Set up our datapath device. */
1580
	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1581
	parms.type = OVS_VPORT_TYPE_INTERNAL;
1582
	parms.options = NULL;
1583
	parms.dp = dp;
1584
	parms.port_no = OVSP_LOCAL;
1585
	parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1586
1587
	vport = new_vport(&parms);
1588
	if (IS_ERR(vport)) {
1589
		err = PTR_ERR(vport);
1590
		if (err == -EBUSY)
1591
			err = -EEXIST;
1592
1593
		goto err_destroy_ports_array;
1594
	}
1595
1596
	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1597
				      info->snd_seq, OVS_DP_CMD_NEW);
1598
	err = PTR_ERR(reply);
1599
	if (IS_ERR(reply))
1600
		goto err_destroy_local_port;
1601
1602
	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1603
	list_add_tail(&dp->list_node, &ovs_net->dps);
1604
1605
	rtnl_unlock();
1606
1607
	genl_notify(reply, genl_info_net(info), info->snd_portid,
1608
		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1609
		    GFP_KERNEL);
1610
	return 0;
1611
1612
err_destroy_local_port:
1613
	ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1614
err_destroy_ports_array:
1615
	kfree(dp->ports);
1616
err_destroy_percpu:
1617
	free_percpu(dp->stats_percpu);
1618
err_destroy_table:
1619
	ovs_flow_tbl_destroy(genl_dereference(dp->table));
1620
err_free_dp:
1621
	release_net(ovs_dp_get_net(dp));
1622
	kfree(dp);
1623
err_unlock_rtnl:
1624
	rtnl_unlock();
1625
err:
1626
	return err;
1627
}
1628
1629
/* Called with genl_mutex. */
1630
static void __dp_destroy(struct datapath *dp)
1631
{
1632
	int i;
1633
1634
	rtnl_lock();
1635
1636
	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1637
		struct vport *vport;
1638
		struct hlist_node *n;
1639
1640
		hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1641
			if (vport->port_no != OVSP_LOCAL)
1642
				ovs_dp_detach_port(vport);
1643
	}
1644
1645
	list_del(&dp->list_node);
1646
	ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1647
1648
	/* rtnl_unlock() will wait until all the references to devices that
1649
	 * are pending unregistration have been dropped.  We do it here to
1650
	 * ensure that any internal devices (which contain DP pointers) are
1651
	 * fully destroyed before freeing the datapath.
1652
	 */
1653
	rtnl_unlock();
1654
1655
	call_rcu(&dp->rcu, destroy_dp_rcu);
1656
}
1657
1658
static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1659
{
1660
	struct sk_buff *reply;
1661
	struct datapath *dp;
1662
	int err;
1663
1664
	err = ovs_dp_cmd_validate(info->attrs);
1665
	if (err)
1666
		return err;
1667
1668
	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1669
	err = PTR_ERR(dp);
1670
	if (IS_ERR(dp))
1671
		return err;
1672
1673
	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1674
				      info->snd_seq, OVS_DP_CMD_DEL);
1675
	err = PTR_ERR(reply);
1676
	if (IS_ERR(reply))
1677
		return err;
1678
1679
	__dp_destroy(dp);
1680
1681
	genl_notify(reply, genl_info_net(info), info->snd_portid,
1682
		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1683
		    GFP_KERNEL);
1684
1685
	return 0;
1686
}
1687
1688
static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1689
{
1690
	struct sk_buff *reply;
1691
	struct datapath *dp;
1692
	int err;
1693
1694
	err = ovs_dp_cmd_validate(info->attrs);
1695
	if (err)
1696
		return err;
1697
1698
	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1699
	if (IS_ERR(dp))
1700
		return PTR_ERR(dp);
1701
1702
	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1703
				      info->snd_seq, OVS_DP_CMD_NEW);
1704
	if (IS_ERR(reply)) {
1705
		err = PTR_ERR(reply);
1706
		netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
1707
				ovs_dp_datapath_multicast_group.id, err);
1708
		return 0;
1709
	}
1710
1711
	genl_notify(reply, genl_info_net(info), info->snd_portid,
1712
		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1713
		    GFP_KERNEL);
1714
1715
	return 0;
1716
}
1717
1718
static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1719
{
1720
	struct sk_buff *reply;
1721
	struct datapath *dp;
1722
	int err;
1723
1724
	err = ovs_dp_cmd_validate(info->attrs);
1725
	if (err)
1726
		return err;
1727
1728
	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1729
	if (IS_ERR(dp))
1730
		return PTR_ERR(dp);
1731
1732
	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1733
				      info->snd_seq, OVS_DP_CMD_NEW);
1734
	if (IS_ERR(reply))
1735
		return PTR_ERR(reply);
1736
1737
	return genlmsg_reply(reply, info);
1738
}
1739
1740
static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1741
{
1742
	struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1743
	struct datapath *dp;
1744
	int skip = cb->args[0];
1745
	int i = 0;
1746
1747
	list_for_each_entry(dp, &ovs_net->dps, list_node) {
1748
		if (i >= skip &&
1749
		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1750
					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1751
					 OVS_DP_CMD_NEW) < 0)
1752
			break;
1753
		i++;
1754
	}
1755
1756
	cb->args[0] = i;
1757
1758
	return skb->len;
1759
}
1760
1761
static struct genl_ops dp_datapath_genl_ops[] = {
1762
	{ .cmd = OVS_DP_CMD_NEW,
1763
	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1764
	  .policy = datapath_policy,
1765
	  .doit = ovs_dp_cmd_new
1766
	},
1767
	{ .cmd = OVS_DP_CMD_DEL,
1768
	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1769
	  .policy = datapath_policy,
1770
	  .doit = ovs_dp_cmd_del
1771
	},
1772
	{ .cmd = OVS_DP_CMD_GET,
1773
	  .flags = 0,		    /* OK for unprivileged users. */
1774
	  .policy = datapath_policy,
1775
	  .doit = ovs_dp_cmd_get,
1776
	  .dumpit = ovs_dp_cmd_dump
1777
	},
1778
	{ .cmd = OVS_DP_CMD_SET,
1779
	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1780
	  .policy = datapath_policy,
1781
	  .doit = ovs_dp_cmd_set,
1782
	},
1783
};
1784
1785
static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1786
#ifdef HAVE_NLA_NUL_STRING
1787
	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1788
	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
1789
#else
1790
	[OVS_VPORT_ATTR_STATS] = { .minlen = sizeof(struct ovs_vport_stats) },
1791
#endif
1792
	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1793
	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1794
	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1795
	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1796
};
1797
1798
static struct genl_family dp_vport_genl_family = {
1799
	.id = GENL_ID_GENERATE,
1800
	.hdrsize = sizeof(struct ovs_header),
1801
	.name = OVS_VPORT_FAMILY,
1802
	.version = OVS_VPORT_VERSION,
1803
	.maxattr = OVS_VPORT_ATTR_MAX,
1804
	 SET_NETNSOK
1805
};
1806
1807
struct genl_multicast_group ovs_dp_vport_multicast_group = {
1808
	.name = OVS_VPORT_MCGROUP
1809
};
1810
1811
/* Called with RTNL lock or RCU read lock. */
1812
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1813
				   u32 portid, u32 seq, u32 flags, u8 cmd)
1814
{
1815
	struct ovs_header *ovs_header;
1816
	struct ovs_vport_stats vport_stats;
1817
	int err;
1818
1819
	ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1820
				 flags, cmd);
1821
	if (!ovs_header)
1822
		return -EMSGSIZE;
1823
1824
	ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1825
1826
	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1827
	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1828
	    nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
1829
	    nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
1830
		goto nla_put_failure;
1831
1832
	ovs_vport_get_stats(vport, &vport_stats);
1833
	if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1834
		    &vport_stats))
1835
		goto nla_put_failure;
1836
1837
	err = ovs_vport_get_options(vport, skb);
1838
	if (err == -EMSGSIZE)
1839
		goto error;
1840
1841
	return genlmsg_end(skb, ovs_header);
1842
1843
nla_put_failure:
1844
	err = -EMSGSIZE;
1845
error:
1846
	genlmsg_cancel(skb, ovs_header);
1847
	return err;
1848
}
1849
1850
/* Called with RTNL lock or RCU read lock. */
1851
struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1852
					 u32 seq, u8 cmd)
1853
{
1854
	struct sk_buff *skb;
1855
	int retval;
1856
1857
	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1858
	if (!skb)
1859
		return ERR_PTR(-ENOMEM);
1860
1861
	retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1862
	if (retval < 0) {
1863
		kfree_skb(skb);
1864
		return ERR_PTR(retval);
1865
	}
1866
	return skb;
1867
}
1868
1869
static int ovs_vport_cmd_validate(struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1870
{
1871
	return CHECK_NUL_STRING(a[OVS_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1872
}
1873
1874
/* Called with RTNL lock or RCU read lock. */
1875
static struct vport *lookup_vport(struct net *net,
1876
				  struct ovs_header *ovs_header,
1877
				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1878
{
1879
	struct datapath *dp;
1880
	struct vport *vport;
1881
1882
	if (a[OVS_VPORT_ATTR_NAME]) {
1883
		vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1884
		if (!vport)
1885
			return ERR_PTR(-ENODEV);
1886
		if (ovs_header->dp_ifindex &&
1887
		    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1888
			return ERR_PTR(-ENODEV);
1889
		return vport;
1890
	} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1891
		u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1892
1893
		if (port_no >= DP_MAX_PORTS)
1894
			return ERR_PTR(-EFBIG);
1895
1896
		dp = get_dp(net, ovs_header->dp_ifindex);
1897
		if (!dp)
1898
			return ERR_PTR(-ENODEV);
1899
1900
		vport = ovs_vport_rtnl_rcu(dp, port_no);
1901
		if (!vport)
1902
			return ERR_PTR(-ENODEV);
1903
		return vport;
1904
	} else
1905
		return ERR_PTR(-EINVAL);
1906
}
1907
1908
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1909
{
1910
	struct nlattr **a = info->attrs;
1911
	struct ovs_header *ovs_header = info->userhdr;
1912
	struct vport_parms parms;
1913
	struct sk_buff *reply;
1914
	struct vport *vport;
1915
	struct datapath *dp;
1916
	u32 port_no;
1917
	int err;
1918
1919
	err = -EINVAL;
1920
	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1921
	    !a[OVS_VPORT_ATTR_UPCALL_PID])
1922
		goto exit;
1923
1924
	err = ovs_vport_cmd_validate(a);
1925
	if (err)
1926
		goto exit;
1927
1928
	rtnl_lock();
1929
	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1930
	err = -ENODEV;
1931
	if (!dp)
1932
		goto exit_unlock;
1933
1934
	if (a[OVS_VPORT_ATTR_PORT_NO]) {
1935
		port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1936
1937
		err = -EFBIG;
1938
		if (port_no >= DP_MAX_PORTS)
1939
			goto exit_unlock;
1940
1941
		vport = ovs_vport_rtnl(dp, port_no);
1942
		err = -EBUSY;
1943
		if (vport)
1944
			goto exit_unlock;
1945
	} else {
1946
		for (port_no = 1; ; port_no++) {
1947
			if (port_no >= DP_MAX_PORTS) {
1948
				err = -EFBIG;
1949
				goto exit_unlock;
1950
			}
1951
			vport = ovs_vport_rtnl(dp, port_no);
1952
			if (!vport)
1953
				break;
1954
		}
1955
	}
1956
1957
	parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1958
	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1959
	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1960
	parms.dp = dp;
1961
	parms.port_no = port_no;
1962
	parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1963
1964
	vport = new_vport(&parms);
1965
	err = PTR_ERR(vport);
1966
	if (IS_ERR(vport))
1967
		goto exit_unlock;
1968
1969
	err = 0;
1970
	if (a[OVS_VPORT_ATTR_STATS])
1971
		ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1972
1973
	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1974
					 OVS_VPORT_CMD_NEW);
1975
	if (IS_ERR(reply)) {
1976
		err = PTR_ERR(reply);
1977
		ovs_dp_detach_port(vport);
1978
		goto exit_unlock;
1979
	}
1980
	genl_notify(reply, genl_info_net(info), info->snd_portid,
1981
		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1982
1983
exit_unlock:
1984
	rtnl_unlock();
1985
exit:
1986
	return err;
1987
}
1988
1989
static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1990
{
1991
	struct nlattr **a = info->attrs;
1992
	struct sk_buff *reply;
1993
	struct vport *vport;
1994
	int err;
1995
1996
	err = ovs_vport_cmd_validate(a);
1997
	if (err)
1998
		goto exit;
1999
2000
	rtnl_lock();
2001
	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2002
	err = PTR_ERR(vport);
2003
	if (IS_ERR(vport))
2004
		goto exit_unlock;
2005
2006
	err = 0;
2007
	if (a[OVS_VPORT_ATTR_TYPE] &&
2008
	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
2009
		err = -EINVAL;
2010
2011
	if (!err && a[OVS_VPORT_ATTR_OPTIONS])
2012
		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2013
	if (err)
2014
		goto exit_unlock;
2015
2016
	if (a[OVS_VPORT_ATTR_STATS])
2017
		ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
2018
2019
	if (a[OVS_VPORT_ATTR_UPCALL_PID])
2020
		vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
2021
2022
	reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2023
					 info->snd_seq, OVS_VPORT_CMD_NEW);
2024
	if (IS_ERR(reply)) {
2025
		netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
2026
				ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
2027
		goto exit_unlock;
2028
	}
2029
2030
	genl_notify(reply, genl_info_net(info), info->snd_portid,
2031
		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
2032
2033
exit_unlock:
2034
	rtnl_unlock();
2035
exit:
2036
	return err;
2037
}
2038
2039
static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2040
{
2041
	struct nlattr **a = info->attrs;
2042
	struct sk_buff *reply;
2043
	struct vport *vport;
2044
	int err;
2045
2046
	err = ovs_vport_cmd_validate(a);
2047
	if (err)
2048
		goto exit;
2049
2050
	rtnl_lock();
2051
	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2052
	err = PTR_ERR(vport);
2053
	if (IS_ERR(vport))
2054
		goto exit_unlock;
2055
2056
	if (vport->port_no == OVSP_LOCAL) {
2057
		err = -EINVAL;
2058
		goto exit_unlock;
2059
	}
2060
2061
	reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2062
					 info->snd_seq, OVS_VPORT_CMD_DEL);
2063
	err = PTR_ERR(reply);
2064
	if (IS_ERR(reply))
2065
		goto exit_unlock;
2066
2067
	err = 0;
2068
	ovs_dp_detach_port(vport);
2069
2070
	genl_notify(reply, genl_info_net(info), info->snd_portid,
2071
		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
2072
2073
exit_unlock:
2074
	rtnl_unlock();
2075
exit:
2076
	return err;
2077
}
2078
2079
static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2080
{
2081
	struct nlattr **a = info->attrs;
2082
	struct ovs_header *ovs_header = info->userhdr;
2083
	struct sk_buff *reply;
2084
	struct vport *vport;
2085
	int err;
2086
2087
	err = ovs_vport_cmd_validate(a);
2088
	if (err)
2089
		goto exit;
2090
2091
	rcu_read_lock();
2092
	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2093
	err = PTR_ERR(vport);
2094
	if (IS_ERR(vport))
2095
		goto exit_unlock;
2096
2097
	reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2098
					 info->snd_seq, OVS_VPORT_CMD_NEW);
2099
	err = PTR_ERR(reply);
2100
	if (IS_ERR(reply))
2101
		goto exit_unlock;
2102
2103
	rcu_read_unlock();
2104
2105
	return genlmsg_reply(reply, info);
2106
2107
exit_unlock:
2108
	rcu_read_unlock();
2109
exit:
2110
	return err;
2111
}
2112
2113
static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2114
{
2115
	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2116
	struct datapath *dp;
2117
	int bucket = cb->args[0], skip = cb->args[1];
2118
	int i, j = 0;
2119
2120
	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2121
	if (!dp)
2122
		return -ENODEV;
2123
2124
	rcu_read_lock();
2125
	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2126
		struct vport *vport;
2127
2128
		j = 0;
2129
		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2130
			if (j >= skip &&
2131
			    ovs_vport_cmd_fill_info(vport, skb,
2132
						    NETLINK_CB(cb->skb).portid,
2133
						    cb->nlh->nlmsg_seq,
2134
						    NLM_F_MULTI,
2135
						    OVS_VPORT_CMD_NEW) < 0)
2136
				goto out;
2137
2138
			j++;
2139
		}
2140
		skip = 0;
2141
	}
2142
out:
2143
	rcu_read_unlock();
2144
2145
	cb->args[0] = i;
2146
	cb->args[1] = j;
2147
2148
	return skb->len;
2149
}
2150
2151
static struct genl_ops dp_vport_genl_ops[] = {
2152
	{ .cmd = OVS_VPORT_CMD_NEW,
2153
	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2154
	  .policy = vport_policy,
2155
	  .doit = ovs_vport_cmd_new
2156
	},
2157
	{ .cmd = OVS_VPORT_CMD_DEL,
2158
	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2159
	  .policy = vport_policy,
2160
	  .doit = ovs_vport_cmd_del
2161
	},
2162
	{ .cmd = OVS_VPORT_CMD_GET,
2163
	  .flags = 0,		    /* OK for unprivileged users. */
2164
	  .policy = vport_policy,
2165
	  .doit = ovs_vport_cmd_get,
2166
	  .dumpit = ovs_vport_cmd_dump
2167
	},
2168
	{ .cmd = OVS_VPORT_CMD_SET,
2169
	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2170
	  .policy = vport_policy,
2171
	  .doit = ovs_vport_cmd_set,
2172
	},
2173
};
2174
2175
struct genl_family_and_ops {
2176
	struct genl_family *family;
2177
	struct genl_ops *ops;
2178
	int n_ops;
2179
	struct genl_multicast_group *group;
2180
};
2181
2182
static const struct genl_family_and_ops dp_genl_families[] = {
2183
	{ &dp_datapath_genl_family,
2184
	  dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
2185
	  &ovs_dp_datapath_multicast_group },
2186
	{ &dp_vport_genl_family,
2187
	  dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
2188
	  &ovs_dp_vport_multicast_group },
2189
	{ &dp_flow_genl_family,
2190
	  dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
2191
	  &ovs_dp_flow_multicast_group },
2192
	{ &dp_packet_genl_family,
2193
	  dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
2194
	  NULL },
2195
};
2196
2197
static void dp_unregister_genl(int n_families)
2198
{
2199
	int i;
2200
2201
	for (i = 0; i < n_families; i++)
2202
		genl_unregister_family(dp_genl_families[i].family);
2203
}
2204
2205
static int dp_register_genl(void)
2206
{
2207
	int n_registered;
2208
	int err;
2209
	int i;
2210
2211
	n_registered = 0;
2212
	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2213
		const struct genl_family_and_ops *f = &dp_genl_families[i];
2214
2215
		err = genl_register_family_with_ops(f->family, f->ops,
2216
						    f->n_ops);
2217
		if (err)
2218
			goto error;
2219
		n_registered++;
2220
2221
		if (f->group) {
2222
			err = genl_register_mc_group(f->family, f->group);
2223
			if (err)
2224
				goto error;
2225
		}
2226
	}
2227
2228
	return 0;
2229
2230
error:
2231
	dp_unregister_genl(n_registered);
2232
	return err;
2233
}
2234
2235
static int __rehash_flow_table(void *dummy)
2236
{
2237
	struct datapath *dp;
2238
	struct net *net;
2239
2240
	rtnl_lock();
2241
	for_each_net(net) {
2242
		struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2243
2244
		list_for_each_entry(dp, &ovs_net->dps, list_node) {
2245
			struct flow_table *old_table = genl_dereference(dp->table);
2246
			struct flow_table *new_table;
2247
2248
			new_table = ovs_flow_tbl_rehash(old_table);
2249
			if (!IS_ERR(new_table)) {
2250
				rcu_assign_pointer(dp->table, new_table);
2251
				ovs_flow_tbl_deferred_destroy(old_table);
2252
			}
2253
		}
2254
	}
2255
	rtnl_unlock();
2256
	return 0;
2257
}
2258
2259
static void rehash_flow_table(struct work_struct *work)
2260
{
2261
	genl_exec(__rehash_flow_table, NULL);
2262
	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2263
}
2264
2265
static int dp_destroy_all(void *data)
2266
{
2267
	struct datapath *dp, *dp_next;
2268
	struct ovs_net *ovs_net = data;
2269
2270
	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2271
		__dp_destroy(dp);
2272
2273
	return 0;
2274
}
2275
2276
static int __net_init ovs_init_net(struct net *net)
2277
{
2278
	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2279
2280
	INIT_LIST_HEAD(&ovs_net->dps);
2281
	return 0;
2282
}
2283
2284
static void __net_exit ovs_exit_net(struct net *net)
2285
{
2286
	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2287
2288
	genl_exec(dp_destroy_all, ovs_net);
2289
}
2290
2291
static struct pernet_operations ovs_net_ops = {
2292
	.init = ovs_init_net,
2293
	.exit = ovs_exit_net,
2294
	.id   = &ovs_net_id,
2295
	.size = sizeof(struct ovs_net),
2296
};
2297
2298
static int __init dp_init(void)
2299
{
2300
	int err;
2301
2302
	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2303
2304
	pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n",
2305
		VERSION);
2306
2307
	err = genl_exec_init();
2308
	if (err)
2309
		goto error;
2310
2311
	err = ovs_workqueues_init();
2312
	if (err)
2313
		goto error_genl_exec;
2314
2315
	err = ovs_flow_init();
2316
	if (err)
2317
		goto error_wq;
2318
2319
	err = ovs_vport_init();
2320
	if (err)
2321
		goto error_flow_exit;
2322
2323
	err = register_pernet_device(&ovs_net_ops);
2324
	if (err)
2325
		goto error_vport_exit;
2326
2327
	err = register_netdevice_notifier(&ovs_dp_device_notifier);
2328
	if (err)
2329
		goto error_netns_exit;
2330
2331
	err = dp_register_genl();
2332
	if (err < 0)
2333
		goto error_unreg_notifier;
2334
2335
	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2336
2337
	return 0;
2338
2339
error_unreg_notifier:
2340
	unregister_netdevice_notifier(&ovs_dp_device_notifier);
2341
error_netns_exit:
2342
	unregister_pernet_device(&ovs_net_ops);
2343
error_vport_exit:
2344
	ovs_vport_exit();
2345
error_flow_exit:
2346
	ovs_flow_exit();
2347
error_wq:
2348
	ovs_workqueues_exit();
2349
error_genl_exec:
2350
	genl_exec_exit();
2351
error:
2352
	return err;
2353
}
2354
2355
static void dp_cleanup(void)
2356
{
2357
	cancel_delayed_work_sync(&rehash_flow_wq);
2358
	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2359
	unregister_netdevice_notifier(&ovs_dp_device_notifier);
2360
	unregister_pernet_device(&ovs_net_ops);
2361
	rcu_barrier();
2362
	ovs_vport_exit();
2363
	ovs_flow_exit();
2364
	ovs_workqueues_exit();
2365
	genl_exec_exit();
2366
}
2367
2368
module_init(dp_init);
2369
module_exit(dp_cleanup);
2370
2371
MODULE_DESCRIPTION("Open vSwitch switching datapath");
2372
MODULE_LICENSE("GPL");
2373
MODULE_VERSION(VERSION);