31
31
#include <net/icmp.h>
32
32
#include <net/ip.h>
33
33
#include <net/udp.h>
34
#include <net/ip_tunnels.h>
36
#include <net/rtnetlink.h>
37
#include <net/route.h>
38
#include <net/dsfield.h>
39
#include <net/inet_ecn.h>
40
#include <net/net_namespace.h>
41
#include <net/netns/generic.h>
42
#include <net/vxlan.h>
35
44
#include "datapath.h"
36
45
#include "tunnel.h"
39
#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
42
* struct vxlanhdr - VXLAN header
43
* @vx_flags: Must have the exact value %VXLAN_FLAGS.
44
* @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed.
51
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
53
static inline int vxlan_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
48
#define OVS_VXLAN_RCV_PRIORITY 8
59
51
* struct vxlan_port - Keeps track of open UDP ports
60
* @list: list element.
61
* @vport: vport for the tunnel.
62
* @socket: The socket created for this port number.
52
* @vh: vxlan_handler created for the port.
64
55
struct vxlan_port {
65
struct list_head list;
67
struct socket *vxlan_rcv_socket;
56
struct vxlan_handler *vh;
71
static LIST_HEAD(vxlan_ports);
73
static struct vxlan_port *vxlan_find_port(struct net *net, __be16 port)
75
struct vxlan_port *vxlan_port;
77
list_for_each_entry_rcu(vxlan_port, &vxlan_ports, list) {
78
struct tnl_vport *tnl_vport = tnl_vport_priv(vxlan_port->vport);
80
if (tnl_vport->dst_port == port &&
81
net_eq(sock_net(vxlan_port->vxlan_rcv_socket->sk), net))
88
static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
90
return (struct vxlanhdr *)(udp_hdr(skb) + 1);
93
static void vxlan_build_header(const struct vport *vport,
97
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
98
struct udphdr *udph = udp_hdr(skb);
99
struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
100
const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
102
udph->dest = tnl_vport->dst_port;
103
udph->source = htons(ovs_tnl_get_src_port(skb));
105
udph->len = htons(skb->len - skb_transport_offset(skb));
107
vxh->vx_flags = htonl(VXLAN_FLAGS);
108
vxh->vx_vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
60
static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
62
return vport_priv(vport);
111
65
/* Called with rcu_read_lock and BH disabled. */
112
static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
66
static int vxlan_rcv(struct vxlan_handler *vh, struct sk_buff *skb, __be32 vx_vni)
114
struct vxlan_port *vxlan_vport;
115
struct vxlanhdr *vxh;
68
struct vport *vport = vh->data;
116
69
struct iphdr *iph;
117
70
struct ovs_key_ipv4_tunnel tun_key;
120
vxlan_vport = vxlan_find_port(dev_net(skb->dev), udp_hdr(skb)->dest);
121
if (unlikely(!vxlan_vport))
124
if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
127
vxh = vxlan_hdr(skb);
128
if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) ||
129
vxh->vx_vni & htonl(0xff)))
132
__skb_pull(skb, VXLAN_HLEN);
133
skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN);
135
key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
137
73
/* Save outer tunnel values */
138
74
iph = ip_hdr(skb);
139
tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY);
140
OVS_CB(skb)->tun_key = &tun_key;
142
ovs_tnl_rcv(vxlan_vport->vport, skb);
151
/* Random value. Irrelevant as long as it's not 0 since we set the handler. */
152
#define UDP_ENCAP_VXLAN 1
153
static int vxlan_socket_init(struct vxlan_port *vxlan_port, struct net *net)
156
struct sockaddr_in sin;
157
struct tnl_vport *tnl_vport = tnl_vport_priv(vxlan_port->vport);
159
err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
160
&vxlan_port->vxlan_rcv_socket);
164
/* release net ref. */
165
sk_change_net(vxlan_port->vxlan_rcv_socket->sk, net);
167
sin.sin_family = AF_INET;
168
sin.sin_addr.s_addr = htonl(INADDR_ANY);
169
sin.sin_port = tnl_vport->dst_port;
171
err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin,
172
sizeof(struct sockaddr_in));
176
udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN;
177
udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv;
184
sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk);
186
pr_warn("cannot register vxlan protocol handler\n");
190
static void free_port_rcu(struct rcu_head *rcu)
192
struct vxlan_port *vxlan_port = container_of(rcu,
193
struct vxlan_port, rcu);
198
static void vxlan_tunnel_release(struct vxlan_port *vxlan_port)
203
list_del_rcu(&vxlan_port->list);
205
sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk);
206
call_rcu(&vxlan_port->rcu, free_port_rcu);
209
static int vxlan_tunnel_setup(struct net *net, struct vport *vport,
210
struct nlattr *options)
75
key = cpu_to_be64(ntohl(vx_vni) >> 8);
76
tnl_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
78
ovs_vport_receive(vport, skb, &tun_key);
82
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
84
struct vxlan_port *vxlan_port = vxlan_vport(vport);
85
__be16 dst_port = inet_sport(vxlan_port->vh->vs->sock->sk);
87
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
92
static void vxlan_tnl_destroy(struct vport *vport)
94
struct vxlan_port *vxlan_port = vxlan_vport(vport);
96
vxlan_handler_put(vxlan_port->vh);
98
ovs_vport_deferred_free(vport);
101
static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
103
struct net *net = ovs_dp_get_net(parms->dp);
104
struct nlattr *options = parms->options;
212
105
struct vxlan_port *vxlan_port;
213
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
106
struct vxlan_handler *vh;
214
108
struct nlattr *a;
223
116
a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
224
117
if (a && nla_len(a) == sizeof(u16)) {
225
118
dst_port = nla_get_u16(a);
227
120
/* Require destination port from userspace. */
232
/* Verify if we already have a socket created for this port */
233
vxlan_port = vxlan_find_port(net, htons(dst_port));
239
/* Add a new socket for this port */
240
vxlan_port = kzalloc(sizeof(struct vxlan_port), GFP_KERNEL);
246
tnl_vport->dst_port = htons(dst_port);
247
vxlan_port->vport = vport;
248
list_add_tail_rcu(&vxlan_port->list, &vxlan_ports);
250
err = vxlan_socket_init(vxlan_port, net);
257
list_del_rcu(&vxlan_port->list);
263
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
265
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
267
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(tnl_vport->dst_port)))
272
static const struct tnl_ops ovs_vxlan_tnl_ops = {
273
.ipproto = IPPROTO_UDP,
274
.hdr_len = vxlan_hdr_len,
275
.build_header = vxlan_build_header,
278
static void vxlan_tnl_destroy(struct vport *vport)
280
struct vxlan_port *vxlan_port;
281
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
283
vxlan_port = vxlan_find_port(ovs_dp_get_net(vport->dp),
284
tnl_vport->dst_port);
286
vxlan_tunnel_release(vxlan_port);
287
ovs_tnl_destroy(vport);
290
static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
295
vport = ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops);
125
vport = ovs_vport_alloc(sizeof(struct vxlan_port),
126
&ovs_vxlan_vport_ops, parms);
296
127
if (IS_ERR(vport))
299
err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), vport,
302
ovs_tnl_destroy(vport);
130
vxlan_port = vxlan_vport(vport);
131
strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
133
vh = vxlan_handler_add(net, htons(dst_port), vxlan_rcv,
134
vport, OVS_VXLAN_RCV_PRIORITY, true);
136
ovs_vport_free(vport);
147
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
149
struct vxlan_port *vxlan_port = vxlan_vport(vport);
150
__be16 dst_port = inet_sport(vxlan_port->vh->vs->sock->sk);
151
struct net *net = ovs_dp_get_net(vport->dp);
160
if (unlikely(!OVS_CB(skb)->tun_key)) {
165
forward_ip_summed(skb, true);
168
saddr = OVS_CB(skb)->tun_key->ipv4_src;
169
rt = find_route(ovs_dp_get_net(vport->dp),
171
OVS_CB(skb)->tun_key->ipv4_dst,
173
OVS_CB(skb)->tun_key->ipv4_tos,
180
df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
185
inet_get_local_port_range(&port_min, &port_max);
186
src_port = vxlan_src_port(port_min, port_max, skb);
188
err = vxlan_xmit_skb(net, vxlan_port->vh, rt, skb,
189
saddr, OVS_CB(skb)->tun_key->ipv4_dst,
190
OVS_CB(skb)->tun_key->ipv4_tos,
191
OVS_CB(skb)->tun_key->ipv4_ttl, df,
193
htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
200
static const char *vxlan_get_name(const struct vport *vport)
202
struct vxlan_port *vxlan_port = vxlan_vport(vport);
203
return vxlan_port->name;
309
206
const struct vport_ops ovs_vxlan_vport_ops = {
310
207
.type = OVS_VPORT_TYPE_VXLAN,
311
.flags = VPORT_F_TUN_ID,
312
208
.create = vxlan_tnl_create,
313
209
.destroy = vxlan_tnl_destroy,
314
.get_name = ovs_tnl_get_name,
210
.get_name = vxlan_get_name,
315
211
.get_options = vxlan_get_options,
316
.send = ovs_tnl_send,
212
.send = vxlan_tnl_send,
319
215
#warning VXLAN tunneling will not be available on kernels before 2.6.26