1
/*********************************************************
2
* Copyright (C) 2007 VMware, Inc. All rights reserved.
4
* This program is free software; you can redistribute it and/or modify it
5
* under the terms of the GNU General Public License as published by the
6
* Free Software Foundation version 2 and no later version.
8
* This program is distributed in the hope that it will be useful, but
9
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
10
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13
* You should have received a copy of the GNU General Public License along
14
* with this program; if not, write to the Free Software Foundation, Inc.,
15
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
*********************************************************/
22
* Linux socket module for the VMCI Sockets protocol family.
27
* Implementation notes:
29
* - There are two kinds of sockets: those created by user action (such as
30
* calling socket(2)) and those created by incoming connection request
33
* - There are two "global" tables, one for bound sockets (sockets that have
34
* specified an address that they are responsible for) and one for connected
35
* sockets (sockets that have established a connection with another socket).
36
* These tables are "global" in that all sockets on the system are placed
38
* - Note, though, that the bound table contains an extra entry for a list of
39
* unbound sockets and SOCK_DGRAM sockets will always remain in that list.
40
* The bound table is used solely for lookup of sockets when packets are
41
* received and that's not necessary for SOCK_DGRAM sockets since we create
42
* a datagram handle for each and need not perform a lookup. Keeping
43
* SOCK_DGRAM sockets out of the bound hash buckets will reduce the chance
44
* of collisions when looking for SOCK_STREAM sockets and prevents us from
45
* having to check the socket type in the hash table lookups.
47
* - Sockets created by user action will either be "client" sockets that
48
* initiate a connection or "server" sockets that listen for connections; we
49
* do not support simultaneous connects (two "client" sockets connecting).
51
* - "Server" sockets are referred to as listener sockets throughout this
52
* implementation because they are in the SS_LISTEN state. When a connection
53
* request is received (the second kind of socket mentioned above), we create
54
* a new socket and refer to it as a pending socket. These pending sockets
55
* are placed on the pending connection list of the listener socket. When
56
* future packets are received for the address the listener socket is bound
57
* to, we check if the source of the packet is from one that has an existing
58
* pending connection. If it does, we process the packet for the pending
59
* socket. When that socket reaches the connected state, it is removed from
60
* the listener socket's pending list and enqueued in the listener socket's
61
* accept queue. Callers of accept(2) will accept connected sockets from the
62
* listener socket's accept queue. If the socket cannot be accepted for some
63
* reason then it is marked rejected. Once the connection is accepted, it is
64
* owned by the user process and the responsibility for cleanup falls with
67
* - It is possible that these pending sockets will never reach the connected
68
* state; in fact, we may never receive another packet after the connection
69
* request. Because of this, we must schedule a cleanup function to run in
70
* the future, after some amount of time passes where a connection should
71
* have been established. This function ensures that the socket is off all
72
* lists so it cannot be retrieved, then drops all references to the socket
73
* so it is cleaned up (sock_put() -> sk_free() -> our sk_destruct
74
* implementation). Note this function will also cleanup rejected sockets,
75
* those that reach the connected state but leave it before they have been
78
* - Sockets created by user action will be cleaned up when the user
79
* process calls close(2), causing our release implementation to be called.
80
* Our release implementation will perform some cleanup then drop the
81
* last reference so our sk_destruct implementation is invoked. Our
82
* sk_destruct implementation will perform additional cleanup that's common
83
* for both types of sockets.
85
* - A socket's reference count is what ensures that the structure won't be
86
* freed. Each entry in a list (such as the "global" bound and connected
87
* tables and the listener socket's pending list and connected queue) ensures
88
* a reference. When we defer work until process context and pass a socket
89
* as our argument, we must ensure the reference count is increased to ensure
90
* the socket isn't freed before the function is run; the deferred function
91
* will then drop the reference.
95
#include "driver-config.h"
97
#include <linux/kmod.h>
98
#include <linux/socket.h>
99
#include <linux/net.h>
100
#include <linux/skbuff.h>
101
#include <linux/miscdevice.h>
102
#include <linux/poll.h>
103
#include <linux/smp.h>
104
#include <linux/smp_lock.h>
106
#if defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12)
107
# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
108
# include <asm/ioctl32.h>
110
# include <linux/ioctl32.h>
112
/* Use weak: not all kernels export sys_ioctl for use by modules */
113
# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 66)
114
asmlinkage __attribute__((weak)) long
115
sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
117
asmlinkage __attribute__((weak)) int
118
sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
122
#include "compat_module.h"
123
#include "compat_kernel.h"
124
#include "compat_init.h"
125
#include "compat_sock.h"
126
#include "compat_wait.h"
127
#include "compat_version.h"
128
#include "compat_workqueue.h"
129
#include "compat_list.h"
130
#if defined(HAVE_COMPAT_IOCTL) || defined(HAVE_UNLOCKED_IOCTL)
131
# include "compat_semaphore.h"
136
#include "vsockCommon.h"
137
#include "vsockPacket.h"
138
#include "vsockVmci.h"
140
#include "vmci_defs.h"
141
#include "vmci_call_defs.h"
142
#include "vmci_iocontrols.h"
144
# include "vmciGuestKernelAPI.h"
146
# include "vmciDatagram.h"
149
#include "af_vsock.h"
151
#include "vsock_version.h"
152
#include "driverLog.h"
155
#define VSOCK_INVALID_FAMILY NPROTO
156
#define VSOCK_AF_IS_REGISTERED(val) ((val) >= 0 && (val) < NPROTO)
158
/* Some kernel versions don't define __user. Define it ourself if so. */
168
int VSockVmci_GetAFValue(void);
170
/* Internal functions. */
171
static int VSockVmciRecvDgramCB(void *data, VMCIDatagram *dg);
173
static int VSockVmciRecvStreamCB(void *data, VMCIDatagram *dg);
174
static void VSockVmciPeerAttachCB(VMCIId subId,
175
VMCI_EventData *ed, void *clientData);
176
static void VSockVmciPeerDetachCB(VMCIId subId,
177
VMCI_EventData *ed, void *clientData);
178
static int VSockVmciSendControlPktBH(struct sockaddr_vm *src,
179
struct sockaddr_vm *dst,
180
VSockPacketType type,
183
VSockWaitingInfo *wait,
185
static int VSockVmciSendControlPkt(struct sock *sk, VSockPacketType type,
186
uint64 size, uint64 mode,
187
VSockWaitingInfo *wait, VMCIHandle handle);
188
static void VSockVmciRecvPktWork(compat_work_arg work);
189
static int VSockVmciRecvListen(struct sock *sk, VSockPacket *pkt);
190
static int VSockVmciRecvConnectingServer(struct sock *sk,
191
struct sock *pending, VSockPacket *pkt);
192
static int VSockVmciRecvConnectingClient(struct sock *sk, VSockPacket *pkt);
193
static int VSockVmciRecvConnectingClientNegotiate(struct sock *sk,
195
static int VSockVmciRecvConnected(struct sock *sk, VSockPacket *pkt);
197
static int __VSockVmciBind(struct sock *sk, struct sockaddr_vm *addr);
198
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14)
199
static struct sock *__VSockVmciCreate(struct socket *sock, unsigned int priority);
200
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
201
static struct sock *__VSockVmciCreate(struct socket *sock, gfp_t priority);
203
static struct sock *__VSockVmciCreate(struct net *net,
204
struct socket *sock, gfp_t priority);
206
static int VSockVmciRegisterAddressFamily(void);
207
static void VSockVmciUnregisterAddressFamily(void);
210
/* Socket operations. */
211
static void VSockVmciSkDestruct(struct sock *sk);
212
static int VSockVmciQueueRcvSkb(struct sock *sk, struct sk_buff *skb);
213
static int VSockVmciRelease(struct socket *sock);
214
static int VSockVmciBind(struct socket *sock,
215
struct sockaddr *addr, int addrLen);
216
static int VSockVmciDgramConnect(struct socket *sock,
217
struct sockaddr *addr, int addrLen, int flags);
219
static int VSockVmciStreamConnect(struct socket *sock,
220
struct sockaddr *addr, int addrLen, int flags);
221
static int VSockVmciAccept(struct socket *sock, struct socket *newsock, int flags);
223
static int VSockVmciGetname(struct socket *sock,
224
struct sockaddr *addr, int *addrLen, int peer);
225
static unsigned int VSockVmciPoll(struct file *file,
226
struct socket *sock, poll_table *wait);
228
static int VSockVmciListen(struct socket *sock, int backlog);
230
static int VSockVmciShutdown(struct socket *sock, int mode);
233
static int VSockVmciStreamSetsockopt(struct socket *sock, int level, int optname,
234
char __user *optval, int optlen);
235
static int VSockVmciStreamGetsockopt(struct socket *sock, int level, int optname,
236
char __user *optval, int __user * optlen);
239
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43)
240
static int VSockVmciDgramSendmsg(struct socket *sock, struct msghdr *msg,
241
int len, struct scm_cookie *scm);
242
static int VSockVmciDgramRecvmsg(struct socket *sock, struct msghdr *msg,
243
int len, int flags, struct scm_cookie *scm);
245
static int VSockVmciStreamSendmsg(struct socket *sock, struct msghdr *msg,
246
int len, struct scm_cookie *scm);
247
static int VSockVmciStreamRecvmsg(struct socket *sock, struct msghdr *msg,
248
int len, int flags, struct scm_cookie *scm);
250
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65)
251
static int VSockVmciDgramSendmsg(struct kiocb *kiocb, struct socket *sock,
252
struct msghdr *msg, int len,
253
struct scm_cookie *scm);
254
static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock,
255
struct msghdr *msg, int len,
256
int flags, struct scm_cookie *scm);
258
static int VSockVmciStreamSendmsg(struct kiocb *kiocb, struct socket *sock,
259
struct msghdr *msg, int len,
260
struct scm_cookie *scm);
261
static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock,
262
struct msghdr *msg, int len,
263
int flags, struct scm_cookie *scm);
265
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2)
266
static int VSockVmciDgramSendmsg(struct kiocb *kiocb,
267
struct socket *sock, struct msghdr *msg, int len);
268
static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock,
269
struct msghdr *msg, int len, int flags);
271
static int VSockVmciStreamSendmsg(struct kiocb *kiocb,
272
struct socket *sock, struct msghdr *msg, int len);
273
static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock,
274
struct msghdr *msg, int len, int flags);
277
static int VSockVmciDgramSendmsg(struct kiocb *kiocb,
278
struct socket *sock, struct msghdr *msg, size_t len);
279
static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock,
280
struct msghdr *msg, size_t len, int flags);
282
static int VSockVmciStreamSendmsg(struct kiocb *kiocb,
283
struct socket *sock, struct msghdr *msg, size_t len);
284
static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock,
285
struct msghdr *msg, size_t len, int flags);
289
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
290
static int VSockVmciCreate(struct socket *sock, int protocol);
292
static int VSockVmciCreate(struct net *net, struct socket *sock, int protocol);
298
int VSockVmciDevOpen(struct inode *inode, struct file *file);
299
int VSockVmciDevRelease(struct inode *inode, struct file *file);
300
static int VSockVmciDevIoctl(struct inode *inode, struct file *filp,
301
u_int iocmd, unsigned long ioarg);
302
#if defined(HAVE_COMPAT_IOCTL) || defined(HAVE_UNLOCKED_IOCTL)
303
static long VSockVmciDevUnlockedIoctl(struct file *filp,
304
u_int iocmd, unsigned long ioarg);
311
/* Protocol family. We only use this for builds against 2.6.9 and later. */
312
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 9)
313
static struct proto vsockVmciProto = {
315
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 10)
316
/* Added in 2.6.10. */
317
.owner = THIS_MODULE,
320
* Before 2.6.9, each address family created their own slab (by calling
321
* kmem_cache_create() directly). From 2.6.9 until 2.6.11, these address
322
* families instead called sk_alloc_slab() and the allocated slab was
323
* assigned to the slab variable in the proto struct and was created of size
324
* slab_obj_size. As of 2.6.12 and later, this slab allocation was moved
325
* into proto_register() and only done if you specified a non-zero value for
326
* the second argument (alloc_slab); the size of the slab element was
327
* changed to obj_size.
329
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9)
330
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12)
331
.slab_obj_size = sizeof (VSockVmciSock),
333
.obj_size = sizeof (VSockVmciSock),
338
static struct net_proto_family vsockVmciFamilyOps = {
339
.family = VSOCK_INVALID_FAMILY,
340
.create = VSockVmciCreate,
341
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 69)
342
.owner = THIS_MODULE,
346
/* Socket operations, split for DGRAM and STREAM sockets. */
347
static struct proto_ops vsockVmciDgramOps = {
348
.family = VSOCK_INVALID_FAMILY,
349
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 69)
350
.owner = THIS_MODULE,
352
.release = VSockVmciRelease,
353
.bind = VSockVmciBind,
354
.connect = VSockVmciDgramConnect,
355
.socketpair = sock_no_socketpair,
356
.accept = sock_no_accept,
357
.getname = VSockVmciGetname,
358
.poll = VSockVmciPoll,
359
.ioctl = sock_no_ioctl,
360
.listen = sock_no_listen,
361
.shutdown = VSockVmciShutdown,
362
.setsockopt = sock_no_setsockopt,
363
.getsockopt = sock_no_getsockopt,
364
.sendmsg = VSockVmciDgramSendmsg,
365
.recvmsg = VSockVmciDgramRecvmsg,
366
.mmap = sock_no_mmap,
367
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 4)
368
.sendpage = sock_no_sendpage,
373
static struct proto_ops vsockVmciStreamOps = {
374
.family = VSOCK_INVALID_FAMILY,
375
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 69)
376
.owner = THIS_MODULE,
378
.release = VSockVmciRelease,
379
.bind = VSockVmciBind,
380
.connect = VSockVmciStreamConnect,
381
.socketpair = sock_no_socketpair,
382
.accept = VSockVmciAccept,
383
.getname = VSockVmciGetname,
384
.poll = VSockVmciPoll,
385
.ioctl = sock_no_ioctl,
386
.listen = VSockVmciListen,
387
.shutdown = VSockVmciShutdown,
388
.setsockopt = VSockVmciStreamSetsockopt,
389
.getsockopt = VSockVmciStreamGetsockopt,
390
.sendmsg = VSockVmciStreamSendmsg,
391
.recvmsg = VSockVmciStreamRecvmsg,
392
.mmap = sock_no_mmap,
393
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 4)
394
.sendpage = sock_no_sendpage,
399
static struct file_operations vsockVmciDeviceOps = {
400
.ioctl = VSockVmciDevIoctl,
401
#ifdef HAVE_UNLOCKED_IOCTL
402
.unlocked_ioctl = VSockVmciDevUnlockedIoctl,
404
#ifdef HAVE_COMPAT_IOCTL
405
.compat_ioctl = VSockVmciDevUnlockedIoctl,
407
.open = VSockVmciDevOpen,
408
.release = VSockVmciDevRelease,
411
static struct miscdevice vsockVmciDevice = {
413
.minor = MISC_DYNAMIC_MINOR,
414
.fops = &vsockVmciDeviceOps,
417
typedef struct VSockRecvPktInfo {
423
static spinlock_t registrationLock = SPIN_LOCK_UNLOCKED;
424
static int devOpenCount = 0;
425
static int vsockVmciSocketCount = 0;
427
static VMCIHandle vmciStreamHandle = { VMCI_INVALID_ID, VMCI_INVALID_ID };
428
static Bool vmciDevicePresent = FALSE;
429
static VMCIId qpResumedSubId = VMCI_INVALID_ID;
432
/* Comment this out to compare with old protocol. */
433
#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1
435
/* Comment this out to turn off datagram counting. */
436
//#define VSOCK_CONTROL_PACKET_COUNT 1
437
#ifdef VSOCK_CONTROL_PACKET_COUNT
438
uint64 controlPacketCount[VSOCK_PACKET_TYPE_MAX];
441
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9)
442
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 5)
443
kmem_cache_t *vsockCachep;
447
#define VSOCK_MAX_DGRAM_RESENDS 10
450
* 64k is hopefully a reasonable default, but we should do some real
451
* benchmarks. There are also some issues with resource limits on ESX.
453
#define VSOCK_DEFAULT_QP_SIZE_MIN 128
454
#define VSOCK_DEFAULT_QP_SIZE 65536
455
#define VSOCK_DEFAULT_QP_SIZE_MAX 262144
457
#define VSOCK_SEND_RESET_BH(_dst, _src, _pkt) \
458
((_pkt)->type == VSOCK_PACKET_TYPE_RST) ? \
460
VSockVmciSendControlPktBH(_dst, _src, VSOCK_PACKET_TYPE_RST, 0, \
461
0, NULL, VMCI_INVALID_HANDLE)
462
#define VSOCK_SEND_INVALID_BH(_dst, _src) \
463
VSockVmciSendControlPktBH(_dst, _src, VSOCK_PACKET_TYPE_INVALID, 0, \
464
0, NULL, VMCI_INVALID_HANDLE)
465
#define VSOCK_SEND_WROTE_BH(_dst, _src) \
466
VSockVmciSendControlPktBH(_dst, _src, VSOCK_PACKET_TYPE_WROTE, 0, \
467
0, NULL, VMCI_INVALID_HANDLE)
468
#define VSOCK_SEND_READ_BH(_dst, _src) \
469
VSockVmciSendControlPktBH(_dst, _src, VSOCK_PACKET_TYPE_READ, 0, \
470
0, NULL, VMCI_INVALID_HANDLE)
471
#define VSOCK_SEND_RESET(_sk, _pkt) \
472
((_pkt)->type == VSOCK_PACKET_TYPE_RST) ? \
474
VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_RST, \
475
0, 0, NULL, VMCI_INVALID_HANDLE)
476
#define VSOCK_SEND_NEGOTIATE(_sk, _size) \
477
VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_NEGOTIATE, \
478
_size, 0, NULL, VMCI_INVALID_HANDLE)
479
#define VSOCK_SEND_QP_OFFER(_sk, _handle) \
480
VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_OFFER, \
482
#define VSOCK_SEND_CONN_REQUEST(_sk, _size) \
483
VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_REQUEST, \
484
_size, 0, NULL, VMCI_INVALID_HANDLE)
485
#define VSOCK_SEND_ATTACH(_sk, _handle) \
486
VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_ATTACH, \
488
#define VSOCK_SEND_WROTE(_sk) \
489
VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_WROTE, \
490
0, 0, NULL, VMCI_INVALID_HANDLE)
491
#define VSOCK_SEND_READ(_sk) \
492
VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_READ, \
493
0, 0, NULL, VMCI_INVALID_HANDLE)
494
#define VSOCK_SEND_SHUTDOWN(_sk, _mode) \
495
VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_SHUTDOWN, \
496
0, _mode, NULL, VMCI_INVALID_HANDLE)
497
#define VSOCK_SEND_WAITING_WRITE(_sk, _waitInfo) \
498
VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_WAITING_WRITE, \
499
0, 0, _waitInfo, VMCI_INVALID_HANDLE)
500
#define VSOCK_SEND_WAITING_READ(_sk, _waitInfo) \
501
VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_WAITING_READ, \
502
0, 0, _waitInfo, VMCI_INVALID_HANDLE)
506
# define LOG_PACKET(_pkt) VSockVmciLogPkt(__FUNCTION__, __LINE__, _pkt)
508
# define LOG_PACKET(_pkt)
513
*----------------------------------------------------------------------------
515
* VSockVmci_GetAFValue --
517
* Returns the address family value being used.
520
* The address family on success, a negative error on failure.
525
*----------------------------------------------------------------------------
529
VSockVmci_GetAFValue(void)
533
spin_lock(®istrationLock);
535
afvalue = vsockVmciFamilyOps.family;
536
if (!VSOCK_AF_IS_REGISTERED(afvalue)) {
537
afvalue = VSockVmciRegisterAddressFamily();
540
spin_unlock(®istrationLock);
546
*----------------------------------------------------------------------------
548
* VSockVmciTestUnregister --
550
* Tests if it's necessary to unregister the socket family, and does so.
552
* Note that this assumes the registration lock is held.
560
*----------------------------------------------------------------------------
564
VSockVmciTestUnregister(void)
566
if (devOpenCount <= 0 && vsockVmciSocketCount <= 0) {
567
if (VSOCK_AF_IS_REGISTERED(vsockVmciFamilyOps.family)) {
568
VSockVmciUnregisterAddressFamily();
581
*----------------------------------------------------------------------------
583
* VSockVmciNotifyWaitingWrite --
585
* Determines if the conditions have been met to notify a waiting writer.
588
* TRUE if a notification should be sent, FALSE otherwise.
593
*----------------------------------------------------------------------------
597
VSockVmciNotifyWaitingWrite(VSockVmciSock *vsk) // IN
599
#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
600
if (!vsk->peerWaitingWrite) {
605
* For now we ignore the wait information and just see if there is any room
606
* to write anything. Note that improving this function to be more
607
* intelligent will not require a protocol change and will retain
608
* compatibility between endpoints with mixed versions of this function.
610
return VMCIQueue_FreeSpace(vsk->consumeQ,
611
vsk->produceQ, vsk->consumeSize) > 0;
619
*----------------------------------------------------------------------------
621
* VSockVmciNotifyWaitingRead --
623
* Determines if the conditions have been met to notify a waiting reader.
626
* TRUE if a notification should be sent, FALSE otherwise.
631
*----------------------------------------------------------------------------
635
VSockVmciNotifyWaitingRead(VSockVmciSock *vsk) // IN
637
#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
638
if (!vsk->peerWaitingRead) {
643
* For now we ignore the wait information and just see if there is any data
644
* to read. Note that improving this function to be more intelligent will
645
* not require a protocol change and will retain compatibility between
646
* endpoints with mixed versions of this function.
648
return VMCIQueue_BufReady(vsk->produceQ,
649
vsk->consumeQ, vsk->produceSize) > 0;
657
*----------------------------------------------------------------------------
659
* VSockVmciHandleWaitingWrite --
661
* Handles an incoming waiting write message.
667
* May send a notification to the peer, may update socket's wait info
670
*----------------------------------------------------------------------------
674
VSockVmciHandleWaitingWrite(struct sock *sk, // IN
675
VSockPacket *pkt, // IN
676
Bool bottomHalf, // IN
677
struct sockaddr_vm *dst, // IN
678
struct sockaddr_vm *src) // IN
680
#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
685
vsk->peerWaitingWrite = TRUE;
686
memcpy(&vsk->peerWaitingWriteInfo, &pkt->u.wait,
687
sizeof vsk->peerWaitingWriteInfo);
689
if (VSockVmciNotifyWaitingWrite(vsk)) {
693
sent = VSOCK_SEND_READ_BH(dst, src) > 0;
695
sent = VSOCK_SEND_READ(sk) > 0;
699
vsk->peerWaitingWrite = FALSE;
707
*----------------------------------------------------------------------------
709
* VSockVmciHandleWaitingRead --
711
* Handles an incoming waiting read message.
717
* May send a notification to the peer, may update socket's wait info
720
*----------------------------------------------------------------------------
724
VSockVmciHandleWaitingRead(struct sock *sk, // IN
725
VSockPacket *pkt, // IN
726
Bool bottomHalf, // IN
727
struct sockaddr_vm *dst, // IN
728
struct sockaddr_vm *src) // IN
730
#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
735
vsk->peerWaitingRead = TRUE;
736
memcpy(&vsk->peerWaitingReadInfo, &pkt->u.wait,
737
sizeof vsk->peerWaitingReadInfo);
739
if (VSockVmciNotifyWaitingRead(vsk)) {
743
sent = VSOCK_SEND_WROTE_BH(dst, src) > 0;
745
sent = VSOCK_SEND_WROTE(sk) > 0;
749
vsk->peerWaitingRead = FALSE;
758
*----------------------------------------------------------------------------
760
* VSockVmciRecvDgramCB --
762
* VMCI Datagram receive callback. This function is used specifically for
763
* SOCK_DGRAM sockets.
765
* This is invoked as part of a tasklet that's scheduled when the VMCI
766
* interrupt fires. This is run in bottom-half context and if it ever needs
767
* to sleep it should defer that work to a work queue.
770
* Zero on success, negative error code on failure.
773
* An sk_buff is created and queued with this socket.
775
*----------------------------------------------------------------------------
779
VSockVmciRecvDgramCB(void *data, // IN
780
VMCIDatagram *dg) // IN
787
ASSERT(dg->payloadSize <= VMCI_MAX_DG_PAYLOAD_SIZE);
789
sk = (struct sock *)data;
792
/* XXX Figure out why sk->compat_sk_socket can be NULL. */
793
ASSERT(sk->compat_sk_socket ? sk->compat_sk_socket->type == SOCK_DGRAM : 1);
795
size = VMCI_DG_SIZE(dg);
798
* Attach the packet to the socket's receive queue as an sk_buff.
800
skb = alloc_skb(size, GFP_ATOMIC);
802
/* compat_sk_receive_skb() will do a sock_put(), so hold here. */
805
memcpy(skb->data, dg, size);
806
compat_sk_receive_skb(sk, skb, 0);
815
*----------------------------------------------------------------------------
817
* VSockVmciRecvStreamCB --
819
* VMCI stream receive callback for control datagrams. This function is
820
* used specifically for SOCK_STREAM sockets.
822
* This is invoked as part of a tasklet that's scheduled when the VMCI
823
* interrupt fires. This is run in bottom-half context but it defers most
824
* of its work to the packet handling work queue.
827
* Zero on success, negative error code on failure.
832
*----------------------------------------------------------------------------
836
VSockVmciRecvStreamCB(void *data, // IN
837
VMCIDatagram *dg) // IN
840
struct sockaddr_vm dst;
841
struct sockaddr_vm src;
847
ASSERT(dg->payloadSize <= VMCI_MAX_DG_PAYLOAD_SIZE);
854
* Ignore incoming packets from contexts without sockets, or resources that
855
* aren't vsock implementations.
857
if (!VSockAddr_SocketContext(VMCI_HANDLE_TO_CONTEXT_ID(dg->src)) ||
858
VSOCK_PACKET_RID != VMCI_HANDLE_TO_RESOURCE_ID(dg->src)) {
859
return VMCI_ERROR_NO_ACCESS;
862
if (VMCI_DG_SIZE(dg) < sizeof *pkt) {
863
/* Drop datagrams that do not contain full VSock packets. */
864
return VMCI_ERROR_INVALID_ARGS;
867
pkt = (VSockPacket *)dg;
872
* Find the socket that should handle this packet. First we look for
873
* a connected socket and if there is none we look for a socket bound to
874
* the destintation address.
876
* Note that we don't initialize the family member of the src and dst
877
* sockaddr_vm since we don't want to call VMCISock_GetAFValue() and
878
* possibly register the address family.
880
VSockAddr_InitNoFamily(&src,
881
VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src),
884
VSockAddr_InitNoFamily(&dst,
885
VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.dst),
888
sk = VSockVmciFindConnectedSocket(&src, &dst);
890
sk = VSockVmciFindBoundSocket(&dst);
893
* We could not find a socket for this specified address. If this
894
* packet is a RST, we just drop it. If it is another packet, we send
895
* a RST. Note that we do not send a RST reply to RSTs so that we do
896
* not continually send RSTs between two endpoints.
898
* Note that since this is a reply, dst is src and src is dst.
900
if (VSOCK_SEND_RESET_BH(&dst, &src, pkt) < 0) {
901
Log("unable to send reset.\n");
903
err = VMCI_ERROR_NOT_FOUND;
909
* If the received packet type is beyond all types known to this
910
* implementation, reply with an invalid message. Hopefully this will help
911
* when implementing backwards compatibility in the future.
913
if (pkt->type >= VSOCK_PACKET_TYPE_MAX) {
914
if (VSOCK_SEND_INVALID_BH(&dst, &src) < 0) {
915
Warning("unable to send reply for invalid packet.\n");
916
err = VMCI_ERROR_INVALID_ARGS;
922
* We do most everything in a work queue, but let's fast path the
923
* notification of reads and writes to help data transfer performance. We
924
* can only do this if there is no process context code executing for this
925
* socket since that may change the state.
929
if (!compat_sock_owned_by_user(sk) && sk->compat_sk_state == SS_CONNECTED) {
931
case VSOCK_PACKET_TYPE_WROTE:
932
sk->compat_sk_data_ready(sk, 0);
935
case VSOCK_PACKET_TYPE_READ:
936
sk->compat_sk_write_space(sk);
939
case VSOCK_PACKET_TYPE_WAITING_WRITE:
940
VSockVmciHandleWaitingWrite(sk, pkt, TRUE, &dst, &src);
944
case VSOCK_PACKET_TYPE_WAITING_READ:
945
VSockVmciHandleWaitingRead(sk, pkt, TRUE, &dst, &src);
954
VSockRecvPktInfo *recvPktInfo;
956
recvPktInfo = kmalloc(sizeof *recvPktInfo, GFP_ATOMIC);
958
if (VSOCK_SEND_RESET_BH(&dst, &src, pkt) < 0) {
959
Warning("unable to send reset\n");
961
err = VMCI_ERROR_NO_MEM;
965
recvPktInfo->sk = sk;
966
memcpy(&recvPktInfo->pkt, pkt, sizeof recvPktInfo->pkt);
967
COMPAT_INIT_WORK(&recvPktInfo->work, VSockVmciRecvPktWork, recvPktInfo);
969
compat_schedule_work(&recvPktInfo->work);
971
* Clear sk so that the reference count incremented by one of the Find
972
* functions above is not decremented below. We need that reference
973
* count for the packet handler we've scheduled to run.
987
*----------------------------------------------------------------------------
989
* VSockVmciPeerAttachCB --
991
* Invoked when a peer attaches to a queue pair.
993
* Right now this does not do anything.
999
* May modify socket state and signal socket.
1001
*----------------------------------------------------------------------------
1005
VSockVmciPeerAttachCB(VMCIId subId, // IN
1006
VMCI_EventData *eData, // IN
1007
void *clientData) // IN
1010
VMCIEventPayload_QP *ePayload;
1016
sk = (struct sock *)clientData;
1017
ePayload = VMCIEventDataPayload(eData);
1024
* XXX This is lame, we should provide a way to lookup sockets by qpHandle.
1026
if (VMCI_HANDLE_EQUAL(vsk->qpHandle, ePayload->handle)) {
1028
* XXX This doesn't do anything, but in the future we may want to set
1029
* a flag here to verify the attach really did occur and we weren't just
1030
* sent a datagram claiming it was.
1041
*----------------------------------------------------------------------------
1043
* VSockVmciHandleDetach --
1045
* Perform the work necessary when the peer has detached.
1047
* Note that this assumes the socket lock is held.
1053
* The socket's and its peer's shutdown mask will be set appropriately,
1054
* and any callers waiting on this socket will be awoken.
1056
*----------------------------------------------------------------------------
1060
VSockVmciHandleDetach(struct sock *sk) // IN
1067
if (!VMCI_HANDLE_INVALID(vsk->qpHandle)) {
1068
ASSERT(vsk->produceQ);
1069
ASSERT(vsk->consumeQ);
1071
/* On a detach the peer will not be sending or receiving anymore. */
1072
vsk->peerShutdown = SHUTDOWN_MASK;
1075
* We should not be sending anymore since the peer won't be there to
1076
* receive, but we can still receive if there is data left in our consume
1079
sk->compat_sk_shutdown |= SEND_SHUTDOWN;
1080
if (VMCIQueue_BufReady(vsk->consumeQ,
1081
vsk->produceQ, vsk->consumeSize) <= 0) {
1082
sk->compat_sk_shutdown |= RCV_SHUTDOWN;
1083
sk->compat_sk_state = SS_UNCONNECTED;
1085
sk->compat_sk_state_change(sk);
1091
*----------------------------------------------------------------------------
1093
* VSockVmciPeerDetachCB --
1095
* Invoked when a peer detaches from a queue pair.
1101
* May modify socket state and signal socket.
1103
*----------------------------------------------------------------------------
1107
VSockVmciPeerDetachCB(VMCIId subId, // IN
1108
VMCI_EventData *eData, // IN
1109
void *clientData) // IN
1112
VMCIEventPayload_QP *ePayload;
1118
sk = (struct sock *)clientData;
1119
ePayload = VMCIEventDataPayload(eData);
1121
if (VMCI_HANDLE_INVALID(ePayload->handle)) {
1126
* XXX This is lame, we should provide a way to lookup sockets by qpHandle.
1130
if (VMCI_HANDLE_EQUAL(vsk->qpHandle, ePayload->handle)) {
1131
VSockVmciHandleDetach(sk);
1139
*----------------------------------------------------------------------------
1141
* VSockVmciQPResumedCB --
1143
* Invoked when a VM is resumed. We must mark all connected stream sockets
1150
* May modify socket state and signal socket.
1152
*----------------------------------------------------------------------------
1156
VSockVmciQPResumedCB(VMCIId subId, // IN
1157
VMCI_EventData *eData, // IN
1158
void *clientData) // IN
1162
spin_lock_bh(&vsockTableLock);
1165
* XXX This loop should probably be provided by util.{h,c}, but that's for
1168
for (i = 0; i < ARRAYSIZE(vsockConnectedTable); i++) {
1171
list_for_each_entry(vsk, &vsockConnectedTable[i], connectedTable) {
1172
struct sock *sk = sk_vsock(vsk);
1175
* XXX Technically this is racy but the resulting outcome from such
1176
* a race is relatively harmless. My next change will be a fix to
1179
VSockVmciHandleDetach(sk);
1183
spin_unlock_bh(&vsockTableLock);
1188
*----------------------------------------------------------------------------
1190
* VSockVmciPendingWork --
1192
* Releases the resources for a pending socket if it has not reached the
1193
* connected state and been accepted by a user process.
1199
* The socket may be removed from the connected list and all its resources
1202
*----------------------------------------------------------------------------
1206
VSockVmciPendingWork(compat_delayed_work_arg work) // IN
1209
struct sock *listener;
1213
vsk = COMPAT_DELAYED_WORK_GET_DATA(work, VSockVmciSock, dwork);
1217
listener = vsk->listener;
1222
lock_sock(listener);
1226
* The socket should be on the pending list or the accept queue, but not
1227
* both. It's also possible that the socket isn't on either.
1229
ASSERT( ( VSockVmciIsPending(sk) && !VSockVmciInAcceptQueue(sk))
1230
|| (!VSockVmciIsPending(sk) && VSockVmciInAcceptQueue(sk))
1231
|| (!VSockVmciIsPending(sk) && !VSockVmciInAcceptQueue(sk)));
1233
if (VSockVmciIsPending(sk)) {
1234
VSockVmciRemovePending(listener, sk);
1235
} else if (!vsk->rejected) {
1237
* We are not on the pending list and accept() did not reject us, so we
1238
* must have been accepted by our user process. We just need to drop our
1239
* references to the sockets and be on our way.
1245
listener->compat_sk_ack_backlog--;
1248
* We need to remove ourself from the global connected sockets list so
1249
* incoming packets can't find this socket, and to reduce the reference
1252
if (VSockVmciInConnectedTable(sk)) {
1253
VSockVmciRemoveConnected(sk);
1256
sk->compat_sk_state = SS_FREE;
1260
release_sock(listener);
1270
*----------------------------------------------------------------------------
1272
* VSockVmciRecvPktWork --
1274
* Handles an incoming control packet for the provided socket. This is the
1275
* state machine for our stream sockets.
1281
* May set state and wakeup threads waiting for socket state to change.
1283
*----------------------------------------------------------------------------
1287
VSockVmciRecvPktWork(compat_work_arg work) // IN
1290
VSockRecvPktInfo *recvPktInfo;
1295
recvPktInfo = COMPAT_WORK_GET_DATA(work, VSockRecvPktInfo);
1296
ASSERT(recvPktInfo);
1299
sk = recvPktInfo->sk;
1300
pkt = &recvPktInfo->pkt;
1305
ASSERT(pkt->type < VSOCK_PACKET_TYPE_MAX);
1309
switch (sk->compat_sk_state) {
1311
err = VSockVmciRecvListen(sk, pkt);
1313
case SS_UNCONNECTED:
1314
Log("packet received for socket in unconnected state; dropping.\n");
1318
* Processing of pending connections for servers goes through the
1319
* listening socket, so see VSockVmciRecvListen() for that path.
1321
err = VSockVmciRecvConnectingClient(sk, pkt);
1324
err = VSockVmciRecvConnected(sk, pkt);
1326
case SS_DISCONNECTING:
1327
Log("packet receieved for socket in disconnecting state; dropping.\n");
1330
Log("packet receieved for socket in free state; dropping.\n");
1333
Log("socket is in invalid state; dropping packet.\n");
1341
* Release reference obtained in the stream callback when we fetched this
1342
* socket out of the bound or connected list.
1349
*----------------------------------------------------------------------------
1351
* VSockVmciRecvListen --
1353
* Receives packets for sockets in the listen state.
1355
* Note that this assumes the socket lock is held.
1358
* Zero on success, negative error code on failure.
1361
* A new socket may be created and a negotiate control packet is sent.
1363
*----------------------------------------------------------------------------
1367
VSockVmciRecvListen(struct sock *sk, // IN
1368
VSockPacket *pkt) // IN
1371
struct sock *pending;
1372
VSockVmciSock *vpending;
1378
ASSERT(sk->compat_sk_state == SS_LISTEN);
1384
* Because we are in the listen state, we could be receiving a packet for
1385
* ourself or any previous connection requests that we received. If it's
1386
* the latter, we try to find a socket in our list of pending connections
1387
* and, if we do, call the appropriate handler for the state that that
1388
* socket is in. Otherwise we try to service the connection request.
1390
pending = VSockVmciGetPending(sk, pkt);
1393
switch (pending->compat_sk_state) {
1395
err = VSockVmciRecvConnectingServer(sk, pending, pkt);
1398
VSOCK_SEND_RESET(pending, pkt);
1403
VSockVmciRemovePending(sk, pending);
1406
release_sock(pending);
1407
VSockVmciReleasePending(pending);
1413
* The listen state only accepts connection requests. Reply with a reset
1414
* unless we received a reset.
1416
if (pkt->type != VSOCK_PACKET_TYPE_REQUEST ||
1418
VSOCK_SEND_RESET(sk, pkt);
1423
* If this socket can't accommodate this connection request, we send
1424
* a reset. Otherwise we create and initialize a child socket and reply
1425
* with a connection negotiation.
1427
if (sk->compat_sk_ack_backlog >= sk->compat_sk_max_ack_backlog) {
1428
VSOCK_SEND_RESET(sk, pkt);
1429
return -ECONNREFUSED;
1432
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
1433
pending = __VSockVmciCreate(NULL, GFP_KERNEL);
1435
pending = __VSockVmciCreate(compat_sock_net(sk), NULL, GFP_KERNEL);
1438
VSOCK_SEND_RESET(sk, pkt);
1442
vpending = vsock_sk(pending);
1444
ASSERT(vsk->localAddr.svm_port == pkt->dstPort);
1446
VSockAddr_Init(&vpending->localAddr,
1447
VMCI_GetContextID(),
1449
VSockAddr_Init(&vpending->remoteAddr,
1450
VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src),
1454
* If the proposed size fits within our min/max, accept
1455
* it. Otherwise propose our own size.
1457
if (pkt->u.size >= vsk->queuePairMinSize &&
1458
pkt->u.size <= vsk->queuePairMaxSize) {
1459
qpSize = pkt->u.size;
1461
qpSize = vsk->queuePairSize;
1464
err = VSOCK_SEND_NEGOTIATE(pending, qpSize);
1466
VSOCK_SEND_RESET(sk, pkt);
1468
err = VSockVmci_ErrorToVSockError(err);
1472
VSockVmciAddPending(sk, pending);
1473
sk->compat_sk_ack_backlog++;
1475
pending->compat_sk_state = SS_CONNECTING;
1476
vpending->produceSize = vpending->consumeSize = pkt->u.size;
1479
* We might never receive another message for this socket and it's not
1480
* connected to any process, so we have to ensure it gets cleaned up
1481
* ourself. Our delayed work function will take care of that. Note that we
1482
* do not ever cancel this function since we have few guarantees about its
1483
* state when calling cancel_delayed_work(). Instead we hold a reference on
1484
* the socket for that function and make it capable of handling cases where
1485
* it needs to do nothing but release that reference.
1487
vpending->listener = sk;
1490
COMPAT_INIT_DELAYED_WORK(&vpending->dwork, VSockVmciPendingWork, vpending);
1491
compat_schedule_delayed_work(&vpending->dwork, HZ);
1499
*----------------------------------------------------------------------------
1501
* VSockVmciRecvConnectingServer --
1503
* Receives packets for sockets in the connecting state on the server side.
1505
* Connecting sockets on the server side can only receive queue pair offer
1506
* packets. All others should be treated as cause for closing the
1509
* Note that this assumes the socket lock is held for both sk and pending.
1512
* Zero on success, negative error code on failure.
1515
* A queue pair may be created, an attach control packet may be sent, the
1516
* socket may transition to the connected state, and a pending caller in
1517
* accept() may be woken up.
1519
*----------------------------------------------------------------------------
1523
VSockVmciRecvConnectingServer(struct sock *listener, // IN: the listening socket
1524
struct sock *pending, // IN: the pending connection
1525
VSockPacket *pkt) // IN: current packet
1527
VSockVmciSock *vpending;
1529
VMCIQueue *produceQ;
1530
VMCIQueue *consumeQ;
1539
ASSERT(listener->compat_sk_state == SS_LISTEN);
1540
ASSERT(pending->compat_sk_state == SS_CONNECTING);
1542
vpending = vsock_sk(pending);
1543
detachSubId = VMCI_INVALID_ID;
1545
switch (pkt->type) {
1546
case VSOCK_PACKET_TYPE_OFFER:
1547
if (VMCI_HANDLE_INVALID(pkt->u.handle)) {
1548
VSOCK_SEND_RESET(pending, pkt);
1555
/* Close and cleanup the connection. */
1556
VSOCK_SEND_RESET(pending, pkt);
1558
err = pkt->type == VSOCK_PACKET_TYPE_RST ?
1564
ASSERT(pkt->type == VSOCK_PACKET_TYPE_OFFER);
1567
* In order to complete the connection we need to attach to the offered
1568
* queue pair and send an attach notification. We also subscribe to the
1569
* detach event so we know when our peer goes away, and we do that before
1570
* attaching so we don't miss an event. If all this succeeds, we update our
1571
* state and wakeup anything waiting in accept() for a connection.
1575
* We don't care about attach since we ensure the other side has attached by
1576
* specifying the ATTACH_ONLY flag below.
1578
err = VMCIEvent_Subscribe(VMCI_EVENT_QP_PEER_DETACH,
1579
VSockVmciPeerDetachCB,
1582
if (err < VMCI_SUCCESS) {
1583
VSOCK_SEND_RESET(pending, pkt);
1584
err = VSockVmci_ErrorToVSockError(err);
1589
vpending->detachSubId = detachSubId;
1591
/* Now attach to the queue pair the client created. */
1592
handle = pkt->u.handle;
1593
isLocal = vpending->remoteAddr.svm_cid == vpending->localAddr.svm_cid;
1594
flags = VMCI_QPFLAG_ATTACH_ONLY;
1595
flags |= isLocal ? VMCI_QPFLAG_LOCAL : 0;
1597
err = VMCIQueuePair_Alloc(&handle,
1598
&produceQ, vpending->produceSize,
1599
&consumeQ, vpending->consumeSize,
1600
VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src),
1603
/* We cannot complete this connection: send a reset and close. */
1604
Log("Could not attach to queue pair with %d\n", err);
1605
VSOCK_SEND_RESET(pending, pkt);
1606
err = VSockVmci_ErrorToVSockError(err);
1611
VMCIQueue_Init(handle, produceQ);
1613
ASSERT(VMCI_HANDLE_EQUAL(handle, pkt->u.handle));
1614
vpending->qpHandle = handle;
1615
vpending->produceQ = produceQ;
1616
vpending->consumeQ = consumeQ;
1618
/* Notify our peer of our attach. */
1619
err = VSOCK_SEND_ATTACH(pending, handle);
1621
Log("Could not send attach\n");
1622
VSOCK_SEND_RESET(pending, pkt);
1623
err = VSockVmci_ErrorToVSockError(err);
1629
* We have a connection. Add our connection to the connected list so it no
1630
* longer goes through the listening socket, move it from the listener's
1631
* pending list to the accept queue so callers of accept() can find it.
1632
* Note that enqueueing the socket increments the reference count, so even
1633
* if a reset comes before the connection is accepted, the socket will be
1634
* valid until it is removed from the queue.
1636
pending->compat_sk_state = SS_CONNECTED;
1638
VSockVmciInsertConnected(vsockConnectedSocketsVsk(vpending), pending);
1640
VSockVmciRemovePending(listener, pending);
1641
VSockVmciEnqueueAccept(listener, pending);
1644
* Callers of accept() will be be waiting on the listening socket, not the
1647
listener->compat_sk_state_change(listener);
1652
pending->compat_sk_err = skerr;
1653
pending->compat_sk_state = SS_UNCONNECTED;
1655
* As long as we drop our reference, all necessary cleanup will handle when
1656
* the cleanup function drops its reference and our destruct implementation
1657
* is called. Note that since the listen handler will remove pending from
1658
* the pending list upon our failure, the cleanup function won't drop the
1659
* additional reference, which is why we do it here.
1668
*----------------------------------------------------------------------------
1670
* VSockVmciRecvConnectingClient --
1672
* Receives packets for sockets in the connecting state on the client side.
1674
* Connecting sockets on the client side should only receive attach packets.
1675
* All others should be treated as cause for closing the connection.
1677
* Note that this assumes the socket lock is held for both sk and pending.
1680
* Zero on success, negative error code on failure.
1683
* The socket may transition to the connected state and wakeup the pending
1684
* caller of connect().
1686
*----------------------------------------------------------------------------
1690
VSockVmciRecvConnectingClient(struct sock *sk, // IN: socket
1691
VSockPacket *pkt) // IN: current packet
1699
ASSERT(sk->compat_sk_state == SS_CONNECTING);
1703
switch (pkt->type) {
1704
case VSOCK_PACKET_TYPE_ATTACH:
1705
if (VMCI_HANDLE_INVALID(pkt->u.handle) ||
1706
!VMCI_HANDLE_EQUAL(pkt->u.handle, vsk->qpHandle)) {
1713
* Signify the socket is connected and wakeup the waiter in connect().
1714
* Also place the socket in the connected table for accounting (it can
1715
* already be found since it's in the bound table).
1717
sk->compat_sk_state = SS_CONNECTED;
1718
sk->compat_sk_socket->state = SS_CONNECTED;
1719
VSockVmciInsertConnected(vsockConnectedSocketsVsk(vsk), sk);
1720
sk->compat_sk_state_change(sk);
1722
case VSOCK_PACKET_TYPE_NEGOTIATE:
1723
if (pkt->u.size == 0 ||
1724
VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src) != vsk->remoteAddr.svm_cid ||
1725
pkt->srcPort != vsk->remoteAddr.svm_port ||
1726
!VMCI_HANDLE_INVALID(vsk->qpHandle) ||
1729
vsk->produceSize != 0 ||
1730
vsk->consumeSize != 0 ||
1731
vsk->attachSubId != VMCI_INVALID_ID ||
1732
vsk->detachSubId != VMCI_INVALID_ID) {
1738
err = VSockVmciRecvConnectingClientNegotiate(sk, pkt);
1745
case VSOCK_PACKET_TYPE_RST:
1750
/* Close and cleanup the connection. */
1756
ASSERT(pkt->type == VSOCK_PACKET_TYPE_ATTACH ||
1757
pkt->type == VSOCK_PACKET_TYPE_NEGOTIATE);
1762
VSOCK_SEND_RESET(sk, pkt);
1764
sk->compat_sk_state = SS_UNCONNECTED;
1765
sk->compat_sk_err = skerr;
1766
sk->compat_sk_error_report(sk);
1772
*----------------------------------------------------------------------------
1774
* VSockVmciRecvConnectingClientNegotiate --
1776
* Handles a negotiate packet for a client in the connecting state.
1778
* Note that this assumes the socket lock is held for both sk and pending.
1781
* Zero on success, negative error code on failure.
1784
* The socket may transition to the connected state and wakeup the pending
1785
* caller of connect().
1787
*----------------------------------------------------------------------------
1791
VSockVmciRecvConnectingClientNegotiate(struct sock *sk, // IN: socket
1792
VSockPacket *pkt) // IN: current packet
1797
VMCIQueue *produceQ;
1798
VMCIQueue *consumeQ;
1804
handle = VMCI_INVALID_HANDLE;
1805
attachSubId = VMCI_INVALID_ID;
1806
detachSubId = VMCI_INVALID_ID;
1810
ASSERT(pkt->u.size > 0);
1811
ASSERT(vsk->remoteAddr.svm_cid == VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src));
1812
ASSERT(vsk->remoteAddr.svm_port == pkt->srcPort);
1813
ASSERT(VMCI_HANDLE_INVALID(vsk->qpHandle));
1814
ASSERT(vsk->produceQ == NULL);
1815
ASSERT(vsk->consumeQ == NULL);
1816
ASSERT(vsk->produceSize == 0);
1817
ASSERT(vsk->consumeSize == 0);
1818
ASSERT(vsk->attachSubId == VMCI_INVALID_ID);
1819
ASSERT(vsk->detachSubId == VMCI_INVALID_ID);
1821
/* Verify that we're OK with the proposed queue pair size */
1822
if (pkt->u.size < vsk->queuePairMinSize ||
1823
pkt->u.size > vsk->queuePairMaxSize) {
1829
* Subscribe to attach and detach events first.
1831
* XXX We attach once for each queue pair created for now so it is easy
1832
* to find the socket (it's provided), but later we should only subscribe
1833
* once and add a way to lookup sockets by queue pair handle.
1835
err = VMCIEvent_Subscribe(VMCI_EVENT_QP_PEER_ATTACH,
1836
VSockVmciPeerAttachCB,
1839
if (err < VMCI_SUCCESS) {
1840
err = VSockVmci_ErrorToVSockError(err);
1844
err = VMCIEvent_Subscribe(VMCI_EVENT_QP_PEER_DETACH,
1845
VSockVmciPeerDetachCB,
1848
if (err < VMCI_SUCCESS) {
1849
err = VSockVmci_ErrorToVSockError(err);
1853
/* Make VMCI select the handle for us. */
1854
handle = VMCI_INVALID_HANDLE;
1855
isLocal = vsk->remoteAddr.svm_cid == vsk->localAddr.svm_cid;
1857
err = VMCIQueuePair_Alloc(&handle,
1858
&produceQ, pkt->u.size,
1859
&consumeQ, pkt->u.size,
1860
vsk->remoteAddr.svm_cid,
1861
isLocal ? VMCI_QPFLAG_LOCAL : 0);
1862
if (err < VMCI_SUCCESS) {
1863
err = VSockVmci_ErrorToVSockError(err);
1867
VMCIQueue_Init(handle, produceQ);
1869
err = VSOCK_SEND_QP_OFFER(sk, handle);
1871
err = VSockVmci_ErrorToVSockError(err);
1875
vsk->qpHandle = handle;
1876
vsk->produceQ = produceQ;
1877
vsk->consumeQ = consumeQ;
1878
vsk->produceSize = vsk->consumeSize = pkt->u.size;
1879
vsk->attachSubId = attachSubId;
1880
vsk->detachSubId = detachSubId;
1885
if (attachSubId != VMCI_INVALID_ID) {
1886
VMCIEvent_Unsubscribe(attachSubId);
1887
ASSERT(vsk->attachSubId == VMCI_INVALID_ID);
1890
if (detachSubId != VMCI_INVALID_ID) {
1891
VMCIEvent_Unsubscribe(detachSubId);
1892
ASSERT(vsk->detachSubId == VMCI_INVALID_ID);
1895
if (!VMCI_HANDLE_INVALID(handle)) {
1896
VMCIQueuePair_Detach(handle);
1897
ASSERT(VMCI_HANDLE_INVALID(vsk->qpHandle));
1905
*----------------------------------------------------------------------------
1907
* VSockVmciRecvConnected --
1909
* Receives packets for sockets in the connected state.
1911
* Connected sockets should only ever receive detach, wrote, read, or reset
1912
* control messages. Others are treated as errors that are ignored.
1914
* Wrote and read signify that the peer has produced or consumed,
1917
* Detach messages signify that the connection is being closed cleanly and
1918
* reset messages signify that the connection is being closed in error.
1920
* Note that this assumes the socket lock is held.
1923
* Zero on success, negative error code on failure.
1926
* A queue pair may be created, an offer control packet sent, and the socket
1927
* may transition to the connecting state.
1930
*----------------------------------------------------------------------------
1934
VSockVmciRecvConnected(struct sock *sk, // IN
1935
VSockPacket *pkt) // IN
1939
ASSERT(sk->compat_sk_state == SS_CONNECTED);
1942
* In cases where we are closing the connection, it's sufficient to mark
1943
* the state change (and maybe error) and wake up any waiting threads.
1944
* Since this is a connected socket, it's owned by a user process and will
1945
* be cleaned up when the failure is passed back on the current or next
1946
* system call. Our system call implementations must therefore check for
1947
* error and state changes on entry and when being awoken.
1949
switch (pkt->type) {
1950
case VSOCK_PACKET_TYPE_SHUTDOWN:
1952
VSockVmciSock *vsk = vsock_sk(sk);
1954
vsk->peerShutdown |= pkt->u.mode;
1955
sk->compat_sk_state_change(sk);
1959
case VSOCK_PACKET_TYPE_RST:
1960
sk->compat_sk_state = SS_DISCONNECTING;
1961
sk->compat_sk_shutdown = SHUTDOWN_MASK;
1962
sk->compat_sk_err = ECONNRESET;
1963
sk->compat_sk_error_report(sk);
1966
case VSOCK_PACKET_TYPE_WROTE:
1967
sk->compat_sk_data_ready(sk, 0);
1970
case VSOCK_PACKET_TYPE_READ:
1971
sk->compat_sk_write_space(sk);
1974
case VSOCK_PACKET_TYPE_WAITING_WRITE:
1975
VSockVmciHandleWaitingWrite(sk, pkt, FALSE, NULL, NULL);
1978
case VSOCK_PACKET_TYPE_WAITING_READ:
1979
VSockVmciHandleWaitingRead(sk, pkt, FALSE, NULL, NULL);
1991
*----------------------------------------------------------------------------
1993
* VSockVmciSendControlPktBH --
1995
* Sends a control packet from bottom-half context.
1998
* Size of datagram sent on success, negative error code otherwise. Note
1999
* that we return a VMCI error message since that's what callers will need
2005
*----------------------------------------------------------------------------
2009
VSockVmciSendControlPktBH(struct sockaddr_vm *src, // IN
2010
struct sockaddr_vm *dst, // IN
2011
VSockPacketType type, // IN
2014
VSockWaitingInfo *wait, // IN
2015
VMCIHandle handle) // IN
2018
* Note that it is safe to use a single packet across all CPUs since two
2019
* tasklets of the same type are guaranteed to not ever run simultaneously.
2020
* If that ever changes, or VMCI stops using tasklets, we can use per-cpu
2023
static VSockPacket pkt;
2025
VSockPacket_Init(&pkt, src, dst, type, size, mode, wait, handle);
2028
#ifdef VSOCK_CONTROL_PACKET_COUNT
2029
controlPacketCount[pkt.type]++;
2031
return VMCIDatagram_Send(&pkt.dg);
2036
*----------------------------------------------------------------------------
2038
* VSockVmciSendControlPkt --
2040
* Sends a control packet.
2043
* Size of datagram sent on success, negative error on failure.
2048
*----------------------------------------------------------------------------
2052
VSockVmciSendControlPkt(struct sock *sk, // IN
2053
VSockPacketType type, // IN
2056
VSockWaitingInfo *wait, // IN
2057
VMCIHandle handle) // IN
2065
* New sockets for connection establishment won't have socket structures
2066
* yet; if one exists, ensure it is of the proper type.
2068
ASSERT(sk->compat_sk_socket ?
2069
sk->compat_sk_socket->type == SOCK_STREAM :
2074
if (!VSockAddr_Bound(&vsk->localAddr)) {
2078
if (!VSockAddr_Bound(&vsk->remoteAddr)) {
2082
pkt = kmalloc(sizeof *pkt, GFP_KERNEL);
2087
VSockPacket_Init(pkt, &vsk->localAddr, &vsk->remoteAddr,
2088
type, size, mode, wait, handle);
2091
err = VMCIDatagram_Send(&pkt->dg);
2094
return VSockVmci_ErrorToVSockError(err);
2097
#ifdef VSOCK_CONTROL_PACKET_COUNT
2098
controlPacketCount[pkt->type]++;
2107
*----------------------------------------------------------------------------
2109
* __VSockVmciBind --
2111
* Common functionality needed to bind the specified address to the
2112
* VSocket. If VMADDR_CID_ANY or VMADDR_PORT_ANY are specified, the context
2113
* ID or port are selected automatically.
2116
* Zero on success, negative error code on failure.
2119
* On success, a new datagram handle is created.
2121
*----------------------------------------------------------------------------
2125
__VSockVmciBind(struct sock *sk, // IN/OUT
2126
struct sockaddr_vm *addr) // IN
2128
static unsigned int port = LAST_RESERVED_PORT + 1;
2129
struct sockaddr_vm newAddr;
2135
ASSERT(sk->compat_sk_socket);
2140
/* First ensure this socket isn't already bound. */
2141
if (VSockAddr_Bound(&vsk->localAddr)) {
2146
* Now bind to the provided address or select appropriate values if none are
2147
* provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that like AF_INET
2148
* prevents binding to a non-local IP address (in most cases), we only allow
2149
* binding to the local CID.
2151
VSockAddr_Init(&newAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
2153
cid = VMCI_GetContextID();
2154
if (addr->svm_cid != cid &&
2155
addr->svm_cid != VMADDR_CID_ANY) {
2156
return -EADDRNOTAVAIL;
2159
newAddr.svm_cid = cid;
2161
switch (sk->compat_sk_socket->type) {
2163
spin_lock_bh(&vsockTableLock);
2165
if (addr->svm_port == VMADDR_PORT_ANY) {
2169
for (i = 0; i < MAX_PORT_RETRIES; i++) {
2170
if (port <= LAST_RESERVED_PORT) {
2171
port = LAST_RESERVED_PORT + 1;
2174
newAddr.svm_port = port++;
2176
if (!__VSockVmciFindBoundSocket(&newAddr)) {
2183
err = -EADDRNOTAVAIL;
2187
/* If port is in reserved range, ensure caller has necessary privileges. */
2188
if (addr->svm_port <= LAST_RESERVED_PORT &&
2189
!capable(CAP_NET_BIND_SERVICE)) {
2194
newAddr.svm_port = addr->svm_port;
2195
if (__VSockVmciFindBoundSocket(&newAddr)) {
2203
/* VMCI will select a resource ID for us if we provide VMCI_INVALID_ID. */
2204
newAddr.svm_port = addr->svm_port == VMADDR_PORT_ANY ?
2208
if (newAddr.svm_port <= LAST_RESERVED_PORT &&
2209
!capable(CAP_NET_BIND_SERVICE)) {
2214
err = VMCIDatagram_CreateHnd(newAddr.svm_port, 0,
2215
VSockVmciRecvDgramCB, sk,
2217
if (err != VMCI_SUCCESS ||
2218
vsk->dgHandle.context == VMCI_INVALID_ID ||
2219
vsk->dgHandle.resource == VMCI_INVALID_ID) {
2220
err = VSockVmci_ErrorToVSockError(err);
2224
newAddr.svm_port = VMCI_HANDLE_TO_RESOURCE_ID(vsk->dgHandle);
2231
VSockAddr_Init(&vsk->localAddr, newAddr.svm_cid, newAddr.svm_port);
2234
* Remove stream sockets from the unbound list and add them to the hash
2235
* table for easy lookup by its address. The unbound list is simply an
2236
* extra entry at the end of the hash table, a trick used by AF_UNIX.
2238
if (sk->compat_sk_socket->type == SOCK_STREAM) {
2239
__VSockVmciRemoveBound(sk);
2240
__VSockVmciInsertBound(vsockBoundSockets(&vsk->localAddr), sk);
2246
if (sk->compat_sk_socket->type == SOCK_STREAM) {
2247
spin_unlock_bh(&vsockTableLock);
2255
*----------------------------------------------------------------------------
2257
* VSockVmciSendWaitingWrite --
2259
* Sends a waiting write notification to this socket's peer.
2262
* TRUE if the datagram is sent successfully, FALSE otherwise.
2265
* Our peer will notify us when there is room to write in to our produce
2268
*----------------------------------------------------------------------------
2273
VSockVmciSendWaitingWrite(struct sock *sk, // IN
2274
uint64 roomNeeded) // IN
2276
#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
2278
VSockWaitingInfo waitingInfo;
2287
VMCIQueue_GetPointers(vsk->produceQ, vsk->consumeQ, &tail, &head);
2288
roomLeft = vsk->produceSize - tail;
2289
if (roomNeeded + 1 >= roomLeft) {
2290
/* Wraps around to current generation. */
2291
waitingInfo.offset = roomNeeded + 1 - roomLeft;
2292
waitingInfo.generation = vsk->produceQGeneration;
2294
waitingInfo.offset = tail + roomNeeded + 1;
2295
waitingInfo.generation = vsk->produceQGeneration - 1;
2298
return VSOCK_SEND_WAITING_WRITE(sk, &waitingInfo) > 0;
2306
*----------------------------------------------------------------------------
2308
* VSockVmciSendWaitingRead --
2310
* Sends a waiting read notification to this socket's peer.
2313
* TRUE if the datagram is sent successfully, FALSE otherwise.
2316
* Our peer will notify us when there is data to read from our consume
2319
*----------------------------------------------------------------------------
2323
VSockVmciSendWaitingRead(struct sock *sk, // IN
2324
uint64 roomNeeded) // IN
2326
#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
2328
VSockWaitingInfo waitingInfo;
2337
VMCIQueue_GetPointers(vsk->consumeQ, vsk->produceQ, &tail, &head);
2338
roomLeft = vsk->consumeSize - head;
2339
if (roomNeeded >= roomLeft) {
2340
waitingInfo.offset = roomNeeded - roomLeft;
2341
waitingInfo.generation = vsk->consumeQGeneration + 1;
2343
waitingInfo.offset = head + roomNeeded;
2344
waitingInfo.generation = vsk->consumeQGeneration;
2347
return VSOCK_SEND_WAITING_READ(sk, &waitingInfo) > 0;
2356
*----------------------------------------------------------------------------
2358
* __VSockVmciCreate --
2360
* Does the work to create the sock structure.
2363
* sock structure on success, NULL on failure.
2366
* Allocated sk is added to the unbound sockets list iff it is owned by
2369
*----------------------------------------------------------------------------
2372
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14)
2373
static struct sock *
2374
__VSockVmciCreate(struct socket *sock, // IN: Owning socket, may be NULL
2375
unsigned int priority) // IN: Allocation flags
2376
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
2377
static struct sock *
2378
__VSockVmciCreate(struct socket *sock, // IN: Owning socket, may be NULL
2379
gfp_t priority) // IN: Allocation flags
2381
static struct sock *
2382
__VSockVmciCreate(struct net *net, // IN: Network namespace
2383
struct socket *sock, // IN: Owning socket, may be NULL
2384
gfp_t priority) // IN: Allocation flags
2393
* Before 2.5.5, sk_alloc() always used its own cache and protocol-specific
2394
* data was contained in the protinfo union. We cannot use those other
2395
* structures so we allocate our own structure and attach it to the
2396
* user_data pointer that we don't otherwise need. We must be sure to free
2397
* it later in our destruct routine.
2399
* From 2.5.5 until 2.6.8, sk_alloc() offerred to use a cache that the
2400
* caller provided. After this, the cache was moved into the proto
2401
* structure, but you still had to specify the size and cache yourself until
2402
* 2.6.12. Most recently (in 2.6.24), sk_alloc() was changed to expect the
2403
* network namespace, and the option to zero the sock was dropped.
2406
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5)
2407
sk = sk_alloc(vsockVmciFamilyOps.family, priority, 1);
2408
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9)
2409
sk = sk_alloc(vsockVmciFamilyOps.family, priority,
2410
sizeof (VSockVmciSock), vsockCachep);
2411
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12)
2412
sk = sk_alloc(vsockVmciFamilyOps.family, priority,
2413
vsockVmciProto.slab_obj_size, vsockVmciProto.slab);
2414
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
2415
sk = sk_alloc(vsockVmciFamilyOps.family, priority, &vsockVmciProto, 1);
2417
sk = sk_alloc(net, vsockVmciFamilyOps.family, priority, &vsockVmciProto);
2423
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5)
2424
vsock_sk(sk) = kmalloc(sizeof *vsk, priority);
2425
if (!vsock_sk(sk)) {
2429
sk_vsock(vsock_sk(sk)) = sk;
2433
* If we go this far, we know the socket family is registered, so there's no
2434
* need to register it now.
2436
spin_lock(®istrationLock);
2437
vsockVmciSocketCount++;
2438
spin_unlock(®istrationLock);
2440
sock_init_data(sock, sk);
2443
VSockAddr_Init(&vsk->localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
2444
VSockAddr_Init(&vsk->remoteAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
2446
sk->compat_sk_destruct = VSockVmciSkDestruct;
2447
sk->compat_sk_backlog_rcv = VSockVmciQueueRcvSkb;
2448
sk->compat_sk_state = SS_UNCONNECTED;
2450
INIT_LIST_HEAD(&vsk->boundTable);
2451
INIT_LIST_HEAD(&vsk->connectedTable);
2452
vsk->dgHandle = VMCI_INVALID_HANDLE;
2454
vsk->qpHandle = VMCI_INVALID_HANDLE;
2455
vsk->produceQ = vsk->consumeQ = NULL;
2456
vsk->produceQGeneration = vsk->consumeQGeneration = 0;
2457
vsk->produceSize = vsk->consumeSize = 0;
2458
vsk->queuePairSize = VSOCK_DEFAULT_QP_SIZE;
2459
vsk->queuePairMinSize = VSOCK_DEFAULT_QP_SIZE_MIN;
2460
vsk->queuePairMaxSize = VSOCK_DEFAULT_QP_SIZE_MAX;
2461
vsk->peerWaitingRead = vsk->peerWaitingWrite = FALSE;
2462
memset(&vsk->peerWaitingReadInfo, 0, sizeof vsk->peerWaitingReadInfo);
2463
memset(&vsk->peerWaitingWriteInfo, 0, sizeof vsk->peerWaitingWriteInfo);
2464
vsk->listener = NULL;
2465
INIT_LIST_HEAD(&vsk->pendingLinks);
2466
INIT_LIST_HEAD(&vsk->acceptQueue);
2467
vsk->rejected = FALSE;
2468
vsk->attachSubId = vsk->detachSubId = VMCI_INVALID_ID;
2469
vsk->peerShutdown = 0;
2473
VSockVmciInsertBound(vsockUnboundSockets, sk);
2481
*----------------------------------------------------------------------------
2483
* __VSockVmciRelease --
2485
* Releases the provided socket.
2491
* Any pending sockets are also released.
2493
*----------------------------------------------------------------------------
2497
__VSockVmciRelease(struct sock *sk) // IN
2500
struct sk_buff *skb;
2501
struct sock *pending;
2502
struct VSockVmciSock *vsk;
2505
pending = NULL; /* Compiler warning. */
2507
if (VSockVmciInBoundTable(sk)) {
2508
VSockVmciRemoveBound(sk);
2511
if (VSockVmciInConnectedTable(sk)) {
2512
VSockVmciRemoveConnected(sk);
2515
if (!VMCI_HANDLE_INVALID(vsk->dgHandle)) {
2516
VMCIDatagram_DestroyHnd(vsk->dgHandle);
2517
vsk->dgHandle = VMCI_INVALID_HANDLE;
2522
sk->compat_sk_shutdown = SHUTDOWN_MASK;
2524
while ((skb = skb_dequeue(&sk->compat_sk_receive_queue))) {
2528
/* Clean up any sockets that never were accepted. */
2530
while ((pending = VSockVmciDequeueAccept(sk)) != NULL) {
2531
__VSockVmciRelease(pending);
2547
*----------------------------------------------------------------------------
2549
* VSockVmciSkDestruct --
2551
* Destroys the provided socket. This is called by sk_free(), which is
2552
* invoked when the reference count of the socket drops to zero.
2558
* Socket count is decremented.
2560
*----------------------------------------------------------------------------
2564
VSockVmciSkDestruct(struct sock *sk) // IN
2571
if (vsk->attachSubId != VMCI_INVALID_ID) {
2572
VMCIEvent_Unsubscribe(vsk->attachSubId);
2573
vsk->attachSubId = VMCI_INVALID_ID;
2576
if (vsk->detachSubId != VMCI_INVALID_ID) {
2577
VMCIEvent_Unsubscribe(vsk->detachSubId);
2578
vsk->detachSubId = VMCI_INVALID_ID;
2581
if (!VMCI_HANDLE_INVALID(vsk->qpHandle)) {
2582
VMCIQueuePair_Detach(vsk->qpHandle);
2583
vsk->qpHandle = VMCI_INVALID_HANDLE;
2584
vsk->produceQ = vsk->consumeQ = NULL;
2585
vsk->produceSize = vsk->consumeSize = 0;
2590
* Each list entry holds a reference on the socket, so we should not even be
2591
* here if the socket is in one of our lists. If we are we have a stray
2592
* sock_put() that needs to go away.
2594
ASSERT(!VSockVmciInBoundTable(sk));
2595
ASSERT(!VSockVmciInConnectedTable(sk));
2597
ASSERT(!VSockVmciIsPending(sk));
2598
ASSERT(!VSockVmciInAcceptQueue(sk));
2602
* When clearing these addresses, there's no need to set the family and
2603
* possibly register the address family with the kernel.
2605
VSockAddr_InitNoFamily(&vsk->localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
2606
VSockAddr_InitNoFamily(&vsk->remoteAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
2608
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5)
2609
ASSERT(vsock_sk(sk) == vsk);
2610
kfree(vsock_sk(sk));
2613
spin_lock(®istrationLock);
2614
vsockVmciSocketCount--;
2615
VSockVmciTestUnregister();
2616
spin_unlock(®istrationLock);
2618
#ifdef VSOCK_CONTROL_PACKET_COUNT
2621
for (index = 0; index < ARRAYSIZE(controlPacketCount); index++) {
2622
Warning("Control packet count: Type = %u, Count = %"FMT64"u\n",
2623
index, controlPacketCount[index]);
2631
*----------------------------------------------------------------------------
2633
* VSockVmciQueueRcvSkb --
2635
* Receives skb on the socket's receive queue.
2638
* Zero on success, negative error code on failure.
2643
*----------------------------------------------------------------------------
2647
VSockVmciQueueRcvSkb(struct sock *sk, // IN
2648
struct sk_buff *skb) // IN
2652
err = sock_queue_rcv_skb(sk, skb);
2662
*----------------------------------------------------------------------------
2664
* VSockVmciRegisterProto --
2666
* Registers the vmci sockets protocol family.
2669
* Zero on success, error code on failure.
2674
*----------------------------------------------------------------------------
2678
VSockVmciRegisterProto(void)
2685
* Before 2.6.9, each address family created their own slab (by calling
2686
* kmem_cache_create() directly). From 2.6.9 until 2.6.11, these address
2687
* families instead called sk_alloc_slab() and the allocated slab was
2688
* assigned to the slab variable in the proto struct and was created of size
2689
* slab_obj_size. As of 2.6.12 and later, this slab allocation was moved
2690
* into proto_register() and only done if you specified a non-zero value for
2691
* the second argument (alloc_slab); the size of the slab element was
2692
* changed to obj_size.
2694
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5)
2695
/* Simply here for clarity and so else case at end implies > rest. */
2696
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9)
2697
vsockCachep = kmem_cache_create("vsock", sizeof (VSockVmciSock),
2698
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
2702
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12)
2703
err = sk_alloc_slab(&vsockVmciProto, "vsock");
2705
sk_alloc_slab_error(&vsockVmciProto);
2708
/* Specify 1 as the second argument so the slab is created for us. */
2709
err = proto_register(&vsockVmciProto, 1);
2717
*----------------------------------------------------------------------------
2719
* VSockVmciUnregisterProto --
2721
* Unregisters the vmci sockets protocol family.
2729
*----------------------------------------------------------------------------
2733
VSockVmciUnregisterProto(void)
2735
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5)
2736
/* Simply here for clarity and so else case at end implies > rest. */
2737
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9)
2738
kmem_cache_destroy(vsockCachep);
2739
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12)
2740
sk_free_slab(&vsockVmciProto);
2742
proto_unregister(&vsockVmciProto);
2745
#ifdef VSOCK_CONTROL_PACKET_COUNT
2748
for (index = 0; index < ARRAYSIZE(controlPacketCount); index++) {
2749
controlPacketCount[index] = 0;
2757
*----------------------------------------------------------------------------
2759
* VSockVmciRegisterAddressFamily --
2761
* Registers our socket address family with the kernel.
2763
* Note that this assumes the registration lock is held.
2766
* The address family value on success, negative error code on failure.
2769
* Callers of socket operations with the returned value, on success, will
2770
* be able to use our socket implementation.
2772
*----------------------------------------------------------------------------
2776
VSockVmciRegisterAddressFamily(void)
2783
* We don't call into the vmci module or register our socket family if the
2784
* vmci device isn't present.
2786
vmciDevicePresent = VMCI_DeviceGet();
2787
if (!vmciDevicePresent) {
2788
Log("Could not register VMCI Sockets because VMCI device is not present.\n");
2793
* Create the datagram handle that we will use to send and receive all
2794
* VSocket control messages for this context.
2796
err = VMCIDatagram_CreateHnd(VSOCK_PACKET_RID, 0,
2797
VSockVmciRecvStreamCB, NULL, &vmciStreamHandle);
2798
if (err != VMCI_SUCCESS ||
2799
vmciStreamHandle.context == VMCI_INVALID_ID ||
2800
vmciStreamHandle.resource == VMCI_INVALID_ID) {
2801
Warning("Unable to create datagram handle. (%d)\n", err);
2805
err = VMCIEvent_Subscribe(VMCI_EVENT_QP_RESUMED,
2806
VSockVmciQPResumedCB,
2809
if (err < VMCI_SUCCESS) {
2810
Warning("Unable to subscribe to QP resumed event. (%d)\n", err);
2812
qpResumedSubId = VMCI_INVALID_ID;
2818
* Linux will not allocate an address family to code that is not part of the
2819
* kernel proper, so until that time comes we need a workaround. Here we
2820
* loop through the allowed values and claim the first one that's not
2821
* currently used. Users will then make an ioctl(2) into our module to
2822
* retrieve this value before calling socket(2).
2824
* This is undesirable, but it's better than having users' programs break
2825
* when a hard-coded, currently-available value gets assigned to someone
2826
* else in the future.
2828
for (i = NPROTO - 1; i >= 0; i--) {
2829
vsockVmciFamilyOps.family = i;
2830
err = sock_register(&vsockVmciFamilyOps);
2832
Warning("Could not register address family %d.\n", i);
2833
vsockVmciFamilyOps.family = VSOCK_INVALID_FAMILY;
2835
vsockVmciDgramOps.family = i;
2837
vsockVmciStreamOps.family = i;
2847
return vsockVmciFamilyOps.family;
2851
if (qpResumedSubId != VMCI_INVALID_ID) {
2852
VMCIEvent_Unsubscribe(qpResumedSubId);
2853
qpResumedSubId = VMCI_INVALID_ID;
2855
VMCIDatagram_DestroyHnd(vmciStreamHandle);
2862
*----------------------------------------------------------------------------
2864
* VSockVmciUnregisterAddressFamily --
2866
* Unregisters the address family with the kernel.
2868
* Note that this assumes the registration lock is held.
2874
* Our socket implementation is no longer accessible.
2876
*----------------------------------------------------------------------------
2880
VSockVmciUnregisterAddressFamily(void)
2883
if (!vmciDevicePresent) {
2884
/* Nothing was registered. */
2888
if (!VMCI_HANDLE_INVALID(vmciStreamHandle)) {
2889
if (VMCIDatagram_DestroyHnd(vmciStreamHandle) != VMCI_SUCCESS) {
2890
Warning("Could not destroy VMCI datagram handle.\n");
2894
if (qpResumedSubId != VMCI_INVALID_ID) {
2895
VMCIEvent_Unsubscribe(qpResumedSubId);
2896
qpResumedSubId = VMCI_INVALID_ID;
2900
if (vsockVmciFamilyOps.family != VSOCK_INVALID_FAMILY) {
2901
sock_unregister(vsockVmciFamilyOps.family);
2904
vsockVmciDgramOps.family = vsockVmciFamilyOps.family = VSOCK_INVALID_FAMILY;
2906
vsockVmciStreamOps.family = vsockVmciFamilyOps.family;
2913
* Socket operations.
2917
*----------------------------------------------------------------------------
2919
* VSockVmciRelease --
2921
* Releases the provided socket by freeing the contents of its queue. This
2922
* is called when a user process calls close(2) on the socket.
2925
* Zero on success, negative error code on failure.
2930
*----------------------------------------------------------------------------
2934
VSockVmciRelease(struct socket *sock) // IN
2936
__VSockVmciRelease(sock->sk);
2938
sock->state = SS_FREE;
2945
*----------------------------------------------------------------------------
2949
* Binds the provided address to the provided socket.
2952
* Zero on success, negative error code on failure.
2957
*----------------------------------------------------------------------------
2961
VSockVmciBind(struct socket *sock, // IN
2962
struct sockaddr *addr, // IN
2967
struct sockaddr_vm *vmciAddr;
2971
if (VSockAddr_Cast(addr, addrLen, &vmciAddr) != 0) {
2976
err = __VSockVmciBind(sk, vmciAddr);
2984
*----------------------------------------------------------------------------
2986
* VSockVmciDgramConnect --
2988
* Connects a datagram socket. This can be called multiple times to change
2989
* the socket's association and can be called with a sockaddr whose family
2990
* is set to AF_UNSPEC to dissolve any existing association.
2993
* Zero on success, negative error code on failure.
2998
*----------------------------------------------------------------------------
3002
VSockVmciDgramConnect(struct socket *sock, // IN
3003
struct sockaddr *addr, // IN
3010
struct sockaddr_vm *remoteAddr;
3015
err = VSockAddr_Cast(addr, addrLen, &remoteAddr);
3016
if (err == -EAFNOSUPPORT && remoteAddr->svm_family == AF_UNSPEC) {
3018
VSockAddr_Init(&vsk->remoteAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3019
sock->state = SS_UNCONNECTED;
3022
} else if (err != 0) {
3029
if (!VSockAddr_Bound(&vsk->localAddr)) {
3030
struct sockaddr_vm localAddr;
3032
VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3033
if ((err = __VSockVmciBind(sk, &localAddr))) {
3038
memcpy(&vsk->remoteAddr, remoteAddr, sizeof vsk->remoteAddr);
3039
sock->state = SS_CONNECTED;
3049
*----------------------------------------------------------------------------
3051
* VSockVmciStreamConnect --
3053
* Connects a stream socket.
3056
* Zero on success, negative error code on failure.
3061
*----------------------------------------------------------------------------
3065
VSockVmciStreamConnect(struct socket *sock, // IN
3066
struct sockaddr *addr, // IN
3073
struct sockaddr_vm *remoteAddr;
3075
COMPAT_DEFINE_WAIT(wait);
3083
/* XXX AF_UNSPEC should make us disconnect like AF_INET. */
3085
switch (sock->state) {
3089
case SS_DISCONNECTING:
3095
* This continues on so we can move sock into the SS_CONNECTED state once
3096
* the connection has completed (at which point err will be set to zero
3097
* also). Otherwise, we will either wait for the connection or return
3098
* -EALREADY should this be a non-blocking call.
3103
ASSERT(sk->compat_sk_state == SS_FREE ||
3104
sk->compat_sk_state == SS_UNCONNECTED);
3105
if (VSockAddr_Cast(addr, addrLen, &remoteAddr) != 0) {
3110
/* The hypervisor and well-known contexts do not have socket endpoints. */
3111
if (!VSockAddr_SocketContext(remoteAddr->svm_cid)) {
3116
/* Set the remote address that we are connecting to. */
3117
memcpy(&vsk->remoteAddr, remoteAddr, sizeof vsk->remoteAddr);
3119
/* Autobind this socket to the local address if necessary. */
3120
if (!VSockAddr_Bound(&vsk->localAddr)) {
3121
struct sockaddr_vm localAddr;
3123
VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3124
if ((err = __VSockVmciBind(sk, &localAddr))) {
3129
sk->compat_sk_state = SS_CONNECTING;
3131
err = VSOCK_SEND_CONN_REQUEST(sk, vsk->queuePairSize);
3133
sk->compat_sk_state = SS_UNCONNECTED;
3138
* Mark sock as connecting and set the error code to in progress in case
3139
* this is a non-blocking connect.
3141
sock->state = SS_CONNECTING;
3146
* The receive path will handle all communication until we are able to enter
3147
* the connected state. Here we wait for the connection to be completed or
3148
* a notification of an error.
3150
timeout = sock_sndtimeo(sk, flags & O_NONBLOCK);
3151
compat_init_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE);
3153
while (sk->compat_sk_state != SS_CONNECTED && sk->compat_sk_err == 0) {
3156
* If we're not going to block, skip ahead to preserve error code set
3163
timeout = schedule_timeout(timeout);
3166
if (signal_pending(current)) {
3167
err = sock_intr_errno(timeout);
3169
} else if (timeout == 0) {
3174
compat_cont_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE);
3177
if (sk->compat_sk_err) {
3178
err = -sk->compat_sk_err;
3181
ASSERT(sk->compat_sk_state == SS_CONNECTED);
3186
compat_finish_wait(sk->compat_sk_sleep, &wait, TASK_RUNNING);
3192
sk->compat_sk_state = SS_UNCONNECTED;
3193
sock->state = SS_UNCONNECTED;
3199
*----------------------------------------------------------------------------
3201
* VSockVmciAccept --
3203
* Accepts next available connection request for this socket.
3206
* Zero on success, negative error code on failure.
3211
*----------------------------------------------------------------------------
3215
VSockVmciAccept(struct socket *sock, // IN
3216
struct socket *newsock, // IN/OUT
3219
struct sock *listener;
3221
struct sock *connected;
3222
VSockVmciSock *vconnected;
3224
COMPAT_DEFINE_WAIT(wait);
3227
listener = sock->sk;
3229
lock_sock(listener);
3231
if (sock->type != SOCK_STREAM) {
3236
if (listener->compat_sk_state != SS_LISTEN) {
3242
* Wait for children sockets to appear; these are the new sockets created
3243
* upon connection establishment.
3245
timeout = sock_sndtimeo(listener, flags & O_NONBLOCK);
3246
compat_init_prepare_to_wait(listener->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE);
3248
while ((connected = VSockVmciDequeueAccept(listener)) == NULL &&
3249
listener->compat_sk_err == 0) {
3250
release_sock(listener);
3251
timeout = schedule_timeout(timeout);
3252
lock_sock(listener);
3254
if (signal_pending(current)) {
3255
err = sock_intr_errno(timeout);
3257
} else if (timeout == 0) {
3262
compat_cont_prepare_to_wait(listener->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE);
3265
if (listener->compat_sk_err) {
3266
err = -listener->compat_sk_err;
3270
listener->compat_sk_ack_backlog--;
3272
lock_sock(connected);
3273
vconnected = vsock_sk(connected);
3276
* If the listener socket has received an error, then we should reject
3277
* this socket and return. Note that we simply mark the socket rejected,
3278
* drop our reference, and let the cleanup function handle the cleanup;
3279
* the fact that we found it in the listener's accept queue guarantees
3280
* that the cleanup function hasn't run yet.
3283
vconnected->rejected = TRUE;
3284
release_sock(connected);
3285
sock_put(connected);
3289
newsock->state = SS_CONNECTED;
3290
sock_graft(connected, newsock);
3291
release_sock(connected);
3292
sock_put(connected);
3296
compat_finish_wait(listener->compat_sk_sleep, &wait, TASK_RUNNING);
3298
release_sock(listener);
3305
*----------------------------------------------------------------------------
3307
* VSockVmciGetname --
3309
* Provides the local or remote address for the socket.
3312
* Zero on success, negative error code otherwise.
3317
*----------------------------------------------------------------------------
3321
VSockVmciGetname(struct socket *sock, // IN
3322
struct sockaddr *addr, // OUT
3323
int *addrLen, // OUT
3329
struct sockaddr_vm *vmciAddr;
3338
if (sock->state != SS_CONNECTED) {
3342
vmciAddr = &vsk->remoteAddr;
3344
vmciAddr = &vsk->localAddr;
3353
* sys_getsockname() and sys_getpeername() pass us a MAX_SOCK_ADDR-sized
3354
* buffer and don't set addrLen. Unfortunately that macro is defined in
3355
* socket.c instead of .h, so we hardcode its value here.
3357
ASSERT_ON_COMPILE(sizeof *vmciAddr <= 128);
3358
memcpy(addr, vmciAddr, sizeof *vmciAddr);
3359
*addrLen = sizeof *vmciAddr;
3368
*----------------------------------------------------------------------------
3372
* Waits on file for activity then provides mask indicating state of socket.
3375
* Mask of flags containing socket state.
3380
*----------------------------------------------------------------------------
3384
VSockVmciPoll(struct file *file, // IN
3385
struct socket *sock, // IN
3386
poll_table *wait) // IN
3393
poll_wait(file, sk->compat_sk_sleep, wait);
3396
if (sk->compat_sk_err) {
3400
if (sk->compat_sk_shutdown == SHUTDOWN_MASK) {
3404
/* POLLRDHUP wasn't added until 2.6.17. */
3405
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 17)
3406
if (sk->compat_sk_shutdown & RCV_SHUTDOWN) {
3411
if (sock->type == SOCK_DGRAM) {
3413
* For datagram sockets we can read if there is something in the queue
3414
* and write as long as the socket isn't shutdown for sending.
3416
if (!skb_queue_empty(&sk->compat_sk_receive_queue) ||
3417
(sk->compat_sk_shutdown & RCV_SHUTDOWN)) {
3418
mask |= POLLIN | POLLRDNORM;
3421
if (!(sk->compat_sk_shutdown & SEND_SHUTDOWN)) {
3422
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
3425
} else if (sock->type == SOCK_STREAM) {
3433
* Listening sockets that have connections in their accept queue and
3434
* connected sockets that have consumable data can be read. Sockets
3435
* whose connections have been close, reset, or terminated should also be
3436
* considered read, and we check the shutdown flag for that.
3438
if ((sk->compat_sk_state == SS_LISTEN &&
3439
!VSockVmciIsAcceptQueueEmpty(sk)) ||
3440
(!VMCI_HANDLE_INVALID(vsk->qpHandle) &&
3441
!(sk->compat_sk_shutdown & RCV_SHUTDOWN) &&
3442
VMCIQueue_BufReady(vsk->consumeQ,
3443
vsk->produceQ, vsk->consumeSize)) ||
3444
sk->compat_sk_shutdown) {
3445
mask |= POLLIN | POLLRDNORM;
3449
* Connected sockets that can produce data can be written.
3451
if (sk->compat_sk_state == SS_CONNECTED &&
3452
!(sk->compat_sk_shutdown & SEND_SHUTDOWN) &&
3453
VMCIQueue_FreeSpace(vsk->produceQ,
3454
vsk->consumeQ, vsk->produceSize) > 0) {
3455
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
3459
* Connected sockets also need to notify their peer that they are
3460
* waiting. Optimally these calls would happen in the code that decides
3461
* whether the caller will wait or not, but that's core kernel code and
3462
* this is the best we can do. If the caller doesn't sleep, the worst
3463
* that happens is a few extra datagrams are sent.
3465
if (sk->compat_sk_state == SS_CONNECTED) {
3466
VSockVmciSendWaitingWrite(sk, 1);
3467
VSockVmciSendWaitingRead(sk, 1);
3480
*----------------------------------------------------------------------------
3482
* VSockVmciListen --
3484
* Signify that this socket is listening for connection requests.
3487
* Zero on success, negative error code on failure.
3492
*----------------------------------------------------------------------------
3496
VSockVmciListen(struct socket *sock, // IN
3507
if (sock->type != SOCK_STREAM) {
3512
if (sock->state != SS_UNCONNECTED) {
3519
if (!VSockAddr_Bound(&vsk->localAddr)) {
3524
sk->compat_sk_max_ack_backlog = backlog;
3525
sk->compat_sk_state = SS_LISTEN;
3537
*----------------------------------------------------------------------------
3539
* VSockVmciShutdown --
3541
* Shuts down the provided socket in the provided method.
3544
* Zero on success, negative error code on failure.
3549
*----------------------------------------------------------------------------
3553
VSockVmciShutdown(struct socket *sock, // IN
3559
* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses
3560
* RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode here
3561
* like the other address families do. Note also that the increment makes
3562
* SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), which is what we
3567
if ((mode & ~SHUTDOWN_MASK) || !mode) {
3571
if (sock->state == SS_UNCONNECTED) {
3576
sock->state = SS_DISCONNECTING;
3578
/* Receive and send shutdowns are treated alike. */
3579
mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
3582
sk->compat_sk_shutdown |= mode;
3583
sk->compat_sk_state_change(sk);
3588
if (sk->compat_sk_type == SOCK_STREAM && mode) {
3589
VSOCK_SEND_SHUTDOWN(sk, mode);
3598
*----------------------------------------------------------------------------
3600
* VSockVmciDgramSendmsg --
3605
* Number of bytes sent on success, negative error code on failure.
3610
*----------------------------------------------------------------------------
3613
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43)
3615
VSockVmciDgramSendmsg(struct socket *sock, // IN: socket to send on
3616
struct msghdr *msg, // IN: message to send
3617
int len, // IN: length of message
3618
struct scm_cookie *scm) // UNUSED
3619
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65)
3621
VSockVmciDgramSendmsg(struct kiocb *kiocb, // UNUSED
3622
struct socket *sock, // IN: socket to send on
3623
struct msghdr *msg, // IN: message to send
3624
int len, // IN: length of message
3625
struct scm_cookie *scm); // UNUSED
3626
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2)
3628
VSockVmciDgramSendmsg(struct kiocb *kiocb, // UNUSED
3629
struct socket *sock, // IN: socket to send on
3630
struct msghdr *msg, // IN: message to send
3631
int len) // IN: length of message
3634
VSockVmciDgramSendmsg(struct kiocb *kiocb, // UNUSED
3635
struct socket *sock, // IN: socket to send on
3636
struct msghdr *msg, // IN: message to send
3637
size_t len) // IN: length of message
3643
struct sockaddr_vm *remoteAddr;
3646
if (msg->msg_flags & MSG_OOB) {
3650
if (len > VMCI_MAX_DG_PAYLOAD_SIZE) {
3654
/* For now, MSG_DONTWAIT is always assumed... */
3661
if (!VSockAddr_Bound(&vsk->localAddr)) {
3662
struct sockaddr_vm localAddr;
3664
VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3665
if ((err = __VSockVmciBind(sk, &localAddr))) {
3671
* If the provided message contains an address, use that. Otherwise fall
3672
* back on the socket's remote handle (if it has been connected).
3674
if (msg->msg_name &&
3675
VSockAddr_Cast(msg->msg_name, msg->msg_namelen, &remoteAddr) == 0) {
3676
/* Ensure this address is of the right type and is a valid destination. */
3677
// XXXAB Temporary to handle test program
3678
if (remoteAddr->svm_cid == VMADDR_CID_ANY) {
3679
remoteAddr->svm_cid = VMCI_GetContextID();
3682
if (!VSockAddr_Bound(remoteAddr)) {
3686
} else if (sock->state == SS_CONNECTED) {
3687
remoteAddr = &vsk->remoteAddr;
3688
// XXXAB Temporary to handle test program
3689
if (remoteAddr->svm_cid == VMADDR_CID_ANY) {
3690
remoteAddr->svm_cid = VMCI_GetContextID();
3693
/* XXX Should connect() or this function ensure remoteAddr is bound? */
3694
if (!VSockAddr_Bound(&vsk->remoteAddr)) {
3704
* Allocate a buffer for the user's message and our packet header.
3706
dg = kmalloc(len + sizeof *dg, GFP_KERNEL);
3712
memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), msg->msg_iov, len);
3714
dg->dst = VMCI_MAKE_HANDLE(remoteAddr->svm_cid, remoteAddr->svm_port);
3715
dg->src = VMCI_MAKE_HANDLE(vsk->localAddr.svm_cid, vsk->localAddr.svm_port);
3716
dg->payloadSize = len;
3718
err = VMCIDatagram_Send(dg);
3721
err = VSockVmci_ErrorToVSockError(err);
3726
* err is the number of bytes sent on success. We need to subtract the
3727
* VSock-specific header portions of what we've sent.
3738
*----------------------------------------------------------------------------
3740
* VSockVmciStreamSetsockopt --
3742
* Set a socket option on a stream socket
3745
* 0 on success, negative error code on failure.
3750
*----------------------------------------------------------------------------
3754
VSockVmciStreamSetsockopt(struct socket *sock, // IN/OUT
3757
char __user *optval, // IN
3765
if (level != VSockVmci_GetAFValue()) {
3766
return -ENOPROTOOPT;
3769
if (optlen < sizeof val) {
3773
if (copy_from_user(&val, optval, sizeof val) != 0) {
3781
ASSERT(vsk->queuePairMinSize <= vsk->queuePairSize &&
3782
vsk->queuePairSize <= vsk->queuePairMaxSize);
3787
case SO_VMCI_BUFFER_SIZE:
3788
if (val < vsk->queuePairMinSize || val > vsk->queuePairMaxSize) {
3792
vsk->queuePairSize = val;
3795
case SO_VMCI_BUFFER_MAX_SIZE:
3796
if (val < vsk->queuePairSize) {
3800
vsk->queuePairMaxSize = val;
3803
case SO_VMCI_BUFFER_MIN_SIZE:
3804
if (val > vsk->queuePairSize) {
3808
vsk->queuePairMinSize = val;
3818
ASSERT(vsk->queuePairMinSize <= vsk->queuePairSize &&
3819
vsk->queuePairSize <= vsk->queuePairMaxSize);
3829
*----------------------------------------------------------------------------
3831
* VSockVmciStreamGetsockopt --
3833
* Get a socket option for a stream socket
3836
* 0 on success, negative error code on failure.
3841
*----------------------------------------------------------------------------
3845
VSockVmciStreamGetsockopt(struct socket *sock, // IN
3848
char __user *optval, // OUT
3849
int __user * optlen) // IN/OUT
3857
if (level != VSockVmci_GetAFValue()) {
3858
return -ENOPROTOOPT;
3861
if ((err = get_user(len, optlen)) != 0) {
3864
if (len < sizeof val) {
3875
case SO_VMCI_BUFFER_SIZE:
3876
val = vsk->queuePairSize;
3879
case SO_VMCI_BUFFER_MAX_SIZE:
3880
val = vsk->queuePairMaxSize;
3883
case SO_VMCI_BUFFER_MIN_SIZE:
3884
val = vsk->queuePairMinSize;
3888
return -ENOPROTOOPT;
3891
if ((err = put_user(val, (uint64 __user *)optval)) != 0) {
3894
if ((err = put_user(len, optlen)) != 0) {
3904
*----------------------------------------------------------------------------
3906
* VSockVmciStreamSendmsg --
3908
* Sends a message on the socket.
3911
* Number of bytes sent on success, negative error code on failure.
3916
*----------------------------------------------------------------------------
3919
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43)
3921
VSockVmciStreamSendmsg(struct socket *sock, // IN: socket to send on
3922
struct msghdr *msg, // IN: message to send
3923
int len, // IN: length of message
3924
struct scm_cookie *scm) // UNUSED
3925
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65)
3927
VSockVmciStreamSendmsg(struct kiocb *kiocb, // UNUSED
3928
struct socket *sock, // IN: socket to send on
3929
struct msghdr *msg, // IN: message to send
3930
int len, // IN: length of message
3931
struct scm_cookie *scm); // UNUSED
3932
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2)
3934
VSockVmciStreamSendmsg(struct kiocb *kiocb, // UNUSED
3935
struct socket *sock, // IN: socket to send on
3936
struct msghdr *msg, // IN: message to send
3937
int len) // IN: length of message
3940
VSockVmciStreamSendmsg(struct kiocb *kiocb, // UNUSED
3941
struct socket *sock, // IN: socket to send on
3942
struct msghdr *msg, // IN: message to send
3943
size_t len) // IN: length of message
3948
ssize_t totalWritten;
3951
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
3955
COMPAT_DEFINE_WAIT(wait);
3962
if (msg->msg_flags & MSG_OOB) {
3968
/* Callers should not provide a destination with stream sockets. */
3969
if (msg->msg_namelen) {
3970
err = sk->compat_sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
3974
if (sk->compat_sk_shutdown & SEND_SHUTDOWN) {
3979
if (sk->compat_sk_state != SS_CONNECTED ||
3980
!VSockAddr_Bound(&vsk->localAddr)) {
3985
if (!VSockAddr_Bound(&vsk->remoteAddr)) {
3986
err = -EDESTADDRREQ;
3991
* Wait for room in the produce queue to enqueue our user's data.
3993
timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
3994
compat_init_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE);
3996
while (totalWritten < len) {
3998
unsigned int retries;
4004
while (VMCIQueue_FreeSpace(vsk->produceQ,
4005
vsk->consumeQ, vsk->produceSize) == 0 &&
4006
sk->compat_sk_err == 0 &&
4007
!(sk->compat_sk_shutdown & SEND_SHUTDOWN) &&
4008
!(vsk->peerShutdown & RCV_SHUTDOWN)) {
4010
/* Don't wait for non-blocking sockets. */
4016
/* Notify our peer that we are waiting for room to write. */
4017
if (!VSockVmciSendWaitingWrite(sk, 1)) {
4018
err = -EHOSTUNREACH;
4023
timeout = schedule_timeout(timeout);
4025
if (signal_pending(current)) {
4026
err = sock_intr_errno(timeout);
4028
} else if (timeout == 0) {
4033
compat_cont_prepare_to_wait(sk->compat_sk_sleep,
4034
&wait, TASK_INTERRUPTIBLE);
4038
* These checks occur both as part of and after the loop conditional
4039
* since we need to check before and after sleeping.
4041
if (sk->compat_sk_err) {
4042
err = -sk->compat_sk_err;
4044
} else if ((sk->compat_sk_shutdown & SEND_SHUTDOWN) ||
4045
(vsk->peerShutdown & RCV_SHUTDOWN)) {
4051
* Note that enqueue will only write as many bytes as are free in the
4052
* produce queue, so we don't need to ensure len is smaller than the queue
4053
* size. It is the caller's responsibility to check how many bytes we were
4056
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4057
VMCIQueue_GetPointers(vsk->produceQ, vsk->consumeQ,
4058
&produceTail, &consumeHead);
4061
written = VMCIQueue_EnqueueV(vsk->produceQ, vsk->consumeQ,
4062
vsk->produceSize, msg->msg_iov,
4063
len - totalWritten);
4069
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4071
* Detect a wrap-around to maintain queue generation. Note that this is
4072
* safe since we hold the socket lock across the two queue pair
4075
if (written >= vsk->produceSize - produceTail) {
4076
vsk->produceQGeneration++;
4080
totalWritten += written;
4082
if (VSockVmciNotifyWaitingRead(vsk)) {
4084
* Notify the peer that we have written, retrying the send on failure up to
4085
* our maximum value. See the XXX comment for the corresponding piece of
4086
* code in StreamRecvmsg() for potential improvements.
4088
while (!(vsk->peerShutdown & RCV_SHUTDOWN) &&
4090
retries < VSOCK_MAX_DGRAM_RESENDS) {
4091
err = VSOCK_SEND_WROTE(sk);
4099
if (retries >= VSOCK_MAX_DGRAM_RESENDS) {
4100
Warning("unable to send wrote notification to peer for socket %p.\n", sk);
4103
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4104
vsk->peerWaitingRead = FALSE;
4110
ASSERT(totalWritten <= INT_MAX);
4113
if (totalWritten > 0) {
4116
compat_finish_wait(sk->compat_sk_sleep, &wait, TASK_RUNNING);
4125
*----------------------------------------------------------------------------
4127
* VSockVmciDgramRecvmsg --
4129
* Receives a datagram and places it in the caller's msg.
4132
* The size of the payload on success, negative value on failure.
4137
*----------------------------------------------------------------------------
4140
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43)
4142
VSockVmciDgramRecvmsg(struct socket *sock, // IN: socket to receive from
4143
struct msghdr *msg, // IN/OUT: message to receive into
4144
int len, // IN: length of receive buffer
4145
int flags, // IN: receive flags
4146
struct scm_cookie *scm) // UNUSED
4147
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65)
4149
VSockVmciDgramRecvmsg(struct kiocb *kiocb, // UNUSED
4150
struct socket *sock, // IN: socket to receive from
4151
struct msghdr *msg, // IN/OUT: message to receive into
4152
int len, // IN: length of receive buffer
4153
int flags, // IN: receive flags
4154
struct scm_cookie *scm) // UNUSED
4155
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2)
4157
VSockVmciDgramRecvmsg(struct kiocb *kiocb, // UNUSED
4158
struct socket *sock, // IN: socket to receive from
4159
struct msghdr *msg, // IN/OUT: message to receive into
4160
int len, // IN: length of receive buffer
4161
int flags) // IN: receive flags
4164
VSockVmciDgramRecvmsg(struct kiocb *kiocb, // UNUSED
4165
struct socket *sock, // IN: socket to receive from
4166
struct msghdr *msg, // IN/OUT: message to receive into
4167
size_t len, // IN: length of receive buffer
4168
int flags) // IN: receive flags
4176
struct sk_buff *skb;
4177
struct sockaddr_vm *vmciAddr;
4182
noblock = flags & MSG_DONTWAIT;
4183
vmciAddr = (struct sockaddr_vm *)msg->msg_name;
4185
if (flags & MSG_OOB || flags & MSG_ERRQUEUE) {
4189
/* Retrieve the head sk_buff from the socket's receive queue. */
4190
skb = skb_recv_datagram(sk, flags, noblock, &err);
4199
dg = (VMCIDatagram *)skb->data;
4201
/* err is 0, meaning we read zero bytes. */
4205
payloadLen = dg->payloadSize;
4206
/* Ensure the sk_buff matches the payload size claimed in the packet. */
4207
if (payloadLen != skb->len - sizeof *dg) {
4212
if (payloadLen > len) {
4214
msg->msg_flags |= MSG_TRUNC;
4217
/* Place the datagram payload in the user's iovec. */
4218
err = skb_copy_datagram_iovec(skb, sizeof *dg, msg->msg_iov, payloadLen);
4223
msg->msg_namelen = 0;
4225
/* Provide the address of the sender. */
4226
VSockAddr_Init(vmciAddr,
4227
VMCI_HANDLE_TO_CONTEXT_ID(dg->src),
4228
VMCI_HANDLE_TO_RESOURCE_ID(dg->src));
4229
msg->msg_namelen = sizeof *vmciAddr;
4234
skb_free_datagram(sk, skb);
4241
*----------------------------------------------------------------------------
4243
* VSockVmciStreamRecvmsg --
4245
* Receives a datagram and places it in the caller's msg.
4248
* The size of the payload on success, negative value on failure.
4253
*----------------------------------------------------------------------------
4256
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43)
4258
VSockVmciStreamRecvmsg(struct socket *sock, // IN: socket to receive from
4259
struct msghdr *msg, // IN/OUT: message to receive into
4260
int len, // IN: length of receive buffer
4261
int flags, // IN: receive flags
4262
struct scm_cookie *scm) // UNUSED
4263
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65)
4265
VSockVmciStreamRecvmsg(struct kiocb *kiocb, // UNUSED
4266
struct socket *sock, // IN: socket to receive from
4267
struct msghdr *msg, // IN/OUT: message to receive into
4268
int len, // IN: length of receive buffer
4269
int flags, // IN: receive flags
4270
struct scm_cookie *scm) // UNUSED
4271
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2)
4273
VSockVmciStreamRecvmsg(struct kiocb *kiocb, // UNUSED
4274
struct socket *sock, // IN: socket to receive from
4275
struct msghdr *msg, // IN/OUT: message to receive into
4276
int len, // IN: length of receive buffer
4277
int flags) // IN: receive flags
4280
VSockVmciStreamRecvmsg(struct kiocb *kiocb, // UNUSED
4281
struct socket *sock, // IN: socket to receive from
4282
struct msghdr *msg, // IN/OUT: message to receive into
4283
size_t len, // IN: length of receive buffer
4284
int flags) // IN: receive flags
4295
unsigned int retries;
4296
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4300
COMPAT_DEFINE_WAIT(wait);
4310
if (sk->compat_sk_state != SS_CONNECTED) {
4315
if (flags & MSG_OOB) {
4320
if (sk->compat_sk_shutdown & RCV_SHUTDOWN) {
4326
* We must not copy less than target bytes into the user's buffer before
4327
* returning successfully, so we wait for the consume queue to have that
4328
* much data to consume before dequeueing. Note that this makes it
4329
* impossible to handle cases where target is greater than the queue size.
4331
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
4332
if (target >= vsk->consumeSize) {
4336
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
4339
compat_init_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE);
4341
while ((ready = VMCIQueue_BufReady(vsk->consumeQ,
4343
vsk->consumeSize)) < target &&
4344
sk->compat_sk_err == 0 &&
4345
!(sk->compat_sk_shutdown & RCV_SHUTDOWN) &&
4346
!(vsk->peerShutdown & SEND_SHUTDOWN)) {
4350
* Invalid queue pair content. XXX This should be changed to
4351
* a connection reset in a later change.
4358
/* Don't wait for non-blocking sockets. */
4364
/* Notify our peer that we are waiting for data to read. */
4365
if (!VSockVmciSendWaitingRead(sk, target)) {
4366
err = -EHOSTUNREACH;
4371
timeout = schedule_timeout(timeout);
4374
if (signal_pending(current)) {
4375
err = sock_intr_errno(timeout);
4377
} else if (timeout == 0) {
4382
compat_cont_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE);
4385
if (sk->compat_sk_err) {
4386
err = -sk->compat_sk_err;
4388
} else if (sk->compat_sk_shutdown & RCV_SHUTDOWN) {
4391
} else if ((vsk->peerShutdown & SEND_SHUTDOWN) &&
4392
VMCIQueue_BufReady(vsk->consumeQ,
4393
vsk->produceQ, vsk->consumeSize) < target) {
4399
* Now consume up to len bytes from the queue. Note that since we have the
4400
* socket locked we should copy at least ready bytes.
4402
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4403
VMCIQueue_GetPointers(vsk->consumeQ, vsk->produceQ,
4404
&produceTail, &consumeHead);
4407
copied = VMCIQueue_DequeueV(vsk->produceQ, vsk->consumeQ,
4408
vsk->consumeSize, msg->msg_iov, len);
4414
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4416
* Detect a wrap-around to maintain queue generation. Note that this is
4417
* safe since we hold the socket lock across the two queue pair
4420
if (copied >= vsk->consumeSize - consumeHead) {
4421
vsk->consumeQGeneration++;
4425
ASSERT(copied >= target);
4428
* If the other side has shutdown for sending and there is nothing more to
4429
* read, then set our socket's RCV_SHUTDOWN flag and modify the socket
4432
if (vsk->peerShutdown & SEND_SHUTDOWN) {
4433
if (VMCIQueue_BufReady(vsk->consumeQ,
4434
vsk->produceQ, vsk->consumeSize) <= 0) {
4435
sk->compat_sk_shutdown |= RCV_SHUTDOWN;
4436
sk->compat_sk_state = SS_UNCONNECTED;
4437
sk->compat_sk_state_change(sk);
4441
if (VSockVmciNotifyWaitingWrite(vsk)) {
4443
* Notify the peer that we have read, retrying the send on failure up to our
4444
* maximum value. XXX For now we just log the failure, but later we should
4445
* schedule a work item to handle the resend until it succeeds. That would
4446
* require keeping track of work items in the vsk and cleaning them up upon
4449
while (!(vsk->peerShutdown & RCV_SHUTDOWN) &&
4451
retries < VSOCK_MAX_DGRAM_RESENDS) {
4452
err = VSOCK_SEND_READ(sk);
4460
if (retries >= VSOCK_MAX_DGRAM_RESENDS) {
4461
Warning("unable to send read notification to peer for socket %p.\n", sk);
4464
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4465
vsk->peerWaitingWrite = FALSE;
4470
ASSERT(copied <= INT_MAX);
4474
compat_finish_wait(sk->compat_sk_sleep, &wait, TASK_RUNNING);
4483
* Protocol operation.
4487
*----------------------------------------------------------------------------
4489
* VSockVmciCreate --
4491
* Creates a VSocket socket.
4494
* Zero on success, negative error code on failure.
4497
* Socket count is incremented.
4499
*----------------------------------------------------------------------------
4502
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
4504
VSockVmciCreate(struct socket *sock, // IN
4508
VSockVmciCreate(struct net *net, // IN
4509
struct socket *sock, // IN
4518
return -EPROTONOSUPPORT;
4521
switch (sock->type) {
4523
sock->ops = &vsockVmciDgramOps;
4527
* Queue pairs are /currently/ only supported within guests, so stream
4528
* sockets are only supported within guests.
4531
sock->ops = &vsockVmciStreamOps;
4535
return -ESOCKTNOSUPPORT;
4538
sock->state = SS_UNCONNECTED;
4540
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
4541
return __VSockVmciCreate(sock, GFP_KERNEL) ? 0 : -ENOMEM;
4543
return __VSockVmciCreate(net, sock, GFP_KERNEL) ? 0 : -ENOMEM;
4549
*----------------------------------------------------------------------------
4551
* VSockVmciIoctl32Handler --
4553
* Handler for 32-bit ioctl(2) on 64-bit.
4556
* Same as VsockVmciDevIoctl().
4561
*----------------------------------------------------------------------------
4565
#ifndef HAVE_COMPAT_IOCTL
4567
VSockVmciIoctl32Handler(unsigned int fd, // IN
4568
unsigned int iocmd, // IN
4569
unsigned long ioarg, // IN/OUT
4570
struct file * filp) // IN
4573
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 26) || \
4574
(LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 3))
4578
if (filp && filp->f_op && filp->f_op->ioctl == VSockVmciDevIoctl) {
4579
ret = VSockVmciDevIoctl(filp->f_dentry->d_inode, filp, iocmd, ioarg);
4581
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 26) || \
4582
(LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 3))
4587
#endif /* !HAVE_COMPAT_IOCTL */
4591
*----------------------------------------------------------------------------
4593
* register_ioctl32_handlers --
4595
* Registers the ioctl conversion handler.
4598
* Zero on success, error code otherwise.
4603
*----------------------------------------------------------------------------
4607
register_ioctl32_handlers(void)
4609
#ifndef HAVE_COMPAT_IOCTL
4612
for (i = IOCTL_VMCI_SOCKETS_FIRST; i < IOCTL_VMCI_SOCKETS_LAST; i++) {
4613
int retval = register_ioctl32_conversion(i, VSockVmciIoctl32Handler);
4615
Warning("Fail to register ioctl32 conversion for cmd %d\n", i);
4620
#endif /* !HAVE_COMPAT_IOCTL */
4626
*----------------------------------------------------------------------------
4628
* unregister_ioctl32_handlers --
4630
* Unregisters the ioctl converstion handler.
4638
*----------------------------------------------------------------------------
4642
unregister_ioctl32_handlers(void)
4644
#ifndef HAVE_COMPAT_IOCTL
4647
for (i = IOCTL_VMCI_SOCKETS_FIRST; i < IOCTL_VMCI_SOCKETS_LAST; i++) {
4648
int retval = unregister_ioctl32_conversion(i);
4650
Warning("Fail to unregister ioctl32 conversion for cmd %d\n", i);
4654
#endif /* !HAVE_COMPAT_IOCTL */
4656
#else /* VM_X86_64 */
4657
#define register_ioctl32_handlers() (0)
4658
#define unregister_ioctl32_handlers() do { } while (0)
4659
#endif /* VM_X86_64 */
4663
* Device operations.
4668
*----------------------------------------------------------------------------
4670
* VSockVmciDevOpen --
4672
* Invoked when the device is opened. Simply maintains a count of open
4676
* Zero on success, negative value otherwise.
4681
*----------------------------------------------------------------------------
4685
VSockVmciDevOpen(struct inode *inode, // IN
4686
struct file *file) // IN
4688
spin_lock(®istrationLock);
4690
spin_unlock(®istrationLock);
4696
*----------------------------------------------------------------------------
4698
* VSockVmciDevRelease --
4700
* Invoked when the device is closed. Updates the open instance count and
4701
* unregisters the socket family if this is the last user.
4704
* Zero on success, negative value otherwise.
4709
*----------------------------------------------------------------------------
4713
VSockVmciDevRelease(struct inode *inode, // IN
4714
struct file *file) // IN
4716
spin_lock(®istrationLock);
4718
VSockVmciTestUnregister();
4719
spin_unlock(®istrationLock);
4725
*----------------------------------------------------------------------------
4727
* VSockVmciDevIoctl --
4732
* Zero on success, negative error code otherwise.
4737
*----------------------------------------------------------------------------
4741
VSockVmciDevIoctl(struct inode *inode, // IN
4742
struct file *filp, // IN
4744
unsigned long ioarg) // IN/OUT
4751
case IOCTL_VMCI_SOCKETS_GET_AF_VALUE: {
4754
family = VSockVmci_GetAFValue();
4756
Warning("AF_VSOCK is not registered\n");
4758
if (copy_to_user((void *)ioarg, &family, sizeof family) != 0) {
4764
case IOCTL_VMCI_SOCKETS_GET_LOCAL_CID: {
4765
VMCIId cid = VMCI_GetContextID();
4766
if (copy_to_user((void *)ioarg, &cid, sizeof cid) != 0) {
4773
Warning("Unknown ioctl %d\n", iocmd);
4781
#if defined(HAVE_COMPAT_IOCTL) || defined(HAVE_UNLOCKED_IOCTL)
4783
*-----------------------------------------------------------------------------
4785
* VSockVmciDevUnlockedIoctl --
4787
* Wrapper for VSockVmciDevIoctl() supporting the compat_ioctl and
4788
* unlocked_ioctl methods that have signatures different from the
4789
* old ioctl. Used as compat_ioctl method for 32bit apps running
4790
* on 64bit kernel and for unlocked_ioctl on systems supporting
4791
* those. VSockVmciDevIoctl() may safely be called without holding
4795
* Same as VSockVmciDevIoctl().
4800
*-----------------------------------------------------------------------------
4804
VSockVmciDevUnlockedIoctl(struct file *filp, // IN
4806
unsigned long ioarg) // IN/OUT
4808
return VSockVmciDevIoctl(NULL, filp, iocmd, ioarg);
4813
* Module operations.
4817
*----------------------------------------------------------------------------
4821
* Initialization routine for the VSockets module.
4824
* Zero on success, error code on failure.
4827
* The VSocket protocol family and socket operations are registered.
4829
*----------------------------------------------------------------------------
4837
DriverLog_Init("VSock");
4839
request_module("vmci");
4841
err = misc_register(&vsockVmciDevice);
4846
err = register_ioctl32_handlers();
4848
misc_deregister(&vsockVmciDevice);
4852
err = VSockVmciRegisterProto();
4854
Warning("Cannot register vsock protocol.\n");
4855
unregister_ioctl32_handlers();
4856
misc_deregister(&vsockVmciDevice);
4860
VSockVmciInitTables();
4866
*----------------------------------------------------------------------------
4868
* VSocketVmciExit --
4870
* VSockets module exit routine.
4876
* Unregisters VSocket protocol family and socket operations.
4878
*----------------------------------------------------------------------------
4884
unregister_ioctl32_handlers();
4885
misc_deregister(&vsockVmciDevice);
4886
spin_lock(®istrationLock);
4887
VSockVmciUnregisterAddressFamily();
4888
spin_unlock(®istrationLock);
4890
VSockVmciUnregisterProto();
4894
module_init(VSockVmciInit);
4895
module_exit(VSockVmciExit);
4897
MODULE_AUTHOR("VMware, Inc.");
4898
MODULE_DESCRIPTION("VMware Virtual Socket Family");
4899
MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING);
4900
MODULE_LICENSE("GPL v2");