218
213
struct sockaddr *addr, int addrLen);
219
214
static int VSockVmciDgramConnect(struct socket *sock,
220
215
struct sockaddr *addr, int addrLen, int flags);
222
216
static int VSockVmciStreamConnect(struct socket *sock,
223
217
struct sockaddr *addr, int addrLen, int flags);
224
218
static int VSockVmciAccept(struct socket *sock, struct socket *newsock, int flags);
226
219
static int VSockVmciGetname(struct socket *sock,
227
220
struct sockaddr *addr, int *addrLen, int peer);
228
221
static unsigned int VSockVmciPoll(struct file *file,
229
222
struct socket *sock, poll_table *wait);
231
223
static int VSockVmciListen(struct socket *sock, int backlog);
233
224
static int VSockVmciShutdown(struct socket *sock, int mode);
236
226
static int VSockVmciStreamSetsockopt(struct socket *sock, int level, int optname,
237
227
char __user *optval, int optlen);
238
228
static int VSockVmciStreamGetsockopt(struct socket *sock, int level, int optname,
239
229
char __user *optval, int __user * optlen);
242
231
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43)
243
232
static int VSockVmciDgramSendmsg(struct socket *sock, struct msghdr *msg,
244
233
int len, struct scm_cookie *scm);
245
234
static int VSockVmciDgramRecvmsg(struct socket *sock, struct msghdr *msg,
246
235
int len, int flags, struct scm_cookie *scm);
248
236
static int VSockVmciStreamSendmsg(struct socket *sock, struct msghdr *msg,
249
237
int len, struct scm_cookie *scm);
250
238
static int VSockVmciStreamRecvmsg(struct socket *sock, struct msghdr *msg,
251
239
int len, int flags, struct scm_cookie *scm);
253
240
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65)
254
241
static int VSockVmciDgramSendmsg(struct kiocb *kiocb, struct socket *sock,
255
242
struct msghdr *msg, int len,
257
244
static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock,
258
245
struct msghdr *msg, int len,
259
246
int flags, struct scm_cookie *scm);
261
247
static int VSockVmciStreamSendmsg(struct kiocb *kiocb, struct socket *sock,
262
248
struct msghdr *msg, int len,
263
249
struct scm_cookie *scm);
264
250
static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock,
265
251
struct msghdr *msg, int len,
266
252
int flags, struct scm_cookie *scm);
268
253
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2)
269
254
static int VSockVmciDgramSendmsg(struct kiocb *kiocb,
270
255
struct socket *sock, struct msghdr *msg, int len);
271
256
static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock,
272
257
struct msghdr *msg, int len, int flags);
274
258
static int VSockVmciStreamSendmsg(struct kiocb *kiocb,
275
259
struct socket *sock, struct msghdr *msg, int len);
276
260
static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock,
277
261
struct msghdr *msg, int len, int flags);
280
263
static int VSockVmciDgramSendmsg(struct kiocb *kiocb,
281
264
struct socket *sock, struct msghdr *msg, size_t len);
282
265
static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock,
283
266
struct msghdr *msg, size_t len, int flags);
285
267
static int VSockVmciStreamSendmsg(struct kiocb *kiocb,
286
268
struct socket *sock, struct msghdr *msg, size_t len);
287
269
static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock,
288
270
struct msghdr *msg, size_t len, int flags);
292
273
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
427
406
static DECLARE_MUTEX(registrationMutex);
428
407
static int devOpenCount = 0;
429
408
static int vsockVmciSocketCount = 0;
409
static int vsockVmciKernClientCount = 0;
430
410
#ifdef VMX86_TOOLS
411
static Bool vmciDevicePresent = FALSE;
431
413
static VMCIHandle vmciStreamHandle = { VMCI_INVALID_ID, VMCI_INVALID_ID };
432
static Bool vmciDevicePresent = FALSE;
433
414
static VMCIId qpResumedSubId = VMCI_INVALID_ID;
436
416
/* Comment this out to compare with old protocol. */
437
417
#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1
438
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
418
#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
439
419
/* Comment this out to remove flow control for "new" protocol */
440
420
# define VSOCK_OPTIMIZATION_FLOW_CONTROL 1
443
/* Comment this out to turn off datagram counting. */
444
//#define VSOCK_CONTROL_PACKET_COUNT 1
445
#ifdef VSOCK_CONTROL_PACKET_COUNT
424
* Define VSOCK_GATHER_STATISTICS to turn on statistics gathering.
425
* Currently this consists of 2 types of stats:
426
* 1. The number of control datagram messages sent.
427
* 2. The level of queuepair fullness (in 10% buckets) whenever data is
428
* about to be enqueued or dequeued from the queuepair.
430
//#define VSOCK_GATHER_STATISTICS 1
431
#ifdef VSOCK_GATHER_STATISTICS
432
#define VSOCK_NUM_QUEUE_LEVEL_BUCKETS 10
446
433
uint64 controlPacketCount[VSOCK_PACKET_TYPE_MAX];
434
uint64 consumeQueueLevel[VSOCK_NUM_QUEUE_LEVEL_BUCKETS];
435
uint64 produceQueueLevel[VSOCK_NUM_QUEUE_LEVEL_BUCKETS];
449
438
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9)
521
510
*----------------------------------------------------------------------------
523
* VSockVmci_GetAFValue --
512
* VMCISock_GetAFValue --
514
* Kernel interface that allows external kernel modules to get the current
515
* VMCI Sockets address family.
516
* This version of the function is exported to kernel clients and should not
520
* The address family on success, a negative error on failure.
525
*----------------------------------------------------------------------------
529
VMCISock_GetAFValue(void)
533
down(®istrationMutex);
536
* Kernel clients are required to explicitly register themselves before they
537
* can use VMCI Sockets.
539
if (vsockVmciKernClientCount <= 0) {
544
afvalue = VSockVmciGetAFValue();
547
up(®istrationMutex);
554
*----------------------------------------------------------------------------
556
* VMCISock_KernelRegister --
558
* Allows a kernel client to register with VMCI Sockets. Must be called
559
* before VMCISock_GetAFValue within a kernel module. Note that we don't
560
* actually register the address family until the first time the module
569
*----------------------------------------------------------------------------
573
VMCISock_KernelRegister(void)
575
down(®istrationMutex);
576
vsockVmciKernClientCount++;
577
up(®istrationMutex);
582
*----------------------------------------------------------------------------
584
* VMCISock_KernelDeregister --
586
* Allows a kernel client to unregister with VMCI Sockets. Every call
587
* to VMCISock_KernRegister must be matched with a call to
588
* VMCISock_KernUnregister.
596
*----------------------------------------------------------------------------
600
VMCISock_KernelDeregister(void)
602
down(®istrationMutex);
603
vsockVmciKernClientCount--;
604
VSockVmciTestUnregister();
605
up(®istrationMutex);
610
*----------------------------------------------------------------------------
612
* VSockVmciGetAFValue --
525
614
* Returns the address family value being used.
615
* Note: The registration mutex must be held when calling this function.
528
618
* The address family on success, a negative error on failure.
533
623
*----------------------------------------------------------------------------
537
VSockVmci_GetAFValue(void)
627
VSockVmciGetAFValue(void)
541
down(®istrationMutex);
543
631
afvalue = vsockVmciFamilyOps.family;
544
632
if (!VSOCK_AF_IS_REGISTERED(afvalue)) {
545
633
afvalue = VSockVmciRegisterAddressFamily();
640
*----------------------------------------------------------------------------
642
* VSockVmci_GetAFValue --
644
* Returns the address family value being used.
647
* The address family on success, a negative error on failure.
652
*----------------------------------------------------------------------------
656
VSockVmci_GetAFValue(void)
660
down(®istrationMutex);
661
afvalue = VSockVmciGetAFValue();
548
662
up(®istrationMutex);
554
674
*----------------------------------------------------------------------------
556
676
* VSockVmciTestUnregister --
568
688
*----------------------------------------------------------------------------
572
692
VSockVmciTestUnregister(void)
574
if (devOpenCount <= 0 && vsockVmciSocketCount <= 0) {
694
if (devOpenCount <= 0 && vsockVmciSocketCount <= 0 &&
695
vsockVmciKernClientCount <= 0) {
575
696
if (VSOCK_AF_IS_REGISTERED(vsockVmciFamilyOps.family)) {
576
697
VSockVmciUnregisterAddressFamily();
702
#ifdef VSOCK_GATHER_STATISTICS
704
*----------------------------------------------------------------------------
706
* VSockVmciUpdateQueueBucketCount --
708
* Given a queue, determine how much data is enqueued and add that to
709
* the specified queue level statistic bucket.
717
*----------------------------------------------------------------------------
721
VSockVmciUpdateQueueBucketCount(VMCIQueue *mainQueue, // IN
722
VMCIQueue *otherQueue, // IN
723
uint64 mainQueueSize, // IN
724
uint64 queueLevel[]) // IN
727
uint32 remainder = 0;
728
uint64 dataReady = VMCIQueue_BufReady(mainQueue,
732
* We can't do 64 / 64 = 64 bit divides on linux because it requires a libgcc
733
* which is not linked into the kernel module. Since this code is only used by
734
* developers we just limit the mainQueueSize to be less than MAX_UINT for now.
736
ASSERT(mainQueueSize <= MAX_UINT32);
737
Div643264(dataReady * 10, mainQueueSize, &bucket, &remainder);
738
ASSERT(bucket < VSOCK_NUM_QUEUE_LEVEL_BUCKETS);
739
++queueLevel[bucket];
589
744
*----------------------------------------------------------------------------
1452
1596
case SS_CONNECTED:
1453
1597
err = VSockVmciRecvConnected(sk, pkt);
1455
case SS_DISCONNECTING:
1456
Log("packet receieved for socket in disconnecting state; dropping.\n");
1459
Log("packet receieved for socket in free state; dropping.\n");
1462
Log("socket is in invalid state; dropping packet.\n");
1601
* Because this function does not run in the same context as
1602
* VSockVmciRecvStreamCB it is possible that the socket
1603
* has closed. We need to let the other side know or it could
1604
* be sitting in a connect and hang forever. Send a reset to prevent
1607
VSOCK_SEND_RESET(sk, pkt);
2091
2238
case VSOCK_PACKET_TYPE_RST:
2092
sk->compat_sk_state = SS_DISCONNECTING;
2093
sk->compat_sk_shutdown = SHUTDOWN_MASK;
2094
sk->compat_sk_err = ECONNRESET;
2095
sk->compat_sk_error_report(sk);
2241
* It is possible that we sent our peer a message (e.g
2242
* a WAITING_READ) right before we got notified that the peer
2243
* had detached. If that happens then we can get a RST pkt back
2244
* from our peer even though there is data available for us
2245
* to read. In that case, don't shutdown the socket completely
2246
* but instead allow the local client to finish reading data
2247
* off the queuepair. Always treat a RST pkt in connected mode
2248
* like a clean shutdown.
2250
compat_sock_set_done(sk);
2251
vsk->peerShutdown = SHUTDOWN_MASK;
2252
sk->compat_sk_shutdown |= SEND_SHUTDOWN;
2253
if (VSockVmciStreamHasData(vsk) <= 0) {
2254
sk->compat_sk_state = SS_DISCONNECTING;
2255
sk->compat_sk_shutdown = SHUTDOWN_MASK;
2257
sk->compat_sk_state_change(sk);
2098
2260
case VSOCK_PACKET_TYPE_WROTE:
2857
3008
VSockVmciTestUnregister();
2858
3009
up(®istrationMutex);
2860
#ifdef VSOCK_CONTROL_PACKET_COUNT
3011
#ifdef VSOCK_GATHER_STATISTICS
2863
3014
for (index = 0; index < ARRAYSIZE(controlPacketCount); index++) {
2864
3015
Warning("Control packet count: Type = %u, Count = %"FMT64"u\n",
2865
3016
index, controlPacketCount[index]);
3019
for (index = 0; index < ARRAYSIZE(consumeQueueLevel); index++) {
3020
Warning("Consume Bucket: %u Count: %"FMT64"u\n",
3021
index, consumeQueueLevel[index]);
3024
for (index = 0; index < ARRAYSIZE(produceQueueLevel); index++) {
3025
Warning("Produce Bucket: %u Count: %"FMT64"u\n",
3026
index, produceQueueLevel[index]);
2984
3144
proto_unregister(&vsockVmciProto);
2987
#ifdef VSOCK_CONTROL_PACKET_COUNT
3147
#ifdef VSOCK_GATHER_STATISTICS
2990
3150
for (index = 0; index < ARRAYSIZE(controlPacketCount); index++) {
2991
3151
controlPacketCount[index] = 0;
3154
for (index = 0; index < ARRAYSIZE(consumeQueueLevel); index++) {
3155
consumeQueueLevel[index] = 0;
3158
for (index = 0; index < ARRAYSIZE(produceQueueLevel); index++) {
3159
produceQueueLevel[index] = 0;
3137
3303
VMCIEvent_Unsubscribe(qpResumedSubId);
3138
3304
qpResumedSubId = VMCI_INVALID_ID;
3142
3307
if (vsockVmciFamilyOps.family != VSOCK_INVALID_FAMILY) {
3143
3308
sock_unregister(vsockVmciFamilyOps.family);
3146
3311
vsockVmciDgramOps.family = vsockVmciFamilyOps.family = VSOCK_INVALID_FAMILY;
3148
3312
vsockVmciStreamOps.family = vsockVmciFamilyOps.family;
3318
*----------------------------------------------------------------------------
3320
* VSockVmciStreamHasData --
3322
* Gets the amount of data available for a given stream socket's consume
3325
* Note that this assumes the socket lock is held.
3328
* The amount of data available or a VMCI error code on failure.
3333
*----------------------------------------------------------------------------
3337
VSockVmciStreamHasData(VSockVmciSock *vsk) // IN
3341
return VMCIQueue_BufReady(vsk->consumeQ,
3342
vsk->produceQ, vsk->consumeSize);
3347
*----------------------------------------------------------------------------
3349
* VSockVmciStreamHasSpace --
3351
* Gets the amount of space available for a give stream socket's produce
3354
* Note that this assumes the socket lock is held.
3357
* The amount of space available or a VMCI error code on failure.
3362
*----------------------------------------------------------------------------
3366
VSockVmciStreamHasSpace(VSockVmciSock *vsk) // IN
3370
return VMCIQueue_FreeSpace(vsk->produceQ,
3371
vsk->consumeQ, vsk->produceSize);
3714
3931
if (sk->compat_sk_state == SS_CONNECTED) {
3715
3932
if (!(sk->compat_sk_shutdown & SEND_SHUTDOWN)) {
3716
3933
int64 produceQFreeSpace =
3717
VMCIQueue_FreeSpace(vsk->produceQ,
3718
vsk->consumeQ, vsk->produceSize);
3934
VSockVmciStreamHasSpace(vsk);
3719
3935
if (produceQFreeSpace > 0) {
3720
3936
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
3721
3937
} else if (produceQFreeSpace == 0) {
4523
#if defined(VSOCK_GATHER_STATISTICS)
4524
VSockVmciUpdateQueueBucketCount(vsk->produceQ,
4319
4531
* Note that enqueue will only write as many bytes as are free in the
4320
4532
* produce queue, so we don't need to ensure len is smaller than the queue
4321
4533
* size. It is the caller's responsibility to check how many bytes we were
4322
4534
* able to send.
4324
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4536
#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4325
4537
VMCIQueue_GetPointers(vsk->produceQ, vsk->consumeQ,
4326
4538
&produceTail, &consumeHead);
4642
4852
compat_init_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE);
4644
while ((ready = VMCIQueue_BufReady(vsk->consumeQ,
4646
vsk->consumeSize)) < target &&
4854
while ((ready = VSockVmciStreamHasData(vsk)) < target &&
4647
4855
sk->compat_sk_err == 0 &&
4648
4856
!(sk->compat_sk_shutdown & RCV_SHUTDOWN) &&
4649
4857
!(vsk->peerShutdown & SEND_SHUTDOWN)) {
4651
4859
if (ready < 0) {
4653
4861
* Invalid queue pair content. XXX This should be changed to
4654
4862
* a connection reset in a later change.
4704
4912
} else if ((vsk->peerShutdown & SEND_SHUTDOWN) &&
4705
VMCIQueue_BufReady(vsk->consumeQ,
4706
vsk->produceQ, vsk->consumeSize) < target) {
4913
VSockVmciStreamHasData(vsk) < target) {
4917
#if defined(VSOCK_GATHER_STATISTICS)
4918
VSockVmciUpdateQueueBucketCount(vsk->consumeQ,
4712
4926
* Now consume up to len bytes from the queue. Note that since we have the
4713
4927
* socket locked we should copy at least ready bytes.
4715
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4929
#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4716
4930
VMCIQueue_GetPointers(vsk->consumeQ, vsk->produceQ,
4717
4931
&produceTail, &consumeHead);
4720
copied = VMCIQueue_DequeueV(vsk->produceQ, vsk->consumeQ,
4934
if (flags & MSG_PEEK) {
4935
copied = VMCIQueue_PeekV(vsk->produceQ, vsk->consumeQ,
4721
4936
vsk->consumeSize, msg->msg_iov, len);
4938
copied = VMCIQueue_DequeueV(vsk->produceQ, vsk->consumeQ,
4939
vsk->consumeSize, msg->msg_iov, len);
4722
4942
if (copied < 0) {
4727
#if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4729
* Detect a wrap-around to maintain queue generation. Note that this is
4730
* safe since we hold the socket lock across the two queue pair
4733
if (copied >= vsk->consumeSize - consumeHead) {
4734
vsk->consumeQGeneration++;
4738
4947
ASSERT(copied >= target);
4741
* If the other side has shutdown for sending and there is nothing more to
4742
* read, then set our socket's RCV_SHUTDOWN flag and modify the socket
4950
* We only do these additional bookkeeping/notification steps if we actually
4951
* copied something out of the queue pair instead of just peeking ahead.
4745
if (vsk->peerShutdown & SEND_SHUTDOWN) {
4746
if (VMCIQueue_BufReady(vsk->consumeQ,
4747
vsk->produceQ, vsk->consumeSize) <= 0) {
4748
sk->compat_sk_shutdown |= RCV_SHUTDOWN;
4749
sk->compat_sk_state = SS_UNCONNECTED;
4750
compat_sock_set_done(sk);
4751
sk->compat_sk_state_change(sk);
4755
err = VSockVmciSendReadNotification(sk);
4953
if (!(flags & MSG_PEEK)) {
4954
#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
4956
* Detect a wrap-around to maintain queue generation. Note that this is
4957
* safe since we hold the socket lock across the two queue pair
4960
if (copied >= vsk->consumeSize - consumeHead) {
4961
vsk->consumeQGeneration++;
4966
* If the other side has shutdown for sending and there is nothing more to
4967
* read, then set our socket's RCV_SHUTDOWN flag and modify the socket
4970
if (vsk->peerShutdown & SEND_SHUTDOWN) {
4971
if (VSockVmciStreamHasData(vsk) <= 0) {
4972
sk->compat_sk_shutdown |= RCV_SHUTDOWN;
4973
sk->compat_sk_state = SS_UNCONNECTED;
4974
compat_sock_set_done(sk);
4975
sk->compat_sk_state_change(sk);
4979
err = VSockVmciSendReadNotification(sk);
4760
4985
ASSERT(copied <= INT_MAX);
5188
5406
MODULE_DESCRIPTION("VMware Virtual Socket Family");
5189
5407
MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING);
5190
5408
MODULE_LICENSE("GPL v2");
5410
* Starting with SLE10sp2, Novell requires that IHVs sign a support agreement
5411
* with them and mark their kernel modules as externally supported via a
5412
* change to the module header. If this isn't done, the module will not load
5413
* by default (i.e., neither mkinitrd nor modprobe will accept it).
5415
MODULE_INFO(supported, "external");