236
244
struct socket *sock, struct msghdr *msg, size_t len);
237
245
static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock,
238
246
struct msghdr *msg, size_t len, int flags);
247
static int VSockVmciSeqSendmsg(struct kiocb *kiocb,
248
struct socket *sock, struct msghdr *msg, size_t len);
249
static int VSockVmciSeqRecvmsg(struct kiocb *kiocb, struct socket *sock,
250
struct msghdr *msg, size_t len, int flags);
239
251
static int VSockVmciStreamSendmsg(struct kiocb *kiocb,
240
252
struct socket *sock, struct msghdr *msg, size_t len);
241
253
static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock,
319
331
.sendpage = sock_no_sendpage,
334
static struct proto_ops vsockVmciSeqOps = {
335
.family = VSOCK_INVALID_FAMILY,
336
.owner = THIS_MODULE,
337
.release = VSockVmciRelease,
338
.bind = VSockVmciBind,
339
.connect = VSockVmciSeqConnect,
340
.socketpair = sock_no_socketpair,
341
.accept = sock_no_accept,
342
.getname = VSockVmciGetname,
343
.poll = VSockVmciPoll,
344
.ioctl = sock_no_ioctl,
345
.listen = sock_no_listen,
346
.shutdown = VSockVmciShutdown,
347
.setsockopt = sock_no_setsockopt,
348
.getsockopt = sock_no_getsockopt,
349
.sendmsg = VSockVmciSeqSendmsg,
350
.recvmsg = VSockVmciSeqRecvmsg,
351
.mmap = sock_no_mmap,
352
.sendpage = sock_no_sendpage,
322
355
static struct proto_ops vsockVmciStreamOps = {
323
356
.family = VSOCK_INVALID_FAMILY,
324
357
.owner = THIS_MODULE,
373
406
static Bool vmciDevicePresent = FALSE;
374
407
static VMCIHandle vmciStreamHandle = { VMCI_INVALID_ID, VMCI_INVALID_ID };
375
408
static VMCIId qpResumedSubId = VMCI_INVALID_ID;
409
static VMCIId ctxUpdatedSubId = VMCI_INVALID_ID;
377
411
static int PROTOCOL_OVERRIDE = -1;
380
* Netperf benchmarks have shown significant throughput improvements when the QP
381
* size is bumped from 64k to 256k. These measurements were taken during the K/L.next
382
* timeframe. Give users better performance by default.
414
* Netperf benchmarks have shown significant throughput improvements when the
415
* QP size is bumped from 64k to 256k. These measurements were taken during the
416
* K/L.next timeframe. Give users better performance by default.
384
418
#define VSOCK_DEFAULT_QP_SIZE_MIN 128
385
419
#define VSOCK_DEFAULT_QP_SIZE 262144
996
1035
compat_sk_receive_skb(sk, skb, 0);
1038
return VMCI_SUCCESS;
1043
*----------------------------------------------------------------------------
1045
* VSockVmciSendSeqPacket --
1047
* Send a sequential packet. This uses a stack-allocated packet, i.e.,
1048
* it isn't meant for DATA packets, but it works fine for the other packet
1052
* Zero on success, negative error code on failure.
1057
*----------------------------------------------------------------------------
1061
VSockVmciSendSeqPacket(VSockVmciSock *vsk, // IN
1062
VSockSeqPacketType type, // IN
1070
VSockSeqPacket_Init(&pkt, &vsk->localAddr, &vsk->remoteAddr, type, mode);
1072
err = VMCIDatagram_Send(&pkt.hdr.dg);
1074
err = VSockVmci_ErrorToVSockError(err);
1082
*----------------------------------------------------------------------------
1084
* VSockVmciRecvSeqCB --
1086
* VMCI Datagram receive callback. This function is used specifically for
1087
* SOCK_SEQPACKET sockets.
1089
* This is invoked as part of a tasklet that's scheduled when the VMCI
1090
* interrupt fires. This is run in bottom-half context and if it ever needs
1091
* to sleep it should defer that work to a work queue.
1094
* Zero on success, negative error code on failure.
1097
* An sk_buff is created and queued with this socket.
1099
*----------------------------------------------------------------------------
1103
VSockVmciRecvSeqCB(void *data, // IN
1104
VMCIDatagram *dg) // IN
1109
VSockSeqPacket *pkt;
1112
ASSERT(dg->payloadSize <= VMCI_MAX_DG_PAYLOAD_SIZE);
1114
sk = (struct sock *)data;
1118
/* XXX, figure out why sk->sk_socket can be NULL. */
1119
if (!sk->sk_socket) {
1123
ASSERT(sk->sk_socket->type == SOCK_SEQPACKET);
1125
if (VMCI_HYPERVISOR_CONTEXT_ID != dg->src.context) {
1126
return VMCI_ERROR_NO_ACCESS;
1129
if (VMCI_RPC_PRIVILEGED != dg->src.resource &&
1130
VMCI_RPC_UNPRIVILEGED != dg->src.resource) {
1131
return VMCI_ERROR_NO_ACCESS;
1134
size = VMCI_DG_SIZE(dg);
1135
if (size < sizeof *pkt) {
1136
return VMCI_ERROR_INVALID_ARGS;
1140
pkt = (VSockSeqPacket *)dg;
1143
* After this point, if we fail to handle the packet, we need to send a
1144
* close to the peer with an error. Otherwise it might hang, waiting for a
1145
* response to a packet that we discarded.
1148
if (VSOCK_SEQ_PACKET_VERSION_1 != pkt->hdr.version) {
1149
VSOCK_SEND_SEQ_CLOSE(vsk, EINVAL);
1150
return VMCI_ERROR_INVALID_ARGS;
1153
if (SS_CONNECTED != sk->sk_socket->state) {
1154
VSOCK_SEND_SEQ_CLOSE(vsk, ENOTCONN);
1155
return VMCI_ERROR_DST_UNREACHABLE;
1158
switch (pkt->hdr.type) {
1159
case VSOCK_SEQ_PACKET_TYPE_DATA: {
1160
struct sk_buff *skb;
1162
* Attach the packet to the socket's receive queue as an sk_buff.
1165
size -= sizeof *pkt;
1166
skb = alloc_skb(size, GFP_ATOMIC);
1168
VSOCK_SEND_SEQ_CLOSE(vsk, ENOMEM);
1169
return VMCI_ERROR_NO_MEM;
1172
/* compat_sk_receive_skb() will do a sock_put(), so hold here. */
1175
memcpy(skb->data, VSOCK_SEQ_PACKET_PAYLOAD(pkt), size);
1178
* XXX, this can drop the skb. We need to find an alternative that
1179
* will return an error if that happens, so that we can send a reset
1180
* to the peer, i.e.,
1182
* if (!receive_skb(sk, skb)) {
1183
* VSOCK_SEND_SEQ_CLOSE(vsk, ENOMEM);
1184
* return VMCI_ERROR_NO_MEM;
1188
compat_sk_receive_skb(sk, skb, 0);
1191
case VSOCK_SEQ_PACKET_TYPE_CLOSE:
1194
sock_set_flag(sk, SOCK_DONE);
1195
vsk->peerShutdown = SHUTDOWN_MASK;
1196
sk->sk_state = TCP_CLOSE;
1199
* A close packet with an error code means a forceful reset, whereas
1200
* no error means a graceful close.
1203
sk->sk_socket->state = SS_UNCONNECTED;
1204
sk->sk_err = pkt->hdr.val;
1205
sk->sk_error_report(sk);
1207
if (skb_queue_empty(&sk->sk_receive_queue)) {
1208
sk->sk_socket->state = SS_DISCONNECTING;
1210
sk->sk_state_change(sk);
1216
* There's no reason for us to receive a shutdown packet in this direction,
1217
* or any other packet for that matter. Inform the peer that the packet
1221
VSOCK_SEND_SEQ_CLOSE(vsk, EINVAL);
1222
return VMCI_ERROR_INVALID_ARGS;
1225
return VMCI_SUCCESS;
1387
1613
*----------------------------------------------------------------------------
1615
* VSockVmciContextUpdatedCB --
1617
* Invoked when a VM is resumed (technically when the context ID changes,
1618
* but the event is actually sent even when it does not, so this works
1619
* well for catching resumes). We must mark all connected sequential
1620
* sockets as detached.
1626
* May modify socket state and signal socket.
1628
*----------------------------------------------------------------------------
1632
VSockVmciContextUpdatedCB(VMCIId subId, // IN
1633
VMCI_EventData *eData, // IN
1634
void *clientData) // IN
1638
spin_lock_bh(&vsockSeqTableLock);
1640
for (i = 0; i < ARRAYSIZE(vsockSeqTable); i++) {
1643
list_for_each_entry(vsk, &vsockSeqTable[i], seqTable) {
1644
struct sock *sk = sk_vsock(vsk);
1646
sock_set_flag(sk, SOCK_DONE);
1647
vsk->peerShutdown = SHUTDOWN_MASK;
1648
sk->sk_state = TCP_CLOSE;
1650
if (skb_queue_empty(&sk->sk_receive_queue)) {
1651
sk->sk_socket->state = SS_DISCONNECTING;
1653
sk->sk_state_change(sk);
1657
spin_unlock_bh(&vsockSeqTableLock);
1662
*----------------------------------------------------------------------------
1389
1664
* VSockVmciPendingWork --
1391
1666
* Releases the resources for a pending socket if it has not reached the
3496
if (!VSockAddr_Bound(&vsk->localAddr)) {
3497
struct sockaddr_vm localAddr;
3499
VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3500
if ((err = __VSockVmciBind(sk, &localAddr))) {
3505
if (!VSockAddr_SocketContextDgram(remoteAddr->svm_cid,
3506
remoteAddr->svm_port)) {
3511
memcpy(&vsk->remoteAddr, remoteAddr, sizeof vsk->remoteAddr);
3512
sock->state = SS_CONNECTED;
3800
if (!VSockAddr_Bound(&vsk->localAddr)) {
3801
struct sockaddr_vm localAddr;
3803
VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3804
if ((err = __VSockVmciBind(sk, &localAddr))) {
3809
if (!VSockAddr_SocketContextDgram(remoteAddr->svm_cid,
3810
remoteAddr->svm_port)) {
3815
memcpy(&vsk->remoteAddr, remoteAddr, sizeof vsk->remoteAddr);
3816
sock->state = SS_CONNECTED;
3825
*----------------------------------------------------------------------------
3827
* VSockVmciSeqConnect --
3829
* Connects a sequential socket.
3832
* Zero on success, negative error code on failure.
3837
*----------------------------------------------------------------------------
3841
VSockVmciSeqConnect(struct socket *sock, // IN
3842
struct sockaddr *addr, // IN
3850
struct sockaddr_vm *remoteAddr;
3857
if (SS_CONNECTED == sock->state) {
3860
} else if (SS_CONNECTING == sock->state ||
3861
SS_DISCONNECTING == sock->state) {
3866
if (VSockAddr_Cast(addr, addrLen, &remoteAddr) != 0) {
3871
if (!VSockAddr_Bound(&vsk->localAddr)) {
3872
struct sockaddr_vm localAddr;
3874
VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3875
if ((err = __VSockVmciBind(sk, &localAddr))) {
3880
if (VMCI_HYPERVISOR_CONTEXT_ID != remoteAddr->svm_cid) {
3885
if (VMCI_RPC_PRIVILEGED != remoteAddr->svm_port &&
3886
VMCI_RPC_UNPRIVILEGED != remoteAddr->svm_port) {
3891
if (!VSockAddr_SocketContextDgram(remoteAddr->svm_cid,
3892
remoteAddr->svm_port)) {
3897
VSockSeqPacket_Init(&pkt, &vsk->localAddr, remoteAddr,
3898
VSOCK_SEQ_PACKET_TYPE_CONNECT, 0);
3900
err = VMCIDatagram_Send(&pkt.hdr.dg);
3902
err = VSockVmci_ErrorToVSockError(err);
3907
* It's not necessary to get an acknowledgement. We're sending to the
3908
* hypervisor, which means the result of the call tells us whether the
3909
* endpoint accepted it or not. So as long as it returns success,
3913
memcpy(&vsk->remoteAddr, remoteAddr, sizeof vsk->remoteAddr);
3916
* The skb routines actually check if this is a sequential socket, and if
3917
* so, they require that the socket be in the TCP established state. So
3918
* we need to use the TCP states for sk_state rather than the SS states
3919
* (our STREAM sockets cheat and get away with it, we should fix that).
3922
sock->state = SS_CONNECTED;
3923
sk->sk_state = TCP_ESTABLISHED;
3924
VSockVmciInsertSeq(vsockSeqSocketsVsk(vsk), sk);
3925
sk->sk_state_change(sk);
3515
3930
release_sock(sk);
4038
4450
release_sock(sk);
4451
} else if (sock->type == SOCK_SEQPACKET) {
4455
* If there is something in the queue then we can read.
4457
if (!skb_queue_empty(&sk->sk_receive_queue) &&
4458
!(sk->sk_shutdown & RCV_SHUTDOWN)) {
4459
mask |= POLLIN | POLLRDNORM;
4463
* Sockets whose connections have beed closed, reset or terminated
4464
* should also be considered readable, and we check the shutdown flag
4467
if (sk->sk_shutdown & RCV_SHUTDOWN ||
4468
vsk->peerShutdown & SEND_SHUTDOWN) {
4469
mask |= POLLIN | POLLRDNORM;
4473
* Connected sockets that can produce data can be written.
4475
if (sk->sk_state == TCP_ESTABLISHED &&
4476
!(sk->sk_shutdown & SEND_SHUTDOWN)) {
4477
mask |= POLLOUT | POLLWRNORM;
4598
* It doesn't make any sense to try and shutdown a sequential socket to
4599
* the hypervisor in the recv direction, only for send or for both.
4602
if (sk->sk_type == SOCK_SEQPACKET && mode == RCV_SHUTDOWN) {
4155
4607
/* Receive and send shutdowns are treated alike. */
4156
4608
mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
4159
4611
sk->sk_shutdown |= mode;
4612
if (sk->sk_type == SOCK_SEQPACKET) {
4613
sk->sk_state = TCP_CLOSE;
4160
4615
sk->sk_state_change(sk);
4161
4616
release_sock(sk);
4164
if (sk->sk_type == SOCK_STREAM && mode) {
4165
sock_reset_flag(sk, SOCK_DONE);
4166
VSOCK_SEND_SHUTDOWN(sk, mode);
4618
if (sk->sk_type == SOCK_STREAM) {
4619
sock_reset_flag(sk, SOCK_DONE);
4620
VSOCK_SEND_SHUTDOWN(sk, mode);
4621
} else if (sk->sk_type == SOCK_SEQPACKET) {
4622
sock_reset_flag(sk, SOCK_DONE);
4623
err = VSOCK_SEND_SEQ_SHUTDOWN(vsock_sk(sk), mode);
4763
*----------------------------------------------------------------------------
4765
* VSockVmciSeqSendmsg --
4770
* Number of bytes sent on success, negative error code on failure.
4775
*----------------------------------------------------------------------------
4779
VSockVmciSeqSendmsg(struct kiocb *kiocb, // UNUSED
4780
struct socket *sock, // IN: socket to send on
4781
struct msghdr *msg, // IN: message to send
4782
size_t len) // IN: length of message
4787
VSockSeqPacket *pkt;
4792
if (msg->msg_flags & MSG_OOB) {
4796
if (len > VMCI_MAX_DG_PAYLOAD_SIZE) {
4802
/* Callers should not provide a destination with sequential sockets. */
4803
if (msg->msg_namelen) {
4804
err = sock->state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
4808
/* Send data only if we're not shutdown in that direction. */
4809
if (sk->sk_shutdown & SEND_SHUTDOWN) {
4814
if (sock->state != SS_CONNECTED) {
4820
* We already managed to connect, which means we must already have the
4821
* right privs to send to our peer. So no need for the usual datagram
4822
* checks here, they were done by connect().
4826
* Allocate a buffer for the user's message and our packet header.
4828
pkt = kmalloc(len + sizeof *pkt, GFP_KERNEL);
4834
VSockSeqPacket_Init(pkt, &vsk->localAddr, &vsk->remoteAddr,
4835
VSOCK_SEQ_PACKET_TYPE_DATA, 0);
4836
pkt->hdr.dg.payloadSize += len;
4838
err = memcpy_fromiovec(VSOCK_SEQ_PACKET_PAYLOAD(pkt), msg->msg_iov, len);
4844
err = VMCIDatagram_Send(&pkt->hdr.dg);
4847
err = VSockVmci_ErrorToVSockError(err);
4308
4860
*----------------------------------------------------------------------------
4765
5319
*----------------------------------------------------------------------------
5321
* VSockVmciSeqRecvmsg --
5323
* Receives a datagram and places it in the caller's msg.
5326
* The size of the payload on success, negative value on failure.
5331
*----------------------------------------------------------------------------
5335
VSockVmciSeqRecvmsg(struct kiocb *kiocb, // UNUSED
5336
struct socket *sock, // IN: socket to receive from
5337
struct msghdr *msg, // IN/OUT: message to receive into
5338
size_t len, // IN: length of receive buffer
5339
int flags) // IN: receive flags
5345
struct sk_buff *skb;
5347
if (flags & MSG_OOB || flags & MSG_ERRQUEUE) {
5352
noblock = flags & MSG_DONTWAIT;
5354
/* Retrieve the head sk_buff from the socket's receive queue. */
5356
skb = skb_recv_datagram(sk, flags, noblock, &err);
5366
/* err is 0, meaning we read zero bytes. */
5370
payloadLen = skb->len;
5371
if (payloadLen > len) {
5373
msg->msg_flags |= MSG_TRUNC;
5375
* XXX, we're supposed to be a reliable protocol, so while it's fine to
5376
* return a partial packet here, we shouldn't drop the remainder. We
5377
* should keep it around so that a subsequent recv() can read it and
5378
* then get the end of record marker (see below).
5381
/* We managed to read the whole payload, so mark the end of record. */
5382
msg->msg_flags |= MSG_EOR;
5385
/* Place the datagram payload in the user's iovec. */
5386
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, payloadLen);
5391
msg->msg_namelen = 0;
5392
if (msg->msg_name) {
5394
struct sockaddr_vm *vmciAddr;
5396
/* Provide the address of the sender. */
5398
vmciAddr = (struct sockaddr_vm *)msg->msg_name;
5399
VSockAddr_Init(vmciAddr,
5400
vsk->remoteAddr.svm_cid, vsk->remoteAddr.svm_port);
5401
msg->msg_namelen = sizeof *vmciAddr;
5406
skb_free_datagram(sk, skb);
5412
*----------------------------------------------------------------------------
4767
5414
* VSockVmciStreamRecvmsg --
4769
5416
* Receives a datagram and places it in the caller's msg.