2
* This file is part of the Nice GLib ICE library.
4
* (C) 2010 Collabora Ltd.
5
* Contact: Youness Alaoui
8
* The contents of this file are subject to the Mozilla Public License Version
9
* 1.1 (the "License"); you may not use this file except in compliance with
10
* the License. You may obtain a copy of the License at
11
* http://www.mozilla.org/MPL/
13
* Software distributed under the License is distributed on an "AS IS" basis,
14
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
15
* for the specific language governing rights and limitations under the
18
* The Original Code is the Nice GLib ICE library.
20
* The Initial Developers of the Original Code are Collabora Ltd and Nokia
21
* Corporation. All Rights Reserved.
24
* Youness Alaoui, Collabora Ltd.
26
* Alternatively, the contents of this file may be used under the terms of the
27
* the GNU Lesser General Public License Version 2.1 (the "LGPL"), in which
28
* case the provisions of LGPL are applicable instead of those above. If you
29
* wish to allow use of your version of this file only under the terms of the
30
* LGPL and not to allow others to use your version of this file under the
31
* MPL, indicate your decision by deleting the provisions above and replace
32
* them with the notice and other provisions required by the LGPL. If you do
33
* not delete the provisions above, a recipient may use your version of this
34
* file under either the MPL or the LGPL.
37
/* Reproducing license from libjingle for copied code */
41
* Copyright 2004--2005, Google Inc.
43
* Redistribution and use in source and binary forms, with or without
44
* modification, are permitted provided that the following conditions are met:
46
* 1. Redistributions of source code must retain the above copyright notice,
47
* this list of conditions and the following disclaimer.
48
* 2. Redistributions in binary form must reproduce the above copyright notice,
49
* this list of conditions and the following disclaimer in the documentation
50
* and/or other materials provided with the distribution.
51
* 3. The name of the author may not be used to endorse or promote products
52
* derived from this software without specific prior written permission.
54
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
55
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
56
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
57
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
58
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
60
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
61
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
62
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
63
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
67
#include <arpa/inet.h>
73
#include "pseudotcp.h"
75
G_DEFINE_TYPE (PseudoTcpSocket, pseudo_tcp_socket, G_TYPE_OBJECT);
78
//////////////////////////////////////////////////////////////////////
80
//////////////////////////////////////////////////////////////////////
83
const guint16 PACKET_MAXIMUMS[] = {
84
65535, // Theoretical maximum, Hyperchannel
86
17914, // 16Mb IBM Token Ring
88
//4464, // IEEE 802.5 (4Mb max)
90
//2048, // Wideband Network
91
2002, // IEEE 802.5 (4Mb recommended)
92
//1536, // Expermental Ethernet Networks
93
//1500, // Ethernet, Point-to-Point (default)
95
1006, // SLIP, ARPANET
96
//576, // X.25 Networks
97
//544, // DEC IP Portal
99
508, // IEEE 802/Source-Rt Bridge, ARCNET
100
296, // Point-to-Point (low delay)
101
//68, // Official minimum
102
0, // End of list marker
105
#define MAX_PACKET 65535
106
// Note: we removed lowest level because packet overhead was larger!
107
#define MIN_PACKET 296
109
// (+ up to 40 bytes of options?)
110
#define IP_HEADER_SIZE 20
111
#define ICMP_HEADER_SIZE 8
112
#define UDP_HEADER_SIZE 8
113
// TODO: Make JINGLE_HEADER_SIZE transparent to this code?
114
// when relay framing is in use
115
#define JINGLE_HEADER_SIZE 64
117
//////////////////////////////////////////////////////////////////////
118
// Global Constants and Functions
119
//////////////////////////////////////////////////////////////////////
122
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
123
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
124
// 0 | Conversation Number |
125
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
126
// 4 | Sequence Number |
127
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
128
// 8 | Acknowledgment Number |
129
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
130
// | | |U|A|P|R|S|F| |
131
// 12 | Control | |R|C|S|S|Y|I| Window |
132
// | | |G|K|H|T|N|N| |
133
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
134
// 16 | Timestamp sending |
135
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
136
// 20 | Timestamp receiving |
137
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
139
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
141
//////////////////////////////////////////////////////////////////////
143
#define MAX_SEQ 0xFFFFFFFF
144
#define HEADER_SIZE 24
146
#define PACKET_OVERHEAD (HEADER_SIZE + UDP_HEADER_SIZE + \
147
IP_HEADER_SIZE + JINGLE_HEADER_SIZE)
149
// MIN_RTO = 250 ms (RFC1122, Sec 4.2.3.1 "fractions of a second")
151
#define DEF_RTO 3000 /* 3 seconds (RFC1122, Sec 4.2.3.1) */
152
#define MAX_RTO 60000 /* 60 seconds */
153
#define ACK_DELAY 100 /* 100 milliseconds */
156
#define FLAG_FIN 0x01
157
#define FLAG_SYN 0x02
158
#define FLAG_ACK 0x10
161
#define FLAG_CTL 0x02
162
#define FLAG_RST 0x04
164
#define CTL_CONNECT 0
165
//#define CTL_REDIRECT 1
166
#define CTL_EXTRA 255
169
#define CTRL_BOUND 0x80000000
171
// If there are no pending clocks, wake up every 4 seconds
172
#define DEFAULT_TIMEOUT 4000
173
// If the connection is closed, once per minute
174
#define CLOSED_TIMEOUT (60 * 1000)
176
//////////////////////////////////////////////////////////////////////
178
//////////////////////////////////////////////////////////////////////
181
min (guint32 first, guint32 second)
183
return (first < second? first:second);
186
max (guint32 first, guint32 second)
188
return (first > second? first:second);
192
bound(guint32 lower, guint32 middle, guint32 upper)
194
return min (max (lower, middle), upper);
198
get_current_time(void)
201
g_get_current_time (&tv);
202
return tv.tv_sec * 1000 + tv.tv_usec / 1000;
206
time_is_between(guint32 later, guint32 middle, guint32 earlier)
208
if (earlier <= later) {
209
return ((earlier <= middle) && (middle <= later));
211
return !((later < middle) && (middle < earlier));
216
time_diff(guint32 later, guint32 earlier)
218
guint32 LAST = 0xFFFFFFFF;
219
guint32 HALF = 0x80000000;
220
if (time_is_between(earlier + HALF, later, earlier)) {
221
if (earlier <= later) {
222
return (long)(later - earlier);
224
return (long)(later + (LAST - earlier) + 1);
227
if (later <= earlier) {
228
return -(long) (earlier - later);
230
return -(long)(earlier + (LAST - later) + 1);
235
//////////////////////////////////////////////////////////////////////
237
//////////////////////////////////////////////////////////////////////
252
// Note: can't go as high as 1024 * 64, because of uint16 precision
253
kRcvBufSize = 1024 * 60,
254
// Note: send buffer should be larger to make sure we can always fill the
256
kSndBufSize = 1024 * 90
260
guint32 conv, seq, ack;
265
guint32 tsval, tsecr;
279
struct _PseudoTcpSocketPrivate {
280
PseudoTcpCallbacks callbacks;
286
PseudoTcpState state;
288
gboolean bReadEnable, bWriteEnable, bOutgoing;
289
guint32 last_traffic;
293
gchar rbuf[kRcvBufSize];
294
guint32 rcv_nxt, rcv_wnd, rlen, lastrecv;
298
gchar sbuf[kSndBufSize];
299
guint32 snd_nxt, snd_wnd, slen, lastsend, snd_una;
300
// Maximum segment size, estimated protocol level, largest segment sent
301
guint32 mss, msslevel, largest, mtu_advise;
305
// Timestamp tracking
306
guint32 ts_recent, ts_lastack;
308
// Round-trip calculation
309
guint32 rx_rttvar, rx_srtt, rx_rto;
311
// Congestion avoidance, Fast retransmit/recovery, Delayed ACKs
312
guint32 ssthresh, cwnd;
323
PROP_CONVERSATION = 1,
330
static void pseudo_tcp_socket_get_property (GObject *object, guint property_id,
331
GValue *value, GParamSpec *pspec);
332
static void pseudo_tcp_socket_set_property (GObject *object, guint property_id,
333
const GValue *value, GParamSpec *pspec);
334
static void pseudo_tcp_socket_finalize (GObject *object);
337
static guint32 queue(PseudoTcpSocket *self, const gchar * data,
338
guint32 len, gboolean bCtrl);
339
static PseudoTcpWriteResult packet(PseudoTcpSocket *self, guint32 seq,
340
guint8 flags, const gchar * data, guint32 len);
341
static gboolean parse(PseudoTcpSocket *self,
342
const guint8 * buffer, guint32 size);
343
static gboolean process(PseudoTcpSocket *self, Segment *seg);
344
static gboolean transmit(PseudoTcpSocket *self, const GList *seg, guint32 now);
345
static void attempt_send(PseudoTcpSocket *self, SendFlags sflags);
346
static void closedown(PseudoTcpSocket *self, guint32 err);
347
static void adjustMTU(PseudoTcpSocket *self);
350
// The following logging is for detailed (packet-level) pseudotcp analysis only.
351
static PseudoTcpDebugLevel debug_level = PSEUDO_TCP_DEBUG_NONE;
353
#define DEBUG(level, fmt, ...) \
354
if (debug_level >= level) \
355
g_debug ("PseudoTcpSocket %p: " fmt, self, ## __VA_ARGS__)
358
pseudo_tcp_set_debug_level (PseudoTcpDebugLevel level)
364
pseudo_tcp_socket_class_init (PseudoTcpSocketClass *cls)
366
GObjectClass *object_class = G_OBJECT_CLASS (cls);
368
object_class->get_property = pseudo_tcp_socket_get_property;
369
object_class->set_property = pseudo_tcp_socket_set_property;
370
object_class->finalize = pseudo_tcp_socket_finalize;
372
g_object_class_install_property (object_class, PROP_CONVERSATION,
373
g_param_spec_uint ("conversation", "TCP Conversation ID",
374
"The TCP Conversation ID",
376
G_PARAM_CONSTRUCT_ONLY | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
378
g_object_class_install_property (object_class, PROP_CALLBACKS,
379
g_param_spec_pointer ("callbacks", "PseudoTcp socket callbacks",
380
"Structure with the callbacks to call when PseudoTcp events happen",
381
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
383
g_object_class_install_property (object_class, PROP_STATE,
384
g_param_spec_uint ("state", "PseudoTcp State",
385
"The current state (enum PseudoTcpState) of the PseudoTcp socket",
386
TCP_LISTEN, TCP_CLOSED, TCP_LISTEN,
387
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS));
393
pseudo_tcp_socket_get_property (GObject *object,
398
PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
400
switch (property_id) {
401
case PROP_CONVERSATION:
402
g_value_set_uint (value, self->priv->conv);
405
g_value_set_pointer (value, (gpointer) &self->priv->callbacks);
408
g_value_set_uint (value, self->priv->state);
411
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
417
pseudo_tcp_socket_set_property (GObject *object,
422
PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
424
switch (property_id) {
425
case PROP_CONVERSATION:
426
self->priv->conv = g_value_get_uint (value);
430
PseudoTcpCallbacks *c = g_value_get_pointer (value);
431
self->priv->callbacks = *c;
435
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
441
pseudo_tcp_socket_finalize (GObject *object)
443
PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
444
PseudoTcpSocketPrivate *priv = self->priv;
450
for (i = priv->slist; i; i = i->next) {
451
SSegment *sseg = i->data;
452
g_slice_free (SSegment, sseg);
454
for (i = priv->rlist; i; i = i->next) {
455
RSegment *rseg = i->data;
456
g_slice_free (RSegment, rseg);
458
g_list_free (priv->slist);
460
g_list_free (priv->rlist);
466
if (G_OBJECT_CLASS (pseudo_tcp_socket_parent_class)->finalize)
467
G_OBJECT_CLASS (pseudo_tcp_socket_parent_class)->finalize (object);
472
pseudo_tcp_socket_init (PseudoTcpSocket *obj)
474
/* Use g_new0, and do not use g_object_set_private because the size of
475
* our private data is too big (150KB+) and the g_slice_allow cannot allocate
476
* it. So we handle the private ourselves */
477
PseudoTcpSocketPrivate *priv = g_new0 (PseudoTcpSocketPrivate, 1);
478
guint32 now = get_current_time();
482
priv->shutdown = SD_NONE;
485
priv->state = TCP_LISTEN;
487
priv->rcv_wnd = sizeof(priv->rbuf);
488
priv->snd_nxt = priv->slen = 0;
490
priv->snd_una = priv->rcv_nxt = priv->rlen = 0;
491
priv->bReadEnable = TRUE;
492
priv->bWriteEnable = FALSE;
497
priv->mss = MIN_PACKET - PACKET_OVERHEAD;
498
priv->mtu_advise = MAX_PACKET;
502
priv->cwnd = 2 * priv->mss;
503
priv->ssthresh = sizeof(priv->rbuf);
504
priv->lastrecv = priv->lastsend = priv->last_traffic = now;
505
priv->bOutgoing = FALSE;
510
priv->ts_recent = priv->ts_lastack = 0;
512
priv->rx_rto = DEF_RTO;
513
priv->rx_srtt = priv->rx_rttvar = 0;
516
PseudoTcpSocket *pseudo_tcp_socket_new (guint32 conversation,
517
PseudoTcpCallbacks *callbacks)
520
return g_object_new (PSEUDO_TCP_SOCKET_TYPE,
521
"conversation", conversation,
522
"callbacks", callbacks,
527
pseudo_tcp_socket_connect(PseudoTcpSocket *self)
529
PseudoTcpSocketPrivate *priv = self->priv;
532
if (priv->state != TCP_LISTEN) {
533
priv->error = EINVAL;
537
priv->state = TCP_SYN_SENT;
538
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "State: TCP_SYN_SENT");
540
buffer[0] = CTL_CONNECT;
541
queue(self, buffer, 1, TRUE);
542
attempt_send(self, sfNone);
548
pseudo_tcp_socket_notify_mtu(PseudoTcpSocket *self, guint16 mtu)
550
PseudoTcpSocketPrivate *priv = self->priv;
551
priv->mtu_advise = mtu;
552
if (priv->state == TCP_ESTABLISHED) {
558
pseudo_tcp_socket_notify_clock(PseudoTcpSocket *self)
560
PseudoTcpSocketPrivate *priv = self->priv;
561
guint32 now = get_current_time ();
563
if (priv->state == TCP_CLOSED)
566
// Check if it's time to retransmit a segment
567
if (priv->rto_base &&
568
(time_diff(priv->rto_base + priv->rx_rto, now) <= 0)) {
569
if (g_list_length (priv->slist) == 0) {
570
g_assert_not_reached ();
572
// Note: (priv->slist.front().xmit == 0)) {
573
// retransmit segments
577
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "timeout retransmit (rto: %d) "
578
"(rto_base: %d) (now: %d) (dup_acks: %d)",
579
priv->rx_rto, priv->rto_base, now, (guint) priv->dup_acks);
581
if (!transmit(self, priv->slist, now)) {
582
closedown(self, ECONNABORTED);
586
nInFlight = priv->snd_nxt - priv->snd_una;
587
priv->ssthresh = max(nInFlight / 2, 2 * priv->mss);
588
//LOG(LS_INFO) << "priv->ssthresh: " << priv->ssthresh << " nInFlight: " << nInFlight << " priv->mss: " << priv->mss;
589
priv->cwnd = priv->mss;
591
// Back off retransmit timer. Note: the limit is lower when connecting.
592
rto_limit = (priv->state < TCP_ESTABLISHED) ? DEF_RTO : MAX_RTO;
593
priv->rx_rto = min(rto_limit, priv->rx_rto * 2);
594
priv->rto_base = now;
598
// Check if it's time to probe closed windows
599
if ((priv->snd_wnd == 0)
600
&& (time_diff(priv->lastsend + priv->rx_rto, now) <= 0)) {
601
if (time_diff(now, priv->lastrecv) >= 15000) {
602
closedown(self, ECONNABORTED);
607
packet(self, priv->snd_nxt - 1, 0, 0, 0);
608
priv->lastsend = now;
610
// back off retransmit timer
611
priv->rx_rto = min(MAX_RTO, priv->rx_rto * 2);
614
// Check if it's time to send delayed acks
615
if (priv->t_ack && (time_diff(priv->t_ack + ACK_DELAY, now) <= 0)) {
616
packet(self, priv->snd_nxt, 0, 0, 0);
622
pseudo_tcp_socket_notify_packet(PseudoTcpSocket *self,
623
const gchar * buffer, guint32 len)
625
if (len > MAX_PACKET) {
626
//LOG_F(WARNING) << "packet too large";
629
return parse(self, (guint8 *) buffer, len);
633
pseudo_tcp_socket_get_next_clock(PseudoTcpSocket *self, long *timeout)
635
PseudoTcpSocketPrivate *priv = self->priv;
636
guint32 now = get_current_time ();
638
if (priv->shutdown == SD_FORCEFUL)
641
if ((priv->shutdown == SD_GRACEFUL)
642
&& ((priv->state != TCP_ESTABLISHED)
643
|| ((priv->slen == 0) && (priv->t_ack == 0)))) {
647
if (priv->state == TCP_CLOSED) {
648
*timeout = CLOSED_TIMEOUT;
652
*timeout = DEFAULT_TIMEOUT;
655
*timeout = min(*timeout, time_diff(priv->t_ack + ACK_DELAY, now));
657
if (priv->rto_base) {
658
*timeout = min(*timeout, time_diff(priv->rto_base + priv->rx_rto, now));
660
if (priv->snd_wnd == 0) {
661
*timeout = min(*timeout, time_diff(priv->lastsend + priv->rx_rto, now));
669
pseudo_tcp_socket_recv(PseudoTcpSocket *self, char * buffer, size_t len)
671
PseudoTcpSocketPrivate *priv = self->priv;
674
if (priv->state != TCP_ESTABLISHED) {
675
priv->error = ENOTCONN;
679
if (priv->rlen == 0) {
680
priv->bReadEnable = TRUE;
681
priv->error = EWOULDBLOCK;
685
read = min((guint32) len, priv->rlen);
686
memcpy(buffer, priv->rbuf, read);
689
/* !?! until we create a circular buffer, we need to move all of the rest
691
memmove(priv->rbuf, priv->rbuf + read, sizeof(priv->rbuf) - read);
693
if ((sizeof(priv->rbuf) - priv->rlen - priv->rcv_wnd)
694
>= min(sizeof(priv->rbuf) / 2, priv->mss)) {
695
// !?! Not sure about this was closed business
696
gboolean bWasClosed = (priv->rcv_wnd == 0);
698
priv->rcv_wnd = sizeof(priv->rbuf) - priv->rlen;
701
attempt_send(self, sfImmediateAck);
709
pseudo_tcp_socket_send(PseudoTcpSocket *self, const char * buffer, guint32 len)
711
PseudoTcpSocketPrivate *priv = self->priv;
714
if (priv->state != TCP_ESTABLISHED) {
715
priv->error = ENOTCONN;
719
if (priv->slen == sizeof(priv->sbuf)) {
720
priv->bWriteEnable = TRUE;
721
priv->error = EWOULDBLOCK;
725
written = queue(self, buffer, len, FALSE);
726
attempt_send(self, sfNone);
728
if (written > 0 && (guint32)written < len) {
729
priv->bWriteEnable = TRUE;
736
pseudo_tcp_socket_close(PseudoTcpSocket *self, gboolean force)
738
PseudoTcpSocketPrivate *priv = self->priv;
739
//nice_agent ("Closing socket %p : %d", sock, force?"true":"false");
740
priv->shutdown = force ? SD_FORCEFUL : SD_GRACEFUL;
744
pseudo_tcp_socket_get_error(PseudoTcpSocket *self)
746
PseudoTcpSocketPrivate *priv = self->priv;
751
// Internal Implementation
755
queue(PseudoTcpSocket *self, const gchar * data, guint32 len, gboolean bCtrl)
757
PseudoTcpSocketPrivate *priv = self->priv;
759
if (len > sizeof(priv->sbuf) - priv->slen) {
761
len = sizeof(priv->sbuf) - priv->slen;
764
// We can concatenate data if the last segment is the same type
765
// (control v. regular data), and has not been transmitted yet
766
if (g_list_length (priv->slist) > 0 &&
767
(((SSegment *)g_list_last (priv->slist)->data)->bCtrl == bCtrl) &&
768
(((SSegment *)g_list_last (priv->slist)->data)->xmit == 0)) {
769
((SSegment *)g_list_last (priv->slist)->data)->len += len;
771
SSegment *sseg = g_slice_new0 (SSegment);
772
sseg->seq = priv->snd_una + priv->slen;
775
priv->slist = g_list_append (priv->slist, sseg);
778
memcpy(priv->sbuf + priv->slen, data, len);
780
//LOG(LS_INFO) << "PseudoTcp::queue - priv->slen = " << priv->slen;
784
static PseudoTcpWriteResult
785
packet(PseudoTcpSocket *self, guint32 seq, guint8 flags,
786
const gchar * data, guint32 len)
788
PseudoTcpSocketPrivate *priv = self->priv;
789
guint32 now = get_current_time();
790
guint8 buffer[MAX_PACKET];
791
PseudoTcpWriteResult wres = WR_SUCCESS;
793
g_assert(HEADER_SIZE + len <= MAX_PACKET);
795
*((uint32_t *) buffer) = htonl(priv->conv);
796
*((uint32_t *) (buffer + 4)) = htonl(seq);
797
*((uint32_t *) (buffer + 8)) = htonl(priv->rcv_nxt);
800
*((uint16_t *) (buffer + 14)) = htons((uint16_t)priv->rcv_wnd);
802
// Timestamp computations
803
*((uint32_t *) (buffer + 16)) = htonl(now);
804
*((uint32_t *) (buffer + 20)) = htonl(priv->ts_recent);
805
priv->ts_lastack = priv->rcv_nxt;
807
memcpy(buffer + HEADER_SIZE, data, len);
809
DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "<-- <CONV=%d><FLG=%d><SEQ=%d:%d><ACK=%d>"
810
"<WND=%d><TS=%d><TSR=%d><LEN=%d>",
811
priv->conv, (unsigned)flags, seq, seq + len, priv->rcv_nxt, priv->rcv_wnd,
812
now % 10000, priv->ts_recent % 10000, len);
814
wres = priv->callbacks.WritePacket(self, (gchar *) buffer, len + HEADER_SIZE,
815
priv->callbacks.user_data);
816
/* Note: When data is NULL, this is an ACK packet. We don't read the
817
return value for those, and thus we won't retry. So go ahead and treat
818
the packet as a success (basically simulate as if it were dropped),
819
which will prevent our timers from being messed up. */
820
if ((wres != WR_SUCCESS) && (NULL != data))
825
priv->lastsend = now;
827
priv->last_traffic = now;
828
priv->bOutgoing = TRUE;
834
parse(PseudoTcpSocket *self, const guint8 * buffer, guint32 size)
841
seg.conv = ntohl(*(uint32_t *)buffer);
842
seg.seq = ntohl(*(uint32_t *)(buffer + 4));
843
seg.ack = ntohl(*(uint32_t *)(buffer + 8));
844
seg.flags = buffer[13];
845
seg.wnd = ntohs(*(uint16_t *)(buffer + 14));
847
seg.tsval = ntohl(*(uint32_t *)(buffer + 16));
848
seg.tsecr = ntohl(*(uint32_t *)(buffer + 20));
850
seg.data = ((gchar *)buffer) + HEADER_SIZE;
851
seg.len = size - HEADER_SIZE;
853
DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "--> <CONV=%d><FLG=%d><SEQ=%d:%d><ACK=%d>"
854
"<WND=%d><TS=%d><TSR=%d><LEN=%d>",
855
seg.conv, (unsigned)seg.flags, seg.seq, seg.seq + seg.len, seg.ack,
856
seg.wnd, seg.tsval % 10000, seg.tsecr % 10000, seg.len);
858
return process(self, &seg);
863
process(PseudoTcpSocket *self, Segment *seg)
865
PseudoTcpSocketPrivate *priv = self->priv;
867
SendFlags sflags = sfNone;
868
gboolean bIgnoreData;
870
gboolean bConnect = FALSE;
872
/* If this is the wrong conversation, send a reset!?!
873
(with the correct conversation?) */
874
if (seg->conv != priv->conv) {
875
//if ((seg->flags & FLAG_RST) == 0) {
876
// packet(sock, tcb, seg->ack, 0, FLAG_RST, 0, 0);
878
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "wrong conversation");
882
now = get_current_time();
883
priv->last_traffic = priv->lastrecv = now;
884
priv->bOutgoing = FALSE;
886
if (priv->state == TCP_CLOSED) {
888
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "closed");
892
// Check if this is a reset segment
893
if (seg->flags & FLAG_RST) {
894
closedown(self, ECONNRESET);
898
// Check for control data
900
if (seg->flags & FLAG_CTL) {
902
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Missing control code");
904
} else if (seg->data[0] == CTL_CONNECT) {
906
if (priv->state == TCP_LISTEN) {
908
priv->state = TCP_SYN_RECEIVED;
909
buffer[0] = CTL_CONNECT;
910
queue(self, buffer, 1, TRUE);
911
} else if (priv->state == TCP_SYN_SENT) {
912
priv->state = TCP_ESTABLISHED;
913
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "State: TCP_ESTABLISHED");
915
if (priv->callbacks.PseudoTcpOpened)
916
priv->callbacks.PseudoTcpOpened(self, priv->callbacks.user_data);
920
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Unknown control code: %d", seg->data[0]);
926
if ((seg->seq <= priv->ts_lastack) &&
927
(priv->ts_lastack < seg->seq + seg->len)) {
928
priv->ts_recent = seg->tsval;
931
// Check if this is a valuable ack
932
if ((seg->ack > priv->snd_una) && (seg->ack <= priv->snd_nxt)) {
935
guint32 kIdealRefillSize;
937
// Calculate round-trip time
939
long rtt = time_diff(now, seg->tsecr);
941
if (priv->rx_srtt == 0) {
943
priv->rx_rttvar = rtt / 2;
945
priv->rx_rttvar = (3 * priv->rx_rttvar +
946
abs((long)(rtt - priv->rx_srtt))) / 4;
947
priv->rx_srtt = (7 * priv->rx_srtt + rtt) / 8;
949
priv->rx_rto = bound(MIN_RTO,
950
priv->rx_srtt + max(1LU, 4 * priv->rx_rttvar), MAX_RTO);
952
DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "rtt: %ld srtt: %d rto: %d",
953
rtt, priv->rx_srtt, priv->rx_rto);
955
g_assert_not_reached ();
959
priv->snd_wnd = seg->wnd;
961
nAcked = seg->ack - priv->snd_una;
962
priv->snd_una = seg->ack;
964
priv->rto_base = (priv->snd_una == priv->snd_nxt) ? 0 : now;
966
priv->slen -= nAcked;
967
memmove(priv->sbuf, priv->sbuf + nAcked, priv->slen);
968
//LOG(LS_INFO) << "PseudoTcp::process - priv->slen = " << priv->slen;
970
for (nFree = nAcked; nFree > 0; ) {
971
SSegment *data = (SSegment *) (g_list_first (priv->slist)->data);
972
g_assert(g_list_length (priv->slist) > 0);
973
if (nFree < data->len) {
977
if (data->len > priv->largest) {
978
priv->largest = data->len;
981
g_slice_free (SSegment, priv->slist->data);
982
priv->slist = g_list_delete_link (priv->slist, priv->slist);
986
if (priv->dup_acks >= 3) {
987
if (priv->snd_una >= priv->recover) { // NewReno
988
guint32 nInFlight = priv->snd_nxt - priv->snd_una;
990
priv->cwnd = min(priv->ssthresh, nInFlight + priv->mss);
991
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "exit recovery");
994
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "recovery retransmit");
995
if (!transmit(self, priv->slist, now)) {
996
closedown(self, ECONNABORTED);
999
priv->cwnd += priv->mss - min(nAcked, priv->cwnd);
1003
// Slow start, congestion avoidance
1004
if (priv->cwnd < priv->ssthresh) {
1005
priv->cwnd += priv->mss;
1007
priv->cwnd += max(1LU, priv->mss * priv->mss / priv->cwnd);
1012
if ((priv->state == TCP_SYN_RECEIVED) && !bConnect) {
1013
priv->state = TCP_ESTABLISHED;
1014
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "State: TCP_ESTABLISHED");
1016
if (priv->callbacks.PseudoTcpOpened)
1017
priv->callbacks.PseudoTcpOpened(self, priv->callbacks.user_data);
1020
// If we make room in the send queue, notify the user
1021
// The goal it to make sure we always have at least enough data to fill the
1022
// window. We'd like to notify the app when we are halfway to that point.
1023
kIdealRefillSize = (sizeof(priv->sbuf) + sizeof(priv->rbuf)) / 2;
1024
if (priv->bWriteEnable && (priv->slen < kIdealRefillSize)) {
1025
priv->bWriteEnable = FALSE;
1026
if (priv->callbacks.PseudoTcpWritable)
1027
priv->callbacks.PseudoTcpWritable(self, priv->callbacks.user_data);
1029
} else if (seg->ack == priv->snd_una) {
1030
/* !?! Note, tcp says don't do this... but otherwise how does a
1031
closed window become open? */
1032
priv->snd_wnd = seg->wnd;
1034
// Check duplicate acks
1036
// it's a dup ack, but with a data payload, so don't modify priv->dup_acks
1037
} else if (priv->snd_una != priv->snd_nxt) {
1040
priv->dup_acks += 1;
1041
if (priv->dup_acks == 3) { // (Fast Retransmit)
1042
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "enter recovery");
1043
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "recovery retransmit");
1044
if (!transmit(self, priv->slist, now)) {
1045
closedown(self, ECONNABORTED);
1048
priv->recover = priv->snd_nxt;
1049
nInFlight = priv->snd_nxt - priv->snd_una;
1050
priv->ssthresh = max(nInFlight / 2, 2 * priv->mss);
1051
//LOG(LS_INFO) << "priv->ssthresh: " << priv->ssthresh << " nInFlight: " << nInFlight << " priv->mss: " << priv->mss;
1052
priv->cwnd = priv->ssthresh + 3 * priv->mss;
1053
} else if (priv->dup_acks > 3) {
1054
priv->cwnd += priv->mss;
1061
/* Conditions where acks must be sent:
1062
* 1) Segment is too old (they missed an ACK) (immediately)
1063
* 2) Segment is too new (we missed a segment) (immediately)
1064
* 3) Segment has data (so we need to ACK!) (delayed)
1065
* ... so the only time we don't need to ACK, is an empty segment
1066
* that points to rcv_nxt!
1069
if (seg->seq != priv->rcv_nxt) {
1070
sflags = sfImmediateAck; // (Fast Recovery)
1071
} else if (seg->len != 0) {
1072
sflags = sfDelayedAck;
1074
if (sflags == sfImmediateAck) {
1075
if (seg->seq > priv->rcv_nxt) {
1076
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too new");
1077
} else if (seg->seq + seg->len <= priv->rcv_nxt) {
1078
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too old");
1082
// Adjust the incoming segment to fit our receive buffer
1083
if (seg->seq < priv->rcv_nxt) {
1084
guint32 nAdjust = priv->rcv_nxt - seg->seq;
1085
if (nAdjust < seg->len) {
1086
seg->seq += nAdjust;
1087
seg->data += nAdjust;
1088
seg->len -= nAdjust;
1093
if ((seg->seq + seg->len - priv->rcv_nxt) >
1094
(sizeof(priv->rbuf) - priv->rlen)) {
1095
guint32 nAdjust = seg->seq + seg->len - priv->rcv_nxt -
1096
(sizeof(priv->rbuf) - priv->rlen);
1097
if (nAdjust < seg->len) {
1098
seg->len -= nAdjust;
1104
bIgnoreData = (seg->flags & FLAG_CTL) || (priv->shutdown != SD_NONE);
1109
if (seg->seq == priv->rcv_nxt) {
1110
priv->rcv_nxt += seg->len;
1113
guint32 nOffset = seg->seq - priv->rcv_nxt;
1114
memcpy(priv->rbuf + priv->rlen + nOffset, seg->data, seg->len);
1115
if (seg->seq == priv->rcv_nxt) {
1118
priv->rlen += seg->len;
1119
priv->rcv_nxt += seg->len;
1120
priv->rcv_wnd -= seg->len;
1124
while (iter && (((RSegment *)iter->data)->seq <= priv->rcv_nxt)) {
1125
RSegment *data = (RSegment *)(iter->data);
1126
if (data->seq + data->len > priv->rcv_nxt) {
1127
guint32 nAdjust = (data->seq + data->len) - priv->rcv_nxt;
1128
sflags = sfImmediateAck; // (Fast Recovery)
1129
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Recovered %d bytes (%d -> %d)",
1130
nAdjust, priv->rcv_nxt, priv->rcv_nxt + nAdjust);
1131
priv->rlen += nAdjust;
1132
priv->rcv_nxt += nAdjust;
1133
priv->rcv_wnd -= nAdjust;
1135
g_slice_free (RSegment, priv->rlist->data);
1136
priv->rlist = g_list_delete_link (priv->rlist, priv->rlist);
1140
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Saving %d bytes (%d -> %d)",
1141
seg->len, seg->seq, seg->seq + seg->len);
1143
RSegment *rseg = g_slice_new0 (RSegment);
1144
rseg->seq = seg->seq;
1145
rseg->len = seg->len;
1147
while (iter && (((RSegment*)iter->data)->seq < rseg->seq)) {
1148
iter = g_list_next (iter);
1150
priv->rlist = g_list_insert_before(priv->rlist, iter, rseg);
1155
attempt_send(self, sflags);
1157
// If we have new data, notify the user
1158
if (bNewData && priv->bReadEnable) {
1159
priv->bReadEnable = FALSE;
1160
if (priv->callbacks.PseudoTcpReadable)
1161
priv->callbacks.PseudoTcpReadable(self, priv->callbacks.user_data);
1168
transmit(PseudoTcpSocket *self, const GList *seg, guint32 now)
1170
PseudoTcpSocketPrivate *priv = self->priv;
1171
SSegment *segment = (SSegment*)(seg->data);
1172
guint32 nTransmit = min(segment->len, priv->mss);
1174
if (segment->xmit >= ((priv->state == TCP_ESTABLISHED) ? 15 : 30)) {
1175
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too many retransmits");
1180
guint32 seq = segment->seq;
1181
guint8 flags = (segment->bCtrl ? FLAG_CTL : 0);
1182
const gchar * buffer = priv->sbuf + (segment->seq - priv->snd_una);
1183
PseudoTcpWriteResult wres = packet(self, seq, flags, buffer, nTransmit);
1185
if (wres == WR_SUCCESS)
1188
if (wres == WR_FAIL) {
1189
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "packet failed");
1193
g_assert(wres == WR_TOO_LARGE);
1196
if (PACKET_MAXIMUMS[priv->msslevel + 1] == 0) {
1197
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "MTU too small");
1200
/* !?! We need to break up all outstanding and pending packets
1201
and then retransmit!?! */
1203
priv->mss = PACKET_MAXIMUMS[++priv->msslevel] - PACKET_OVERHEAD;
1204
// I added this... haven't researched actual formula
1205
priv->cwnd = 2 * priv->mss;
1207
if (priv->mss < nTransmit) {
1208
nTransmit = priv->mss;
1212
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Adjusting mss to %d bytes ", priv->mss);
1215
if (nTransmit < segment->len) {
1216
SSegment *subseg = g_slice_new0 (SSegment);
1217
subseg->seq = segment->seq + nTransmit;
1218
subseg->len = segment->len - nTransmit;
1219
subseg->bCtrl = segment->bCtrl;
1220
subseg->xmit = segment->xmit;
1222
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "mss reduced to %d", priv->mss);
1224
segment->len = nTransmit;
1225
priv->slist = g_list_insert_before(priv->slist, seg->next, subseg);
1228
if (segment->xmit == 0) {
1229
priv->snd_nxt += segment->len;
1233
if (priv->rto_base == 0) {
1234
priv->rto_base = now;
1241
attempt_send(PseudoTcpSocket *self, SendFlags sflags)
1243
PseudoTcpSocketPrivate *priv = self->priv;
1244
guint32 now = get_current_time();
1245
gboolean bFirst = TRUE;
1247
if (time_diff(now, priv->lastsend) > (long) priv->rx_rto) {
1248
priv->cwnd = priv->mss;
1261
if ((priv->dup_acks == 1) || (priv->dup_acks == 2)) { // Limited Transmit
1262
cwnd += priv->dup_acks * priv->mss;
1264
nWindow = min(priv->snd_wnd, cwnd);
1265
nInFlight = priv->snd_nxt - priv->snd_una;
1266
nUseable = (nInFlight < nWindow) ? (nWindow - nInFlight) : 0;
1267
nAvailable = min(priv->slen - nInFlight, priv->mss);
1269
if (nAvailable > nUseable) {
1270
if (nUseable * 4 < nWindow) {
1271
// RFC 813 - avoid SWS
1274
nAvailable = nUseable;
1280
DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "[cwnd: %d nWindow: %d nInFlight: %d "
1281
"nAvailable: %d nQueued: %d nEmpty: %d ssthresh: %d]",
1282
priv->cwnd, nWindow, nInFlight, nAvailable, priv->slen - nInFlight,
1283
sizeof(priv->sbuf) - priv->slen, priv->ssthresh);
1286
if (nAvailable == 0) {
1287
if (sflags == sfNone)
1290
// If this is an immediate ack, or the second delayed ack
1291
if ((sflags == sfImmediateAck) || priv->t_ack) {
1292
packet(self, priv->snd_nxt, 0, 0, 0);
1294
priv->t_ack = get_current_time();
1300
if ((priv->snd_nxt > priv->snd_una) && (nAvailable < priv->mss)) {
1304
// Find the next segment to transmit
1306
while (((SSegment*)iter->data)->xmit > 0) {
1307
iter = g_list_next (iter);
1311
// If the segment is too large, break it into two
1312
if (((SSegment*)iter->data)->len > nAvailable) {
1313
SSegment *subseg = g_slice_new0 (SSegment);
1314
subseg->seq = ((SSegment*)iter->data)->seq + nAvailable;
1315
subseg->len = ((SSegment*)iter->data)->len - nAvailable;
1316
subseg->bCtrl = ((SSegment*)iter->data)->bCtrl;
1318
((SSegment*)iter->data)->len = nAvailable;
1319
priv->slist = g_list_insert_before(priv->slist, iter->next, subseg);
1322
if (!transmit(self, iter, now)) {
1323
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "transmit failed");
1324
// TODO: consider closing socket
1333
closedown(PseudoTcpSocket *self, guint32 err)
1335
PseudoTcpSocketPrivate *priv = self->priv;
1338
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "State: TCP_CLOSED");
1339
priv->state = TCP_CLOSED;
1340
if (priv->callbacks.PseudoTcpClosed)
1341
priv->callbacks.PseudoTcpClosed(self, err, priv->callbacks.user_data);
1345
adjustMTU(PseudoTcpSocket *self)
1347
PseudoTcpSocketPrivate *priv = self->priv;
1349
// Determine our current mss level, so that we can adjust appropriately later
1350
for (priv->msslevel = 0;
1351
PACKET_MAXIMUMS[priv->msslevel + 1] > 0;
1353
if (((guint16)PACKET_MAXIMUMS[priv->msslevel]) <= priv->mtu_advise) {
1357
priv->mss = priv->mtu_advise - PACKET_OVERHEAD;
1358
// !?! Should we reset priv->largest here?
1359
DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Adjusting mss to %d bytes", priv->mss);
1360
// Enforce minimums on ssthresh and cwnd
1361
priv->ssthresh = max(priv->ssthresh, 2 * priv->mss);
1362
priv->cwnd = max(priv->cwnd, priv->mss);