提交 f8071cde 编写于 作者: E Eric Dumazet 提交者: David S. Miller

tcp: enhance tcp_collapse_retrans() with skb_shift()

In commit 2331ccc5 ("tcp: enhance tcp collapsing"),
we made a first step allowing copying right skb to left skb head.

Since all skbs in socket write queue are headless (but possibly the very
first one), this strategy often does not work.

This patch extends tcp_collapse_retrans() to perform frag shifting,
thanks to skb_shift() helper.

This helper needs to not BUG on non headless skbs, as callers are ok
with that.

Tested:

Following packetdrill test now passes :

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

   +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 8>
   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.100 < . 1:1(0) ack 1 win 257
   +0 accept(3, ..., ...) = 4

   +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
   +0 write(4, ..., 200) = 200
   +0 > P. 1:201(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 201:401(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 401:601(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 601:801(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 801:1001(200) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1001:1101(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1101:1201(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1201:1301(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1301:1401(100) ack 1

+.099 < . 1:1(0) ack 201 win 257
+.001 < . 1:1(0) ack 201 win 257 <nop,nop,sack 1001:1401>
   +0 > P. 201:1001(800) ack 1
Signed-off-by: NEric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: NYuchung Cheng <ycheng@google.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 7d381a02
......@@ -2656,7 +2656,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
struct skb_frag_struct *fragfrom, *fragto;
BUG_ON(shiftlen > skb->len);
BUG_ON(skb_headlen(skb)); /* Would corrupt stream */
if (skb_headlen(skb))
return 0;
todo = shiftlen;
from = 0;
......
......@@ -2514,7 +2514,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
}
/* Collapses two adjacent SKB's during retransmission. */
static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
......@@ -2525,14 +2525,17 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
if (next_skb_size) {
if (next_skb_size <= skb_availroom(skb))
skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
next_skb_size);
else if (!skb_shift(skb, next_skb, next_skb_size))
return false;
}
tcp_highest_sack_combine(sk, next_skb, skb);
tcp_unlink_write_queue(next_skb, sk);
if (next_skb_size)
skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
next_skb_size);
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL;
......@@ -2561,6 +2564,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
tcp_skb_collapse_tstamp(skb, next_skb);
sk_wmem_free_skb(sk, next_skb);
return true;
}
/* Check if coalescing SKBs is legal. */
......@@ -2610,16 +2614,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
if (space < 0)
break;
/* Punt if not enough space exists in the first SKB for
* the data in the second
*/
if (skb->len > skb_availroom(to))
break;
if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
break;
tcp_collapse_retrans(sk, to);
if (!tcp_collapse_retrans(sk, to))
break;
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册