提交 8c72c65b 编写于 作者: E Eric Dumazet 提交者: David S. Miller

tcp: update skb->skb_mstamp more carefully

liujian reported a problem in TCP_USER_TIMEOUT processing with a patch
in tcp_probe_timer() :
      https://www.spinics.net/lists/netdev/msg454496.html

After investigations, the root cause of the problem is that we update
skb->skb_mstamp of skbs in write queue, even if the attempt to send a
clone or copy of it failed. One reason being a routing problem.

This patch prevents this, solving liujian issue.

It also removes a potential RTT miscalculation, since
__tcp_retransmit_skb() is not OR-ing TCP_SKB_CB(skb)->sacked with
TCPCB_EVER_RETRANS if a failure happens, but skb->skb_mstamp has
been changed.

A future ACK would then lead to a very small RTT sample and min_rtt
would then be lowered to this too small value.

Tested:

# cat user_timeout.pkt
--local_ip=192.168.102.64

    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

   +0 `ifconfig tun0 192.168.102.64/16; ip ro add 192.0.2.1 dev tun0`

   +0 < S 0:0(0) win 0 <mss 1460>
   +0 > S. 0:0(0) ack 1 <mss 1460>

  +.1 < . 1:1(0) ack 1 win 65530
   +0 accept(3, ..., ...) = 4

   +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
   +0 write(4, ..., 24) = 24
   +0 > P. 1:25(24) ack 1 win 29200
   +.1 < . 1:1(0) ack 25 win 65530

//change the ipaddress
   +1 `ifconfig tun0 192.168.0.10/16`

   +1 write(4, ..., 24) = 24
   +1 write(4, ..., 24) = 24
   +1 write(4, ..., 24) = 24
   +1 write(4, ..., 24) = 24

   +0 `ifconfig tun0 192.168.102.64/16`
   +0 < . 1:2(1) ack 25 win 65530
   +0 `ifconfig tun0 192.168.0.10/16`

   +3 write(4, ..., 24) = -1

# ./packetdrill user_timeout.pkt
Signed-off-by: NEric Dumazet <edumazet@googl.com>
Reported-by: Nliujian <liujian56@huawei.com>
Acked-by: NNeal Cardwell <ncardwell@google.com>
Acked-by: NYuchung Cheng <ycheng@google.com>
Acked-by: NSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 cbea8f02
...@@ -991,6 +991,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, ...@@ -991,6 +991,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
struct tcp_skb_cb *tcb; struct tcp_skb_cb *tcb;
struct tcp_out_options opts; struct tcp_out_options opts;
unsigned int tcp_options_size, tcp_header_size; unsigned int tcp_options_size, tcp_header_size;
struct sk_buff *oskb = NULL;
struct tcp_md5sig_key *md5; struct tcp_md5sig_key *md5;
struct tcphdr *th; struct tcphdr *th;
int err; int err;
...@@ -998,12 +999,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, ...@@ -998,12 +999,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
BUG_ON(!skb || !tcp_skb_pcount(skb)); BUG_ON(!skb || !tcp_skb_pcount(skb));
tp = tcp_sk(sk); tp = tcp_sk(sk);
skb->skb_mstamp = tp->tcp_mstamp;
if (clone_it) { if (clone_it) {
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una; - tp->snd_una;
tcp_rate_skb_sent(sk, skb); tcp_rate_skb_sent(sk, skb);
oskb = skb;
if (unlikely(skb_cloned(skb))) if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask); skb = pskb_copy(skb, gfp_mask);
else else
...@@ -1011,6 +1012,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, ...@@ -1011,6 +1012,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (unlikely(!skb)) if (unlikely(!skb))
return -ENOBUFS; return -ENOBUFS;
} }
skb->skb_mstamp = tp->tcp_mstamp;
inet = inet_sk(sk); inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb); tcb = TCP_SKB_CB(skb);
...@@ -1122,12 +1124,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, ...@@ -1122,12 +1124,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
if (likely(err <= 0)) if (unlikely(err > 0)) {
return err; tcp_enter_cwr(sk);
err = net_xmit_eval(err);
tcp_enter_cwr(sk); }
if (!err && oskb)
oskb->skb_mstamp = tp->tcp_mstamp;
return net_xmit_eval(err); return err;
} }
/* This routine just queues the buffer for sending. /* This routine just queues the buffer for sending.
...@@ -2869,10 +2873,11 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) ...@@ -2869,10 +2873,11 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
skb_headroom(skb) >= 0xFFFF)) { skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb; struct sk_buff *nskb;
skb->skb_mstamp = tp->tcp_mstamp;
nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-ENOBUFS; -ENOBUFS;
if (!err)
skb->skb_mstamp = tp->tcp_mstamp;
} else { } else {
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册