From d510f70642795107fcff4c35d662e6563cef1025 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jul 2019 17:30:50 +0800 Subject: [PATCH] tcp: be more careful in tcp_fragment() mainline inclusion from mainline-v5.2 commit b617158dc096709d8600c53b6052144d12b89fab category: bugfix bugzilla: 18679 CVE: NA ------------------------------------------------- Some applications set tiny SO_SNDBUF values and expect TCP to just work. Recent patches to address CVE-2019-11478 broke them in case of losses, since retransmits might be prevented. We should allow these flows to make progress. This patch allows the first and last skb in retransmit queue to be split even if memory limits are hit. It also adds the some room due to the fact that tcp_sendmsg() and tcp_sendpage() might overshoot sk_wmem_queued by about one full TSO skb (64KB size). Note this allowance was already present in stable backports for kernels < 4.15 Note for < 4.15 backports : tcp_rtx_queue_tail() will probably look like : static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk) { struct sk_buff *skb = tcp_send_head(sk); return skb ? tcp_write_queue_prev(sk, skb) : tcp_write_queue_tail(sk); } Fixes: f070ef2ac667 ("tcp: tcp_fragment() should apply sane memory limits") Signed-off-by: Eric Dumazet Reported-by: Andrew Prout Tested-by: Andrew Prout Tested-by: Jonathan Lemon Tested-by: Michal Kubecek Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Christoph Paasch Cc: Jonathan Looney Signed-off-by: David S. Miller Signed-off-by: Yang Yingliang Reviewed-by: Wenan Mao Signed-off-by: Yang Yingliang --- include/net/tcp.h | 5 +++++ net/ipv4/tcp_output.c | 13 +++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 96d6f233c616..3f721b952d7f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1646,6 +1646,11 @@ static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk) return skb_rb_first(&sk->tcp_rtx_queue); } +static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk) +{ + return skb_rb_last(&sk->tcp_rtx_queue); +} + static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) { return skb_peek(&sk->sk_write_queue); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 221d9b72423b..88c7e821fd11 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1289,6 +1289,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; int nsize, old_factor; + long limit; int nlen; u8 flags; @@ -1299,8 +1300,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (nsize < 0) nsize = 0; - if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf && - tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) { + /* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb. + * We need some allowance to not penalize applications setting small + * SO_SNDBUF values. + * Also allow first and last skb in retransmit queue to be split. + */ + limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE); + if (unlikely((sk->sk_wmem_queued >> 1) > limit && + tcp_queue != TCP_FRAG_IN_WRITE_QUEUE && + skb != tcp_rtx_queue_head(sk) && + skb != tcp_rtx_queue_tail(sk))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); return -ENOMEM; } -- GitLab