From b081f85c2977b1cbb6e635d53d9512f1ef985972 Mon Sep 17 00:00:00 2001 From: Eric Dumazet <edumazet@google.com> Date: Wed, 2 May 2012 09:58:29 +0000 Subject: [PATCH] net: implement tcp coalescing in tcp_queue_rcv() Extend tcp coalescing implementing it from tcp_queue_rcv(), the main receiver function when application is not blocked in recvmsg(). Function tcp_queue_rcv() is moved a bit to allow its call from tcp_data_queue() This gives good results especially if GRO could not kick, and if skb head is a fragment. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Alexander Duyck <alexander.h.duyck@intel.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/tcp.h | 3 ++- net/ipv4/tcp.c | 10 +++++----- net/ipv4/tcp_input.c | 40 +++++++++++++++++++++------------------- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5283aa4bfa23..c826ed7b007b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -439,7 +439,8 @@ extern int tcp_disconnect(struct sock *sk, int flags); void tcp_connect_init(struct sock *sk); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); -void tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen); +int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, + int hdrlen, bool *fragstolen); /* From syncookies.c */ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6802c89bc44d..c2cff8b62772 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -981,8 +981,8 @@ static inline int select_size(const struct sock *sk, bool sg) static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) { struct sk_buff *skb; - struct tcp_skb_cb *cb; struct tcphdr *th; + bool fragstolen; skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); if (!skb) @@ -995,14 +995,14 @@ static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size)) goto err_free; - cb = TCP_SKB_CB(skb); - TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size; TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1; - tcp_queue_rcv(sk, skb, sizeof(*th)); - + if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) { + WARN_ON_ONCE(fragstolen); /* should not happen */ + __kfree_skb(skb); + } return size; err_free: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a8829370f712..2f696ef13dcd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4739,6 +4739,22 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) skb_set_owner_r(skb, sk); } +int tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, + bool *fragstolen) +{ + int eaten; + struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue); + + __skb_pull(skb, hdrlen); + eaten = (tail && + tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; + tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + if (!eaten) { + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + } + return eaten; +} static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { @@ -4785,20 +4801,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) } if (eaten <= 0) { - struct sk_buff *tail; queue_and_out: if (eaten < 0 && tcp_try_rmem_schedule(sk, skb->truesize)) goto drop; - tail = skb_peek_tail(&sk->sk_receive_queue); - eaten = (tail && - tcp_try_coalesce(sk, tail, skb, - &fragstolen)) ? 1 : 0; - if (eaten <= 0) { - skb_set_owner_r(skb, sk); - __skb_queue_tail(&sk->sk_receive_queue, skb); - } + eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); } tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (skb->len) @@ -5493,14 +5501,6 @@ static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, return 0; } -void tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen) -{ - __skb_pull(skb, hdrlen); - __skb_queue_tail(&sk->sk_receive_queue, skb); - skb_set_owner_r(skb, sk); - tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; -} - /* * TCP receive function for the ESTABLISHED state. * @@ -5609,6 +5609,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, } else { int eaten = 0; int copied_early = 0; + bool fragstolen = false; if (tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len) { @@ -5666,7 +5667,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ - tcp_queue_rcv(sk, skb, tcp_header_len); + eaten = tcp_queue_rcv(sk, skb, tcp_header_len, + &fragstolen); } tcp_event_data_recv(sk, skb); @@ -5688,7 +5690,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, else #endif if (eaten) - __kfree_skb(skb); + kfree_skb_partial(skb, fragstolen); else sk->sk_data_ready(sk, 0); return 0; -- GitLab