From b081f85c2977b1cbb6e635d53d9512f1ef985972 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 May 2012 09:58:29 +0000
Subject: [PATCH] net: implement tcp coalescing in tcp_queue_rcv()

Extend tcp coalescing implementing it from tcp_queue_rcv(), the main
receiver function when application is not blocked in recvmsg().

Function tcp_queue_rcv() is moved a bit to allow its call from
tcp_data_queue()

This gives good results especially if GRO could not kick, and if skb
head is a fragment.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    |  3 ++-
 net/ipv4/tcp.c       | 10 +++++-----
 net/ipv4/tcp_input.c | 40 +++++++++++++++++++++-------------------
 3 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5283aa4bfa23..c826ed7b007b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -439,7 +439,8 @@ extern int tcp_disconnect(struct sock *sk, int flags);
 
 void tcp_connect_init(struct sock *sk);
 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
-void tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen);
+int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
+			       int hdrlen, bool *fragstolen);
 
 /* From syncookies.c */
 extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6802c89bc44d..c2cff8b62772 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -981,8 +981,8 @@ static inline int select_size(const struct sock *sk, bool sg)
 static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct sk_buff *skb;
-	struct tcp_skb_cb *cb;
 	struct tcphdr *th;
+	bool fragstolen;
 
 	skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
 	if (!skb)
@@ -995,14 +995,14 @@ static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 	if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
 		goto err_free;
 
-	cb = TCP_SKB_CB(skb);
-
 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
 	TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
 
-	tcp_queue_rcv(sk, skb, sizeof(*th));
-
+	if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
+		WARN_ON_ONCE(fragstolen); /* should not happen */
+		__kfree_skb(skb);
+	}
 	return size;
 
 err_free:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a8829370f712..2f696ef13dcd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4739,6 +4739,22 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 		skb_set_owner_r(skb, sk);
 }
 
+int tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
+		  bool *fragstolen)
+{
+	int eaten;
+	struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
+
+	__skb_pull(skb, hdrlen);
+	eaten = (tail &&
+		 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
+	tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+	if (!eaten) {
+		__skb_queue_tail(&sk->sk_receive_queue, skb);
+		skb_set_owner_r(skb, sk);
+	}
+	return eaten;
+}
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
@@ -4785,20 +4801,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		}
 
 		if (eaten <= 0) {
-			struct sk_buff *tail;
 queue_and_out:
 			if (eaten < 0 &&
 			    tcp_try_rmem_schedule(sk, skb->truesize))
 				goto drop;
 
-			tail = skb_peek_tail(&sk->sk_receive_queue);
-			eaten = (tail &&
-				 tcp_try_coalesce(sk, tail, skb,
-						  &fragstolen)) ? 1 : 0;
-			if (eaten <= 0) {
-				skb_set_owner_r(skb, sk);
-				__skb_queue_tail(&sk->sk_receive_queue, skb);
-			}
+			eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
 		}
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if (skb->len)
@@ -5493,14 +5501,6 @@ static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 	return 0;
 }
 
-void tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen)
-{
-	__skb_pull(skb, hdrlen);
-	__skb_queue_tail(&sk->sk_receive_queue, skb);
-	skb_set_owner_r(skb, sk);
-	tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-}
-
 /*
  *	TCP receive function for the ESTABLISHED state.
  *
@@ -5609,6 +5609,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 		} else {
 			int eaten = 0;
 			int copied_early = 0;
+			bool fragstolen = false;
 
 			if (tp->copied_seq == tp->rcv_nxt &&
 			    len - tcp_header_len <= tp->ucopy.len) {
@@ -5666,7 +5667,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
 
 				/* Bulk data transfer: receiver */
-				tcp_queue_rcv(sk, skb, tcp_header_len);
+				eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
+						      &fragstolen);
 			}
 
 			tcp_event_data_recv(sk, skb);
@@ -5688,7 +5690,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			else
 #endif
 			if (eaten)
-				__kfree_skb(skb);
+				kfree_skb_partial(skb, fragstolen);
 			else
 				sk->sk_data_ready(sk, 0);
 			return 0;
-- 
GitLab