tcp: remove tcp_tw_recycle

The tcp_tw_recycle was already broken for connections behind NAT, since the per-destination timestamp is not monotonically increasing for multiple machines behind a single destination address. After the randomization of TCP timestamp offsets in commit 8a5bd45f6616 (tcp: randomize tcp timestamp offsets for each connection), the tcp_tw_recycle is broken for all types of connections for the same reason: the timestamps received from a single machine is not monotonically increasing, anymore. Remove tcp_tw_recycle, since it is not functional. Also, remove the PAWSPassive SNMP counter since it is only used for tcp_tw_recycle, and simplify tcp_v4_route_req and tcp_v6_route_req since the strict argument is only set when tcp_tw_recycle is enabled. Signed-off-by: N Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: N Eric Dumazet <edumazet@google.com> Signed-off-by: N Neal Cardwell <ncardwell@google.com> Signed-off-by: N Yuchung Cheng <ycheng@google.com> Cc: Lutz Vieweg <lvml@5t9.de> Cc: Florian Westphal <fw@strlen.de> Signed-off-by: N David S. Miller <davem@davemloft.net>

tcp: remove tcp_tw_recycle
The tcp_tw_recycle was already broken for connections behind NAT, since the per-destination timestamp is not monotonically increasing for multiple machines behind a single destination address. After the randomization of TCP timestamp offsets in commit 8a5bd45f6616 (tcp: randomize tcp timestamp offsets for each connection), the tcp_tw_recycle is broken for all types of connections for the same reason: the timestamps received from a single machine is not monotonically increasing, anymore. Remove tcp_tw_recycle, since it is not functional. Also, remove the PAWSPassive SNMP counter since it is only used for tcp_tw_recycle, and simplify tcp_v4_route_req and tcp_v6_route_req since the strict argument is only set when tcp_tw_recycle is enabled. Signed-off-by: N Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: N Eric Dumazet <edumazet@google.com> Signed-off-by: N Neal Cardwell <ncardwell@google.com> Signed-off-by: N Yuchung Cheng <ycheng@google.com> Cc: Lutz Vieweg <lvml@5t9.de> Cc: Florian Westphal <fw@strlen.de> Signed-off-by: N David S. Miller <davem@davemloft.net>
4396e461 · Soheil Hassas Yeganeh · David S. Miller · d82bae12 · 4396e461 · 4396e461
9 changed file
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -640,11 +640,6 @@ tcp_tso_win_divisor - INTEGER
 	building larger TSO frames.
 	Default: 3
-tcp_tw_recycle - BOOLEAN
-	Enable fast recycling TIME-WAIT sockets. Default value is 0.
-	It should not be changed without advice/request of technical
-	experts.
 tcp_tw_reuse - BOOLEAN
 	Allow to reuse TIME-WAIT sockets for new connections when it is
 	safe from protocol viewpoint. Default value is 0.

--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -33,7 +33,6 @@ struct inet_timewait_death_row {
 	atomic_t		tw_count;
 	struct inet_hashinfo 	*hashinfo ____cacheline_aligned_in_smp;
-	int			sysctl_tw_recycle;
 	int			sysctl_max_tw_buckets;
 };

--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1810,8 +1810,7 @@ struct tcp_request_sock_ops {
 				 __u16 *mss);
 #endif
 	struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
-				       const struct request_sock *req,
+				       const struct request_sock *req);
-				       bool *strict);
 	__u32 (*init_seq_tsoff)(const struct sk_buff *skb, u32 *tsoff);
 	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
 			   struct flowi *fl, struct request_sock *req,

--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -177,7 +177,6 @@ enum
 	LINUX_MIB_TIMEWAITED,			/* TimeWaited */
 	LINUX_MIB_TIMEWAITRECYCLED,		/* TimeWaitRecycled */
 	LINUX_MIB_TIMEWAITKILLED,		/* TimeWaitKilled */
-	LINUX_MIB_PAWSPASSIVEREJECTED,		/* PAWSPassiveRejected */
 	LINUX_MIB_PAWSACTIVEREJECTED,		/* PAWSActiveRejected */
 	LINUX_MIB_PAWSESTABREJECTED,		/* PAWSEstabRejected */
 	LINUX_MIB_DELAYEDACKS,			/* DelayedACKs */

--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -199,7 +199,6 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED),
 	SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED),
 	SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
-	SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED),
 	SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
 	SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
 	SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),

--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -980,13 +980,6 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{
-		.procname	= "tcp_tw_recycle",
-		.data		= &init_net.ipv4.tcp_death_row.sysctl_tw_recycle,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
 	{
 		.procname	= "tcp_max_syn_backlog",
 		.data		= &init_net.ipv4.sysctl_max_syn_backlog,

--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6327,31 +6327,11 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
 	if (!want_cookie && !isn) {
-		/* VJ's idea. We save last timestamp seen
-		 * from the destination in peer table, when entering
-		 * state TIME-WAIT, and check against it before
-		 * accepting new connection request.
-		 *
-		 * If "isn" is not zero, this request hit alive
-		 * timewait bucket, so that all the necessary checks
-		 * are made in the function processing timewait state.
-		 */
-		if (net->ipv4.tcp_death_row.sysctl_tw_recycle) {
-			bool strict;
-			dst = af_ops->route_req(sk, &fl, req, &strict);
-			if (dst && strict &&
-			    !tcp_peer_is_proven(req, dst)) {
-				NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
-				goto drop_and_release;
-			}
-		}
 		/* Kill the following clause, if you dislike this way. */
-		else if (!net->ipv4.sysctl_tcp_syncookies &&
+		if (!net->ipv4.sysctl_tcp_syncookies &&
-			 (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+		    (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
-			  (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+		     (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
-			 !tcp_peer_is_proven(req, dst)) {
+		    !tcp_peer_is_proven(req, dst)) {
 			/* Without syncookies last quarter of
 			 * backlog is filled with destinations,
 			 * proven to be alive.
@@ -6367,7 +6347,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
 	}
 	if (!dst) {
-		dst = af_ops->route_req(sk, &fl, req, NULL);
+		dst = af_ops->route_req(sk, &fl, req);
 		if (!dst)
 			goto drop_and_free;
 	}

--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1213,19 +1213,9 @@ static void tcp_v4_init_req(struct request_sock *req,
 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
 					  struct flowi *fl,
-					  const struct request_sock *req,
+					  const struct request_sock *req)
-					  bool *strict)
 {
-	struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
+	return inet_csk_route_req(sk, &fl->u.ip4, req);
-	if (strict) {
-		if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
-			*strict = true;
-		else
-			*strict = false;
-	}
-	return dst;
 }
 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
@@ -2462,7 +2452,6 @@ static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_tcp_tw_reuse = 0;
 	cnt = tcp_hashinfo.ehash_mask + 1;
-	net->ipv4.tcp_death_row.sysctl_tw_recycle = 0;
 	net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
 	net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;

--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -722,11 +722,8 @@ static void tcp_v6_init_req(struct request_sock *req,
 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
 					  struct flowi *fl,
-					  const struct request_sock *req,
+					  const struct request_sock *req)
-					  bool *strict)
 {
-	if (strict)
-		*strict = true;
 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
 }