diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 98e0fd241a255063a54e5eda7620278d16f4f26d..c9ccb550206fb0accb948bd42ab655b4f3fefcb1 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -439,6 +439,7 @@ enum NET_TCP_AVAIL_CONG_CONTROL=122, NET_TCP_ALLOWED_CONG_CONTROL=123, NET_TCP_MAX_SSTHRESH=124, + NET_TCP_FRTO_RESPONSE=125, }; enum { diff --git a/include/net/tcp.h b/include/net/tcp.h index 6d09f5085f6a9ef4c6a46648b2e79a4cc912cfa2..f0c9e3400a090491598e438367e13d688829e9e0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -220,6 +220,7 @@ extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; +extern int sysctl_tcp_frto_response; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; @@ -738,7 +739,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) tp->left_out = tp->sacked_out + tp->lost_out; } -extern void tcp_enter_cwr(struct sock *sk); +extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); /* Slow start with delack produces 3 packets of burst, so that diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d68effe98e8db7d6d994651c5bd1ce028115f6de..6817d6485df5c195430ab808c2c345e8d6e15ba4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -646,6 +646,14 @@ ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, + { + .ctl_name = NET_TCP_FRTO_RESPONSE, + .procname = "tcp_frto_response", + .data = &sysctl_tcp_frto_response, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, { .ctl_name = NET_TCP_LOW_LATENCY, .procname = "tcp_low_latency", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f6ba07f0d81688ed81ed962bd1425f5548d962fe..322e43c56461a950c14ee369d6ccf2842e015157 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly; +int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_nometrics_save __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; @@ -762,15 +763,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) } /* Set slow start threshold and cwnd not falling to slow start */ -void tcp_enter_cwr(struct sock *sk) +void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) { struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); tp->prior_ssthresh = 0; tp->bytes_acked = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { tp->undo_marker = 0; - tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); + if (set_ssthresh) + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; @@ -2003,7 +2006,7 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) tp->retrans_stamp = 0; if (flag&FLAG_ECE) - tcp_enter_cwr(sk); + tcp_enter_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; @@ -2579,6 +2582,21 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) tcp_moderate_cwnd(tp); } +/* A conservative spurious RTO response algorithm: reduce cwnd using + * rate halving and continue in congestion avoidance. + */ +static void tcp_ratehalving_spur_to_response(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + tcp_enter_cwr(sk, 0); + tp->high_seq = tp->frto_highmark; /* Smoother w/o this? - ij */ +} + +static void tcp_undo_spur_to_response(struct sock *sk) +{ + tcp_undo_cwr(sk, 1); +} + /* F-RTO spurious RTO detection algorithm (RFC4138) * * F-RTO affects during two new ACKs following RTO (well, almost, see inline @@ -2661,7 +2679,17 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) tp->frto_counter = 2; return 1; } else /* frto_counter == 2 */ { - tcp_conservative_spur_to_response(tp); + switch (sysctl_tcp_frto_response) { + case 2: + tcp_undo_spur_to_response(sk); + break; + case 1: + tcp_conservative_spur_to_response(tp); + break; + default: + tcp_ratehalving_spur_to_response(sk); + break; + }; tp->frto_counter = 0; } return 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3c24881f2a6598941fbc5663d8bc6c2263e0ee21..d19b2f3b70fd0a53af71a36688e17d7d4b320222 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -545,7 +545,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (likely(err <= 0)) return err; - tcp_enter_cwr(sk); + tcp_enter_cwr(sk, 1); return net_xmit_eval(err);