diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8c54863dfc38c9006efd74a78029549fc2a0c9a7..5dce9705fe8415e31561ee2c1d52ef17aaeb7495 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -194,6 +194,12 @@ struct tcp_sock { u32 window_clamp; /* Maximal window to advertise */ u32 rcv_ssthresh; /* Current window clamp */ + /* Information of the most recently (s)acked skb */ + struct tcp_rack { + struct skb_mstamp mstamp; /* (Re)sent time of the skb */ + u8 advanced; /* mstamp advanced since last lost marking */ + u8 reord; /* reordering detected */ + } rack; u16 advmss; /* Advertised MSS */ u8 unused; u8 nonagle : 4,/* Disable Nagle algorithm? */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 4a43152229eadc8162805b35b0186b1054504d5c..3c3a9fe057d38be63d69bac740ce569e290cf005 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1750,6 +1750,11 @@ int tcpv4_offload_init(void); void tcp_v4_init(void); void tcp_init(void); +/* tcp_recovery.c */ + +extern void tcp_rack_advance(struct tcp_sock *tp, + const struct skb_mstamp *xmit_time, u8 sacked); + /* * Save and compile IPv4 options, return a pointer to it */ diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 89aacb630a53362bb9de51c7381ae9b6a799bdc3..c29809f765dc5d4d95edd5d6ac3cc321fcb97c88 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,6 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ + tcp_recovery.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1e97e73e5ecf1a3068355403a3c1a5ea52de98e0..ce8370525832f51021d16c1799cb7919c495d624 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1173,6 +1173,8 @@ static u8 tcp_sacktag_one(struct sock *sk, return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { + tcp_rack_advance(tp, xmit_time, sacked); + if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, * we do not clear RETRANS, believing @@ -2256,6 +2258,16 @@ static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when) before(tp->rx_opt.rcv_tsecr, when); } +/* skb is spurious retransmitted if the returned timestamp echo + * reply is prior to the skb transmission time + */ +static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, + const struct sk_buff *skb) +{ + return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) && + tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb)); +} + /* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */ @@ -3135,6 +3147,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= acked_pcount; + else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb)) + tcp_rack_advance(tp, &skb->skb_mstamp, sacked); if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b875c288daaa1b91c208b510ba0098946f83b89b..1fd5d413a6642b526c98edc0144b3ceed503bb9d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -548,6 +548,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_ecn_openreq_child(newtp, req); newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; + newtp->rack.mstamp.v64 = 0; + newtp->rack.advanced = 0; newtp->saved_syn = req->saved_syn; req->saved_syn = NULL; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c new file mode 100644 index 0000000000000000000000000000000000000000..8f66a6584845d55e6a54b5156ae42c47b6dfb1f1 --- /dev/null +++ b/net/ipv4/tcp_recovery.c @@ -0,0 +1,32 @@ +#include +#include + +/* Record the most recently (re)sent time among the (s)acked packets */ +void tcp_rack_advance(struct tcp_sock *tp, + const struct skb_mstamp *xmit_time, u8 sacked) +{ + if (tp->rack.mstamp.v64 && + !skb_mstamp_after(xmit_time, &tp->rack.mstamp)) + return; + + if (sacked & TCPCB_RETRANS) { + struct skb_mstamp now; + + /* If the sacked packet was retransmitted, it's ambiguous + * whether the retransmission or the original (or the prior + * retransmission) was sacked. + * + * If the original is lost, there is no ambiguity. Otherwise + * we assume the original can be delayed up to aRTT + min_rtt. + * the aRTT term is bounded by the fast recovery or timeout, + * so it's at least one RTT (i.e., retransmission is at least + * an RTT later). + */ + skb_mstamp_get(&now); + if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp)) + return; + } + + tp->rack.mstamp = *xmit_time; + tp->rack.advanced = 1; +}