提交 deed7be7 编写于 作者: Y Yuchung Cheng 提交者: David S. Miller

tcp: record most recent RTT in RACK loss detection

Record the most recent RTT in RACK. It is often identical to the
"ca_rtt_us" values in tcp_clean_rtx_queue. But when the packet has
been retransmitted, RACK choses to believe the ACK is for the
(latest) retransmitted packet if the RTT is over minimum RTT.

This requires passing the arrival time of the most recent ACK to
RACK routines. The timestamp is now recorded in the "ack_time"
in tcp_sacktag_state during the ACK processing.

This patch does not change the RACK algorithm itself. It only adds
the RTT variable to prepare the next main patch.
Signed-off-by: NYuchung Cheng <ycheng@google.com>
Signed-off-by: NNeal Cardwell <ncardwell@google.com>
Acked-by: NEric Dumazet <edumazet@google.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 e636f8b0
...@@ -207,6 +207,7 @@ struct tcp_sock { ...@@ -207,6 +207,7 @@ struct tcp_sock {
/* Information of the most recently (s)acked skb */ /* Information of the most recently (s)acked skb */
struct tcp_rack { struct tcp_rack {
struct skb_mstamp mstamp; /* (Re)sent time of the skb */ struct skb_mstamp mstamp; /* (Re)sent time of the skb */
u32 rtt_us; /* Associated RTT */
u8 advanced; /* mstamp advanced since last lost marking */ u8 advanced; /* mstamp advanced since last lost marking */
u8 reord; /* reordering detected */ u8 reord; /* reordering detected */
} rack; } rack;
......
...@@ -1863,9 +1863,10 @@ extern int sysctl_tcp_recovery; ...@@ -1863,9 +1863,10 @@ extern int sysctl_tcp_recovery;
/* Use TCP RACK to detect (some) tail and retransmit losses */ /* Use TCP RACK to detect (some) tail and retransmit losses */
#define TCP_RACK_LOST_RETRANS 0x1 #define TCP_RACK_LOST_RETRANS 0x1
extern void tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now);
extern void tcp_rack_advance(struct tcp_sock *tp, extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
const struct skb_mstamp *xmit_time, u8 sacked); const struct skb_mstamp *xmit_time,
const struct skb_mstamp *ack_time);
/* /*
* Save and compile IPv4 options, return a pointer to it * Save and compile IPv4 options, return a pointer to it
......
...@@ -1135,6 +1135,7 @@ struct tcp_sacktag_state { ...@@ -1135,6 +1135,7 @@ struct tcp_sacktag_state {
*/ */
struct skb_mstamp first_sackt; struct skb_mstamp first_sackt;
struct skb_mstamp last_sackt; struct skb_mstamp last_sackt;
struct skb_mstamp ack_time; /* Timestamp when the S/ACK was received */
struct rate_sample *rate; struct rate_sample *rate;
int flag; int flag;
}; };
...@@ -1217,7 +1218,7 @@ static u8 tcp_sacktag_one(struct sock *sk, ...@@ -1217,7 +1218,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
return sacked; return sacked;
if (!(sacked & TCPCB_SACKED_ACKED)) { if (!(sacked & TCPCB_SACKED_ACKED)) {
tcp_rack_advance(tp, xmit_time, sacked); tcp_rack_advance(tp, sacked, xmit_time, &state->ack_time);
if (sacked & TCPCB_SACKED_RETRANS) { if (sacked & TCPCB_SACKED_RETRANS) {
/* If the segment is not tagged as lost, /* If the segment is not tagged as lost,
...@@ -2813,7 +2814,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked) ...@@ -2813,7 +2814,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked)
* tcp_xmit_retransmit_queue(). * tcp_xmit_retransmit_queue().
*/ */
static void tcp_fastretrans_alert(struct sock *sk, const int acked, static void tcp_fastretrans_alert(struct sock *sk, const int acked,
bool is_dupack, int *ack_flag, int *rexmit) bool is_dupack, int *ack_flag, int *rexmit,
const struct skb_mstamp *ack_time)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
...@@ -2868,7 +2870,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, ...@@ -2868,7 +2870,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS) { if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS) {
u32 prior_retrans = tp->retrans_out; u32 prior_retrans = tp->retrans_out;
tcp_rack_mark_lost(sk); tcp_rack_mark_lost(sk, ack_time);
if (prior_retrans > tp->retrans_out) { if (prior_retrans > tp->retrans_out) {
flag |= FLAG_LOST_RETRANS; flag |= FLAG_LOST_RETRANS;
*ack_flag |= FLAG_LOST_RETRANS; *ack_flag |= FLAG_LOST_RETRANS;
...@@ -3105,11 +3107,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, ...@@ -3105,11 +3107,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
*/ */
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
u32 prior_snd_una, int *acked, u32 prior_snd_una, int *acked,
struct tcp_sacktag_state *sack, struct tcp_sacktag_state *sack)
struct skb_mstamp *now)
{ {
const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock *icsk = inet_csk(sk);
struct skb_mstamp first_ackt, last_ackt; struct skb_mstamp first_ackt, last_ackt;
struct skb_mstamp *now = &sack->ack_time;
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out; u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out; u32 reord = tp->packets_out;
...@@ -3169,7 +3171,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ...@@ -3169,7 +3171,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
} else if (tcp_is_sack(tp)) { } else if (tcp_is_sack(tp)) {
tp->delivered += acked_pcount; tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb)) if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, &skb->skb_mstamp, sacked); tcp_rack_advance(tp, sacked,
&skb->skb_mstamp,
&sack->ack_time);
} }
if (sacked & TCPCB_LOST) if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount; tp->lost_out -= acked_pcount;
...@@ -3599,7 +3603,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3599,7 +3603,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 lost = tp->lost; u32 lost = tp->lost;
int acked = 0; /* Number of packets newly acked */ int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
struct skb_mstamp now;
sack_state.first_sackt.v64 = 0; sack_state.first_sackt.v64 = 0;
sack_state.rate = &rs; sack_state.rate = &rs;
...@@ -3625,7 +3628,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3625,7 +3628,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, tp->snd_nxt)) if (after(ack, tp->snd_nxt))
goto invalid_ack; goto invalid_ack;
skb_mstamp_get(&now); skb_mstamp_get(&sack_state.ack_time);
if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
...@@ -3693,11 +3696,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3693,11 +3696,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */ /* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
&sack_state, &now); &sack_state);
if (tcp_ack_is_dubious(sk, flag)) { if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
&sack_state.ack_time);
} }
if (tp->tlp_high_seq) if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag); tcp_process_tlp_ack(sk, ack, flag);
...@@ -3712,15 +3716,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3712,15 +3716,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_schedule_loss_probe(sk); tcp_schedule_loss_probe(sk);
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */ lost = tp->lost - lost; /* freshly marked lost */
tcp_rate_gen(sk, delivered, lost, &now, &rs); tcp_rate_gen(sk, delivered, lost, &sack_state.ack_time,
tcp_cong_control(sk, ack, delivered, flag, &rs); sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit); tcp_xmit_recovery(sk, rexmit);
return 1; return 1;
no_queue: no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */ /* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK) if (flag & FLAG_DSACKING_ACK)
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
&sack_state.ack_time);
/* If this ack opens up a zero window, clear backoff. It was /* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than * being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission. * it needs to be for normal retransmission.
...@@ -3741,9 +3747,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3741,9 +3747,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* If data was DSACKed, see if we can undo a cwnd reduction. * If data was DSACKed, see if we can undo a cwnd reduction.
*/ */
if (TCP_SKB_CB(skb)->sacked) { if (TCP_SKB_CB(skb)->sacked) {
skb_mstamp_get(&sack_state.ack_time);
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state); &sack_state);
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
&sack_state.ack_time);
tcp_xmit_recovery(sk, rexmit); tcp_xmit_recovery(sk, rexmit);
} }
......
...@@ -32,7 +32,7 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) ...@@ -32,7 +32,7 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
* The current version is only used after recovery starts but can be * The current version is only used after recovery starts but can be
* easily extended to detect the first loss. * easily extended to detect the first loss.
*/ */
static void tcp_rack_detect_loss(struct sock *sk) static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb; struct sk_buff *skb;
...@@ -62,13 +62,14 @@ static void tcp_rack_detect_loss(struct sock *sk) ...@@ -62,13 +62,14 @@ static void tcp_rack_detect_loss(struct sock *sk)
continue; continue;
if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) { if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) {
/* Step 3 in draft-cheng-tcpm-rack-00.txt:
if (skb_mstamp_us_delta(&tp->rack.mstamp, * A packet is lost if its elapsed time is beyond
&skb->skb_mstamp) <= reo_wnd) * the recent RTT plus the reordering window.
continue; */
if (skb_mstamp_us_delta(now, &skb->skb_mstamp) >
/* skb is lost if packet sent later is sacked */ tp->rack.rtt_us + reo_wnd) {
tcp_rack_mark_skb_lost(sk, skb); tcp_rack_mark_skb_lost(sk, skb);
}
} else if (!(scb->sacked & TCPCB_RETRANS)) { } else if (!(scb->sacked & TCPCB_RETRANS)) {
/* Original data are sent sequentially so stop early /* Original data are sent sequentially so stop early
* b/c the rest are all sent after rack_sent * b/c the rest are all sent after rack_sent
...@@ -78,7 +79,7 @@ static void tcp_rack_detect_loss(struct sock *sk) ...@@ -78,7 +79,7 @@ static void tcp_rack_detect_loss(struct sock *sk)
} }
} }
void tcp_rack_mark_lost(struct sock *sk) void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
...@@ -86,20 +87,25 @@ void tcp_rack_mark_lost(struct sock *sk) ...@@ -86,20 +87,25 @@ void tcp_rack_mark_lost(struct sock *sk)
return; return;
/* Reset the advanced flag to avoid unnecessary queue scanning */ /* Reset the advanced flag to avoid unnecessary queue scanning */
tp->rack.advanced = 0; tp->rack.advanced = 0;
tcp_rack_detect_loss(sk); tcp_rack_detect_loss(sk, now);
} }
/* Record the most recently (re)sent time among the (s)acked packets */ /* Record the most recently (re)sent time among the (s)acked packets
void tcp_rack_advance(struct tcp_sock *tp, * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
const struct skb_mstamp *xmit_time, u8 sacked) * draft-cheng-tcpm-rack-00.txt
*/
void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
const struct skb_mstamp *xmit_time,
const struct skb_mstamp *ack_time)
{ {
u32 rtt_us;
if (tp->rack.mstamp.v64 && if (tp->rack.mstamp.v64 &&
!skb_mstamp_after(xmit_time, &tp->rack.mstamp)) !skb_mstamp_after(xmit_time, &tp->rack.mstamp))
return; return;
rtt_us = skb_mstamp_us_delta(ack_time, xmit_time);
if (sacked & TCPCB_RETRANS) { if (sacked & TCPCB_RETRANS) {
struct skb_mstamp now;
/* If the sacked packet was retransmitted, it's ambiguous /* If the sacked packet was retransmitted, it's ambiguous
* whether the retransmission or the original (or the prior * whether the retransmission or the original (or the prior
* retransmission) was sacked. * retransmission) was sacked.
...@@ -110,11 +116,10 @@ void tcp_rack_advance(struct tcp_sock *tp, ...@@ -110,11 +116,10 @@ void tcp_rack_advance(struct tcp_sock *tp,
* so it's at least one RTT (i.e., retransmission is at least * so it's at least one RTT (i.e., retransmission is at least
* an RTT later). * an RTT later).
*/ */
skb_mstamp_get(&now); if (rtt_us < tcp_min_rtt(tp))
if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp))
return; return;
} }
tp->rack.rtt_us = rtt_us;
tp->rack.mstamp = *xmit_time; tp->rack.mstamp = *xmit_time;
tp->rack.advanced = 1; tp->rack.advanced = 1;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册