tcp_westwood.c 8.2 KB
Newer Older
1
/*
L
Luca De Cicco 已提交
2
 * TCP Westwood+: end-to-end bandwidth estimation for TCP
3
 *
L
Luca De Cicco 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *      Angelo Dell'Aera: author of the first version of TCP Westwood+ in Linux 2.4
 *
 * Support at http://c3lab.poliba.it/index.php/Westwood
 * Main references in literature:
 *
 * - Mascolo S, Casetti, M. Gerla et al.
 *   "TCP Westwood: bandwidth estimation for TCP" Proc. ACM Mobicom 2001
 *
 * - A. Grieco, s. Mascolo
 *   "Performance evaluation of New Reno, Vegas, Westwood+ TCP" ACM Computer
 *     Comm. Review, 2004
 *
 * - A. Dell'Aera, L. Grieco, S. Mascolo.
 *   "Linux 2.4 Implementation of Westwood+ TCP with Rate-Halving :
 *    A Performance Evaluation Over the Internet" (ICC 2004), Paris, June 2004
 *
 * Westwood+ employs end-to-end bandwidth measurement to set cwnd and
 * ssthresh after packet loss. The probing phase is as the original Reno.
22 23 24 25 26
 */

#include <linux/mm.h>
#include <linux/module.h>
#include <linux/skbuff.h>
27
#include <linux/inet_diag.h>
28 29 30 31 32 33 34 35 36 37 38 39 40
#include <net/tcp.h>

/* TCP Westwood structure */
struct westwood {
	u32    bw_ns_est;        /* first bandwidth estimation..not too smoothed 8) */
	u32    bw_est;           /* bandwidth estimate */
	u32    rtt_win_sx;       /* here starts a new evaluation... */
	u32    bk;
	u32    snd_una;          /* used for evaluating the number of acked bytes */
	u32    cumul_ack;
	u32    accounted;
	u32    rtt;
	u32    rtt_min;          /* minimum observed RTT */
41
	u8     first_ack;        /* flag which infers that this is the first ack */
42
	u8     reset_rtt_min;    /* Reset RTT min to next RTT sample*/
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
};


/* TCP Westwood functions and constants */
#define TCP_WESTWOOD_RTT_MIN   (HZ/20)	/* 50ms */
#define TCP_WESTWOOD_INIT_RTT  (20*HZ)	/* maybe too conservative?! */

/*
 * @tcp_westwood_create
 * This function initializes fields used in TCP Westwood+,
 * it is called after the initial SYN, so the sequence numbers
 * are correct but new passive connections we have no
 * information about RTTmin at this time so we simply set it to
 * TCP_WESTWOOD_INIT_RTT. This value was chosen to be too conservative
 * since in this way we're sure it will be updated in a consistent
 * way as soon as possible. It will reasonably happen within the first
 * RTT period of the connection lifetime.
 */
61
static void tcp_westwood_init(struct sock *sk)
62
{
63
	struct westwood *w = inet_csk_ca(sk);
64 65 66 67 68 69

	w->bk = 0;
        w->bw_ns_est = 0;
        w->bw_est = 0;
        w->accounted = 0;
        w->cumul_ack = 0;
70
	w->reset_rtt_min = 1;
71 72
	w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT;
	w->rtt_win_sx = tcp_time_stamp;
73
	w->snd_una = tcp_sk(sk)->snd_una;
74
	w->first_ack = 1;
75 76 77 78 79 80 81 82 83 84 85
}

/*
 * @westwood_do_filter
 * Low-pass filter. Implemented using constant coefficients.
 */
static inline u32 westwood_do_filter(u32 a, u32 b)
{
	return (((7 * a) + b) >> 3);
}

86
static void westwood_filter(struct westwood *w, u32 delta)
87
{
88 89 90 91 92 93 94 95
	/* If the filter is empty fill it with the first sample of bandwidth  */
	if (w->bw_ns_est == 0 && w->bw_est == 0) {
		w->bw_ns_est = w->bk / delta;
		w->bw_est = w->bw_ns_est;
	} else {
		w->bw_ns_est = westwood_do_filter(w->bw_ns_est, w->bk / delta);
		w->bw_est = westwood_do_filter(w->bw_est, w->bw_ns_est);
	}
96 97 98 99 100 101 102
}

/*
 * @westwood_pkts_acked
 * Called after processing group of packets.
 * but all westwood needs is the last sample of srtt.
 */
103
static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
104
{
105
	struct westwood *w = inet_csk_ca(sk);
106
	if (cnt > 0)
107
		w->rtt = tcp_sk(sk)->srtt >> 3;
108 109 110 111 112 113 114
}

/*
 * @westwood_update_window
 * It updates RTT evaluation window if it is the right moment to do
 * it. If so it calls filter for evaluating bandwidth.
 */
115
static void westwood_update_window(struct sock *sk)
116
{
117
	struct westwood *w = inet_csk_ca(sk);
118 119
	s32 delta = tcp_time_stamp - w->rtt_win_sx;

L
Luca De Cicco 已提交
120
	/* Initialize w->snd_una with the first acked sequence number in order
121 122 123 124 125 126 127 128
	 * to fix mismatch between tp->snd_una and w->snd_una for the first
	 * bandwidth sample
	 */
        if (w->first_ack) {
		w->snd_una = tcp_sk(sk)->snd_una;
		w->first_ack = 0;
	}

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
	/*
	 * See if a RTT-window has passed.
	 * Be careful since if RTT is less than
	 * 50ms we don't filter but we continue 'building the sample'.
	 * This minimum limit was chosen since an estimation on small
	 * time intervals is better to avoid...
	 * Obviously on a LAN we reasonably will always have
	 * right_bound = left_bound + WESTWOOD_RTT_MIN
	 */
	if (w->rtt && delta > max_t(u32, w->rtt, TCP_WESTWOOD_RTT_MIN)) {
		westwood_filter(w, delta);

		w->bk = 0;
		w->rtt_win_sx = tcp_time_stamp;
	}
}

146 147 148 149 150 151 152 153 154 155
static inline void update_rtt_min(struct westwood *w)
{
	if (w->reset_rtt_min) {
		w->rtt_min = w->rtt;
		w->reset_rtt_min = 0;	
	} else
		w->rtt_min = min(w->rtt, w->rtt_min);
}


156 157 158 159 160 161
/*
 * @westwood_fast_bw
 * It is called when we are in fast path. In particular it is called when
 * header prediction is successful. In such case in fact update is
 * straight forward and doesn't need any particular care.
 */
162
static inline void westwood_fast_bw(struct sock *sk)
163
{
164 165
	const struct tcp_sock *tp = tcp_sk(sk);
	struct westwood *w = inet_csk_ca(sk);
166

167
	westwood_update_window(sk);
168 169 170

	w->bk += tp->snd_una - w->snd_una;
	w->snd_una = tp->snd_una;
171
	update_rtt_min(w);
172 173 174 175 176 177 178
}

/*
 * @westwood_acked_count
 * This function evaluates cumul_ack for evaluating bk in case of
 * delayed or partial acks.
 */
179
static inline u32 westwood_acked_count(struct sock *sk)
180
{
181 182
	const struct tcp_sock *tp = tcp_sk(sk);
	struct westwood *w = inet_csk_ca(sk);
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216

	w->cumul_ack = tp->snd_una - w->snd_una;

        /* If cumul_ack is 0 this is a dupack since it's not moving
         * tp->snd_una.
         */
        if (!w->cumul_ack) {
		w->accounted += tp->mss_cache;
		w->cumul_ack = tp->mss_cache;
	}

        if (w->cumul_ack > tp->mss_cache) {
		/* Partial or delayed ack */
		if (w->accounted >= w->cumul_ack) {
			w->accounted -= w->cumul_ack;
			w->cumul_ack = tp->mss_cache;
		} else {
			w->cumul_ack -= w->accounted;
			w->accounted = 0;
		}
	}

	w->snd_una = tp->snd_una;

	return w->cumul_ack;
}


/*
 * TCP Westwood
 * Here limit is evaluated as Bw estimation*RTTmin (for obtaining it
 * in packets we use mss_cache). Rttmin is guaranteed to be >= 2
 * so avoids ever returning 0.
 */
217
static u32 tcp_westwood_bw_rttmin(const struct sock *sk)
218
{
219 220 221
	const struct tcp_sock *tp = tcp_sk(sk);
	const struct westwood *w = inet_csk_ca(sk);
	return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2);
222 223
}

224
static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
225
{
226 227
	struct tcp_sock *tp = tcp_sk(sk);
	struct westwood *w = inet_csk_ca(sk);
L
Luca De Cicco 已提交
228

229 230
	switch(event) {
	case CA_EVENT_FAST_ACK:
231
		westwood_fast_bw(sk);
232 233 234
		break;

	case CA_EVENT_COMPLETE_CWR:
235
		tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
236 237 238
		break;

	case CA_EVENT_FRTO:
239
		tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
240 241
 		/* Update RTT_min when next ack arrives */
		w->reset_rtt_min = 1;
242 243 244
		break;

	case CA_EVENT_SLOW_ACK:
245 246
		westwood_update_window(sk);
		w->bk += westwood_acked_count(sk);
247
		update_rtt_min(w);
248 249 250 251 252 253 254 255 256 257
		break;

	default:
		/* don't care */
		break;
	}
}


/* Extract info for Tcp socket info provided via netlink. */
258
static void tcp_westwood_info(struct sock *sk, u32 ext,
259 260
			      struct sk_buff *skb)
{
261
	const struct westwood *ca = inet_csk_ca(sk);
262
	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
263 264 265
		struct rtattr *rta;
		struct tcpvegas_info *info;

266
		rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info));
267 268 269 270 271 272 273 274 275 276 277 278 279 280
		info = RTA_DATA(rta);
		info->tcpv_enabled = 1;
		info->tcpv_rttcnt = 0;
		info->tcpv_rtt = jiffies_to_usecs(ca->rtt);
		info->tcpv_minrtt = jiffies_to_usecs(ca->rtt_min);
	rtattr_failure:	;
	}
}


static struct tcp_congestion_ops tcp_westwood = {
	.init		= tcp_westwood_init,
	.ssthresh	= tcp_reno_ssthresh,
	.cong_avoid	= tcp_reno_cong_avoid,
281
	.min_cwnd	= tcp_westwood_bw_rttmin,
282 283 284 285 286 287 288 289 290 291
	.cwnd_event	= tcp_westwood_event,
	.get_info	= tcp_westwood_info,
	.pkts_acked	= tcp_westwood_pkts_acked,

	.owner		= THIS_MODULE,
	.name		= "westwood"
};

static int __init tcp_westwood_register(void)
{
292
	BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
293 294 295 296 297 298 299 300 301 302 303 304 305 306
	return tcp_register_congestion_control(&tcp_westwood);
}

static void __exit tcp_westwood_unregister(void)
{
	tcp_unregister_congestion_control(&tcp_westwood);
}

module_init(tcp_westwood_register);
module_exit(tcp_westwood_unregister);

MODULE_AUTHOR("Stephen Hemminger, Angelo Dell'Aera");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("TCP Westwood+");