tcp_illinois.c 8.1 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 * TCP Illinois congestion control.
 * Home page:
 *	http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
 *
 * The algorithm is described in:
 * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
 *  for High-Speed Networks"
9
 * http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * Implemented from description in paper and ns-2 simulation.
 * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
 */

#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/inet_diag.h>
#include <asm/div64.h>
#include <net/tcp.h>

#define ALPHA_SHIFT	7
#define ALPHA_SCALE	(1u<<ALPHA_SHIFT)
#define ALPHA_MIN	((3*ALPHA_SCALE)/10)	/* ~0.3 */
#define ALPHA_MAX	(10*ALPHA_SCALE)	/* 10.0 */
#define ALPHA_BASE	ALPHA_SCALE		/* 1.0 */
S
Stephen Hemminger 已提交
26
#define RTT_MAX		(U32_MAX / ALPHA_MAX)	/* 3.3 secs */
27 28 29

#define BETA_SHIFT	6
#define BETA_SCALE	(1u<<BETA_SHIFT)
S
Stephen Hemminger 已提交
30 31 32
#define BETA_MIN	(BETA_SCALE/8)		/* 0.125 */
#define BETA_MAX	(BETA_SCALE/2)		/* 0.5 */
#define BETA_BASE	BETA_MAX
33 34

static int win_thresh __read_mostly = 15;
S
Stephen Hemminger 已提交
35
module_param(win_thresh, int, 0);
36 37
MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");

S
Stephen Hemminger 已提交
38 39 40
static int theta __read_mostly = 5;
module_param(theta, int, 0);
MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
41 42

/* TCP Illinois Parameters */
S
Stephen Hemminger 已提交
43 44 45 46 47 48 49 50 51 52 53
struct illinois {
	u64	sum_rtt;	/* sum of rtt's measured within last rtt */
	u16	cnt_rtt;	/* # of rtts measured within last rtt */
	u32	base_rtt;	/* min of all rtt in usec */
	u32	max_rtt;	/* max of all rtt in usec */
	u32	end_seq;	/* right edge of current RTT */
	u32	alpha;		/* Additive increase */
	u32	beta;		/* Muliplicative decrease */
	u16	acked;		/* # packets acked by current ACK */
	u8	rtt_above;	/* average rtt has gone above threshold */
	u8	rtt_low;	/* # of rtts measurements below threshold */
54 55
};

S
Stephen Hemminger 已提交
56 57 58 59 60 61 62 63 64 65 66 67
static void rtt_reset(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct illinois *ca = inet_csk_ca(sk);

	ca->end_seq = tp->snd_nxt;
	ca->cnt_rtt = 0;
	ca->sum_rtt = 0;

	/* TODO: age max_rtt? */
}

68 69
static void tcp_illinois_init(struct sock *sk)
{
S
Stephen Hemminger 已提交
70 71 72 73 74 75 76 77 78 79
	struct illinois *ca = inet_csk_ca(sk);

	ca->alpha = ALPHA_MAX;
	ca->beta = BETA_BASE;
	ca->base_rtt = 0x7fffffff;
	ca->max_rtt = 0;

	ca->acked = 0;
	ca->rtt_low = 0;
	ca->rtt_above = 0;
80

S
Stephen Hemminger 已提交
81
	rtt_reset(sk);
82 83
}

S
Stephen Hemminger 已提交
84
/* Measure RTT for each ack. */
85
static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt)
86
{
S
Stephen Hemminger 已提交
87
	struct illinois *ca = inet_csk_ca(sk);
88 89 90

	ca->acked = pkts_acked;

91 92
	/* dup ack, no rtt sample */
	if (rtt < 0)
93 94
		return;

S
Stephen Hemminger 已提交
95 96 97
	/* ignore bogus values, this prevents wraparound in alpha math */
	if (rtt > RTT_MAX)
		rtt = RTT_MAX;
98

S
Stephen Hemminger 已提交
99 100 101 102 103 104
	/* keep track of minimum RTT seen so far */
	if (ca->base_rtt > rtt)
		ca->base_rtt = rtt;

	/* and max */
	if (ca->max_rtt < rtt)
105 106
		ca->max_rtt = rtt;

S
Stephen Hemminger 已提交
107 108
	++ca->cnt_rtt;
	ca->sum_rtt += rtt;
109 110
}

S
Stephen Hemminger 已提交
111 112
/* Maximum queuing delay */
static inline u32 max_delay(const struct illinois *ca)
113
{
S
Stephen Hemminger 已提交
114 115
	return ca->max_rtt - ca->base_rtt;
}
116

S
Stephen Hemminger 已提交
117 118 119 120
/* Average queuing delay */
static inline u32 avg_delay(const struct illinois *ca)
{
	u64 t = ca->sum_rtt;
121

S
Stephen Hemminger 已提交
122 123
	do_div(t, ca->cnt_rtt);
	return t - ca->base_rtt;
124 125 126 127 128 129 130 131 132 133
}

/*
 * Compute value of alpha used for additive increase.
 * If small window then use 1.0, equivalent to Reno.
 *
 * For larger windows, adjust based on average delay.
 * A. If average delay is at minimum (we are uncongested),
 *    then use large alpha (10.0) to increase faster.
 * B. If average delay is at maximum (getting congested)
S
Stephen Hemminger 已提交
134
 *    then use small alpha (0.3)
135 136 137
 *
 * The result is a convex window growth curve.
 */
S
Stephen Hemminger 已提交
138
static u32 alpha(struct illinois *ca, u32 da, u32 dm)
139
{
S
Stephen Hemminger 已提交
140
	u32 d1 = dm / 100;	/* Low threshold */
141 142

	if (da <= d1) {
S
Stephen Hemminger 已提交
143 144
		/* If never got out of low delay zone, then use max */
		if (!ca->rtt_above)
145
			return ALPHA_MAX;
S
Stephen Hemminger 已提交
146 147 148 149 150 151 152 153 154 155

		/* Wait for 5 good RTT's before allowing alpha to go alpha max.
		 * This prevents one good RTT from causing sudden window increase.
		 */
		if (++ca->rtt_low < theta)
			return ca->alpha;

		ca->rtt_low = 0;
		ca->rtt_above = 0;
		return ALPHA_MAX;
156 157
	}

S
Stephen Hemminger 已提交
158
	ca->rtt_above = 1;
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177

	/*
	 * Based on:
	 *
	 *      (dm - d1) amin amax
	 * k1 = -------------------
	 *         amax - amin
	 *
	 *       (dm - d1) amin
	 * k2 = ----------------  - d1
	 *        amax - amin
	 *
	 *             k1
	 * alpha = ----------
	 *          k2 + da
	 */

	dm -= d1;
	da -= d1;
S
Stephen Hemminger 已提交
178 179
	return (dm * ALPHA_MAX) /
		(dm + (da  * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
180 181 182 183 184 185 186 187 188 189
}

/*
 * Beta used for multiplicative decrease.
 * For small window sizes returns same value as Reno (0.5)
 *
 * If delay is small (10% of max) then beta = 1/8
 * If delay is up to 80% of max then beta = 1/2
 * In between is a linear function
 */
S
Stephen Hemminger 已提交
190
static u32 beta(u32 da, u32 dm)
191 192 193 194 195 196
{
	u32 d2, d3;

	d2 = dm / 10;
	if (da <= d2)
		return BETA_MIN;
S
Stephen Hemminger 已提交
197

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
	d3 = (8 * dm) / 10;
	if (da >= d3 || d3 <= d2)
		return BETA_MAX;

	/*
	 * Based on:
	 *
	 *       bmin d3 - bmax d2
	 * k3 = -------------------
	 *         d3 - d2
	 *
	 *       bmax - bmin
	 * k4 = -------------
	 *         d3 - d2
	 *
	 * b = k3 + k4 da
	 */
	return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
		/ (d3 - d2);
}

S
Stephen Hemminger 已提交
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
/* Update alpha and beta values once per RTT */
static void update_params(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct illinois *ca = inet_csk_ca(sk);

	if (tp->snd_cwnd < win_thresh) {
		ca->alpha = ALPHA_BASE;
		ca->beta = BETA_BASE;
	} else if (ca->cnt_rtt > 0) {
		u32 dm = max_delay(ca);
		u32 da = avg_delay(ca);

		ca->alpha = alpha(ca, da, dm);
		ca->beta = beta(da, dm);
	}

	rtt_reset(sk);
}

/*
 * In case of loss, reset to default values
 */
static void tcp_illinois_state(struct sock *sk, u8 new_state)
{
	struct illinois *ca = inet_csk_ca(sk);

	if (new_state == TCP_CA_Loss) {
		ca->alpha = ALPHA_BASE;
		ca->beta = BETA_BASE;
		ca->rtt_low = 0;
		ca->rtt_above = 0;
		rtt_reset(sk);
	}
}

/*
 * Increase window in response to successful acknowledgment.
 */
258
static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
S
Stephen Hemminger 已提交
259 260 261 262 263 264 265 266
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct illinois *ca = inet_csk_ca(sk);

	if (after(ack, ca->end_seq))
		update_params(sk);

	/* RFC2861 only increase cwnd if fully utilized */
267
	if (!tcp_is_cwnd_limited(sk))
S
Stephen Hemminger 已提交
268 269 270 271
		return;

	/* In slow start */
	if (tp->snd_cwnd <= tp->snd_ssthresh)
272
		tcp_slow_start(tp, acked);
S
Stephen Hemminger 已提交
273 274 275 276 277 278 279 280 281 282 283 284 285 286

	else {
		u32 delta;

		/* snd_cwnd_cnt is # of packets since last cwnd increment */
		tp->snd_cwnd_cnt += ca->acked;
		ca->acked = 1;

		/* This is close approximation of:
		 * tp->snd_cwnd += alpha/tp->snd_cwnd
		*/
		delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
		if (delta >= tp->snd_cwnd) {
			tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
S
stephen hemminger 已提交
287
					   (u32)tp->snd_cwnd_clamp);
S
Stephen Hemminger 已提交
288 289 290 291 292
			tp->snd_cwnd_cnt = 0;
		}
	}
}

293 294 295
static u32 tcp_illinois_ssthresh(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
S
Stephen Hemminger 已提交
296
	struct illinois *ca = inet_csk_ca(sk);
297 298

	/* Multiplicative decrease */
299
	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
300 301
}

S
Stephen Hemminger 已提交
302 303 304
/* Extract info for Tcp socket info provided via netlink. */
static void tcp_illinois_info(struct sock *sk, u32 ext,
			      struct sk_buff *skb)
305
{
S
Stephen Hemminger 已提交
306
	const struct illinois *ca = inet_csk_ca(sk);
307 308 309 310

	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
		struct tcpvegas_info info = {
			.tcpv_enabled = 1,
S
Stephen Hemminger 已提交
311 312
			.tcpv_rttcnt = ca->cnt_rtt,
			.tcpv_minrtt = ca->base_rtt,
313
		};
S
Stephen Hemminger 已提交
314

315 316
		if (info.tcpv_rttcnt > 0) {
			u64 t = ca->sum_rtt;
317

318 319 320
			do_div(t, info.tcpv_rttcnt);
			info.tcpv_rtt = t;
		}
321 322 323 324
		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
	}
}

325
static struct tcp_congestion_ops tcp_illinois __read_mostly = {
326 327 328
	.init		= tcp_illinois_init,
	.ssthresh	= tcp_illinois_ssthresh,
	.cong_avoid	= tcp_illinois_cong_avoid,
S
Stephen Hemminger 已提交
329 330 331
	.set_state	= tcp_illinois_state,
	.get_info	= tcp_illinois_info,
	.pkts_acked	= tcp_illinois_acked,
332 333 334 335 336 337 338

	.owner		= THIS_MODULE,
	.name		= "illinois",
};

static int __init tcp_illinois_register(void)
{
S
Stephen Hemminger 已提交
339
	BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
340 341 342 343 344 345 346 347 348 349 350 351 352 353
	return tcp_register_congestion_control(&tcp_illinois);
}

static void __exit tcp_illinois_unregister(void)
{
	tcp_unregister_congestion_control(&tcp_illinois);
}

module_init(tcp_illinois_register);
module_exit(tcp_illinois_unregister);

MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("TCP Illinois");
S
Stephen Hemminger 已提交
354
MODULE_VERSION("1.0");