tcp_yeah.c 7.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
/*
 *
 *   YeAH TCP
 *
 * For further details look at:
 *    http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
 *
 */

#include "tcp_yeah.h"

/* Default values of the Vegas variables, in fixed-point representation
 * with V_PARAM_SHIFT bits to the right of the binary point.
 */
#define V_PARAM_SHIFT 1

#define TCP_YEAH_ALPHA       80 //lin number of packets queued at the bottleneck
#define TCP_YEAH_GAMMA        1 //lin fraction of queue to be removed per rtt
#define TCP_YEAH_DELTA        3 //log minimum fraction of cwnd to be removed on loss
#define TCP_YEAH_EPSILON      1 //log maximum fraction to be removed on early decongestion
#define TCP_YEAH_PHY          8 //lin maximum delta from base
#define TCP_YEAH_RHO         16 //lin minumum number of consecutive rtt to consider competition on loss
#define TCP_YEAH_ZETA        50 //lin minimum number of state switchs to reset reno_count

#define TCP_SCALABLE_AI_CNT	 100U

/* YeAH variables */
struct yeah {
	/* Vegas */
	u32	beg_snd_nxt;	/* right edge during last RTT */
	u32	beg_snd_una;	/* left edge  during last RTT */
	u32	beg_snd_cwnd;	/* saves the size of the cwnd */
	u8	doing_vegas_now;/* if true, do vegas for this RTT */
	u16	cntRTT;		/* # of RTTs measured within last RTT */
	u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
	u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */

	/* YeAH */
	u32 lastQ;
	u32 doing_reno_now;

	u32 reno_count;
	u32 fast_count;

	u32 pkts_acked;
};

static void tcp_yeah_init(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct yeah *yeah = inet_csk_ca(sk);

	tcp_vegas_init(sk);

	yeah->doing_reno_now = 0;
	yeah->lastQ = 0;

	yeah->reno_count = 2;

	/* Ensure the MD arithmetic works.  This is somewhat pedantic,
	 * since I don't think we will see a cwnd this large. :) */
	tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);

}


static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked)
{
	const struct inet_connection_sock *icsk = inet_csk(sk);
	struct yeah *yeah = inet_csk_ca(sk);

	if (icsk->icsk_ca_state == TCP_CA_Open)
		yeah->pkts_acked = pkts_acked;
}

static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
S
Stephen Hemminger 已提交
77
				u32 seq_rtt, u32 in_flight, int flag)
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct yeah *yeah = inet_csk_ca(sk);

	if (!tcp_is_cwnd_limited(sk, in_flight))
		return;

	if (tp->snd_cwnd <= tp->snd_ssthresh) {
		tcp_slow_start(tp);
	} else if (!yeah->doing_reno_now) {
		/* Scalable */

		tp->snd_cwnd_cnt+=yeah->pkts_acked;
		if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
				tp->snd_cwnd++;
			tp->snd_cwnd_cnt = 0;
		}

		yeah->pkts_acked = 1;

	} else {
		/* Reno */

		if (tp->snd_cwnd_cnt < tp->snd_cwnd)
			tp->snd_cwnd_cnt++;

		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
			tp->snd_cwnd++;
			tp->snd_cwnd_cnt = 0;
		}
	}

	/* The key players are v_beg_snd_una and v_beg_snd_nxt.
	 *
	 * These are so named because they represent the approximate values
	 * of snd_una and snd_nxt at the beginning of the current RTT. More
	 * precisely, they represent the amount of data sent during the RTT.
	 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
	 * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
	 * bytes of data have been ACKed during the course of the RTT, giving
	 * an "actual" rate of:
	 *
	 *     (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
	 *
	 * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
	 * because delayed ACKs can cover more than one segment, so they
	 * don't line up yeahly with the boundaries of RTTs.
	 *
	 * Another unfortunate fact of life is that delayed ACKs delay the
	 * advance of the left edge of our send window, so that the number
	 * of bytes we send in an RTT is often less than our cwnd will allow.
	 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
	 */

	if (after(ack, yeah->beg_snd_nxt)) {

		/* We do the Vegas calculations only if we got enough RTT
		 * samples that we can be reasonably sure that we got
		 * at least one RTT sample that wasn't from a delayed ACK.
		 * If we only had 2 samples total,
		 * then that means we're getting only 1 ACK per RTT, which
		 * means they're almost certainly delayed ACKs.
		 * If  we have 3 samples, we should be OK.
		 */

		if (yeah->cntRTT > 2) {
S
Stephen Hemminger 已提交
145 146
			u32 rtt, queue;
			u64 bw;
147 148 149 150 151 152 153 154 155 156 157 158 159 160

			/* We have enough RTT samples, so, using the Vegas
			 * algorithm, we determine if we should increase or
			 * decrease cwnd, and by how much.
			 */

			/* Pluck out the RTT we are using for the Vegas
			 * calculations. This is the min RTT seen during the
			 * last RTT. Taking the min filters out the effects
			 * of delayed ACKs, at the cost of noticing congestion
			 * a bit later.
			 */
			rtt = yeah->minRTT;

S
Stephen Hemminger 已提交
161 162 163 164 165 166 167 168 169 170 171 172 173 174
			/* Compute excess number of packets above bandwidth
			 * Avoid doing full 64 bit divide.
			 */
			bw = tp->snd_cwnd;
			bw *= rtt - yeah->baseRTT;
			do_div(bw, rtt);
			queue = bw;

			if (queue > TCP_YEAH_ALPHA ||
			    rtt - yeah->baseRTT > (yeah->baseRTT / TCP_YEAH_PHY)) {
				if (queue > TCP_YEAH_ALPHA
				    && tp->snd_cwnd > yeah->reno_count) {
					u32 reduction = min(queue / TCP_YEAH_GAMMA ,
							    tp->snd_cwnd >> TCP_YEAH_EPSILON);
175 176 177

					tp->snd_cwnd -= reduction;

S
Stephen Hemminger 已提交
178 179
					tp->snd_cwnd = max(tp->snd_cwnd,
							   yeah->reno_count);
180 181

					tp->snd_ssthresh = tp->snd_cwnd;
S
Stephen Hemminger 已提交
182
				}
183 184

				if (yeah->reno_count <= 2)
S
Stephen Hemminger 已提交
185
					yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
186 187 188
				else
					yeah->reno_count++;

S
Stephen Hemminger 已提交
189 190
				yeah->doing_reno_now = min(yeah->doing_reno_now + 1,
							   0xffffffU);
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
			} else {
				yeah->fast_count++;

				if (yeah->fast_count > TCP_YEAH_ZETA) {
					yeah->reno_count = 2;
					yeah->fast_count = 0;
				}

				yeah->doing_reno_now = 0;
			}

			yeah->lastQ = queue;

		}

		/* Save the extent of the current window so we can use this
		 * at the end of the next RTT.
		 */
		yeah->beg_snd_una  = yeah->beg_snd_nxt;
		yeah->beg_snd_nxt  = tp->snd_nxt;
		yeah->beg_snd_cwnd = tp->snd_cwnd;

		/* Wipe the slate clean for the next RTT. */
		yeah->cntRTT = 0;
		yeah->minRTT = 0x7fffffff;
	}
}

static u32 tcp_yeah_ssthresh(struct sock *sk) {
	const struct tcp_sock *tp = tcp_sk(sk);
	struct yeah *yeah = inet_csk_ca(sk);
	u32 reduction;

	if (yeah->doing_reno_now < TCP_YEAH_RHO) {
		reduction = yeah->lastQ;

		reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) );

		reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
	} else
		reduction = max(tp->snd_cwnd>>1,2U);

	yeah->fast_count = 0;
	yeah->reno_count = max(yeah->reno_count>>1, 2U);

	return tp->snd_cwnd - reduction;
}

static struct tcp_congestion_ops tcp_yeah = {
	.init		= tcp_yeah_init,
	.ssthresh	= tcp_yeah_ssthresh,
	.cong_avoid	= tcp_yeah_cong_avoid,
	.min_cwnd	= tcp_reno_min_cwnd,
	.rtt_sample	= tcp_vegas_rtt_calc,
	.set_state	= tcp_vegas_state,
	.cwnd_event	= tcp_vegas_cwnd_event,
	.get_info	= tcp_vegas_get_info,
	.pkts_acked	= tcp_yeah_pkts_acked,

	.owner		= THIS_MODULE,
	.name		= "yeah",
};

static int __init tcp_yeah_register(void)
{
	BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
	tcp_register_congestion_control(&tcp_yeah);
	return 0;
}

static void __exit tcp_yeah_unregister(void)
{
	tcp_unregister_congestion_control(&tcp_yeah);
}

module_init(tcp_yeah_register);
module_exit(tcp_yeah_unregister);

MODULE_AUTHOR("Angelo P. Castellani");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("YeAH TCP");