ccid3.c 36.2 KB
Newer Older
1 2 3 4
/*
 *  net/dccp/ccids/ccid3.c
 *
 *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5
 *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
 *
 *  An implementation of the DCCP protocol
 *
 *  This code has been developed by the University of Waikato WAND
 *  research group. For further information please see http://www.wand.net.nz/
 *
 *  This code also uses code from Lulea University, rereleased as GPL by its
 *  authors:
 *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
 *
 *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
 *  and to make it work as a loadable module in the DCCP stack written by
 *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
 *
 *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
#include "../ccid.h"
#include "../dccp.h"
38
#include "lib/packet_history.h"
39
#include "lib/loss_interval.h"
40
#include "lib/tfrc.h"
41 42
#include "ccid3.h"

43 44 45
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static int ccid3_debug;
#define ccid3_pr_debug(format, a...)	DCCP_PR_DEBUG(ccid3_debug, format, ##a)
46 47 48 49
#else
#define ccid3_pr_debug(format, a...)
#endif

50 51
static struct dccp_tx_hist *ccid3_tx_hist;
static struct dccp_rx_hist *ccid3_rx_hist;
52
static struct dccp_li_hist *ccid3_li_hist;
53

54 55 56
/*
 *	Transmitter Half-Connection Routines
 */
57
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
58 59 60 61 62 63 64 65 66 67 68 69 70
static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
{
	static char *ccid3_state_names[] = {
	[TFRC_SSTATE_NO_SENT]  = "NO_SENT",
	[TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
	[TFRC_SSTATE_FBACK]    = "FBACK",
	[TFRC_SSTATE_TERM]     = "TERM",
	};

	return ccid3_state_names[state];
}
#endif

71 72
static void ccid3_hc_tx_set_state(struct sock *sk,
				  enum ccid3_hc_tx_states state)
73
{
74
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
75 76 77
	enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;

	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
78 79
		       dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
		       ccid3_tx_state_name(state));
80 81 82 83
	WARN_ON(state == oldstate);
	hctx->ccid3hctx_state = state;
}

84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
/*
 * Compute the initial sending rate X_init according to RFC 3390:
 *	w_init   =    min(4 * MSS, max(2 * MSS, 4380 bytes))
 *	X_init   =    w_init / RTT
 * For consistency with other parts of the code, X_init is scaled by 2^6.
 */
static inline u64 rfc3390_initial_rate(struct sock *sk)
{
	const struct dccp_sock *dp = dccp_sk(sk);
	const __u32 w_init = min(4 * dp->dccps_mss_cache,
				 max(2 * dp->dccps_mss_cache, 4380U));

	return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt);
}

99
/*
100
 * Recalculate t_ipi and delta (should be called whenever X changes)
101
 */
102
static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
103
{
104
	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
105 106
	hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
					     hctx->ccid3hctx_x);
107

108
	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
109 110
	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
					   TFRC_OPSYS_HALF_TIME_GRAN);
I
Ian McDonald 已提交
111

112
	ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n",
I
Ian McDonald 已提交
113
		       hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta,
114
		       hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6));
I
Ian McDonald 已提交
115

116 117 118 119
}
/*
 * Update X by
 *    If (p > 0)
120
 *       X_calc = calcX(s, R, p);
121 122 123 124 125
 *       X = max(min(X_calc, 2 * X_recv), s / t_mbi);
 *    Else
 *       If (now - tld >= R)
 *          X = max(min(2 * X, 2 * X_recv), s / R);
 *          tld = now;
126
 *
127 128 129 130 131
 * Note: X and X_recv are both stored in units of 64 * bytes/second, to support
 *       fine-grained resolution of sending rates. This requires scaling by 2^6
 *       throughout the code. Only X_calc is unscaled (in bytes/second).
 *
 */
132 133
static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)

134
{
135
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
136
	__u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
137
	const  __u64 old_x = hctx->ccid3hctx_x;
138

139 140 141 142 143 144 145 146 147 148
	/*
	 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
	 * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis.
	 * For consistency with X and X_recv, min_rate is also scaled by 2^6.
	 */
	if (unlikely(hctx->ccid3hctx_idle)) {
		min_rate = rfc3390_initial_rate(sk);
		min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
	}

149
	if (hctx->ccid3hctx_p > 0) {
150

151
		hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
152
					min_rate);
153 154
		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
					(((__u64)hctx->ccid3hctx_s) << 6) /
155
								TFRC_T_MBI);
156

G
Gerrit Renker 已提交
157
	} else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) -
158
			(suseconds_t)hctx->ccid3hctx_rtt >= 0) {
159

160
		hctx->ccid3hctx_x =
161
			max(min(2 * hctx->ccid3hctx_x, min_rate),
162
			    scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
163
				       hctx->ccid3hctx_rtt));
164
		hctx->ccid3hctx_t_ld = *now;
165
	}
166

I
Ian McDonald 已提交
167
	if (hctx->ccid3hctx_x != old_x) {
168 169 170 171 172
		ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
			       "X_recv=%u\n", (unsigned)(old_x >> 6),
			       (unsigned)(hctx->ccid3hctx_x >> 6),
			       hctx->ccid3hctx_x_calc,
			       (unsigned)(hctx->ccid3hctx_x_recv >> 6));
I
Ian McDonald 已提交
173

174
		ccid3_update_send_interval(hctx);
I
Ian McDonald 已提交
175
	}
176 177
}

178
/*
179 180
 *	Track the mean packet size `s' (cf. RFC 4342, 5.3 and  RFC 3448, 4.1)
 *	@len: DCCP packet payload size in bytes
181 182 183
 */
static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
{
184 185 186 187 188 189
	const u16 old_s = hctx->ccid3hctx_s;

	hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10;

	if (hctx->ccid3hctx_s != old_s)
		ccid3_update_send_interval(hctx);
190 191
}

192
/*
193 194
 *	Update Window Counter using the algorithm from [RFC 4342, 8.1].
 *	The algorithm is not applicable if RTT < 4 microseconds.
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
 */
static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
						struct timeval *now)
{
	suseconds_t delta;
	u32 quarter_rtts;

	if (unlikely(hctx->ccid3hctx_rtt < 4))	/* avoid divide-by-zero */
		return;

	delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count);
	DCCP_BUG_ON(delta < 0);

	quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4);

	if (quarter_rtts > 0) {
		hctx->ccid3hctx_t_last_win_count = *now;
		hctx->ccid3hctx_last_win_count	+= min_t(u32, quarter_rtts, 5);
		hctx->ccid3hctx_last_win_count	&= 0xF;		/* mod 16 */

		ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count);
	}
}

219 220 221
static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
{
	struct sock *sk = (struct sock *)data;
222
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
223
	struct timeval now;
224
	unsigned long t_nfb = USEC_PER_SEC / 5;
225 226 227 228 229

	bh_lock_sock(sk);
	if (sock_owned_by_user(sk)) {
		/* Try again later. */
		/* XXX: set some sensible MIB */
230
		goto restart_timer;
231 232
	}

233
	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
234
		       ccid3_tx_state_name(hctx->ccid3hctx_state));
235

236 237
	hctx->ccid3hctx_idle = 1;

238 239
	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_FBACK:
240
		/* RFC 3448, 4.4: Halve send rate directly */
241 242 243
		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
					(((__u64)hctx->ccid3hctx_s) << 6) /
								    TFRC_T_MBI);
244

245 246
		ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u "
			       "bytes/s\n", dccp_role(sk), sk,
247
			       ccid3_tx_state_name(hctx->ccid3hctx_state),
248
			       (unsigned)(hctx->ccid3hctx_x >> 6));
249 250
		/* The value of R is still undefined and so we can not recompute
		 * the timout value. Keep initial value as per [RFC 4342, 5]. */
251
		t_nfb = TFRC_INITIAL_TIMEOUT;
252
		ccid3_update_send_interval(hctx);
253 254
		break;
	case TFRC_SSTATE_FBACK:
255
		/*
256 257 258 259 260 261 262 263
		 *  Modify the cached value of X_recv [RFC 3448, 4.4]
		 *
		 *  If (p == 0 || X_calc > 2 * X_recv)
		 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
		 *  Else
		 *    X_recv = X_calc / 4;
		 *
		 *  Note that X_recv is scaled by 2^6 while X_calc is not
264
		 */
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
		BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);

		if (hctx->ccid3hctx_p == 0 ||
		    (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) {

			hctx->ccid3hctx_x_recv =
				max(hctx->ccid3hctx_x_recv / 2,
				    (((__u64)hctx->ccid3hctx_s) << 6) /
							      (2 * TFRC_T_MBI));

			if (hctx->ccid3hctx_p == 0)
				dccp_timestamp(sk, &now);
		} else {
			hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
			hctx->ccid3hctx_x_recv <<= 4;
280
		}
281 282
		/* Now recalculate X [RFC 3448, 4.3, step (4)] */
		ccid3_hc_tx_update_x(sk, &now);
283 284
		/*
		 * Schedule no feedback timer to expire in
285 286
		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
		 * See comments in packet_recv() regarding the value of t_RTO.
287
		 */
288
		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
289
		break;
290
	case TFRC_SSTATE_NO_SENT:
291
		DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk);
292 293
		/* fall through */
	case TFRC_SSTATE_TERM:
294 295 296
		goto out;
	}

297 298
restart_timer:
	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
299
			   jiffies + usecs_to_jiffies(t_nfb));
300 301 302 303 304
out:
	bh_unlock_sock(sk);
	sock_put(sk);
}

305 306 307 308 309 310
/*
 * returns
 *   > 0: delay (in msecs) that should pass before actually sending
 *   = 0: can send immediately
 *   < 0: error condition; do not send packet
 */
311
static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
312 313
{
	struct dccp_sock *dp = dccp_sk(sk);
314
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
315
	struct timeval now;
G
Gerrit Renker 已提交
316
	suseconds_t delay;
317

318
	BUG_ON(hctx == NULL);
319

320
	/*
321 322 323
	 * This function is called only for Data and DataAck packets. Sending
	 * zero-sized Data(Ack)s is theoretically possible, but for congestion
	 * control this case is pathological - ignore it.
324
	 */
325
	if (unlikely(skb->len == 0))
326
		return -EBADMSG;
327

328
	dccp_timestamp(sk, &now);
329 330 331

	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_SENT:
332
		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
333
			       (jiffies +
334
				usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
335 336
		hctx->ccid3hctx_last_win_count	 = 0;
		hctx->ccid3hctx_t_last_win_count = now;
337 338

		/* Set t_0 for initial packet */
339
		hctx->ccid3hctx_t_nom = now;
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360

		hctx->ccid3hctx_s = skb->len;

		/*
		 * Use initial RTT sample when available: recommended by erratum
		 * to RFC 4342. This implements the initialisation procedure of
		 * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6.
		 */
		if (dp->dccps_syn_rtt) {
			ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
			hctx->ccid3hctx_rtt  = dp->dccps_syn_rtt;
			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
			hctx->ccid3hctx_t_ld = now;
		} else {
			/* Sender does not have RTT sample: X = MSS/second */
			hctx->ccid3hctx_x = dp->dccps_mss_cache;
			hctx->ccid3hctx_x <<= 6;
		}
		ccid3_update_send_interval(hctx);

		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
361 362 363
		break;
	case TFRC_SSTATE_NO_FBACK:
	case TFRC_SSTATE_FBACK:
364
		delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
I
Ian McDonald 已提交
365
		ccid3_pr_debug("delay=%ld\n", (long)delay);
366
		/*
367
		 *	Scheduling of packet transmissions [RFC 3448, 4.6]
368 369 370 371 372 373
		 *
		 * if (t_now > t_nom - delta)
		 *       // send the packet now
		 * else
		 *       // send the packet in (t_nom - t_now) milliseconds.
		 */
G
Gerrit Renker 已提交
374
		if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0)
375
			return delay / 1000L;
376 377

		ccid3_hc_tx_update_win_count(hctx, &now);
378
		break;
379
	case TFRC_SSTATE_TERM:
380
		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
381
		return -EINVAL;
382 383
	}

384 385
	/* prepare to send now (add options etc.) */
	dp->dccps_hc_tx_insert_options = 1;
386
	DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
387
	hctx->ccid3hctx_idle = 0;
388 389

	/* set the nominal send time for the next following packet */
390 391 392
	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);

	return 0;
393 394
}

395 396
static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
				    unsigned int len)
397
{
398
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
399
	struct timeval now;
400
	struct dccp_tx_hist_entry *packet;
401

402
	BUG_ON(hctx == NULL);
403

404
	ccid3_hc_tx_update_s(hctx, len);
405

406
	packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC);
407
	if (unlikely(packet == NULL)) {
408
		DCCP_CRIT("packet history - out of memory!");
409 410
		return;
	}
411 412 413
	dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet);

	dccp_timestamp(sk, &now);
414
	packet->dccphtx_tstamp = now;
415 416 417
	packet->dccphtx_seqno  = dccp_sk(sk)->dccps_gss;
	packet->dccphtx_rtt    = hctx->ccid3hctx_rtt;
	packet->dccphtx_sent   = 1;
418 419 420 421
}

static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
422
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
423
	struct ccid3_options_received *opt_recv;
424
	struct dccp_tx_hist_entry *packet;
425
	struct timeval now;
426
	unsigned long t_nfb;
427
	u32 pinv, r_sample;
428

429
	BUG_ON(hctx == NULL);
430 431 432 433 434 435 436 437 438 439 440

	/* we are only interested in ACKs */
	if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
	      DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
		return;

	opt_recv = &hctx->ccid3hctx_options_received;

	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_FBACK:
	case TFRC_SSTATE_FBACK:
441
		/* get packet from history to look up t_recvdata */
442
		packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
443
					      DCCP_SKB_CB(skb)->dccpd_ack_seq);
444
		if (unlikely(packet == NULL)) {
445
			DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist "
446
				  "in history!\n",  dccp_role(sk), sk,
447
			    (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
448
				dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
449 450 451
			return;
		}

452
		/* Update receive rate in units of 64 * bytes/second */
453 454
		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
		hctx->ccid3hctx_x_recv <<= 6;
455 456 457

		/* Update loss event rate */
		pinv = opt_recv->ccid3or_loss_event_rate;
458
		if (pinv == ~0U || pinv == 0)	       /* see RFC 4342, 8.5   */
459
			hctx->ccid3hctx_p = 0;
460
		else				       /* can not exceed 100% */
461
			hctx->ccid3hctx_p = 1000000 / pinv;
462

463
		dccp_timestamp(sk, &now);
464 465 466

		/*
		 * Calculate new round trip sample as per [RFC 3448, 4.3] by
467
		 *	R_sample  =  (now - t_recvdata) - t_elapsed
468
		 */
469
		r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp);
470

471 472
		/*
		 * Update RTT estimate by
473 474 475 476 477 478 479 480
		 * If (No feedback recv)
		 *    R = R_sample;
		 * Else
		 *    R = q * R + (1 - q) * R_sample;
		 *
		 * q is a constant, RFC 3448 recomments 0.9
		 */
		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
481 482 483
			/*
			 * Larger Initial Windows [RFC 4342, sec. 5]
			 */
484
			hctx->ccid3hctx_rtt  = r_sample;
485
			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
486 487
			hctx->ccid3hctx_t_ld = now;

488
			ccid3_update_send_interval(hctx);
489

490
			ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
491
				       "R_sample=%uus, X=%u\n", dccp_role(sk),
A
Andrew Morton 已提交
492
				       sk, hctx->ccid3hctx_s,
493
				       dccp_sk(sk)->dccps_mss_cache, r_sample,
494
				       (unsigned)(hctx->ccid3hctx_x >> 6));
495

496 497 498
			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
		} else {
			hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
499
						   r_sample) / 10;
500

501 502 503 504 505 506
			/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
			if (hctx->ccid3hctx_p > 0)
				hctx->ccid3hctx_x_calc =
					tfrc_calc_x(hctx->ccid3hctx_s,
						    hctx->ccid3hctx_rtt,
						    hctx->ccid3hctx_p);
507
			ccid3_hc_tx_update_x(sk, &now);
508

509
			ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
510 511
				       "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
				       dccp_role(sk),
512
				       sk, hctx->ccid3hctx_rtt, r_sample,
513 514
				       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
				       hctx->ccid3hctx_x_calc,
515
				       (unsigned)(hctx->ccid3hctx_x_recv >> 6),
516
				       (unsigned)(hctx->ccid3hctx_x >> 6));
517 518 519 520 521 522
		}

		/* unschedule no feedback timer */
		sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);

		/* remove all packets older than the one acked from history */
523 524
		dccp_tx_hist_purge_older(ccid3_tx_hist,
					 &hctx->ccid3hctx_hist, packet);
525
		/*
526 527
		 * As we have calculated new ipi, delta, t_nom it is possible
		 * that we now can send a packet, so wake up dccp_wait_for_ccid
528 529
		 */
		sk->sk_write_space(sk);
530

531 532 533
		/*
		 * Update timeout interval for the nofeedback timer.
		 * We use a configuration option to increase the lower bound.
534 535
		 * This can help avoid triggering the nofeedback timer too
		 * often ('spinning') on LANs with small RTTs.
536
		 */
537
		hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
538
						   CONFIG_IP_DCCP_CCID3_RTO *
539
						   (USEC_PER_SEC/1000));
540 541
		/*
		 * Schedule no feedback timer to expire in
542
		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
543
		 */
544
		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
545

546
		ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
547 548
			       "expire in %lu jiffies (%luus)\n",
			       dccp_role(sk),
549 550 551
			       sk, usecs_to_jiffies(t_nfb), t_nfb);

		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
552
				   jiffies + usecs_to_jiffies(t_nfb));
553 554

		/* set idle flag */
555
		hctx->ccid3hctx_idle = 1;
556
		break;
557
	case TFRC_SSTATE_NO_SENT:	/* fall through */
G
Gerrit Renker 已提交
558
	case TFRC_SSTATE_TERM:		/* ignore feedback when closing */
559 560 561 562 563
		break;
	}
}

static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
564 565
				     unsigned char len, u16 idx,
				     unsigned char *value)
566 567
{
	int rc = 0;
568 569
	const struct dccp_sock *dp = dccp_sk(sk);
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
570 571
	struct ccid3_options_received *opt_recv;

572
	BUG_ON(hctx == NULL);
573 574 575 576 577 578 579 580 581 582 583 584 585

	opt_recv = &hctx->ccid3hctx_options_received;

	if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
		opt_recv->ccid3or_seqno		     = dp->dccps_gsr;
		opt_recv->ccid3or_loss_event_rate    = ~0;
		opt_recv->ccid3or_loss_intervals_idx = 0;
		opt_recv->ccid3or_loss_intervals_len = 0;
		opt_recv->ccid3or_receive_rate	     = 0;
	}

	switch (option) {
	case TFRC_OPT_LOSS_EVENT_RATE:
586
		if (unlikely(len != 4)) {
587
			DCCP_WARN("%s(%p), invalid len %d "
588 589
				  "for TFRC_OPT_LOSS_EVENT_RATE\n",
				  dccp_role(sk), sk, len);
590 591
			rc = -EINVAL;
		} else {
592 593
			opt_recv->ccid3or_loss_event_rate =
						ntohl(*(__be32 *)value);
594
			ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
595 596 597 598 599 600 601
				       dccp_role(sk), sk,
				       opt_recv->ccid3or_loss_event_rate);
		}
		break;
	case TFRC_OPT_LOSS_INTERVALS:
		opt_recv->ccid3or_loss_intervals_idx = idx;
		opt_recv->ccid3or_loss_intervals_len = len;
602
		ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n",
603 604 605 606 607
			       dccp_role(sk), sk,
			       opt_recv->ccid3or_loss_intervals_idx,
			       opt_recv->ccid3or_loss_intervals_len);
		break;
	case TFRC_OPT_RECEIVE_RATE:
608
		if (unlikely(len != 4)) {
609
			DCCP_WARN("%s(%p), invalid len %d "
610 611
				  "for TFRC_OPT_RECEIVE_RATE\n",
				  dccp_role(sk), sk, len);
612 613
			rc = -EINVAL;
		} else {
614 615
			opt_recv->ccid3or_receive_rate =
						ntohl(*(__be32 *)value);
616
			ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
617 618 619 620 621 622 623 624 625
				       dccp_role(sk), sk,
				       opt_recv->ccid3or_receive_rate);
		}
		break;
	}

	return rc;
}

626
static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
627
{
628
	struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
629

630
	hctx->ccid3hctx_s     = 0;
631
	hctx->ccid3hctx_rtt   = 0;
632 633
	hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
	INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
634

635 636
	hctx->ccid3hctx_no_feedback_timer.function =
				ccid3_hc_tx_no_feedback_timer;
637
	hctx->ccid3hctx_no_feedback_timer.data     = (unsigned long)sk;
638 639 640 641 642 643 644
	init_timer(&hctx->ccid3hctx_no_feedback_timer);

	return 0;
}

static void ccid3_hc_tx_exit(struct sock *sk)
{
645
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
646 647 648 649 650 651 652

	BUG_ON(hctx == NULL);

	ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);

	/* Empty packet history */
653
	dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
654 655
}

656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
{
	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);

	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return;

	BUG_ON(hctx == NULL);

	info->tcpi_rto = hctx->ccid3hctx_t_rto;
	info->tcpi_rtt = hctx->ccid3hctx_rtt;
}

static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
				  u32 __user *optval, int __user *optlen)
{
	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
	const void *val;

	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return -EINVAL;

	switch (optname) {
	case DCCP_SOCKOPT_CCID_TX_INFO:
		if (len < sizeof(hctx->ccid3hctx_tfrc))
			return -EINVAL;
		len = sizeof(hctx->ccid3hctx_tfrc);
		val = &hctx->ccid3hctx_tfrc;
		break;
	default:
		return -ENOPROTOOPT;
	}

	if (put_user(len, optlen) || copy_to_user(optval, val, len))
		return -EFAULT;

	return 0;
}

697
/*
698
 *	Receiver Half-Connection Routines
699
 */
700
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
701 702 703 704 705 706 707 708 709 710 711 712
static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
{
	static char *ccid3_rx_state_names[] = {
	[TFRC_RSTATE_NO_DATA] = "NO_DATA",
	[TFRC_RSTATE_DATA]    = "DATA",
	[TFRC_RSTATE_TERM]    = "TERM",
	};

	return ccid3_rx_state_names[state];
}
#endif

713 714
static void ccid3_hc_rx_set_state(struct sock *sk,
				  enum ccid3_hc_rx_states state)
715
{
716
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
717 718 719
	enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;

	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
720 721
		       dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
		       ccid3_rx_state_name(state));
722 723 724 725
	WARN_ON(state == oldstate);
	hcrx->ccid3hcrx_state = state;
}

726 727 728 729 730 731 732 733 734
static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
{
	if (unlikely(len == 0))	/* don't update on empty packets (e.g. ACKs) */
		ccid3_pr_debug("Packet payload length is 0 - not updating\n");
	else
		hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len :
				    (9 * hcrx->ccid3hcrx_s + len) / 10;
}

735 736
static void ccid3_hc_rx_send_feedback(struct sock *sk)
{
737
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
738
	struct dccp_sock *dp = dccp_sk(sk);
739
	struct dccp_rx_hist_entry *packet;
740
	struct timeval now;
G
Gerrit Renker 已提交
741
	suseconds_t delta;
742

743
	ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
744

745
	dccp_timestamp(sk, &now);
746

747 748 749 750
	switch (hcrx->ccid3hcrx_state) {
	case TFRC_RSTATE_NO_DATA:
		hcrx->ccid3hcrx_x_recv = 0;
		break;
G
Gerrit Renker 已提交
751 752 753 754
	case TFRC_RSTATE_DATA:
		delta = timeval_delta(&now,
				      &hcrx->ccid3hcrx_tstamp_last_feedback);
		DCCP_BUG_ON(delta < 0);
755 756
		hcrx->ccid3hcrx_x_recv =
			scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
757
		break;
758
	case TFRC_RSTATE_TERM:
759
		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
760 761 762
		return;
	}

763
	packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
764
	if (unlikely(packet == NULL)) {
765
		DCCP_WARN("%s(%p), no data packet in history!\n",
766
			  dccp_role(sk), sk);
767 768 769
		return;
	}

770
	hcrx->ccid3hcrx_tstamp_last_feedback = now;
I
Ian McDonald 已提交
771
	hcrx->ccid3hcrx_ccval_last_counter   = packet->dccphrx_ccval;
772 773
	hcrx->ccid3hcrx_bytes_recv	     = 0;

G
Gerrit Renker 已提交
774 775 776 777
	/* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */
	delta = timeval_delta(&now, &packet->dccphrx_tstamp);
	DCCP_BUG_ON(delta < 0);
	hcrx->ccid3hcrx_elapsed_time = delta / 10;
778

779
	if (hcrx->ccid3hcrx_p == 0)
780 781 782 783 784
		hcrx->ccid3hcrx_pinv = ~0U;	/* see RFC 4342, 8.5 */
	else if (hcrx->ccid3hcrx_p > 1000000) {
		DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
		hcrx->ccid3hcrx_pinv = 1;	/* use 100% in this case */
	} else
785
		hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
786

787
	dp->dccps_hc_rx_insert_options = 1;
788 789 790
	dccp_send_ack(sk);
}

791
static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
792
{
793
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
794
	__be32 x_recv, pinv;
795

796 797 798
	BUG_ON(hcrx == NULL);

	if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
799
		return 0;
800

I
Ian McDonald 已提交
801
	DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
802 803

	if (dccp_packet_without_ack(skb))
804 805
		return 0;

806 807
	x_recv = htonl(hcrx->ccid3hcrx_x_recv);
	pinv   = htonl(hcrx->ccid3hcrx_pinv);
808 809 810 811 812 813

	if ((hcrx->ccid3hcrx_elapsed_time != 0 &&
	     dccp_insert_option_elapsed_time(sk, skb,
					     hcrx->ccid3hcrx_elapsed_time)) ||
	    dccp_insert_option_timestamp(sk, skb) ||
	    dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
814
			       &pinv, sizeof(pinv)) ||
815
	    dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
816
			       &x_recv, sizeof(x_recv)))
817 818 819
		return -1;

	return 0;
820 821 822 823 824 825 826 827
}

/* calculate first loss interval
 *
 * returns estimated loss interval in usecs */

static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
{
828
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
829
	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
G
Gerrit Renker 已提交
830 831
	u32 x_recv, p;
	suseconds_t rtt, delta;
832
	struct timeval tstamp = { 0, };
833 834 835
	int interval = 0;
	int win_count = 0;
	int step = 0;
836
	u64 fval;
837

838 839 840
	list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
				 dccphrx_node) {
		if (dccp_rx_hist_entry_data_packet(entry)) {
841 842 843 844
			tail = entry;

			switch (step) {
			case 0:
845
				tstamp	  = entry->dccphrx_tstamp;
846
				win_count = entry->dccphrx_ccval;
847 848 849
				step = 1;
				break;
			case 1:
850
				interval = win_count - entry->dccphrx_ccval;
851 852 853 854 855 856 857 858 859
				if (interval < 0)
					interval += TFRC_WIN_COUNT_LIMIT;
				if (interval > 4)
					goto found;
				break;
			}
		}
	}

860
	if (unlikely(step == 0)) {
861
		DCCP_WARN("%s(%p), packet history has no data packets!\n",
862
			  dccp_role(sk), sk);
863 864 865
		return ~0;
	}

866
	if (unlikely(interval == 0)) {
867
		DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
868
			  "Defaulting to 1\n", dccp_role(sk), sk);
869 870 871
		interval = 1;
	}
found:
I
Ian McDonald 已提交
872
	if (!tail) {
873
		DCCP_CRIT("tail is null\n");
I
Ian McDonald 已提交
874 875
		return ~0;
	}
G
Gerrit Renker 已提交
876 877 878 879 880

	delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
	DCCP_BUG_ON(delta < 0);

	rtt = delta * 4 / interval;
881 882
	ccid3_pr_debug("%s(%p), approximated RTT to %dus\n",
		       dccp_role(sk), sk, (int)rtt);
883

884 885 886
	/*
	 * Determine the length of the first loss interval via inverse lookup.
	 * Assume that X_recv can be computed by the throughput equation
887 888 889
	 *		    s
	 *	X_recv = --------
	 *		 R * fval
890 891 892
	 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
	 */
	if (rtt == 0) {			/* would result in divide-by-zero */
893 894
		DCCP_WARN("RTT==0\n");
		return ~0;
895
	}
896

897 898
	dccp_timestamp(sk, &tstamp);
	delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
G
Gerrit Renker 已提交
899
	DCCP_BUG_ON(delta <= 0);
900

G
Gerrit Renker 已提交
901
	x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
902 903 904 905
	if (x_recv == 0) {		/* would also trigger divide-by-zero */
		DCCP_WARN("X_recv==0\n");
		if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
			DCCP_BUG("stored value of X_recv is zero");
906
			return ~0;
907
		}
I
Ian McDonald 已提交
908 909
	}

910 911
	fval = scaled_div(hcrx->ccid3hcrx_s, rtt);
	fval = scaled_div32(fval, x_recv);
912
	p = tfrc_calc_x_reverse_lookup(fval);
913

914
	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
915
		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
916 917 918 919

	if (p == 0)
		return ~0;
	else
920
		return 1000000 / p;
921 922 923 924
}

static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
{
925
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
926
	struct dccp_li_hist_entry *head;
I
Ian McDonald 已提交
927
	u64 seq_temp;
928

I
Ian McDonald 已提交
929 930 931 932
	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
		if (!dccp_li_hist_interval_new(ccid3_li_hist,
		   &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
			return;
933

934 935 936
		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
		   struct dccp_li_hist_entry, dccplih_node);
		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
I
Ian McDonald 已提交
937 938 939 940
	} else {
		struct dccp_li_hist_entry *entry;
		struct list_head *tail;

941 942
		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
		   struct dccp_li_hist_entry, dccplih_node);
I
Ian McDonald 已提交
943 944 945 946 947 948 949
		/* FIXME win count check removed as was wrong */
		/* should make this check with receive history */
		/* and compare there as per section 10.2 of RFC4342 */

		/* new loss event detected */
		/* calculate last interval length */
		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
950
		entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC);
I
Ian McDonald 已提交
951 952

		if (entry == NULL) {
953
			DCCP_BUG("out of memory - can not allocate entry");
954
			return;
I
Ian McDonald 已提交
955 956 957 958 959 960 961 962 963 964 965 966 967
		}

		list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);

		tail = hcrx->ccid3hcrx_li_hist.prev;
		list_del(tail);
		kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);

		/* Create the newest interval */
		entry->dccplih_seqno = seq_loss;
		entry->dccplih_interval = seq_temp;
		entry->dccplih_win_count = win_loss;
	}
968 969
}

I
Ian McDonald 已提交
970
static int ccid3_hc_rx_detect_loss(struct sock *sk,
971
				    struct dccp_rx_hist_entry *packet)
972
{
973
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
974 975
	struct dccp_rx_hist_entry *rx_hist =
				dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
I
Ian McDonald 已提交
976 977 978 979 980 981 982 983 984 985 986 987 988 989 990
	u64 seqno = packet->dccphrx_seqno;
	u64 tmp_seqno;
	int loss = 0;
	u8 ccval;


	tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;

	if (!rx_hist ||
	   follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
		hcrx->ccid3hcrx_seqno_nonloss = seqno;
		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
		goto detect_out;
	}

991

I
Ian McDonald 已提交
992 993 994 995 996 997 998 999 1000 1001 1002
	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
	   > TFRC_RECV_NUM_LATE_LOSS) {
		loss = 1;
		ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
		   hcrx->ccid3hcrx_ccval_nonloss);
		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
		dccp_inc_seqno(&tmp_seqno);
		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
		dccp_inc_seqno(&tmp_seqno);
		while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
		   tmp_seqno, &ccval)) {
1003
			hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
I
Ian McDonald 已提交
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
			hcrx->ccid3hcrx_ccval_nonloss = ccval;
			dccp_inc_seqno(&tmp_seqno);
		}
	}

	/* FIXME - this code could be simplified with above while */
	/* but works at moment */
	if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
		hcrx->ccid3hcrx_seqno_nonloss = seqno;
		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
	}

detect_out:
	dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
		   &hcrx->ccid3hcrx_li_hist, packet,
		   hcrx->ccid3hcrx_seqno_nonloss);
	return loss;
1021 1022 1023 1024
}

static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
1025
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1026
	const struct dccp_options_received *opt_recv;
1027
	struct dccp_rx_hist_entry *packet;
1028
	struct timeval now;
1029
	u32 p_prev, r_sample, rtt_prev;
1030
	int loss, payload_size;
1031

1032
	BUG_ON(hcrx == NULL);
1033

1034
	opt_recv = &dccp_sk(sk)->dccps_options_received;
1035

1036 1037 1038 1039 1040
	switch (DCCP_SKB_CB(skb)->dccpd_type) {
	case DCCP_PKT_ACK:
		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
			return;
	case DCCP_PKT_DATAACK:
1041
		if (opt_recv->dccpor_timestamp_echo == 0)
1042
			break;
I
Ian McDonald 已提交
1043
		rtt_prev = hcrx->ccid3hcrx_rtt;
1044
		dccp_timestamp(sk, &now);
1045
		r_sample = dccp_sample_rtt(sk, &now, NULL);
1046 1047 1048 1049 1050 1051 1052

		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
			hcrx->ccid3hcrx_rtt = r_sample;
		else
			hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
					      r_sample / 10;

I
Ian McDonald 已提交
1053
		if (rtt_prev != hcrx->ccid3hcrx_rtt)
1054 1055
			ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n",
				       dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
1056
				       opt_recv->dccpor_elapsed_time);
1057 1058 1059
		break;
	case DCCP_PKT_DATA:
		break;
1060
	default: /* We're not interested in other packet types, move along */
1061 1062 1063
		return;
	}

1064
	packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp,
1065
					skb, GFP_ATOMIC);
1066
	if (unlikely(packet == NULL)) {
1067
		DCCP_WARN("%s(%p), Not enough mem to add rx packet "
1068
			  "to history, consider it lost!\n", dccp_role(sk), sk);
1069 1070 1071
		return;
	}

I
Ian McDonald 已提交
1072
	loss = ccid3_hc_rx_detect_loss(sk, packet);
1073 1074 1075 1076

	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
		return;

1077 1078 1079
	payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
	ccid3_hc_rx_update_s(hcrx, payload_size);

1080 1081
	switch (hcrx->ccid3hcrx_state) {
	case TFRC_RSTATE_NO_DATA:
1082 1083
		ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial "
			       "feedback\n", dccp_role(sk), sk,
1084
			       dccp_state_name(sk->sk_state), skb);
1085 1086 1087 1088
		ccid3_hc_rx_send_feedback(sk);
		ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
		return;
	case TFRC_RSTATE_DATA:
1089
		hcrx->ccid3hcrx_bytes_recv += payload_size;
I
Ian McDonald 已提交
1090
		if (loss)
1091 1092
			break;

1093
		dccp_timestamp(sk, &now);
1094 1095
		if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) -
		     (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) {
1096 1097
			hcrx->ccid3hcrx_tstamp_last_ack = now;
			ccid3_hc_rx_send_feedback(sk);
1098
		}
1099
		return;
1100
	case TFRC_RSTATE_TERM:
1101
		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
1102 1103 1104 1105
		return;
	}

	/* Dealing with packet loss */
1106
	ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
1107
		       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
1108 1109

	p_prev = hcrx->ccid3hcrx_p;
1110

1111
	/* Calculate loss event rate */
I
Ian McDonald 已提交
1112 1113 1114
	if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
		u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);

1115
		/* Scaling up by 1000000 as fixed decimal */
I
Ian McDonald 已提交
1116 1117
		if (i_mean != 0)
			hcrx->ccid3hcrx_p = 1000000 / i_mean;
1118 1119
	} else
		DCCP_BUG("empty loss history");
1120 1121 1122 1123 1124 1125 1126

	if (hcrx->ccid3hcrx_p > p_prev) {
		ccid3_hc_rx_send_feedback(sk);
		return;
	}
}

1127
static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
1128
{
1129
	struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
1130

1131
	ccid3_pr_debug("entry\n");
1132 1133 1134

	hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
	INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
1135
	INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
1136
	dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack);
1137
	hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
1138
	hcrx->ccid3hcrx_s   = 0;
1139
	hcrx->ccid3hcrx_rtt = 0;
1140 1141 1142 1143 1144
	return 0;
}

static void ccid3_hc_rx_exit(struct sock *sk)
{
1145
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1146

1147
	BUG_ON(hcrx == NULL);
1148 1149 1150 1151

	ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);

	/* Empty packet history */
1152
	dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
1153 1154

	/* Empty loss interval history */
1155
	dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
1156 1157
}

1158 1159
static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
{
1160
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1161

1162 1163 1164 1165
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return;

1166
	BUG_ON(hcrx == NULL);
1167

1168 1169 1170
	info->tcpi_ca_state = hcrx->ccid3hcrx_state;
	info->tcpi_options  |= TCPI_OPT_TIMESTAMPS;
	info->tcpi_rcv_rtt  = hcrx->ccid3hcrx_rtt;
1171 1172
}

1173 1174 1175 1176 1177
static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
				  u32 __user *optval, int __user *optlen)
{
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
	const void *val;
1178

1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return -EINVAL;

	switch (optname) {
	case DCCP_SOCKOPT_CCID_RX_INFO:
		if (len < sizeof(hcrx->ccid3hcrx_tfrc))
			return -EINVAL;
		len = sizeof(hcrx->ccid3hcrx_tfrc);
		val = &hcrx->ccid3hcrx_tfrc;
		break;
	default:
		return -ENOPROTOOPT;
	}

	if (put_user(len, optlen) || copy_to_user(optval, val, len))
		return -EFAULT;

	return 0;
}

1200
static struct ccid_operations ccid3 = {
I
Ian McDonald 已提交
1201
	.ccid_id		   = DCCPC_CCID3,
1202 1203
	.ccid_name		   = "ccid3",
	.ccid_owner		   = THIS_MODULE,
1204
	.ccid_hc_tx_obj_size	   = sizeof(struct ccid3_hc_tx_sock),
1205 1206 1207 1208 1209 1210
	.ccid_hc_tx_init	   = ccid3_hc_tx_init,
	.ccid_hc_tx_exit	   = ccid3_hc_tx_exit,
	.ccid_hc_tx_send_packet	   = ccid3_hc_tx_send_packet,
	.ccid_hc_tx_packet_sent	   = ccid3_hc_tx_packet_sent,
	.ccid_hc_tx_packet_recv	   = ccid3_hc_tx_packet_recv,
	.ccid_hc_tx_parse_options  = ccid3_hc_tx_parse_options,
1211
	.ccid_hc_rx_obj_size	   = sizeof(struct ccid3_hc_rx_sock),
1212 1213 1214 1215
	.ccid_hc_rx_init	   = ccid3_hc_rx_init,
	.ccid_hc_rx_exit	   = ccid3_hc_rx_exit,
	.ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
	.ccid_hc_rx_packet_recv	   = ccid3_hc_rx_packet_recv,
1216 1217
	.ccid_hc_rx_get_info	   = ccid3_hc_rx_get_info,
	.ccid_hc_tx_get_info	   = ccid3_hc_tx_get_info,
1218 1219
	.ccid_hc_rx_getsockopt	   = ccid3_hc_rx_getsockopt,
	.ccid_hc_tx_getsockopt	   = ccid3_hc_tx_getsockopt,
1220
};
1221

1222
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
1223 1224
module_param(ccid3_debug, int, 0444);
MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
1225
#endif
1226 1227 1228

static __init int ccid3_module_init(void)
{
1229
	int rc = -ENOBUFS;
1230

1231 1232
	ccid3_rx_hist = dccp_rx_hist_new("ccid3");
	if (ccid3_rx_hist == NULL)
1233 1234
		goto out;

1235 1236 1237
	ccid3_tx_hist = dccp_tx_hist_new("ccid3");
	if (ccid3_tx_hist == NULL)
		goto out_free_rx;
1238

1239 1240
	ccid3_li_hist = dccp_li_hist_new("ccid3");
	if (ccid3_li_hist == NULL)
1241
		goto out_free_tx;
1242 1243

	rc = ccid_register(&ccid3);
1244
	if (rc != 0)
1245 1246 1247
		goto out_free_loss_interval_history;
out:
	return rc;
1248

1249
out_free_loss_interval_history:
1250 1251
	dccp_li_hist_delete(ccid3_li_hist);
	ccid3_li_hist = NULL;
1252 1253 1254 1255 1256 1257
out_free_tx:
	dccp_tx_hist_delete(ccid3_tx_hist);
	ccid3_tx_hist = NULL;
out_free_rx:
	dccp_rx_hist_delete(ccid3_rx_hist);
	ccid3_rx_hist = NULL;
1258 1259 1260 1261 1262 1263 1264 1265
	goto out;
}
module_init(ccid3_module_init);

static __exit void ccid3_module_exit(void)
{
	ccid_unregister(&ccid3);

1266 1267 1268
	if (ccid3_tx_hist != NULL) {
		dccp_tx_hist_delete(ccid3_tx_hist);
		ccid3_tx_hist = NULL;
1269
	}
1270 1271 1272
	if (ccid3_rx_hist != NULL) {
		dccp_rx_hist_delete(ccid3_rx_hist);
		ccid3_rx_hist = NULL;
1273
	}
1274 1275 1276
	if (ccid3_li_hist != NULL) {
		dccp_li_hist_delete(ccid3_li_hist);
		ccid3_li_hist = NULL;
1277 1278 1279 1280
	}
}
module_exit(ccid3_module_exit);

1281
MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
1282
	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
1283 1284 1285
MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
MODULE_LICENSE("GPL");
MODULE_ALIAS("net-dccp-ccid-3");