ccid3.c 36.7 KB
Newer Older
1 2 3 4
/*
 *  net/dccp/ccids/ccid3.c
 *
 *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5
 *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
 *
 *  An implementation of the DCCP protocol
 *
 *  This code has been developed by the University of Waikato WAND
 *  research group. For further information please see http://www.wand.net.nz/
 *
 *  This code also uses code from Lulea University, rereleased as GPL by its
 *  authors:
 *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
 *
 *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
 *  and to make it work as a loadable module in the DCCP stack written by
 *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
 *
 *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "../ccid.h"
#include "../dccp.h"
39
#include "lib/packet_history.h"
40
#include "lib/loss_interval.h"
41
#include "lib/tfrc.h"
42 43
#include "ccid3.h"

44 45 46
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static int ccid3_debug;
#define ccid3_pr_debug(format, a...)	DCCP_PR_DEBUG(ccid3_debug, format, ##a)
47 48 49 50
#else
#define ccid3_pr_debug(format, a...)
#endif

51 52
static struct dccp_tx_hist *ccid3_tx_hist;
static struct dccp_rx_hist *ccid3_rx_hist;
53
static struct dccp_li_hist *ccid3_li_hist;
54

55
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
56 57 58 59 60 61 62 63 64 65 66 67 68
static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
{
	static char *ccid3_state_names[] = {
	[TFRC_SSTATE_NO_SENT]  = "NO_SENT",
	[TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
	[TFRC_SSTATE_FBACK]    = "FBACK",
	[TFRC_SSTATE_TERM]     = "TERM",
	};

	return ccid3_state_names[state];
}
#endif

69 70
static void ccid3_hc_tx_set_state(struct sock *sk,
				  enum ccid3_hc_tx_states state)
71
{
72
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
73 74 75
	enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;

	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
76 77
		       dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
		       ccid3_tx_state_name(state));
78 79 80 81
	WARN_ON(state == oldstate);
	hctx->ccid3hctx_state = state;
}

82 83 84 85 86
/*
 * Recalculate scheduled nominal send time t_nom, inter-packet interval
 * t_ipi, and delta value. Should be called after each change to X.
 */
static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
87
{
88 89
	timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);

90 91 92
	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
	hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
					   hctx->ccid3hctx_x >> 6);
93

94 95 96 97
	/* Update nominal send time with regard to the new t_ipi */
	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);

	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
98 99
	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
					   TFRC_OPSYS_HALF_TIME_GRAN);
100 101 102 103
}
/*
 * Update X by
 *    If (p > 0)
104
 *       X_calc = calcX(s, R, p);
105 106 107 108 109
 *       X = max(min(X_calc, 2 * X_recv), s / t_mbi);
 *    Else
 *       If (now - tld >= R)
 *          X = max(min(2 * X, 2 * X_recv), s / R);
 *          tld = now;
110
 *
111 112 113 114
 * Note: X and X_recv are both stored in units of 64 * bytes/second, to support
 *       fine-grained resolution of sending rates. This requires scaling by 2^6
 *       throughout the code. Only X_calc is unscaled (in bytes/second).
 *
115 116
 * If X has changed, we also update the scheduled send time t_now,
 * the inter-packet interval t_ipi, and the delta value.
117
 */
118 119
static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)

120
{
121
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
122
	const  __u64 old_x = hctx->ccid3hctx_x;
123

124
	if (hctx->ccid3hctx_p > 0) {
125

126
		hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
127
					hctx->ccid3hctx_x_recv * 2);
128 129
		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
					(((__u64)hctx->ccid3hctx_s) << 6) /
130
								TFRC_T_MBI);
131

G
Gerrit Renker 已提交
132
	} else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) -
133
			(suseconds_t)hctx->ccid3hctx_rtt >= 0) {
134

135 136 137
		hctx->ccid3hctx_x =
			max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv),
			    scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
138
				       hctx->ccid3hctx_rtt));
139
		hctx->ccid3hctx_t_ld = *now;
140
	}
141

142 143
	if (hctx->ccid3hctx_x != old_x)
		ccid3_update_send_time(hctx);
144 145
}

146
/*
147 148
 *	Track the mean packet size `s' (cf. RFC 4342, 5.3 and  RFC 3448, 4.1)
 *	@len: DCCP packet payload size in bytes
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
 */
static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
{
	if (unlikely(len == 0))
		ccid3_pr_debug("Packet payload length is 0 - not updating\n");
	else
		hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len :
				    (9 * hctx->ccid3hctx_s + len) / 10;
	/*
	 * Note: We could do a potential optimisation here - when `s' changes,
	 *	 recalculate sending rate and consequently t_ipi, t_delta, and
	 *	 t_now. This is however non-standard, and the benefits are not
	 *	 clear, so it is currently left out.
	 */
}

165
/*
166 167
 *	Update Window Counter using the algorithm from [RFC 4342, 8.1].
 *	The algorithm is not applicable if RTT < 4 microseconds.
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
 */
static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
						struct timeval *now)
{
	suseconds_t delta;
	u32 quarter_rtts;

	if (unlikely(hctx->ccid3hctx_rtt < 4))	/* avoid divide-by-zero */
		return;

	delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count);
	DCCP_BUG_ON(delta < 0);

	quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4);

	if (quarter_rtts > 0) {
		hctx->ccid3hctx_t_last_win_count = *now;
		hctx->ccid3hctx_last_win_count	+= min_t(u32, quarter_rtts, 5);
		hctx->ccid3hctx_last_win_count	&= 0xF;		/* mod 16 */

		ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count);
	}
}

192 193 194
static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
{
	struct sock *sk = (struct sock *)data;
195
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
196
	unsigned long t_nfb = USEC_PER_SEC / 5;
197 198 199 200 201

	bh_lock_sock(sk);
	if (sock_owned_by_user(sk)) {
		/* Try again later. */
		/* XXX: set some sensible MIB */
202
		goto restart_timer;
203 204
	}

205
	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
206
		       ccid3_tx_state_name(hctx->ccid3hctx_state));
207

208 209
	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_FBACK:
210
		/* RFC 3448, 4.4: Halve send rate directly */
211 212 213
		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
					(((__u64)hctx->ccid3hctx_s) << 6) /
								    TFRC_T_MBI);
214

215 216
		ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u "
			       "bytes/s\n", dccp_role(sk), sk,
217
			       ccid3_tx_state_name(hctx->ccid3hctx_state),
218
			       (unsigned)(hctx->ccid3hctx_x >> 6));
219 220
		/* The value of R is still undefined and so we can not recompute
		 * the timout value. Keep initial value as per [RFC 4342, 5]. */
221
		t_nfb = TFRC_INITIAL_TIMEOUT;
222
		ccid3_update_send_time(hctx);
223 224
		break;
	case TFRC_SSTATE_FBACK:
225 226
		/*
		 * Check if IDLE since last timeout and recv rate is less than
227
		 * 4 packets (in units of 64*bytes/sec) per RTT
228 229
		 */
		if (!hctx->ccid3hctx_idle ||
230
		    (hctx->ccid3hctx_x_recv >= 4 *
231 232
		     scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
				hctx->ccid3hctx_rtt))) {
233 234
			struct timeval now;

235
			ccid3_pr_debug("%s(%p, state=%s), not idle\n",
236
				       dccp_role(sk), sk,
237
				   ccid3_tx_state_name(hctx->ccid3hctx_state));
238

239 240 241 242
			/*
			 *  Modify the cached value of X_recv [RFC 3448, 4.4]
			 *
			 *  If (p == 0 || X_calc > 2 * X_recv)
243 244 245
			 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
			 *  Else
			 *    X_recv = X_calc / 4;
246 247
			 *
			 *  Note that X_recv is scaled by 2^6 while X_calc is not
248
			 */
249
			BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
250

251
			if (hctx->ccid3hctx_p  == 0 ||
252 253
			    (hctx->ccid3hctx_x_calc >
			     (hctx->ccid3hctx_x_recv >> 5))) {
254 255

				hctx->ccid3hctx_x_recv =
256 257
					max(hctx->ccid3hctx_x_recv / 2,
					    (((__u64)hctx->ccid3hctx_s) << 6) /
258
							  (2 * TFRC_T_MBI));
259

260 261
				if (hctx->ccid3hctx_p == 0)
					dccp_timestamp(sk, &now);
262 263 264 265
			} else {
				hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
				hctx->ccid3hctx_x_recv <<= 4;
			}
266
			/* Now recalculate X [RFC 3448, 4.3, step (4)] */
267
			ccid3_hc_tx_update_x(sk, &now);
268
		}
269 270
		/*
		 * Schedule no feedback timer to expire in
271 272
		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
		 * See comments in packet_recv() regarding the value of t_RTO.
273
		 */
274
		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
275
		break;
276
	case TFRC_SSTATE_NO_SENT:
277
		DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk);
278 279
		/* fall through */
	case TFRC_SSTATE_TERM:
280 281 282 283
		goto out;
	}

	hctx->ccid3hctx_idle = 1;
284 285 286

restart_timer:
	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
287
		           jiffies + usecs_to_jiffies(t_nfb));
288 289 290 291 292
out:
	bh_unlock_sock(sk);
	sock_put(sk);
}

293 294 295 296 297 298
/*
 * returns
 *   > 0: delay (in msecs) that should pass before actually sending
 *   = 0: can send immediately
 *   < 0: error condition; do not send packet
 */
299
static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
300 301
{
	struct dccp_sock *dp = dccp_sk(sk);
302
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
303
	struct timeval now;
G
Gerrit Renker 已提交
304
	suseconds_t delay;
305

306
	BUG_ON(hctx == NULL);
307

308
	/*
309 310 311
	 * This function is called only for Data and DataAck packets. Sending
	 * zero-sized Data(Ack)s is theoretically possible, but for congestion
	 * control this case is pathological - ignore it.
312
	 */
313
	if (unlikely(skb->len == 0))
314
		return -EBADMSG;
315

316
	dccp_timestamp(sk, &now);
317 318 319

	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_SENT:
320
		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
321 322
			       (jiffies +
			        usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
323 324 325 326
		hctx->ccid3hctx_last_win_count	 = 0;
		hctx->ccid3hctx_t_last_win_count = now;
		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);

327
		/* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
328
		ccid3_hc_tx_update_s(hctx, skb->len);
329 330
		hctx->ccid3hctx_x = hctx->ccid3hctx_s;
		hctx->ccid3hctx_x <<= 6;
331

332 333 334 335 336 337
		/* First timeout, according to [RFC 3448, 4.2], is 1 second */
		hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
		/* Initial delta: minimum of 0.5 sec and t_gran/2 */
		hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN;

		/* Set t_0 for initial packet */
338 339 340 341
		hctx->ccid3hctx_t_nom = now;
		break;
	case TFRC_SSTATE_NO_FBACK:
	case TFRC_SSTATE_FBACK:
342 343
		delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
		/*
344
		 *	Scheduling of packet transmissions [RFC 3448, 4.6]
345 346 347 348 349 350
		 *
		 * if (t_now > t_nom - delta)
		 *       // send the packet now
		 * else
		 *       // send the packet in (t_nom - t_now) milliseconds.
		 */
G
Gerrit Renker 已提交
351
		if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0)
352
			return delay / 1000L;
353 354

		ccid3_hc_tx_update_win_count(hctx, &now);
355
		break;
356
	case TFRC_SSTATE_TERM:
357
		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
358
		return -EINVAL;
359 360
	}

361 362
	/* prepare to send now (add options etc.) */
	dp->dccps_hc_tx_insert_options = 1;
363 364 365
	DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;

	/* set the nominal send time for the next following packet */
366 367 368
	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);

	return 0;
369 370
}

371 372
static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
				    unsigned int len)
373
{
374
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
375
	struct timeval now;
376
	struct dccp_tx_hist_entry *packet;
377

378
	BUG_ON(hctx == NULL);
379

380
	ccid3_hc_tx_update_s(hctx, len);
381

382
	packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC);
383
	if (unlikely(packet == NULL)) {
384
		DCCP_CRIT("packet history - out of memory!");
385 386
		return;
	}
387 388 389
	dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet);

	dccp_timestamp(sk, &now);
390
	packet->dccphtx_tstamp = now;
391 392 393 394
	packet->dccphtx_seqno  = dccp_sk(sk)->dccps_gss;
	packet->dccphtx_rtt    = hctx->ccid3hctx_rtt;
	packet->dccphtx_sent   = 1;
	hctx->ccid3hctx_idle   = 0;
395 396 397 398
}

static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
399 400
	const struct dccp_sock *dp = dccp_sk(sk);
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
401
	struct ccid3_options_received *opt_recv;
402
	struct dccp_tx_hist_entry *packet;
403
	struct timeval now;
404
	unsigned long t_nfb;
405
	u32 pinv;
406
	suseconds_t r_sample, t_elapsed;
407

408
	BUG_ON(hctx == NULL);
409 410 411 412 413 414 415 416 417 418 419

	/* we are only interested in ACKs */
	if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
	      DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
		return;

	opt_recv = &hctx->ccid3hctx_options_received;

	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_FBACK:
	case TFRC_SSTATE_FBACK:
420
		/* get packet from history to look up t_recvdata */
421
		packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
422
					      DCCP_SKB_CB(skb)->dccpd_ack_seq);
423
		if (unlikely(packet == NULL)) {
424
			DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist "
425
				  "in history!\n",  dccp_role(sk), sk,
426
			    (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
427
				dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
428 429 430
			return;
		}

431
		/* Update receive rate in units of 64 * bytes/second */
432 433
		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
		hctx->ccid3hctx_x_recv <<= 6;
434 435 436

		/* Update loss event rate */
		pinv = opt_recv->ccid3or_loss_event_rate;
437
		if (pinv == ~0U || pinv == 0)	       /* see RFC 4342, 8.5   */
438
			hctx->ccid3hctx_p = 0;
439
		else				       /* can not exceed 100% */
440
			hctx->ccid3hctx_p = 1000000 / pinv;
441

442
		dccp_timestamp(sk, &now);
443 444 445

		/*
		 * Calculate new round trip sample as per [RFC 3448, 4.3] by
446
		 *	R_sample  =  (now - t_recvdata) - t_elapsed
447 448 449 450
		 */
		r_sample  = timeval_delta(&now, &packet->dccphtx_tstamp);
		t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;

G
Gerrit Renker 已提交
451 452
		DCCP_BUG_ON(r_sample < 0);
		if (unlikely(r_sample <= t_elapsed))
453 454
			DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n",
				  (int)r_sample, (int)t_elapsed);
455 456
		else
			r_sample -= t_elapsed;
457
		CCID3_RTT_SANITY_CHECK(r_sample);
458

459
		/* Update RTT estimate by
460 461 462 463 464 465 466 467
		 * If (No feedback recv)
		 *    R = R_sample;
		 * Else
		 *    R = q * R + (1 - q) * R_sample;
		 *
		 * q is a constant, RFC 3448 recomments 0.9
		 */
		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
468 469 470 471
			/*
			 * Larger Initial Windows [RFC 4342, sec. 5]
			 * We deviate in that we use `s' instead of `MSS'.
			 */
472
			__u64 w_init = min(4 * hctx->ccid3hctx_s,
473
					   max(2 * hctx->ccid3hctx_s, 4380));
474
			hctx->ccid3hctx_rtt  = r_sample;
475
			hctx->ccid3hctx_x    = scaled_div(w_init << 6, r_sample);
476 477 478
			hctx->ccid3hctx_t_ld = now;

			ccid3_update_send_time(hctx);
479

480
			ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, "
481
				       "R_sample=%dus, X=%u\n", dccp_role(sk),
A
Andrew Morton 已提交
482 483
				       sk, hctx->ccid3hctx_s,
				       (unsigned long long)w_init,
484
				       (int)r_sample,
485
				       (unsigned)(hctx->ccid3hctx_x >> 6));
486

487 488 489
			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
		} else {
			hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
490
					           (u32)r_sample) / 10;
491

492 493 494 495 496 497
			/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
			if (hctx->ccid3hctx_p > 0)
				hctx->ccid3hctx_x_calc =
					tfrc_calc_x(hctx->ccid3hctx_s,
						    hctx->ccid3hctx_rtt,
						    hctx->ccid3hctx_p);
498
			ccid3_hc_tx_update_x(sk, &now);
499

500
			ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, "
501 502
				       "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
				       dccp_role(sk),
503
				       sk, hctx->ccid3hctx_rtt, (int)r_sample,
504 505
				       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
				       hctx->ccid3hctx_x_calc,
506
				       (unsigned)(hctx->ccid3hctx_x_recv >> 6),
507
				       (unsigned)(hctx->ccid3hctx_x >> 6));
508 509 510 511 512 513
		}

		/* unschedule no feedback timer */
		sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);

		/* remove all packets older than the one acked from history */
514 515
		dccp_tx_hist_purge_older(ccid3_tx_hist,
					 &hctx->ccid3hctx_hist, packet);
516
		/*
517 518
		 * As we have calculated new ipi, delta, t_nom it is possible
		 * that we now can send a packet, so wake up dccp_wait_for_ccid
519 520
		 */
		sk->sk_write_space(sk);
521

522 523 524
		/*
		 * Update timeout interval for the nofeedback timer.
		 * We use a configuration option to increase the lower bound.
525 526
		 * This can help avoid triggering the nofeedback timer too
		 * often ('spinning') on LANs with small RTTs.
527
		 */
528
		hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
529
						   CONFIG_IP_DCCP_CCID3_RTO *
530
						   (USEC_PER_SEC/1000));
531 532
		/*
		 * Schedule no feedback timer to expire in
533
		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
534
		 */
535
		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
536

537
		ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
538 539
			       "expire in %lu jiffies (%luus)\n",
			       dccp_role(sk),
540 541 542
			       sk, usecs_to_jiffies(t_nfb), t_nfb);

		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
543
				   jiffies + usecs_to_jiffies(t_nfb));
544 545

		/* set idle flag */
546
		hctx->ccid3hctx_idle = 1;
547
		break;
G
Gerrit Renker 已提交
548
	case TFRC_SSTATE_NO_SENT:
549 550 551
		/*
		 * XXX when implementing bidirectional rx/tx check this again
		 */
552
		DCCP_WARN("Illegal ACK received - no packet sent\n");
G
Gerrit Renker 已提交
553 554
		/* fall through */
	case TFRC_SSTATE_TERM:		/* ignore feedback when closing */
555 556 557 558 559
		break;
	}
}

static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
560 561
				     unsigned char len, u16 idx,
				     unsigned char *value)
562 563
{
	int rc = 0;
564 565
	const struct dccp_sock *dp = dccp_sk(sk);
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
566 567
	struct ccid3_options_received *opt_recv;

568
	BUG_ON(hctx == NULL);
569 570 571 572 573 574 575 576 577 578 579 580 581

	opt_recv = &hctx->ccid3hctx_options_received;

	if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
		opt_recv->ccid3or_seqno		     = dp->dccps_gsr;
		opt_recv->ccid3or_loss_event_rate    = ~0;
		opt_recv->ccid3or_loss_intervals_idx = 0;
		opt_recv->ccid3or_loss_intervals_len = 0;
		opt_recv->ccid3or_receive_rate	     = 0;
	}

	switch (option) {
	case TFRC_OPT_LOSS_EVENT_RATE:
582
		if (unlikely(len != 4)) {
583
			DCCP_WARN("%s(%p), invalid len %d "
584 585
				  "for TFRC_OPT_LOSS_EVENT_RATE\n",
				  dccp_role(sk), sk, len);
586 587
			rc = -EINVAL;
		} else {
588 589
			opt_recv->ccid3or_loss_event_rate =
						ntohl(*(__be32 *)value);
590
			ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
591 592 593 594 595 596 597
				       dccp_role(sk), sk,
				       opt_recv->ccid3or_loss_event_rate);
		}
		break;
	case TFRC_OPT_LOSS_INTERVALS:
		opt_recv->ccid3or_loss_intervals_idx = idx;
		opt_recv->ccid3or_loss_intervals_len = len;
598
		ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n",
599 600 601 602 603
			       dccp_role(sk), sk,
			       opt_recv->ccid3or_loss_intervals_idx,
			       opt_recv->ccid3or_loss_intervals_len);
		break;
	case TFRC_OPT_RECEIVE_RATE:
604
		if (unlikely(len != 4)) {
605
			DCCP_WARN("%s(%p), invalid len %d "
606 607
				  "for TFRC_OPT_RECEIVE_RATE\n",
				  dccp_role(sk), sk, len);
608 609
			rc = -EINVAL;
		} else {
610 611
			opt_recv->ccid3or_receive_rate =
						ntohl(*(__be32 *)value);
612
			ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
613 614 615 616 617 618 619 620 621
				       dccp_role(sk), sk,
				       opt_recv->ccid3or_receive_rate);
		}
		break;
	}

	return rc;
}

622
static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
623
{
624
	struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
625

626
	hctx->ccid3hctx_s     = 0;
627
	hctx->ccid3hctx_rtt   = 0;
628 629
	hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
	INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
630

631 632
	hctx->ccid3hctx_no_feedback_timer.function =
				ccid3_hc_tx_no_feedback_timer;
633
	hctx->ccid3hctx_no_feedback_timer.data     = (unsigned long)sk;
634 635 636 637 638 639 640
	init_timer(&hctx->ccid3hctx_no_feedback_timer);

	return 0;
}

static void ccid3_hc_tx_exit(struct sock *sk)
{
641
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
642 643 644 645 646 647 648

	BUG_ON(hctx == NULL);

	ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);

	/* Empty packet history */
649
	dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
650 651 652 653 654 655
}

/*
 * RX Half Connection methods
 */

656
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
657 658 659 660 661 662 663 664 665 666 667 668
static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
{
	static char *ccid3_rx_state_names[] = {
	[TFRC_RSTATE_NO_DATA] = "NO_DATA",
	[TFRC_RSTATE_DATA]    = "DATA",
	[TFRC_RSTATE_TERM]    = "TERM",
	};

	return ccid3_rx_state_names[state];
}
#endif

669 670
static void ccid3_hc_rx_set_state(struct sock *sk,
				  enum ccid3_hc_rx_states state)
671
{
672
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
673 674 675
	enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;

	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
676 677
		       dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
		       ccid3_rx_state_name(state));
678 679 680 681
	WARN_ON(state == oldstate);
	hcrx->ccid3hcrx_state = state;
}

682 683 684 685 686 687 688 689 690
static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
{
	if (unlikely(len == 0))	/* don't update on empty packets (e.g. ACKs) */
		ccid3_pr_debug("Packet payload length is 0 - not updating\n");
	else
		hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len :
				    (9 * hcrx->ccid3hcrx_s + len) / 10;
}

691 692
static void ccid3_hc_rx_send_feedback(struct sock *sk)
{
693
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
694
	struct dccp_sock *dp = dccp_sk(sk);
695
	struct dccp_rx_hist_entry *packet;
696
	struct timeval now;
G
Gerrit Renker 已提交
697
	suseconds_t delta;
698

699
	ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
700

701
	dccp_timestamp(sk, &now);
702

703 704 705 706
	switch (hcrx->ccid3hcrx_state) {
	case TFRC_RSTATE_NO_DATA:
		hcrx->ccid3hcrx_x_recv = 0;
		break;
G
Gerrit Renker 已提交
707 708 709 710
	case TFRC_RSTATE_DATA:
		delta = timeval_delta(&now,
				      &hcrx->ccid3hcrx_tstamp_last_feedback);
		DCCP_BUG_ON(delta < 0);
711 712
		hcrx->ccid3hcrx_x_recv =
			scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
713
		break;
714
	case TFRC_RSTATE_TERM:
715
		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
716 717 718
		return;
	}

719
	packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
720
	if (unlikely(packet == NULL)) {
721
		DCCP_WARN("%s(%p), no data packet in history!\n",
722
			  dccp_role(sk), sk);
723 724 725
		return;
	}

726
	hcrx->ccid3hcrx_tstamp_last_feedback = now;
I
Ian McDonald 已提交
727
	hcrx->ccid3hcrx_ccval_last_counter   = packet->dccphrx_ccval;
728 729
	hcrx->ccid3hcrx_bytes_recv	     = 0;

G
Gerrit Renker 已提交
730 731 732 733
	/* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */
	delta = timeval_delta(&now, &packet->dccphrx_tstamp);
	DCCP_BUG_ON(delta < 0);
	hcrx->ccid3hcrx_elapsed_time = delta / 10;
734

735
	if (hcrx->ccid3hcrx_p == 0)
736 737 738 739 740
		hcrx->ccid3hcrx_pinv = ~0U;	/* see RFC 4342, 8.5 */
	else if (hcrx->ccid3hcrx_p > 1000000) {
		DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
		hcrx->ccid3hcrx_pinv = 1;	/* use 100% in this case */
	} else
741
		hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
742

743
	dp->dccps_hc_rx_insert_options = 1;
744 745 746
	dccp_send_ack(sk);
}

747
static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
748
{
749
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
750
	__be32 x_recv, pinv;
751

752 753 754
	BUG_ON(hcrx == NULL);

	if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
755
		return 0;
756

I
Ian McDonald 已提交
757
	DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
758 759

	if (dccp_packet_without_ack(skb))
760 761
		return 0;

762 763
	x_recv = htonl(hcrx->ccid3hcrx_x_recv);
	pinv   = htonl(hcrx->ccid3hcrx_pinv);
764 765 766 767 768 769

	if ((hcrx->ccid3hcrx_elapsed_time != 0 &&
	     dccp_insert_option_elapsed_time(sk, skb,
					     hcrx->ccid3hcrx_elapsed_time)) ||
	    dccp_insert_option_timestamp(sk, skb) ||
	    dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
770
			       &pinv, sizeof(pinv)) ||
771
	    dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
772
			       &x_recv, sizeof(x_recv)))
773 774 775
		return -1;

	return 0;
776 777 778 779 780 781 782 783
}

/* calculate first loss interval
 *
 * returns estimated loss interval in usecs */

static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
{
784
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
785
	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
G
Gerrit Renker 已提交
786 787
	u32 x_recv, p;
	suseconds_t rtt, delta;
788
	struct timeval tstamp = { 0, };
789 790 791
	int interval = 0;
	int win_count = 0;
	int step = 0;
792
	u64 fval;
793

794 795 796
	list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
				 dccphrx_node) {
		if (dccp_rx_hist_entry_data_packet(entry)) {
797 798 799 800
			tail = entry;

			switch (step) {
			case 0:
801
				tstamp	  = entry->dccphrx_tstamp;
802
				win_count = entry->dccphrx_ccval;
803 804 805
				step = 1;
				break;
			case 1:
806
				interval = win_count - entry->dccphrx_ccval;
807 808 809 810 811 812 813 814 815
				if (interval < 0)
					interval += TFRC_WIN_COUNT_LIMIT;
				if (interval > 4)
					goto found;
				break;
			}
		}
	}

816
	if (unlikely(step == 0)) {
817
		DCCP_WARN("%s(%p), packet history has no data packets!\n",
818
			  dccp_role(sk), sk);
819 820 821
		return ~0;
	}

822
	if (unlikely(interval == 0)) {
823
		DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
824
			  "Defaulting to 1\n", dccp_role(sk), sk);
825 826 827
		interval = 1;
	}
found:
I
Ian McDonald 已提交
828
	if (!tail) {
829
		DCCP_CRIT("tail is null\n");
I
Ian McDonald 已提交
830 831
		return ~0;
	}
G
Gerrit Renker 已提交
832 833 834 835 836

	delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
	DCCP_BUG_ON(delta < 0);

	rtt = delta * 4 / interval;
837 838
	ccid3_pr_debug("%s(%p), approximated RTT to %dus\n",
		       dccp_role(sk), sk, (int)rtt);
839

840 841 842
	/*
	 * Determine the length of the first loss interval via inverse lookup.
	 * Assume that X_recv can be computed by the throughput equation
843 844 845
	 *		    s
	 *	X_recv = --------
	 *		 R * fval
846 847 848
	 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
	 */
	if (rtt == 0) {			/* would result in divide-by-zero */
849 850
		DCCP_WARN("RTT==0\n");
		return ~0;
851
	}
852

853 854
	dccp_timestamp(sk, &tstamp);
	delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
G
Gerrit Renker 已提交
855
	DCCP_BUG_ON(delta <= 0);
856

G
Gerrit Renker 已提交
857
	x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
858 859 860 861
	if (x_recv == 0) {		/* would also trigger divide-by-zero */
		DCCP_WARN("X_recv==0\n");
		if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
			DCCP_BUG("stored value of X_recv is zero");
862
			return ~0;
863
		}
I
Ian McDonald 已提交
864 865
	}

866 867
	fval = scaled_div(hcrx->ccid3hcrx_s, rtt);
	fval = scaled_div32(fval, x_recv);
868
	p = tfrc_calc_x_reverse_lookup(fval);
869

870
	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
871
		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
872 873 874 875

	if (p == 0)
		return ~0;
	else
876
		return 1000000 / p;
877 878 879 880
}

static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
{
881
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
882
	struct dccp_li_hist_entry *head;
I
Ian McDonald 已提交
883
	u64 seq_temp;
884

I
Ian McDonald 已提交
885 886 887 888
	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
		if (!dccp_li_hist_interval_new(ccid3_li_hist,
		   &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
			return;
889

890 891 892
		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
		   struct dccp_li_hist_entry, dccplih_node);
		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
I
Ian McDonald 已提交
893 894 895 896
	} else {
		struct dccp_li_hist_entry *entry;
		struct list_head *tail;

897 898
		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
		   struct dccp_li_hist_entry, dccplih_node);
I
Ian McDonald 已提交
899 900 901 902 903 904 905
		/* FIXME win count check removed as was wrong */
		/* should make this check with receive history */
		/* and compare there as per section 10.2 of RFC4342 */

		/* new loss event detected */
		/* calculate last interval length */
		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
906
		entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC);
I
Ian McDonald 已提交
907 908

		if (entry == NULL) {
909
			DCCP_BUG("out of memory - can not allocate entry");
910
			return;
I
Ian McDonald 已提交
911 912 913 914 915 916 917 918 919 920 921 922 923
		}

		list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);

		tail = hcrx->ccid3hcrx_li_hist.prev;
		list_del(tail);
		kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);

		/* Create the newest interval */
		entry->dccplih_seqno = seq_loss;
		entry->dccplih_interval = seq_temp;
		entry->dccplih_win_count = win_loss;
	}
924 925
}

I
Ian McDonald 已提交
926 927
static int ccid3_hc_rx_detect_loss(struct sock *sk,
                                    struct dccp_rx_hist_entry *packet)
928
{
929
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
930 931
	struct dccp_rx_hist_entry *rx_hist =
				dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
I
Ian McDonald 已提交
932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
	u64 seqno = packet->dccphrx_seqno;
	u64 tmp_seqno;
	int loss = 0;
	u8 ccval;


	tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;

	if (!rx_hist ||
	   follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
		hcrx->ccid3hcrx_seqno_nonloss = seqno;
		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
		goto detect_out;
	}

947

I
Ian McDonald 已提交
948 949 950 951 952 953 954 955 956 957 958
	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
	   > TFRC_RECV_NUM_LATE_LOSS) {
		loss = 1;
		ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
		   hcrx->ccid3hcrx_ccval_nonloss);
		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
		dccp_inc_seqno(&tmp_seqno);
		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
		dccp_inc_seqno(&tmp_seqno);
		while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
		   tmp_seqno, &ccval)) {
959
			hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
I
Ian McDonald 已提交
960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976
			hcrx->ccid3hcrx_ccval_nonloss = ccval;
			dccp_inc_seqno(&tmp_seqno);
		}
	}

	/* FIXME - this code could be simplified with above while */
	/* but works at moment */
	if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
		hcrx->ccid3hcrx_seqno_nonloss = seqno;
		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
	}

detect_out:
	dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
		   &hcrx->ccid3hcrx_li_hist, packet,
		   hcrx->ccid3hcrx_seqno_nonloss);
	return loss;
977 978 979 980
}

static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
981
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
982
	const struct dccp_options_received *opt_recv;
983
	struct dccp_rx_hist_entry *packet;
984
	struct timeval now;
G
Gerrit Renker 已提交
985 986
	u32 p_prev, rtt_prev;
	suseconds_t r_sample, t_elapsed;
987
	int loss, payload_size;
988

989
	BUG_ON(hcrx == NULL);
990

991
	opt_recv = &dccp_sk(sk)->dccps_options_received;
992

993 994 995 996 997
	switch (DCCP_SKB_CB(skb)->dccpd_type) {
	case DCCP_PKT_ACK:
		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
			return;
	case DCCP_PKT_DATAACK:
998
		if (opt_recv->dccpor_timestamp_echo == 0)
999
			break;
I
Ian McDonald 已提交
1000
		rtt_prev = hcrx->ccid3hcrx_rtt;
1001
		dccp_timestamp(sk, &now);
1002 1003 1004 1005
		timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
		r_sample = timeval_usecs(&now);
		t_elapsed = opt_recv->dccpor_elapsed_time * 10;

G
Gerrit Renker 已提交
1006
		DCCP_BUG_ON(r_sample < 0);
1007
		if (unlikely(r_sample <= t_elapsed))
G
Gerrit Renker 已提交
1008
			DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n",
A
Andrew Morton 已提交
1009
				  (long)r_sample, (long)t_elapsed);
1010 1011
		else
			r_sample -= t_elapsed;
1012
		CCID3_RTT_SANITY_CHECK(r_sample);
1013 1014 1015 1016 1017 1018 1019

		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
			hcrx->ccid3hcrx_rtt = r_sample;
		else
			hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
					      r_sample / 10;

I
Ian McDonald 已提交
1020
		if (rtt_prev != hcrx->ccid3hcrx_rtt)
1021 1022
			ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n",
				       dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
1023
				       opt_recv->dccpor_elapsed_time);
1024 1025 1026
		break;
	case DCCP_PKT_DATA:
		break;
1027
	default: /* We're not interested in other packet types, move along */
1028 1029 1030
		return;
	}

1031
	packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp,
1032
					skb, GFP_ATOMIC);
1033
	if (unlikely(packet == NULL)) {
1034
		DCCP_WARN("%s(%p), Not enough mem to add rx packet "
1035
			  "to history, consider it lost!\n", dccp_role(sk), sk);
1036 1037 1038
		return;
	}

I
Ian McDonald 已提交
1039
	loss = ccid3_hc_rx_detect_loss(sk, packet);
1040 1041 1042 1043

	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
		return;

1044 1045 1046
	payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
	ccid3_hc_rx_update_s(hcrx, payload_size);

1047 1048
	switch (hcrx->ccid3hcrx_state) {
	case TFRC_RSTATE_NO_DATA:
1049 1050
		ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial "
			       "feedback\n", dccp_role(sk), sk,
1051
			       dccp_state_name(sk->sk_state), skb);
1052 1053 1054 1055
		ccid3_hc_rx_send_feedback(sk);
		ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
		return;
	case TFRC_RSTATE_DATA:
1056
		hcrx->ccid3hcrx_bytes_recv += payload_size;
I
Ian McDonald 已提交
1057
		if (loss)
1058 1059
			break;

1060
		dccp_timestamp(sk, &now);
1061 1062
		if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) -
		     (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) {
1063 1064
			hcrx->ccid3hcrx_tstamp_last_ack = now;
			ccid3_hc_rx_send_feedback(sk);
1065
		}
1066
		return;
1067
	case TFRC_RSTATE_TERM:
1068
		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
1069 1070 1071 1072
		return;
	}

	/* Dealing with packet loss */
1073
	ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
1074
		       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
1075 1076 1077 1078

	p_prev = hcrx->ccid3hcrx_p;
	
	/* Calculate loss event rate */
I
Ian McDonald 已提交
1079 1080 1081
	if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
		u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);

1082
		/* Scaling up by 1000000 as fixed decimal */
I
Ian McDonald 已提交
1083 1084
		if (i_mean != 0)
			hcrx->ccid3hcrx_p = 1000000 / i_mean;
1085 1086
	} else
		DCCP_BUG("empty loss history");
1087 1088 1089 1090 1091 1092 1093

	if (hcrx->ccid3hcrx_p > p_prev) {
		ccid3_hc_rx_send_feedback(sk);
		return;
	}
}

1094
static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
1095
{
1096
	struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
1097

1098
	ccid3_pr_debug("entry\n");
1099 1100 1101

	hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
	INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
1102
	INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
1103
	dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack);
1104
	hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
1105
	hcrx->ccid3hcrx_s   = 0;
1106
	hcrx->ccid3hcrx_rtt = 0;
1107 1108 1109 1110 1111
	return 0;
}

static void ccid3_hc_rx_exit(struct sock *sk)
{
1112
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1113

1114
	BUG_ON(hcrx == NULL);
1115 1116 1117 1118

	ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);

	/* Empty packet history */
1119
	dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
1120 1121

	/* Empty loss interval history */
1122
	dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
1123 1124
}

1125 1126
static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
{
1127
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1128

1129 1130 1131 1132
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return;

1133
	BUG_ON(hcrx == NULL);
1134

1135 1136 1137
	info->tcpi_ca_state = hcrx->ccid3hcrx_state;
	info->tcpi_options  |= TCPI_OPT_TIMESTAMPS;
	info->tcpi_rcv_rtt  = hcrx->ccid3hcrx_rtt;
1138 1139 1140 1141
}

static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
{
1142
	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
1143

1144 1145 1146 1147
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return;

1148
	BUG_ON(hctx == NULL);
1149 1150 1151 1152 1153

	info->tcpi_rto = hctx->ccid3hctx_t_rto;
	info->tcpi_rtt = hctx->ccid3hctx_rtt;
}

1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
				  u32 __user *optval, int __user *optlen)
{
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
	const void *val;
	
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return -EINVAL;

	switch (optname) {
	case DCCP_SOCKOPT_CCID_RX_INFO:
		if (len < sizeof(hcrx->ccid3hcrx_tfrc))
			return -EINVAL;
		len = sizeof(hcrx->ccid3hcrx_tfrc);
		val = &hcrx->ccid3hcrx_tfrc;
		break;
	default:
		return -ENOPROTOOPT;
	}

	if (put_user(len, optlen) || copy_to_user(optval, val, len))
		return -EFAULT;

	return 0;
}

static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
				  u32 __user *optval, int __user *optlen)
{
	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
	const void *val;
	
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return -EINVAL;

	switch (optname) {
	case DCCP_SOCKOPT_CCID_TX_INFO:
		if (len < sizeof(hctx->ccid3hctx_tfrc))
			return -EINVAL;
		len = sizeof(hctx->ccid3hctx_tfrc);
		val = &hctx->ccid3hctx_tfrc;
		break;
	default:
		return -ENOPROTOOPT;
	}

	if (put_user(len, optlen) || copy_to_user(optval, val, len))
		return -EFAULT;

	return 0;
}

1208
static struct ccid_operations ccid3 = {
I
Ian McDonald 已提交
1209
	.ccid_id		   = DCCPC_CCID3,
1210 1211
	.ccid_name		   = "ccid3",
	.ccid_owner		   = THIS_MODULE,
1212
	.ccid_hc_tx_obj_size	   = sizeof(struct ccid3_hc_tx_sock),
1213 1214 1215 1216 1217 1218
	.ccid_hc_tx_init	   = ccid3_hc_tx_init,
	.ccid_hc_tx_exit	   = ccid3_hc_tx_exit,
	.ccid_hc_tx_send_packet	   = ccid3_hc_tx_send_packet,
	.ccid_hc_tx_packet_sent	   = ccid3_hc_tx_packet_sent,
	.ccid_hc_tx_packet_recv	   = ccid3_hc_tx_packet_recv,
	.ccid_hc_tx_parse_options  = ccid3_hc_tx_parse_options,
1219
	.ccid_hc_rx_obj_size	   = sizeof(struct ccid3_hc_rx_sock),
1220 1221 1222 1223
	.ccid_hc_rx_init	   = ccid3_hc_rx_init,
	.ccid_hc_rx_exit	   = ccid3_hc_rx_exit,
	.ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
	.ccid_hc_rx_packet_recv	   = ccid3_hc_rx_packet_recv,
1224 1225
	.ccid_hc_rx_get_info	   = ccid3_hc_rx_get_info,
	.ccid_hc_tx_get_info	   = ccid3_hc_tx_get_info,
1226 1227
	.ccid_hc_rx_getsockopt	   = ccid3_hc_rx_getsockopt,
	.ccid_hc_tx_getsockopt	   = ccid3_hc_tx_getsockopt,
1228
};
1229

1230
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
1231 1232
module_param(ccid3_debug, int, 0444);
MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
1233
#endif
1234 1235 1236

static __init int ccid3_module_init(void)
{
1237
	int rc = -ENOBUFS;
1238

1239 1240
	ccid3_rx_hist = dccp_rx_hist_new("ccid3");
	if (ccid3_rx_hist == NULL)
1241 1242
		goto out;

1243 1244 1245
	ccid3_tx_hist = dccp_tx_hist_new("ccid3");
	if (ccid3_tx_hist == NULL)
		goto out_free_rx;
1246

1247 1248
	ccid3_li_hist = dccp_li_hist_new("ccid3");
	if (ccid3_li_hist == NULL)
1249
		goto out_free_tx;
1250 1251

	rc = ccid_register(&ccid3);
1252
	if (rc != 0)
1253 1254 1255
		goto out_free_loss_interval_history;
out:
	return rc;
1256

1257
out_free_loss_interval_history:
1258 1259
	dccp_li_hist_delete(ccid3_li_hist);
	ccid3_li_hist = NULL;
1260 1261 1262 1263 1264 1265
out_free_tx:
	dccp_tx_hist_delete(ccid3_tx_hist);
	ccid3_tx_hist = NULL;
out_free_rx:
	dccp_rx_hist_delete(ccid3_rx_hist);
	ccid3_rx_hist = NULL;
1266 1267 1268 1269 1270 1271 1272 1273
	goto out;
}
module_init(ccid3_module_init);

static __exit void ccid3_module_exit(void)
{
	ccid_unregister(&ccid3);

1274 1275 1276
	if (ccid3_tx_hist != NULL) {
		dccp_tx_hist_delete(ccid3_tx_hist);
		ccid3_tx_hist = NULL;
1277
	}
1278 1279 1280
	if (ccid3_rx_hist != NULL) {
		dccp_rx_hist_delete(ccid3_rx_hist);
		ccid3_rx_hist = NULL;
1281
	}
1282 1283 1284
	if (ccid3_li_hist != NULL) {
		dccp_li_hist_delete(ccid3_li_hist);
		ccid3_li_hist = NULL;
1285 1286 1287 1288
	}
}
module_exit(ccid3_module_exit);

1289
MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
1290
	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
1291 1292 1293
MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
MODULE_LICENSE("GPL");
MODULE_ALIAS("net-dccp-ccid-3");