ccid3.c 36.7 KB
Newer Older
1 2 3 4
/*
 *  net/dccp/ccids/ccid3.c
 *
 *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5
 *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
 *
 *  An implementation of the DCCP protocol
 *
 *  This code has been developed by the University of Waikato WAND
 *  research group. For further information please see http://www.wand.net.nz/
 *
 *  This code also uses code from Lulea University, rereleased as GPL by its
 *  authors:
 *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
 *
 *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
 *  and to make it work as a loadable module in the DCCP stack written by
 *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
 *
 *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
#include "../ccid.h"
#include "../dccp.h"
38
#include "lib/packet_history.h"
39
#include "lib/loss_interval.h"
40
#include "lib/tfrc.h"
41 42
#include "ccid3.h"

43 44 45
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static int ccid3_debug;
#define ccid3_pr_debug(format, a...)	DCCP_PR_DEBUG(ccid3_debug, format, ##a)
46 47 48 49
#else
#define ccid3_pr_debug(format, a...)
#endif

50 51
static struct dccp_tx_hist *ccid3_tx_hist;
static struct dccp_rx_hist *ccid3_rx_hist;
52
static struct dccp_li_hist *ccid3_li_hist;
53

54 55 56
/*
 *	Transmitter Half-Connection Routines
 */
57
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
58 59 60 61 62 63 64 65 66 67 68 69 70
static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
{
	static char *ccid3_state_names[] = {
	[TFRC_SSTATE_NO_SENT]  = "NO_SENT",
	[TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
	[TFRC_SSTATE_FBACK]    = "FBACK",
	[TFRC_SSTATE_TERM]     = "TERM",
	};

	return ccid3_state_names[state];
}
#endif

71 72
static void ccid3_hc_tx_set_state(struct sock *sk,
				  enum ccid3_hc_tx_states state)
73
{
74
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
75 76 77
	enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;

	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
78 79
		       dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
		       ccid3_tx_state_name(state));
80 81 82 83
	WARN_ON(state == oldstate);
	hctx->ccid3hctx_state = state;
}

84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
/*
 * Compute the initial sending rate X_init according to RFC 3390:
 *	w_init   =    min(4 * MSS, max(2 * MSS, 4380 bytes))
 *	X_init   =    w_init / RTT
 * For consistency with other parts of the code, X_init is scaled by 2^6.
 */
static inline u64 rfc3390_initial_rate(struct sock *sk)
{
	const struct dccp_sock *dp = dccp_sk(sk);
	const __u32 w_init = min(4 * dp->dccps_mss_cache,
				 max(2 * dp->dccps_mss_cache, 4380U));

	return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt);
}

99
/*
100
 * Recalculate t_ipi and delta (should be called whenever X changes)
101
 */
102
static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
103
{
104 105 106
	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
	hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
					   hctx->ccid3hctx_x >> 6);
107

108
	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
109 110
	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
					   TFRC_OPSYS_HALF_TIME_GRAN);
I
Ian McDonald 已提交
111

112
	ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n",
I
Ian McDonald 已提交
113
		       hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta,
114
		       hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6));
I
Ian McDonald 已提交
115

116 117 118 119
}
/*
 * Update X by
 *    If (p > 0)
120
 *       X_calc = calcX(s, R, p);
121 122 123 124 125
 *       X = max(min(X_calc, 2 * X_recv), s / t_mbi);
 *    Else
 *       If (now - tld >= R)
 *          X = max(min(2 * X, 2 * X_recv), s / R);
 *          tld = now;
126
 *
127 128 129 130 131
 * Note: X and X_recv are both stored in units of 64 * bytes/second, to support
 *       fine-grained resolution of sending rates. This requires scaling by 2^6
 *       throughout the code. Only X_calc is unscaled (in bytes/second).
 *
 */
132 133
static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)

134
{
135
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
136
	const  __u64 old_x = hctx->ccid3hctx_x;
137

138
	if (hctx->ccid3hctx_p > 0) {
139

140
		hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
141
					hctx->ccid3hctx_x_recv * 2);
142 143
		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
					(((__u64)hctx->ccid3hctx_s) << 6) /
144
								TFRC_T_MBI);
145

G
Gerrit Renker 已提交
146
	} else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) -
147
			(suseconds_t)hctx->ccid3hctx_rtt >= 0) {
148

149 150 151
		hctx->ccid3hctx_x =
			max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv),
			    scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
152
				       hctx->ccid3hctx_rtt));
153
		hctx->ccid3hctx_t_ld = *now;
154
	}
155

I
Ian McDonald 已提交
156
	if (hctx->ccid3hctx_x != old_x) {
157 158 159 160 161
		ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
			       "X_recv=%u\n", (unsigned)(old_x >> 6),
			       (unsigned)(hctx->ccid3hctx_x >> 6),
			       hctx->ccid3hctx_x_calc,
			       (unsigned)(hctx->ccid3hctx_x_recv >> 6));
I
Ian McDonald 已提交
162

163
		ccid3_update_send_interval(hctx);
I
Ian McDonald 已提交
164
	}
165 166
}

167
/*
168 169
 *	Track the mean packet size `s' (cf. RFC 4342, 5.3 and  RFC 3448, 4.1)
 *	@len: DCCP packet payload size in bytes
170 171 172
 */
static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
{
173 174 175 176 177 178
	const u16 old_s = hctx->ccid3hctx_s;

	hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10;

	if (hctx->ccid3hctx_s != old_s)
		ccid3_update_send_interval(hctx);
179 180
}

181
/*
182 183
 *	Update Window Counter using the algorithm from [RFC 4342, 8.1].
 *	The algorithm is not applicable if RTT < 4 microseconds.
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
 */
static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
						struct timeval *now)
{
	suseconds_t delta;
	u32 quarter_rtts;

	if (unlikely(hctx->ccid3hctx_rtt < 4))	/* avoid divide-by-zero */
		return;

	delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count);
	DCCP_BUG_ON(delta < 0);

	quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4);

	if (quarter_rtts > 0) {
		hctx->ccid3hctx_t_last_win_count = *now;
		hctx->ccid3hctx_last_win_count	+= min_t(u32, quarter_rtts, 5);
		hctx->ccid3hctx_last_win_count	&= 0xF;		/* mod 16 */

		ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count);
	}
}

208 209 210
static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
{
	struct sock *sk = (struct sock *)data;
211
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
212
	unsigned long t_nfb = USEC_PER_SEC / 5;
213 214 215 216 217

	bh_lock_sock(sk);
	if (sock_owned_by_user(sk)) {
		/* Try again later. */
		/* XXX: set some sensible MIB */
218
		goto restart_timer;
219 220
	}

221
	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
222
		       ccid3_tx_state_name(hctx->ccid3hctx_state));
223

224 225
	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_FBACK:
226
		/* RFC 3448, 4.4: Halve send rate directly */
227 228 229
		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
					(((__u64)hctx->ccid3hctx_s) << 6) /
								    TFRC_T_MBI);
230

231 232
		ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u "
			       "bytes/s\n", dccp_role(sk), sk,
233
			       ccid3_tx_state_name(hctx->ccid3hctx_state),
234
			       (unsigned)(hctx->ccid3hctx_x >> 6));
235 236
		/* The value of R is still undefined and so we can not recompute
		 * the timout value. Keep initial value as per [RFC 4342, 5]. */
237
		t_nfb = TFRC_INITIAL_TIMEOUT;
238
		ccid3_update_send_interval(hctx);
239 240
		break;
	case TFRC_SSTATE_FBACK:
241 242
		/*
		 * Check if IDLE since last timeout and recv rate is less than
243
		 * 4 packets (in units of 64*bytes/sec) per RTT
244 245
		 */
		if (!hctx->ccid3hctx_idle ||
246
		    (hctx->ccid3hctx_x_recv >= 4 *
247 248
		     scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
				hctx->ccid3hctx_rtt))) {
249 250
			struct timeval now;

251
			ccid3_pr_debug("%s(%p, state=%s), not idle\n",
252
				       dccp_role(sk), sk,
253
				   ccid3_tx_state_name(hctx->ccid3hctx_state));
254

255 256 257 258
			/*
			 *  Modify the cached value of X_recv [RFC 3448, 4.4]
			 *
			 *  If (p == 0 || X_calc > 2 * X_recv)
259 260 261
			 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
			 *  Else
			 *    X_recv = X_calc / 4;
262 263
			 *
			 *  Note that X_recv is scaled by 2^6 while X_calc is not
264
			 */
265
			BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
266

267
			if (hctx->ccid3hctx_p  == 0 ||
268 269
			    (hctx->ccid3hctx_x_calc >
			     (hctx->ccid3hctx_x_recv >> 5))) {
270 271

				hctx->ccid3hctx_x_recv =
272 273
					max(hctx->ccid3hctx_x_recv / 2,
					    (((__u64)hctx->ccid3hctx_s) << 6) /
274
							  (2 * TFRC_T_MBI));
275

276 277
				if (hctx->ccid3hctx_p == 0)
					dccp_timestamp(sk, &now);
278 279 280 281
			} else {
				hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
				hctx->ccid3hctx_x_recv <<= 4;
			}
282
			/* Now recalculate X [RFC 3448, 4.3, step (4)] */
283
			ccid3_hc_tx_update_x(sk, &now);
284
		}
285 286
		/*
		 * Schedule no feedback timer to expire in
287 288
		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
		 * See comments in packet_recv() regarding the value of t_RTO.
289
		 */
290
		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
291
		break;
292
	case TFRC_SSTATE_NO_SENT:
293
		DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk);
294 295
		/* fall through */
	case TFRC_SSTATE_TERM:
296 297 298 299
		goto out;
	}

	hctx->ccid3hctx_idle = 1;
300 301 302

restart_timer:
	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
303
			   jiffies + usecs_to_jiffies(t_nfb));
304 305 306 307 308
out:
	bh_unlock_sock(sk);
	sock_put(sk);
}

309 310 311 312 313 314
/*
 * returns
 *   > 0: delay (in msecs) that should pass before actually sending
 *   = 0: can send immediately
 *   < 0: error condition; do not send packet
 */
315
static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
316 317
{
	struct dccp_sock *dp = dccp_sk(sk);
318
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
319
	struct timeval now;
G
Gerrit Renker 已提交
320
	suseconds_t delay;
321

322
	BUG_ON(hctx == NULL);
323

324
	/*
325 326 327
	 * This function is called only for Data and DataAck packets. Sending
	 * zero-sized Data(Ack)s is theoretically possible, but for congestion
	 * control this case is pathological - ignore it.
328
	 */
329
	if (unlikely(skb->len == 0))
330
		return -EBADMSG;
331

332
	dccp_timestamp(sk, &now);
333 334 335

	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_SENT:
336
		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
337
			       (jiffies +
338
				usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
339 340 341 342
		hctx->ccid3hctx_last_win_count	 = 0;
		hctx->ccid3hctx_t_last_win_count = now;
		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);

343
		/* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
344
		hctx->ccid3hctx_x = hctx->ccid3hctx_s = skb->len;
345
		hctx->ccid3hctx_x <<= 6;
346

347 348 349 350 351 352
		/* First timeout, according to [RFC 3448, 4.2], is 1 second */
		hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
		/* Initial delta: minimum of 0.5 sec and t_gran/2 */
		hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN;

		/* Set t_0 for initial packet */
353 354 355 356
		hctx->ccid3hctx_t_nom = now;
		break;
	case TFRC_SSTATE_NO_FBACK:
	case TFRC_SSTATE_FBACK:
357
		delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
I
Ian McDonald 已提交
358
		ccid3_pr_debug("delay=%ld\n", (long)delay);
359
		/*
360
		 *	Scheduling of packet transmissions [RFC 3448, 4.6]
361 362 363 364 365 366
		 *
		 * if (t_now > t_nom - delta)
		 *       // send the packet now
		 * else
		 *       // send the packet in (t_nom - t_now) milliseconds.
		 */
G
Gerrit Renker 已提交
367
		if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0)
368
			return delay / 1000L;
369 370

		ccid3_hc_tx_update_win_count(hctx, &now);
371
		break;
372
	case TFRC_SSTATE_TERM:
373
		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
374
		return -EINVAL;
375 376
	}

377 378
	/* prepare to send now (add options etc.) */
	dp->dccps_hc_tx_insert_options = 1;
379 380 381
	DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;

	/* set the nominal send time for the next following packet */
382 383 384
	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);

	return 0;
385 386
}

387 388
static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
				    unsigned int len)
389
{
390
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
391
	struct timeval now;
392
	struct dccp_tx_hist_entry *packet;
393

394
	BUG_ON(hctx == NULL);
395

396
	ccid3_hc_tx_update_s(hctx, len);
397

398
	packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC);
399
	if (unlikely(packet == NULL)) {
400
		DCCP_CRIT("packet history - out of memory!");
401 402
		return;
	}
403 404 405
	dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet);

	dccp_timestamp(sk, &now);
406
	packet->dccphtx_tstamp = now;
407 408 409 410
	packet->dccphtx_seqno  = dccp_sk(sk)->dccps_gss;
	packet->dccphtx_rtt    = hctx->ccid3hctx_rtt;
	packet->dccphtx_sent   = 1;
	hctx->ccid3hctx_idle   = 0;
411 412 413 414
}

static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
415 416
	const struct dccp_sock *dp = dccp_sk(sk);
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
417
	struct ccid3_options_received *opt_recv;
418
	struct dccp_tx_hist_entry *packet;
419
	struct timeval now;
420
	unsigned long t_nfb;
421
	u32 pinv;
422
	suseconds_t r_sample, t_elapsed;
423

424
	BUG_ON(hctx == NULL);
425 426 427 428 429 430 431 432 433 434 435

	/* we are only interested in ACKs */
	if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
	      DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
		return;

	opt_recv = &hctx->ccid3hctx_options_received;

	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_FBACK:
	case TFRC_SSTATE_FBACK:
436
		/* get packet from history to look up t_recvdata */
437
		packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
438
					      DCCP_SKB_CB(skb)->dccpd_ack_seq);
439
		if (unlikely(packet == NULL)) {
440
			DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist "
441
				  "in history!\n",  dccp_role(sk), sk,
442
			    (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
443
				dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
444 445 446
			return;
		}

447
		/* Update receive rate in units of 64 * bytes/second */
448 449
		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
		hctx->ccid3hctx_x_recv <<= 6;
450 451 452

		/* Update loss event rate */
		pinv = opt_recv->ccid3or_loss_event_rate;
453
		if (pinv == ~0U || pinv == 0)	       /* see RFC 4342, 8.5   */
454
			hctx->ccid3hctx_p = 0;
455
		else				       /* can not exceed 100% */
456
			hctx->ccid3hctx_p = 1000000 / pinv;
457

458
		dccp_timestamp(sk, &now);
459 460 461

		/*
		 * Calculate new round trip sample as per [RFC 3448, 4.3] by
462
		 *	R_sample  =  (now - t_recvdata) - t_elapsed
463 464 465 466
		 */
		r_sample  = timeval_delta(&now, &packet->dccphtx_tstamp);
		t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;

G
Gerrit Renker 已提交
467 468
		DCCP_BUG_ON(r_sample < 0);
		if (unlikely(r_sample <= t_elapsed))
469 470
			DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n",
				  (int)r_sample, (int)t_elapsed);
471 472
		else
			r_sample -= t_elapsed;
473
		CCID3_RTT_SANITY_CHECK(r_sample);
474

475
		/* Update RTT estimate by
476 477 478 479 480 481 482 483
		 * If (No feedback recv)
		 *    R = R_sample;
		 * Else
		 *    R = q * R + (1 - q) * R_sample;
		 *
		 * q is a constant, RFC 3448 recomments 0.9
		 */
		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
484 485 486
			/*
			 * Larger Initial Windows [RFC 4342, sec. 5]
			 */
487
			hctx->ccid3hctx_rtt  = r_sample;
488
			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
489 490
			hctx->ccid3hctx_t_ld = now;

491
			ccid3_update_send_interval(hctx);
492

493
			ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
494
				       "R_sample=%dus, X=%u\n", dccp_role(sk),
A
Andrew Morton 已提交
495
				       sk, hctx->ccid3hctx_s,
496
				       dp->dccps_mss_cache, (int)r_sample,
497
				       (unsigned)(hctx->ccid3hctx_x >> 6));
498

499 500 501
			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
		} else {
			hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
502
						   (u32)r_sample) / 10;
503

504 505 506 507 508 509
			/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
			if (hctx->ccid3hctx_p > 0)
				hctx->ccid3hctx_x_calc =
					tfrc_calc_x(hctx->ccid3hctx_s,
						    hctx->ccid3hctx_rtt,
						    hctx->ccid3hctx_p);
510
			ccid3_hc_tx_update_x(sk, &now);
511

512
			ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, "
513 514
				       "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
				       dccp_role(sk),
515
				       sk, hctx->ccid3hctx_rtt, (int)r_sample,
516 517
				       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
				       hctx->ccid3hctx_x_calc,
518
				       (unsigned)(hctx->ccid3hctx_x_recv >> 6),
519
				       (unsigned)(hctx->ccid3hctx_x >> 6));
520 521 522 523 524 525
		}

		/* unschedule no feedback timer */
		sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);

		/* remove all packets older than the one acked from history */
526 527
		dccp_tx_hist_purge_older(ccid3_tx_hist,
					 &hctx->ccid3hctx_hist, packet);
528
		/*
529 530
		 * As we have calculated new ipi, delta, t_nom it is possible
		 * that we now can send a packet, so wake up dccp_wait_for_ccid
531 532
		 */
		sk->sk_write_space(sk);
533

534 535 536
		/*
		 * Update timeout interval for the nofeedback timer.
		 * We use a configuration option to increase the lower bound.
537 538
		 * This can help avoid triggering the nofeedback timer too
		 * often ('spinning') on LANs with small RTTs.
539
		 */
540
		hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
541
						   CONFIG_IP_DCCP_CCID3_RTO *
542
						   (USEC_PER_SEC/1000));
543 544
		/*
		 * Schedule no feedback timer to expire in
545
		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
546
		 */
547
		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
548

549
		ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
550 551
			       "expire in %lu jiffies (%luus)\n",
			       dccp_role(sk),
552 553 554
			       sk, usecs_to_jiffies(t_nfb), t_nfb);

		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
555
				   jiffies + usecs_to_jiffies(t_nfb));
556 557

		/* set idle flag */
558
		hctx->ccid3hctx_idle = 1;
559
		break;
560
	case TFRC_SSTATE_NO_SENT:	/* fall through */
G
Gerrit Renker 已提交
561
	case TFRC_SSTATE_TERM:		/* ignore feedback when closing */
562 563 564 565 566
		break;
	}
}

static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
567 568
				     unsigned char len, u16 idx,
				     unsigned char *value)
569 570
{
	int rc = 0;
571 572
	const struct dccp_sock *dp = dccp_sk(sk);
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
573 574
	struct ccid3_options_received *opt_recv;

575
	BUG_ON(hctx == NULL);
576 577 578 579 580 581 582 583 584 585 586 587 588

	opt_recv = &hctx->ccid3hctx_options_received;

	if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
		opt_recv->ccid3or_seqno		     = dp->dccps_gsr;
		opt_recv->ccid3or_loss_event_rate    = ~0;
		opt_recv->ccid3or_loss_intervals_idx = 0;
		opt_recv->ccid3or_loss_intervals_len = 0;
		opt_recv->ccid3or_receive_rate	     = 0;
	}

	switch (option) {
	case TFRC_OPT_LOSS_EVENT_RATE:
589
		if (unlikely(len != 4)) {
590
			DCCP_WARN("%s(%p), invalid len %d "
591 592
				  "for TFRC_OPT_LOSS_EVENT_RATE\n",
				  dccp_role(sk), sk, len);
593 594
			rc = -EINVAL;
		} else {
595 596
			opt_recv->ccid3or_loss_event_rate =
						ntohl(*(__be32 *)value);
597
			ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
598 599 600 601 602 603 604
				       dccp_role(sk), sk,
				       opt_recv->ccid3or_loss_event_rate);
		}
		break;
	case TFRC_OPT_LOSS_INTERVALS:
		opt_recv->ccid3or_loss_intervals_idx = idx;
		opt_recv->ccid3or_loss_intervals_len = len;
605
		ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n",
606 607 608 609 610
			       dccp_role(sk), sk,
			       opt_recv->ccid3or_loss_intervals_idx,
			       opt_recv->ccid3or_loss_intervals_len);
		break;
	case TFRC_OPT_RECEIVE_RATE:
611
		if (unlikely(len != 4)) {
612
			DCCP_WARN("%s(%p), invalid len %d "
613 614
				  "for TFRC_OPT_RECEIVE_RATE\n",
				  dccp_role(sk), sk, len);
615 616
			rc = -EINVAL;
		} else {
617 618
			opt_recv->ccid3or_receive_rate =
						ntohl(*(__be32 *)value);
619
			ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
620 621 622 623 624 625 626 627 628
				       dccp_role(sk), sk,
				       opt_recv->ccid3or_receive_rate);
		}
		break;
	}

	return rc;
}

629
static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
630
{
631
	struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
632

633
	hctx->ccid3hctx_s     = 0;
634
	hctx->ccid3hctx_rtt   = 0;
635 636
	hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
	INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
637

638 639
	hctx->ccid3hctx_no_feedback_timer.function =
				ccid3_hc_tx_no_feedback_timer;
640
	hctx->ccid3hctx_no_feedback_timer.data     = (unsigned long)sk;
641 642 643 644 645 646 647
	init_timer(&hctx->ccid3hctx_no_feedback_timer);

	return 0;
}

static void ccid3_hc_tx_exit(struct sock *sk)
{
648
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
649 650 651 652 653 654 655

	BUG_ON(hctx == NULL);

	ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);

	/* Empty packet history */
656
	dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
657 658
}

659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
{
	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);

	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return;

	BUG_ON(hctx == NULL);

	info->tcpi_rto = hctx->ccid3hctx_t_rto;
	info->tcpi_rtt = hctx->ccid3hctx_rtt;
}

static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
				  u32 __user *optval, int __user *optlen)
{
	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
	const void *val;

	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return -EINVAL;

	switch (optname) {
	case DCCP_SOCKOPT_CCID_TX_INFO:
		if (len < sizeof(hctx->ccid3hctx_tfrc))
			return -EINVAL;
		len = sizeof(hctx->ccid3hctx_tfrc);
		val = &hctx->ccid3hctx_tfrc;
		break;
	default:
		return -ENOPROTOOPT;
	}

	if (put_user(len, optlen) || copy_to_user(optval, val, len))
		return -EFAULT;

	return 0;
}

700
/*
701
 *	Receiver Half-Connection Routines
702
 */
703
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
704 705 706 707 708 709 710 711 712 713 714 715
static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
{
	static char *ccid3_rx_state_names[] = {
	[TFRC_RSTATE_NO_DATA] = "NO_DATA",
	[TFRC_RSTATE_DATA]    = "DATA",
	[TFRC_RSTATE_TERM]    = "TERM",
	};

	return ccid3_rx_state_names[state];
}
#endif

716 717
static void ccid3_hc_rx_set_state(struct sock *sk,
				  enum ccid3_hc_rx_states state)
718
{
719
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
720 721 722
	enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;

	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
723 724
		       dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
		       ccid3_rx_state_name(state));
725 726 727 728
	WARN_ON(state == oldstate);
	hcrx->ccid3hcrx_state = state;
}

729 730 731 732 733 734 735 736 737
static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
{
	if (unlikely(len == 0))	/* don't update on empty packets (e.g. ACKs) */
		ccid3_pr_debug("Packet payload length is 0 - not updating\n");
	else
		hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len :
				    (9 * hcrx->ccid3hcrx_s + len) / 10;
}

738 739
static void ccid3_hc_rx_send_feedback(struct sock *sk)
{
740
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
741
	struct dccp_sock *dp = dccp_sk(sk);
742
	struct dccp_rx_hist_entry *packet;
743
	struct timeval now;
G
Gerrit Renker 已提交
744
	suseconds_t delta;
745

746
	ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
747

748
	dccp_timestamp(sk, &now);
749

750 751 752 753
	switch (hcrx->ccid3hcrx_state) {
	case TFRC_RSTATE_NO_DATA:
		hcrx->ccid3hcrx_x_recv = 0;
		break;
G
Gerrit Renker 已提交
754 755 756 757
	case TFRC_RSTATE_DATA:
		delta = timeval_delta(&now,
				      &hcrx->ccid3hcrx_tstamp_last_feedback);
		DCCP_BUG_ON(delta < 0);
758 759
		hcrx->ccid3hcrx_x_recv =
			scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
760
		break;
761
	case TFRC_RSTATE_TERM:
762
		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
763 764 765
		return;
	}

766
	packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
767
	if (unlikely(packet == NULL)) {
768
		DCCP_WARN("%s(%p), no data packet in history!\n",
769
			  dccp_role(sk), sk);
770 771 772
		return;
	}

773
	hcrx->ccid3hcrx_tstamp_last_feedback = now;
I
Ian McDonald 已提交
774
	hcrx->ccid3hcrx_ccval_last_counter   = packet->dccphrx_ccval;
775 776
	hcrx->ccid3hcrx_bytes_recv	     = 0;

G
Gerrit Renker 已提交
777 778 779 780
	/* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */
	delta = timeval_delta(&now, &packet->dccphrx_tstamp);
	DCCP_BUG_ON(delta < 0);
	hcrx->ccid3hcrx_elapsed_time = delta / 10;
781

782
	if (hcrx->ccid3hcrx_p == 0)
783 784 785 786 787
		hcrx->ccid3hcrx_pinv = ~0U;	/* see RFC 4342, 8.5 */
	else if (hcrx->ccid3hcrx_p > 1000000) {
		DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
		hcrx->ccid3hcrx_pinv = 1;	/* use 100% in this case */
	} else
788
		hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
789

790
	dp->dccps_hc_rx_insert_options = 1;
791 792 793
	dccp_send_ack(sk);
}

794
static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
795
{
796
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
797
	__be32 x_recv, pinv;
798

799 800 801
	BUG_ON(hcrx == NULL);

	if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
802
		return 0;
803

I
Ian McDonald 已提交
804
	DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
805 806

	if (dccp_packet_without_ack(skb))
807 808
		return 0;

809 810
	x_recv = htonl(hcrx->ccid3hcrx_x_recv);
	pinv   = htonl(hcrx->ccid3hcrx_pinv);
811 812 813 814 815 816

	if ((hcrx->ccid3hcrx_elapsed_time != 0 &&
	     dccp_insert_option_elapsed_time(sk, skb,
					     hcrx->ccid3hcrx_elapsed_time)) ||
	    dccp_insert_option_timestamp(sk, skb) ||
	    dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
817
			       &pinv, sizeof(pinv)) ||
818
	    dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
819
			       &x_recv, sizeof(x_recv)))
820 821 822
		return -1;

	return 0;
823 824 825 826 827 828 829 830
}

/* calculate first loss interval
 *
 * returns estimated loss interval in usecs */

static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
{
831
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
832
	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
G
Gerrit Renker 已提交
833 834
	u32 x_recv, p;
	suseconds_t rtt, delta;
835
	struct timeval tstamp = { 0, };
836 837 838
	int interval = 0;
	int win_count = 0;
	int step = 0;
839
	u64 fval;
840

841 842 843
	list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
				 dccphrx_node) {
		if (dccp_rx_hist_entry_data_packet(entry)) {
844 845 846 847
			tail = entry;

			switch (step) {
			case 0:
848
				tstamp	  = entry->dccphrx_tstamp;
849
				win_count = entry->dccphrx_ccval;
850 851 852
				step = 1;
				break;
			case 1:
853
				interval = win_count - entry->dccphrx_ccval;
854 855 856 857 858 859 860 861 862
				if (interval < 0)
					interval += TFRC_WIN_COUNT_LIMIT;
				if (interval > 4)
					goto found;
				break;
			}
		}
	}

863
	if (unlikely(step == 0)) {
864
		DCCP_WARN("%s(%p), packet history has no data packets!\n",
865
			  dccp_role(sk), sk);
866 867 868
		return ~0;
	}

869
	if (unlikely(interval == 0)) {
870
		DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
871
			  "Defaulting to 1\n", dccp_role(sk), sk);
872 873 874
		interval = 1;
	}
found:
I
Ian McDonald 已提交
875
	if (!tail) {
876
		DCCP_CRIT("tail is null\n");
I
Ian McDonald 已提交
877 878
		return ~0;
	}
G
Gerrit Renker 已提交
879 880 881 882 883

	delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
	DCCP_BUG_ON(delta < 0);

	rtt = delta * 4 / interval;
884 885
	ccid3_pr_debug("%s(%p), approximated RTT to %dus\n",
		       dccp_role(sk), sk, (int)rtt);
886

887 888 889
	/*
	 * Determine the length of the first loss interval via inverse lookup.
	 * Assume that X_recv can be computed by the throughput equation
890 891 892
	 *		    s
	 *	X_recv = --------
	 *		 R * fval
893 894 895
	 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
	 */
	if (rtt == 0) {			/* would result in divide-by-zero */
896 897
		DCCP_WARN("RTT==0\n");
		return ~0;
898
	}
899

900 901
	dccp_timestamp(sk, &tstamp);
	delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
G
Gerrit Renker 已提交
902
	DCCP_BUG_ON(delta <= 0);
903

G
Gerrit Renker 已提交
904
	x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
905 906 907 908
	if (x_recv == 0) {		/* would also trigger divide-by-zero */
		DCCP_WARN("X_recv==0\n");
		if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
			DCCP_BUG("stored value of X_recv is zero");
909
			return ~0;
910
		}
I
Ian McDonald 已提交
911 912
	}

913 914
	fval = scaled_div(hcrx->ccid3hcrx_s, rtt);
	fval = scaled_div32(fval, x_recv);
915
	p = tfrc_calc_x_reverse_lookup(fval);
916

917
	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
918
		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
919 920 921 922

	if (p == 0)
		return ~0;
	else
923
		return 1000000 / p;
924 925 926 927
}

static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
{
928
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
929
	struct dccp_li_hist_entry *head;
I
Ian McDonald 已提交
930
	u64 seq_temp;
931

I
Ian McDonald 已提交
932 933 934 935
	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
		if (!dccp_li_hist_interval_new(ccid3_li_hist,
		   &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
			return;
936

937 938 939
		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
		   struct dccp_li_hist_entry, dccplih_node);
		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
I
Ian McDonald 已提交
940 941 942 943
	} else {
		struct dccp_li_hist_entry *entry;
		struct list_head *tail;

944 945
		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
		   struct dccp_li_hist_entry, dccplih_node);
I
Ian McDonald 已提交
946 947 948 949 950 951 952
		/* FIXME win count check removed as was wrong */
		/* should make this check with receive history */
		/* and compare there as per section 10.2 of RFC4342 */

		/* new loss event detected */
		/* calculate last interval length */
		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
953
		entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC);
I
Ian McDonald 已提交
954 955

		if (entry == NULL) {
956
			DCCP_BUG("out of memory - can not allocate entry");
957
			return;
I
Ian McDonald 已提交
958 959 960 961 962 963 964 965 966 967 968 969 970
		}

		list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);

		tail = hcrx->ccid3hcrx_li_hist.prev;
		list_del(tail);
		kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);

		/* Create the newest interval */
		entry->dccplih_seqno = seq_loss;
		entry->dccplih_interval = seq_temp;
		entry->dccplih_win_count = win_loss;
	}
971 972
}

I
Ian McDonald 已提交
973
static int ccid3_hc_rx_detect_loss(struct sock *sk,
974
				    struct dccp_rx_hist_entry *packet)
975
{
976
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
977 978
	struct dccp_rx_hist_entry *rx_hist =
				dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
I
Ian McDonald 已提交
979 980 981 982 983 984 985 986 987 988 989 990 991 992 993
	u64 seqno = packet->dccphrx_seqno;
	u64 tmp_seqno;
	int loss = 0;
	u8 ccval;


	tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;

	if (!rx_hist ||
	   follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
		hcrx->ccid3hcrx_seqno_nonloss = seqno;
		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
		goto detect_out;
	}

994

I
Ian McDonald 已提交
995 996 997 998 999 1000 1001 1002 1003 1004 1005
	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
	   > TFRC_RECV_NUM_LATE_LOSS) {
		loss = 1;
		ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
		   hcrx->ccid3hcrx_ccval_nonloss);
		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
		dccp_inc_seqno(&tmp_seqno);
		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
		dccp_inc_seqno(&tmp_seqno);
		while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
		   tmp_seqno, &ccval)) {
1006
			hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
I
Ian McDonald 已提交
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023
			hcrx->ccid3hcrx_ccval_nonloss = ccval;
			dccp_inc_seqno(&tmp_seqno);
		}
	}

	/* FIXME - this code could be simplified with above while */
	/* but works at moment */
	if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
		hcrx->ccid3hcrx_seqno_nonloss = seqno;
		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
	}

detect_out:
	dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
		   &hcrx->ccid3hcrx_li_hist, packet,
		   hcrx->ccid3hcrx_seqno_nonloss);
	return loss;
1024 1025 1026 1027
}

static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
1028
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1029
	const struct dccp_options_received *opt_recv;
1030
	struct dccp_rx_hist_entry *packet;
1031
	struct timeval now;
G
Gerrit Renker 已提交
1032 1033
	u32 p_prev, rtt_prev;
	suseconds_t r_sample, t_elapsed;
1034
	int loss, payload_size;
1035

1036
	BUG_ON(hcrx == NULL);
1037

1038
	opt_recv = &dccp_sk(sk)->dccps_options_received;
1039

1040 1041 1042 1043 1044
	switch (DCCP_SKB_CB(skb)->dccpd_type) {
	case DCCP_PKT_ACK:
		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
			return;
	case DCCP_PKT_DATAACK:
1045
		if (opt_recv->dccpor_timestamp_echo == 0)
1046
			break;
I
Ian McDonald 已提交
1047
		rtt_prev = hcrx->ccid3hcrx_rtt;
1048
		dccp_timestamp(sk, &now);
1049 1050 1051 1052
		timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
		r_sample = timeval_usecs(&now);
		t_elapsed = opt_recv->dccpor_elapsed_time * 10;

G
Gerrit Renker 已提交
1053
		DCCP_BUG_ON(r_sample < 0);
1054
		if (unlikely(r_sample <= t_elapsed))
G
Gerrit Renker 已提交
1055
			DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n",
A
Andrew Morton 已提交
1056
				  (long)r_sample, (long)t_elapsed);
1057 1058
		else
			r_sample -= t_elapsed;
1059
		CCID3_RTT_SANITY_CHECK(r_sample);
1060 1061 1062 1063 1064 1065 1066

		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
			hcrx->ccid3hcrx_rtt = r_sample;
		else
			hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
					      r_sample / 10;

I
Ian McDonald 已提交
1067
		if (rtt_prev != hcrx->ccid3hcrx_rtt)
1068 1069
			ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n",
				       dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
1070
				       opt_recv->dccpor_elapsed_time);
1071 1072 1073
		break;
	case DCCP_PKT_DATA:
		break;
1074
	default: /* We're not interested in other packet types, move along */
1075 1076 1077
		return;
	}

1078
	packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp,
1079
					skb, GFP_ATOMIC);
1080
	if (unlikely(packet == NULL)) {
1081
		DCCP_WARN("%s(%p), Not enough mem to add rx packet "
1082
			  "to history, consider it lost!\n", dccp_role(sk), sk);
1083 1084 1085
		return;
	}

I
Ian McDonald 已提交
1086
	loss = ccid3_hc_rx_detect_loss(sk, packet);
1087 1088 1089 1090

	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
		return;

1091 1092 1093
	payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
	ccid3_hc_rx_update_s(hcrx, payload_size);

1094 1095
	switch (hcrx->ccid3hcrx_state) {
	case TFRC_RSTATE_NO_DATA:
1096 1097
		ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial "
			       "feedback\n", dccp_role(sk), sk,
1098
			       dccp_state_name(sk->sk_state), skb);
1099 1100 1101 1102
		ccid3_hc_rx_send_feedback(sk);
		ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
		return;
	case TFRC_RSTATE_DATA:
1103
		hcrx->ccid3hcrx_bytes_recv += payload_size;
I
Ian McDonald 已提交
1104
		if (loss)
1105 1106
			break;

1107
		dccp_timestamp(sk, &now);
1108 1109
		if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) -
		     (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) {
1110 1111
			hcrx->ccid3hcrx_tstamp_last_ack = now;
			ccid3_hc_rx_send_feedback(sk);
1112
		}
1113
		return;
1114
	case TFRC_RSTATE_TERM:
1115
		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
1116 1117 1118 1119
		return;
	}

	/* Dealing with packet loss */
1120
	ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
1121
		       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
1122 1123

	p_prev = hcrx->ccid3hcrx_p;
1124

1125
	/* Calculate loss event rate */
I
Ian McDonald 已提交
1126 1127 1128
	if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
		u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);

1129
		/* Scaling up by 1000000 as fixed decimal */
I
Ian McDonald 已提交
1130 1131
		if (i_mean != 0)
			hcrx->ccid3hcrx_p = 1000000 / i_mean;
1132 1133
	} else
		DCCP_BUG("empty loss history");
1134 1135 1136 1137 1138 1139 1140

	if (hcrx->ccid3hcrx_p > p_prev) {
		ccid3_hc_rx_send_feedback(sk);
		return;
	}
}

1141
static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
1142
{
1143
	struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
1144

1145
	ccid3_pr_debug("entry\n");
1146 1147 1148

	hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
	INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
1149
	INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
1150
	dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack);
1151
	hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
1152
	hcrx->ccid3hcrx_s   = 0;
1153
	hcrx->ccid3hcrx_rtt = 0;
1154 1155 1156 1157 1158
	return 0;
}

static void ccid3_hc_rx_exit(struct sock *sk)
{
1159
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1160

1161
	BUG_ON(hcrx == NULL);
1162 1163 1164 1165

	ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);

	/* Empty packet history */
1166
	dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
1167 1168

	/* Empty loss interval history */
1169
	dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
1170 1171
}

1172 1173
static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
{
1174
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1175

1176 1177 1178 1179
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return;

1180
	BUG_ON(hcrx == NULL);
1181

1182 1183 1184
	info->tcpi_ca_state = hcrx->ccid3hcrx_state;
	info->tcpi_options  |= TCPI_OPT_TIMESTAMPS;
	info->tcpi_rcv_rtt  = hcrx->ccid3hcrx_rtt;
1185 1186
}

1187 1188 1189 1190 1191
static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
				  u32 __user *optval, int __user *optlen)
{
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
	const void *val;
1192

1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return -EINVAL;

	switch (optname) {
	case DCCP_SOCKOPT_CCID_RX_INFO:
		if (len < sizeof(hcrx->ccid3hcrx_tfrc))
			return -EINVAL;
		len = sizeof(hcrx->ccid3hcrx_tfrc);
		val = &hcrx->ccid3hcrx_tfrc;
		break;
	default:
		return -ENOPROTOOPT;
	}

	if (put_user(len, optlen) || copy_to_user(optval, val, len))
		return -EFAULT;

	return 0;
}

1214
static struct ccid_operations ccid3 = {
I
Ian McDonald 已提交
1215
	.ccid_id		   = DCCPC_CCID3,
1216 1217
	.ccid_name		   = "ccid3",
	.ccid_owner		   = THIS_MODULE,
1218
	.ccid_hc_tx_obj_size	   = sizeof(struct ccid3_hc_tx_sock),
1219 1220 1221 1222 1223 1224
	.ccid_hc_tx_init	   = ccid3_hc_tx_init,
	.ccid_hc_tx_exit	   = ccid3_hc_tx_exit,
	.ccid_hc_tx_send_packet	   = ccid3_hc_tx_send_packet,
	.ccid_hc_tx_packet_sent	   = ccid3_hc_tx_packet_sent,
	.ccid_hc_tx_packet_recv	   = ccid3_hc_tx_packet_recv,
	.ccid_hc_tx_parse_options  = ccid3_hc_tx_parse_options,
1225
	.ccid_hc_rx_obj_size	   = sizeof(struct ccid3_hc_rx_sock),
1226 1227 1228 1229
	.ccid_hc_rx_init	   = ccid3_hc_rx_init,
	.ccid_hc_rx_exit	   = ccid3_hc_rx_exit,
	.ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
	.ccid_hc_rx_packet_recv	   = ccid3_hc_rx_packet_recv,
1230 1231
	.ccid_hc_rx_get_info	   = ccid3_hc_rx_get_info,
	.ccid_hc_tx_get_info	   = ccid3_hc_tx_get_info,
1232 1233
	.ccid_hc_rx_getsockopt	   = ccid3_hc_rx_getsockopt,
	.ccid_hc_tx_getsockopt	   = ccid3_hc_tx_getsockopt,
1234
};
1235

1236
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
1237 1238
module_param(ccid3_debug, int, 0444);
MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
1239
#endif
1240 1241 1242

static __init int ccid3_module_init(void)
{
1243
	int rc = -ENOBUFS;
1244

1245 1246
	ccid3_rx_hist = dccp_rx_hist_new("ccid3");
	if (ccid3_rx_hist == NULL)
1247 1248
		goto out;

1249 1250 1251
	ccid3_tx_hist = dccp_tx_hist_new("ccid3");
	if (ccid3_tx_hist == NULL)
		goto out_free_rx;
1252

1253 1254
	ccid3_li_hist = dccp_li_hist_new("ccid3");
	if (ccid3_li_hist == NULL)
1255
		goto out_free_tx;
1256 1257

	rc = ccid_register(&ccid3);
1258
	if (rc != 0)
1259 1260 1261
		goto out_free_loss_interval_history;
out:
	return rc;
1262

1263
out_free_loss_interval_history:
1264 1265
	dccp_li_hist_delete(ccid3_li_hist);
	ccid3_li_hist = NULL;
1266 1267 1268 1269 1270 1271
out_free_tx:
	dccp_tx_hist_delete(ccid3_tx_hist);
	ccid3_tx_hist = NULL;
out_free_rx:
	dccp_rx_hist_delete(ccid3_rx_hist);
	ccid3_rx_hist = NULL;
1272 1273 1274 1275 1276 1277 1278 1279
	goto out;
}
module_init(ccid3_module_init);

static __exit void ccid3_module_exit(void)
{
	ccid_unregister(&ccid3);

1280 1281 1282
	if (ccid3_tx_hist != NULL) {
		dccp_tx_hist_delete(ccid3_tx_hist);
		ccid3_tx_hist = NULL;
1283
	}
1284 1285 1286
	if (ccid3_rx_hist != NULL) {
		dccp_rx_hist_delete(ccid3_rx_hist);
		ccid3_rx_hist = NULL;
1287
	}
1288 1289 1290
	if (ccid3_li_hist != NULL) {
		dccp_li_hist_delete(ccid3_li_hist);
		ccid3_li_hist = NULL;
1291 1292 1293 1294
	}
}
module_exit(ccid3_module_exit);

1295
MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
1296
	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
1297 1298 1299
MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
MODULE_LICENSE("GPL");
MODULE_ALIAS("net-dccp-ccid-3");