ccid3.c 36.9 KB
Newer Older
1 2 3 4
/*
 *  net/dccp/ccids/ccid3.c
 *
 *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5
 *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
 *
 *  An implementation of the DCCP protocol
 *
 *  This code has been developed by the University of Waikato WAND
 *  research group. For further information please see http://www.wand.net.nz/
 *
 *  This code also uses code from Lulea University, rereleased as GPL by its
 *  authors:
 *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
 *
 *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
 *  and to make it work as a loadable module in the DCCP stack written by
 *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
 *
 *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "../ccid.h"
#include "../dccp.h"
39
#include "lib/packet_history.h"
40
#include "lib/loss_interval.h"
41
#include "lib/tfrc.h"
42 43
#include "ccid3.h"

44 45 46
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static int ccid3_debug;
#define ccid3_pr_debug(format, a...)	DCCP_PR_DEBUG(ccid3_debug, format, ##a)
47 48 49 50
#else
#define ccid3_pr_debug(format, a...)
#endif

51 52
static struct dccp_tx_hist *ccid3_tx_hist;
static struct dccp_rx_hist *ccid3_rx_hist;
53
static struct dccp_li_hist *ccid3_li_hist;
54

55
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
56 57 58 59 60 61 62 63 64 65 66 67 68
static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
{
	static char *ccid3_state_names[] = {
	[TFRC_SSTATE_NO_SENT]  = "NO_SENT",
	[TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
	[TFRC_SSTATE_FBACK]    = "FBACK",
	[TFRC_SSTATE_TERM]     = "TERM",
	};

	return ccid3_state_names[state];
}
#endif

69 70
static void ccid3_hc_tx_set_state(struct sock *sk,
				  enum ccid3_hc_tx_states state)
71
{
72
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
73 74 75
	enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;

	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
76 77
		       dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
		       ccid3_tx_state_name(state));
78 79 80 81
	WARN_ON(state == oldstate);
	hctx->ccid3hctx_state = state;
}

82 83 84 85 86
/*
 * Recalculate scheduled nominal send time t_nom, inter-packet interval
 * t_ipi, and delta value. Should be called after each change to X.
 */
static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
87
{
88 89
	timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);

90 91 92
	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
	hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
					   hctx->ccid3hctx_x >> 6);
93

94 95 96 97
	/* Update nominal send time with regard to the new t_ipi */
	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);

	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
98 99
	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
					   TFRC_OPSYS_HALF_TIME_GRAN);
100 101 102 103
}
/*
 * Update X by
 *    If (p > 0)
104
 *       X_calc = calcX(s, R, p);
105 106 107 108 109
 *       X = max(min(X_calc, 2 * X_recv), s / t_mbi);
 *    Else
 *       If (now - tld >= R)
 *          X = max(min(2 * X, 2 * X_recv), s / R);
 *          tld = now;
110
 *
111 112 113 114
 * Note: X and X_recv are both stored in units of 64 * bytes/second, to support
 *       fine-grained resolution of sending rates. This requires scaling by 2^6
 *       throughout the code. Only X_calc is unscaled (in bytes/second).
 *
115 116
 * If X has changed, we also update the scheduled send time t_now,
 * the inter-packet interval t_ipi, and the delta value.
117
 */
118 119
static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)

120
{
121
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
122
	const  __u64 old_x = hctx->ccid3hctx_x;
123

124
	if (hctx->ccid3hctx_p > 0) {
125 126 127 128 129

		hctx->ccid3hctx_x = min_t(u64, hctx->ccid3hctx_x_calc << 6,
					       hctx->ccid3hctx_x_recv * 2  );
		hctx->ccid3hctx_x = max_t(u64, hctx->ccid3hctx_x,
					  (hctx->ccid3hctx_s << 6)/TFRC_T_MBI);
130

G
Gerrit Renker 已提交
131 132
	} else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) -
			(suseconds_t)hctx->ccid3hctx_rtt >= 0 ) {
133 134 135 136 137

		hctx->ccid3hctx_x = max(2 * min(hctx->ccid3hctx_x,
						hctx->ccid3hctx_x_recv),
					scaled_div(hctx->ccid3hctx_s << 6,
						   hctx->ccid3hctx_rtt    ));
138
		hctx->ccid3hctx_t_ld = *now;
139
	}
140

141 142
	if (hctx->ccid3hctx_x != old_x)
		ccid3_update_send_time(hctx);
143 144
}

145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
/*
 * 	Track the mean packet size `s' (cf. RFC 4342, 5.3 and  RFC 3448, 4.1)
 * 	@len: DCCP packet payload size in bytes
 */
static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
{
	if (unlikely(len == 0))
		ccid3_pr_debug("Packet payload length is 0 - not updating\n");
	else
		hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len :
				    (9 * hctx->ccid3hctx_s + len) / 10;
	/*
	 * Note: We could do a potential optimisation here - when `s' changes,
	 *	 recalculate sending rate and consequently t_ipi, t_delta, and
	 *	 t_now. This is however non-standard, and the benefits are not
	 *	 clear, so it is currently left out.
	 */
}

164 165 166
static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
{
	struct sock *sk = (struct sock *)data;
167
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
168
	unsigned long t_nfb = USEC_PER_SEC / 5;
169 170 171 172 173

	bh_lock_sock(sk);
	if (sock_owned_by_user(sk)) {
		/* Try again later. */
		/* XXX: set some sensible MIB */
174
		goto restart_timer;
175 176 177 178 179 180 181
	}

	ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
		       ccid3_tx_state_name(hctx->ccid3hctx_state));
	
	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_FBACK:
182
		/* RFC 3448, 4.4: Halve send rate directly */
183
		hctx->ccid3hctx_x = max_t(u32, hctx->ccid3hctx_x / 2,
184
					  (hctx->ccid3hctx_s << 6)/TFRC_T_MBI);
185

186
		ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %u "
187 188 189
			       "bytes/s\n",
			       dccp_role(sk), sk,
			       ccid3_tx_state_name(hctx->ccid3hctx_state),
190
			       (unsigned)(hctx->ccid3hctx_x >> 6));
191 192
		/* The value of R is still undefined and so we can not recompute
		 * the timout value. Keep initial value as per [RFC 4342, 5]. */
193
		t_nfb = TFRC_INITIAL_TIMEOUT;
194
		ccid3_update_send_time(hctx);
195 196
		break;
	case TFRC_SSTATE_FBACK:
197 198
		/*
		 * Check if IDLE since last timeout and recv rate is less than
199
		 * 4 packets (in units of 64*bytes/sec) per RTT
200 201
		 */
		if (!hctx->ccid3hctx_idle ||
202 203
		    (hctx->ccid3hctx_x_recv >= 4 *
		     scaled_div(hctx->ccid3hctx_s << 6, hctx->ccid3hctx_rtt))) {
204 205
			struct timeval now;

206 207
			ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
				       dccp_role(sk), sk,
208 209
				       ccid3_tx_state_name(hctx->ccid3hctx_state));

210 211 212 213
			/*
			 *  Modify the cached value of X_recv [RFC 3448, 4.4]
			 *
			 *  If (p == 0 || X_calc > 2 * X_recv)
214 215 216
			 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
			 *  Else
			 *    X_recv = X_calc / 4;
217 218
			 *
			 *  Note that X_recv is scaled by 2^6 while X_calc is not
219
			 */
220
			BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
221

222
			if (hctx->ccid3hctx_p  == 0 ||
223 224 225 226 227 228 229
			    hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))  {

				hctx->ccid3hctx_x_recv =
					max_t(u64, hctx->ccid3hctx_x_recv / 2,
					      	  (hctx->ccid3hctx_s << 6) /
					      			(2*TFRC_T_MBI));

230 231 232
				if (hctx->ccid3hctx_p == 0)
					dccp_timestamp(sk, &now);
			} else
233
				hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc << 4;
234

235
			/* Now recalculate X [RFC 3448, 4.3, step (4)] */
236
			ccid3_hc_tx_update_x(sk, &now);
237
		}
238 239
		/*
		 * Schedule no feedback timer to expire in
240 241
		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
		 * See comments in packet_recv() regarding the value of t_RTO.
242
		 */
243
		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
244
		break;
245 246 247 248
	case TFRC_SSTATE_NO_SENT:
		DCCP_BUG("Illegal %s state NO_SENT, sk=%p", dccp_role(sk), sk);
		/* fall through */
	case TFRC_SSTATE_TERM:
249 250 251 252
		goto out;
	}

	hctx->ccid3hctx_idle = 1;
253 254 255

restart_timer:
	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
256
		           jiffies + usecs_to_jiffies(t_nfb));
257 258 259 260 261
out:
	bh_unlock_sock(sk);
	sock_put(sk);
}

262 263 264 265 266 267
/*
 * returns
 *   > 0: delay (in msecs) that should pass before actually sending
 *   = 0: can send immediately
 *   < 0: error condition; do not send packet
 */
268
static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
269 270
{
	struct dccp_sock *dp = dccp_sk(sk);
271
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
272
	struct dccp_tx_hist_entry *new_packet;
273
	struct timeval now;
G
Gerrit Renker 已提交
274
	suseconds_t delay;
275

276
	BUG_ON(hctx == NULL);
277

278
	/*
279 280 281
	 * This function is called only for Data and DataAck packets. Sending
	 * zero-sized Data(Ack)s is theoretically possible, but for congestion
	 * control this case is pathological - ignore it.
282
	 */
283
	if (unlikely(skb->len == 0))
284
		return -EBADMSG;
285 286

	/* See if last packet allocated was not sent */
287 288
	new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
	if (new_packet == NULL || new_packet->dccphtx_sent) {
289
		new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist,
290
						    GFP_ATOMIC);
291

292
		if (unlikely(new_packet == NULL)) {
293 294
			DCCP_WARN("%s, sk=%p, not enough mem to add to history,"
				  "send refused\n", dccp_role(sk), sk);
295
			return -ENOBUFS;
296 297
		}

298
		dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet);
299 300
	}

301
	dccp_timestamp(sk, &now);
302 303 304

	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_SENT:
305 306
		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
			       jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
307 308 309 310
		hctx->ccid3hctx_last_win_count	 = 0;
		hctx->ccid3hctx_t_last_win_count = now;
		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);

311
		/* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
312
		ccid3_hc_tx_update_s(hctx, skb->len);
313
		hctx->ccid3hctx_x = hctx->ccid3hctx_s << 6;
314

315 316 317 318 319 320
		/* First timeout, according to [RFC 3448, 4.2], is 1 second */
		hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
		/* Initial delta: minimum of 0.5 sec and t_gran/2 */
		hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN;

		/* Set t_0 for initial packet */
321 322 323 324
		hctx->ccid3hctx_t_nom = now;
		break;
	case TFRC_SSTATE_NO_FBACK:
	case TFRC_SSTATE_FBACK:
325 326 327 328 329 330 331 332 333
		delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
		/*
		 * 	Scheduling of packet transmissions [RFC 3448, 4.6]
		 *
		 * if (t_now > t_nom - delta)
		 *       // send the packet now
		 * else
		 *       // send the packet in (t_nom - t_now) milliseconds.
		 */
G
Gerrit Renker 已提交
334
		if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0)
335
			return delay / 1000L;
336
		break;
337 338
	case TFRC_SSTATE_TERM:
		DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk);
339
		return -EINVAL;
340 341
	}

342 343 344 345 346 347 348
	/* prepare to send now (add options etc.) */
	dp->dccps_hc_tx_insert_options = 1;
	new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval =
				    hctx->ccid3hctx_last_win_count;
	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);

	return 0;
349 350
}

351
static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
352
{
353 354
	const struct dccp_sock *dp = dccp_sk(sk);
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
355
	struct timeval now;
G
Gerrit Renker 已提交
356
	suseconds_t quarter_rtt;
357
	struct dccp_tx_hist_entry *packet;
358

359
	BUG_ON(hctx == NULL);
360

361
	dccp_timestamp(sk, &now);
362

363
	ccid3_hc_tx_update_s(hctx, len);
364

365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
	packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
	if (unlikely(packet == NULL)) {
		DCCP_WARN("packet doesn't exist in history!\n");
		return;
	}
	if (unlikely(packet->dccphtx_sent)) {
		DCCP_WARN("no unsent packet in history!\n");
		return;
	}
	packet->dccphtx_tstamp = now;
	packet->dccphtx_seqno  = dp->dccps_gss;
	/*
	 * Check if win_count have changed
	 * Algorithm in "8.1. Window Counter Value" in RFC 4342.
	 */
	quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count);
	if (likely(hctx->ccid3hctx_rtt > 8))
		quarter_rtt /= hctx->ccid3hctx_rtt / 4;
383

384 385 386 387 388 389 390 391 392 393
	if (quarter_rtt > 0) {
		hctx->ccid3hctx_t_last_win_count = now;
		hctx->ccid3hctx_last_win_count	 = (hctx->ccid3hctx_last_win_count +
						    min_t(unsigned long, quarter_rtt, 5)) % 16;
		ccid3_pr_debug("%s, sk=%p, window changed from "
			       "%u to %u!\n",
			       dccp_role(sk), sk,
			       packet->dccphtx_ccval,
			       hctx->ccid3hctx_last_win_count);
	}
394

395 396 397
	hctx->ccid3hctx_idle = 0;
	packet->dccphtx_rtt  = hctx->ccid3hctx_rtt;
	packet->dccphtx_sent = 1;
398 399 400 401
}

static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
402 403
	const struct dccp_sock *dp = dccp_sk(sk);
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
404
	struct ccid3_options_received *opt_recv;
405
	struct dccp_tx_hist_entry *packet;
406
	struct timeval now;
407
	unsigned long t_nfb;
408
	u32 pinv;
409
	long r_sample, t_elapsed;
410

411
	BUG_ON(hctx == NULL);
412 413 414 415 416 417 418 419 420 421 422

	/* we are only interested in ACKs */
	if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
	      DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
		return;

	opt_recv = &hctx->ccid3hctx_options_received;

	switch (hctx->ccid3hctx_state) {
	case TFRC_SSTATE_NO_FBACK:
	case TFRC_SSTATE_FBACK:
423
		/* get packet from history to look up t_recvdata */
424 425
		packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
						 DCCP_SKB_CB(skb)->dccpd_ack_seq);
426
		if (unlikely(packet == NULL)) {
427
			DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist "
428
				  "in history!\n",  dccp_role(sk), sk,
429
			    (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
430
				  dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
431 432 433
			return;
		}

434 435
		/* Update receive rate in units of 64 * bytes/second */
		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate << 6;
436 437 438

		/* Update loss event rate */
		pinv = opt_recv->ccid3or_loss_event_rate;
439
		if (pinv == ~0U || pinv == 0)	       /* see RFC 4342, 8.5   */
440
			hctx->ccid3hctx_p = 0;
441
		else				       /* can not exceed 100% */
442
 			hctx->ccid3hctx_p = 1000000 / pinv;
443

444
		dccp_timestamp(sk, &now);
445 446 447 448 449 450 451 452

		/*
		 * Calculate new round trip sample as per [RFC 3448, 4.3] by
		 * 	R_sample  =  (now - t_recvdata) - t_elapsed
		 */
		r_sample  = timeval_delta(&now, &packet->dccphtx_tstamp);
		t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;

G
Gerrit Renker 已提交
453 454
		DCCP_BUG_ON(r_sample < 0);
		if (unlikely(r_sample <= t_elapsed))
455
			DCCP_WARN("WARNING: r_sample=%ldus <= t_elapsed=%ldus\n",
456
				  r_sample, t_elapsed);
457 458
		else
			r_sample -= t_elapsed;
459 460 461 462 463 464 465 466 467 468

		/* Update RTT estimate by 
		 * If (No feedback recv)
		 *    R = R_sample;
		 * Else
		 *    R = q * R + (1 - q) * R_sample;
		 *
		 * q is a constant, RFC 3448 recomments 0.9
		 */
		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
469 470 471 472
			/*
			 * Larger Initial Windows [RFC 4342, sec. 5]
			 * We deviate in that we use `s' instead of `MSS'.
			 */
473
			u16 w_init = min(    4 * hctx->ccid3hctx_s,
474 475
					 max(2 * hctx->ccid3hctx_s, 4380));
			hctx->ccid3hctx_rtt  = r_sample;
476
			hctx->ccid3hctx_x    = scaled_div(w_init<< 6, r_sample);
477 478 479
			hctx->ccid3hctx_t_ld = now;

			ccid3_update_send_time(hctx);
480

481 482 483
			ccid3_pr_debug("%s(%p), s=%u, w_init=%u, "
				       "R_sample=%ldus, X=%u\n", dccp_role(sk),
				       sk, hctx->ccid3hctx_s, w_init, r_sample,
484
				       (unsigned)(hctx->ccid3hctx_x >> 6));
485

486 487 488 489
			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
		} else {
			hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
					           (u32)r_sample        ) / 10;
490

491 492 493 494 495 496
			/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
			if (hctx->ccid3hctx_p > 0)
				hctx->ccid3hctx_x_calc =
					tfrc_calc_x(hctx->ccid3hctx_s,
						    hctx->ccid3hctx_rtt,
						    hctx->ccid3hctx_p);
497
			ccid3_hc_tx_update_x(sk, &now);
498

499 500 501 502 503
			ccid3_pr_debug("%s(%p), RTT=%uus (sample=%ldus), s=%u, "
				       "p=%u, X_calc=%u, X=%u\n", dccp_role(sk),
				       sk, hctx->ccid3hctx_rtt, r_sample,
				       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
				       hctx->ccid3hctx_x_calc,
504
				       (unsigned)(hctx->ccid3hctx_x >> 6));
505 506 507 508 509 510
		}

		/* unschedule no feedback timer */
		sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);

		/* remove all packets older than the one acked from history */
511 512
		dccp_tx_hist_purge_older(ccid3_tx_hist,
					 &hctx->ccid3hctx_hist, packet);
513 514
		/*
		 * As we have calculated new ipi, delta, t_nom it is possible that
515
		 * we now can send a packet, so wake up dccp_wait_for_ccid
516 517
		 */
		sk->sk_write_space(sk);
518

519 520 521 522 523 524
		/*
		 * Update timeout interval for the nofeedback timer.
		 * We use a configuration option to increase the lower bound.
		 * This can help avoid triggering the nofeedback timer too often
		 * ('spinning') on LANs with small RTTs.
		 */
525
		hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
526 527
						   CONFIG_IP_DCCP_CCID3_RTO *
						   (USEC_PER_SEC/1000)	     );
528 529
		/*
		 * Schedule no feedback timer to expire in
530
		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
531
		 */
532
		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
533
			
534 535 536
		ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to "
			       "expire in %lu jiffies (%luus)\n",
			       dccp_role(sk), sk,
537
			       usecs_to_jiffies(t_nfb), t_nfb);
538 539

		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 
540
				   jiffies + usecs_to_jiffies(t_nfb));
541 542

		/* set idle flag */
543
		hctx->ccid3hctx_idle = 1;
544
		break;
G
Gerrit Renker 已提交
545
	case TFRC_SSTATE_NO_SENT:
546 547
		/* XXX when implementing bidirectional rx/tx check this again */
		DCCP_WARN("Illegal ACK received - no packet sent\n");
G
Gerrit Renker 已提交
548 549
		/* fall through */
	case TFRC_SSTATE_TERM:		/* ignore feedback when closing */
550 551 552 553 554
		break;
	}
}

static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
555 556
				     unsigned char len, u16 idx,
				     unsigned char *value)
557 558
{
	int rc = 0;
559 560
	const struct dccp_sock *dp = dccp_sk(sk);
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
561 562
	struct ccid3_options_received *opt_recv;

563
	BUG_ON(hctx == NULL);
564 565 566 567 568 569 570 571 572 573 574 575 576

	opt_recv = &hctx->ccid3hctx_options_received;

	if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
		opt_recv->ccid3or_seqno		     = dp->dccps_gsr;
		opt_recv->ccid3or_loss_event_rate    = ~0;
		opt_recv->ccid3or_loss_intervals_idx = 0;
		opt_recv->ccid3or_loss_intervals_len = 0;
		opt_recv->ccid3or_receive_rate	     = 0;
	}

	switch (option) {
	case TFRC_OPT_LOSS_EVENT_RATE:
577
		if (unlikely(len != 4)) {
578 579 580
			DCCP_WARN("%s, sk=%p, invalid len %d "
				  "for TFRC_OPT_LOSS_EVENT_RATE\n",
				  dccp_role(sk), sk, len);
581 582
			rc = -EINVAL;
		} else {
583
			opt_recv->ccid3or_loss_event_rate = ntohl(*(__be32 *)value);
584 585 586 587 588 589 590 591 592 593 594 595 596 597
			ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
				       dccp_role(sk), sk,
				       opt_recv->ccid3or_loss_event_rate);
		}
		break;
	case TFRC_OPT_LOSS_INTERVALS:
		opt_recv->ccid3or_loss_intervals_idx = idx;
		opt_recv->ccid3or_loss_intervals_len = len;
		ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
			       dccp_role(sk), sk,
			       opt_recv->ccid3or_loss_intervals_idx,
			       opt_recv->ccid3or_loss_intervals_len);
		break;
	case TFRC_OPT_RECEIVE_RATE:
598
		if (unlikely(len != 4)) {
599 600 601
			DCCP_WARN("%s, sk=%p, invalid len %d "
				  "for TFRC_OPT_RECEIVE_RATE\n",
				  dccp_role(sk), sk, len);
602 603
			rc = -EINVAL;
		} else {
604
			opt_recv->ccid3or_receive_rate = ntohl(*(__be32 *)value);
605 606 607 608 609 610 611 612 613 614
			ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
				       dccp_role(sk), sk,
				       opt_recv->ccid3or_receive_rate);
		}
		break;
	}

	return rc;
}

615
static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
616
{
617
	struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
618

619
	hctx->ccid3hctx_s     = 0;
620
	hctx->ccid3hctx_rtt   = 0;
621 622
	hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
	INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
623 624 625

	hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
	hctx->ccid3hctx_no_feedback_timer.data     = (unsigned long)sk;
626 627 628 629 630 631 632
	init_timer(&hctx->ccid3hctx_no_feedback_timer);

	return 0;
}

static void ccid3_hc_tx_exit(struct sock *sk)
{
633
	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
634 635 636 637 638 639 640

	BUG_ON(hctx == NULL);

	ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);

	/* Empty packet history */
641
	dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
642 643 644 645 646 647
}

/*
 * RX Half Connection methods
 */

648
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
649 650 651 652 653 654 655 656 657 658 659 660
static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
{
	static char *ccid3_rx_state_names[] = {
	[TFRC_RSTATE_NO_DATA] = "NO_DATA",
	[TFRC_RSTATE_DATA]    = "DATA",
	[TFRC_RSTATE_TERM]    = "TERM",
	};

	return ccid3_rx_state_names[state];
}
#endif

661 662
static void ccid3_hc_rx_set_state(struct sock *sk,
				  enum ccid3_hc_rx_states state)
663
{
664
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
665 666 667
	enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;

	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
668 669
		       dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
		       ccid3_rx_state_name(state));
670 671 672 673
	WARN_ON(state == oldstate);
	hcrx->ccid3hcrx_state = state;
}

674 675 676 677 678 679 680 681 682
static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
{
	if (unlikely(len == 0))	/* don't update on empty packets (e.g. ACKs) */
		ccid3_pr_debug("Packet payload length is 0 - not updating\n");
	else
		hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len :
				    (9 * hcrx->ccid3hcrx_s + len) / 10;
}

683 684
static void ccid3_hc_rx_send_feedback(struct sock *sk)
{
685
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
686
	struct dccp_sock *dp = dccp_sk(sk);
687
	struct dccp_rx_hist_entry *packet;
688
	struct timeval now;
G
Gerrit Renker 已提交
689
	suseconds_t delta;
690 691 692

	ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);

693
	dccp_timestamp(sk, &now);
694

695 696 697 698
	switch (hcrx->ccid3hcrx_state) {
	case TFRC_RSTATE_NO_DATA:
		hcrx->ccid3hcrx_x_recv = 0;
		break;
G
Gerrit Renker 已提交
699 700 701 702
	case TFRC_RSTATE_DATA:
		delta = timeval_delta(&now,
				      &hcrx->ccid3hcrx_tstamp_last_feedback);
		DCCP_BUG_ON(delta < 0);
703 704
		hcrx->ccid3hcrx_x_recv =
			scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
705
		break;
706 707
	case TFRC_RSTATE_TERM:
		DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk);
708 709 710
		return;
	}

711
	packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
712
	if (unlikely(packet == NULL)) {
713 714
		DCCP_WARN("%s, sk=%p, no data packet in history!\n",
			  dccp_role(sk), sk);
715 716 717
		return;
	}

718
	hcrx->ccid3hcrx_tstamp_last_feedback = now;
I
Ian McDonald 已提交
719
	hcrx->ccid3hcrx_ccval_last_counter   = packet->dccphrx_ccval;
720 721
	hcrx->ccid3hcrx_bytes_recv	     = 0;

G
Gerrit Renker 已提交
722 723 724 725
	/* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */
	delta = timeval_delta(&now, &packet->dccphrx_tstamp);
	DCCP_BUG_ON(delta < 0);
	hcrx->ccid3hcrx_elapsed_time = delta / 10;
726

727
	if (hcrx->ccid3hcrx_p == 0)
728 729 730 731 732
		hcrx->ccid3hcrx_pinv = ~0U;	/* see RFC 4342, 8.5 */
	else if (hcrx->ccid3hcrx_p > 1000000) {
		DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
		hcrx->ccid3hcrx_pinv = 1;	/* use 100% in this case */
	} else
733
		hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
734

735
	dp->dccps_hc_rx_insert_options = 1;
736 737 738
	dccp_send_ack(sk);
}

739
static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
740
{
741
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
742
	__be32 x_recv, pinv;
743

744 745 746
	BUG_ON(hcrx == NULL);

	if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
747
		return 0;
748

I
Ian McDonald 已提交
749
	DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
750 751

	if (dccp_packet_without_ack(skb))
752 753
		return 0;

754 755
	x_recv = htonl(hcrx->ccid3hcrx_x_recv);
	pinv   = htonl(hcrx->ccid3hcrx_pinv);
756 757 758 759 760 761 762 763 764 765 766 767

	if ((hcrx->ccid3hcrx_elapsed_time != 0 &&
	     dccp_insert_option_elapsed_time(sk, skb,
					     hcrx->ccid3hcrx_elapsed_time)) ||
	    dccp_insert_option_timestamp(sk, skb) ||
	    dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
		    	       &pinv, sizeof(pinv)) ||
	    dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
		    	       &x_recv, sizeof(x_recv)))
		return -1;

	return 0;
768 769 770 771 772 773 774 775
}

/* calculate first loss interval
 *
 * returns estimated loss interval in usecs */

static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
{
776
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
777
	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
G
Gerrit Renker 已提交
778 779
	u32 x_recv, p;
	suseconds_t rtt, delta;
780
	struct timeval tstamp = { 0, };
781 782 783
	int interval = 0;
	int win_count = 0;
	int step = 0;
784
	u64 fval;
785

786 787 788
	list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
				 dccphrx_node) {
		if (dccp_rx_hist_entry_data_packet(entry)) {
789 790 791 792
			tail = entry;

			switch (step) {
			case 0:
793
				tstamp	  = entry->dccphrx_tstamp;
794
				win_count = entry->dccphrx_ccval;
795 796 797
				step = 1;
				break;
			case 1:
798
				interval = win_count - entry->dccphrx_ccval;
799 800 801 802 803 804 805 806 807
				if (interval < 0)
					interval += TFRC_WIN_COUNT_LIMIT;
				if (interval > 4)
					goto found;
				break;
			}
		}
	}

808
	if (unlikely(step == 0)) {
809 810
		DCCP_WARN("%s, sk=%p, packet history has no data packets!\n",
			  dccp_role(sk), sk);
811 812 813
		return ~0;
	}

814
	if (unlikely(interval == 0)) {
815 816
		DCCP_WARN("%s, sk=%p, Could not find a win_count interval > 0."
			  "Defaulting to 1\n", dccp_role(sk), sk);
817 818 819
		interval = 1;
	}
found:
I
Ian McDonald 已提交
820
	if (!tail) {
821
		DCCP_CRIT("tail is null\n");
I
Ian McDonald 已提交
822 823
		return ~0;
	}
G
Gerrit Renker 已提交
824 825 826 827 828 829

	delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
	DCCP_BUG_ON(delta < 0);

	rtt = delta * 4 / interval;
	ccid3_pr_debug("%s, sk=%p, approximated RTT to %ldus\n",
830
		       dccp_role(sk), sk, rtt);
831

832 833 834 835 836 837 838 839 840 841 842
	/*
	 * Determine the length of the first loss interval via inverse lookup.
	 * Assume that X_recv can be computed by the throughput equation
	 *      	    s
	 * 	X_recv = --------
	 * 		 R * fval
	 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
	 */
	if (rtt == 0) {			/* would result in divide-by-zero */
		DCCP_WARN("RTT==0, returning 1/p = 1\n");
		return 1000000;
843
	}
844

845 846
	dccp_timestamp(sk, &tstamp);
	delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
G
Gerrit Renker 已提交
847
	DCCP_BUG_ON(delta <= 0);
848

G
Gerrit Renker 已提交
849
	x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
850 851 852 853 854 855
	if (x_recv == 0) {		/* would also trigger divide-by-zero */
		DCCP_WARN("X_recv==0\n");
		if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
			DCCP_BUG("stored value of X_recv is zero");
			return 1000000;
		}
I
Ian McDonald 已提交
856 857
	}

858 859
	fval = scaled_div(hcrx->ccid3hcrx_s, rtt);
	fval = scaled_div32(fval, x_recv);
860
	p = tfrc_calc_x_reverse_lookup(fval);
861

862 863
	ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied "
		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
864 865 866 867 868 869 870 871 872

	if (p == 0)
		return ~0;
	else
		return 1000000 / p; 
}

static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
{
873
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
874
	struct dccp_li_hist_entry *head;
I
Ian McDonald 已提交
875
	u64 seq_temp;
876

I
Ian McDonald 已提交
877 878 879 880
	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
		if (!dccp_li_hist_interval_new(ccid3_li_hist,
		   &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
			return;
881

882 883 884
		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
		   struct dccp_li_hist_entry, dccplih_node);
		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
I
Ian McDonald 已提交
885 886 887 888
	} else {
		struct dccp_li_hist_entry *entry;
		struct list_head *tail;

889 890
		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
		   struct dccp_li_hist_entry, dccplih_node);
I
Ian McDonald 已提交
891 892 893 894 895 896 897
		/* FIXME win count check removed as was wrong */
		/* should make this check with receive history */
		/* and compare there as per section 10.2 of RFC4342 */

		/* new loss event detected */
		/* calculate last interval length */
		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
898
		entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC);
I
Ian McDonald 已提交
899 900

		if (entry == NULL) {
901
			DCCP_BUG("out of memory - can not allocate entry");
902
			return;
I
Ian McDonald 已提交
903 904 905 906 907 908 909 910 911 912 913 914 915
		}

		list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);

		tail = hcrx->ccid3hcrx_li_hist.prev;
		list_del(tail);
		kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);

		/* Create the newest interval */
		entry->dccplih_seqno = seq_loss;
		entry->dccplih_interval = seq_temp;
		entry->dccplih_win_count = win_loss;
	}
916 917
}

I
Ian McDonald 已提交
918 919
static int ccid3_hc_rx_detect_loss(struct sock *sk,
                                    struct dccp_rx_hist_entry *packet)
920
{
921
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
I
Ian McDonald 已提交
922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937
	struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
	u64 seqno = packet->dccphrx_seqno;
	u64 tmp_seqno;
	int loss = 0;
	u8 ccval;


	tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;

	if (!rx_hist ||
	   follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
		hcrx->ccid3hcrx_seqno_nonloss = seqno;
		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
		goto detect_out;
	}

938

I
Ian McDonald 已提交
939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967
	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
	   > TFRC_RECV_NUM_LATE_LOSS) {
		loss = 1;
		ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
		   hcrx->ccid3hcrx_ccval_nonloss);
		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
		dccp_inc_seqno(&tmp_seqno);
		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
		dccp_inc_seqno(&tmp_seqno);
		while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
		   tmp_seqno, &ccval)) {
		   	hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
			hcrx->ccid3hcrx_ccval_nonloss = ccval;
			dccp_inc_seqno(&tmp_seqno);
		}
	}

	/* FIXME - this code could be simplified with above while */
	/* but works at moment */
	if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
		hcrx->ccid3hcrx_seqno_nonloss = seqno;
		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
	}

detect_out:
	dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
		   &hcrx->ccid3hcrx_li_hist, packet,
		   hcrx->ccid3hcrx_seqno_nonloss);
	return loss;
968 969 970 971
}

static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
972
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
973
	const struct dccp_options_received *opt_recv;
974
	struct dccp_rx_hist_entry *packet;
975
	struct timeval now;
G
Gerrit Renker 已提交
976 977
	u32 p_prev, rtt_prev;
	suseconds_t r_sample, t_elapsed;
978
	int loss, payload_size;
979

980
	BUG_ON(hcrx == NULL);
981

982
	opt_recv = &dccp_sk(sk)->dccps_options_received;
983

984 985 986 987 988
	switch (DCCP_SKB_CB(skb)->dccpd_type) {
	case DCCP_PKT_ACK:
		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
			return;
	case DCCP_PKT_DATAACK:
989
		if (opt_recv->dccpor_timestamp_echo == 0)
990
			break;
I
Ian McDonald 已提交
991
		rtt_prev = hcrx->ccid3hcrx_rtt;
992
		dccp_timestamp(sk, &now);
993 994 995 996
		timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
		r_sample = timeval_usecs(&now);
		t_elapsed = opt_recv->dccpor_elapsed_time * 10;

G
Gerrit Renker 已提交
997
		DCCP_BUG_ON(r_sample < 0);
998
		if (unlikely(r_sample <= t_elapsed))
G
Gerrit Renker 已提交
999
			DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n",
1000
				  r_sample, t_elapsed);
1001 1002 1003 1004 1005 1006 1007 1008 1009
		else
			r_sample -= t_elapsed;

		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
			hcrx->ccid3hcrx_rtt = r_sample;
		else
			hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
					      r_sample / 10;

I
Ian McDonald 已提交
1010 1011
		if (rtt_prev != hcrx->ccid3hcrx_rtt)
			ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n",
1012 1013
				       dccp_role(sk), hcrx->ccid3hcrx_rtt,
				       opt_recv->dccpor_elapsed_time);
1014 1015 1016
		break;
	case DCCP_PKT_DATA:
		break;
1017
	default: /* We're not interested in other packet types, move along */
1018 1019 1020
		return;
	}

1021
	packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp,
1022
					skb, GFP_ATOMIC);
1023
	if (unlikely(packet == NULL)) {
1024 1025
		DCCP_WARN("%s, sk=%p, Not enough mem to add rx packet "
			  "to history, consider it lost!\n", dccp_role(sk), sk);
1026 1027 1028
		return;
	}

I
Ian McDonald 已提交
1029
	loss = ccid3_hc_rx_detect_loss(sk, packet);
1030 1031 1032 1033

	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
		return;

1034 1035 1036
	payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
	ccid3_hc_rx_update_s(hcrx, payload_size);

1037 1038
	switch (hcrx->ccid3hcrx_state) {
	case TFRC_RSTATE_NO_DATA:
1039 1040 1041 1042
		ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial "
			       "feedback\n",
			       dccp_role(sk), sk,
			       dccp_state_name(sk->sk_state), skb);
1043 1044 1045 1046
		ccid3_hc_rx_send_feedback(sk);
		ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
		return;
	case TFRC_RSTATE_DATA:
1047
		hcrx->ccid3hcrx_bytes_recv += payload_size;
I
Ian McDonald 已提交
1048
		if (loss)
1049 1050
			break;

1051
		dccp_timestamp(sk, &now);
G
Gerrit Renker 已提交
1052 1053
		if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) -
					(suseconds_t)hcrx->ccid3hcrx_rtt >= 0) {
1054 1055
			hcrx->ccid3hcrx_tstamp_last_ack = now;
			ccid3_hc_rx_send_feedback(sk);
1056
		}
1057
		return;
1058 1059
	case TFRC_RSTATE_TERM:
		DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk);
1060 1061 1062 1063
		return;
	}

	/* Dealing with packet loss */
1064 1065
	ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
		       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
1066 1067 1068 1069

	p_prev = hcrx->ccid3hcrx_p;
	
	/* Calculate loss event rate */
I
Ian McDonald 已提交
1070 1071 1072
	if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
		u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);

1073
		/* Scaling up by 1000000 as fixed decimal */
I
Ian McDonald 已提交
1074 1075
		if (i_mean != 0)
			hcrx->ccid3hcrx_p = 1000000 / i_mean;
1076 1077
	} else
		DCCP_BUG("empty loss history");
1078 1079 1080 1081 1082 1083 1084

	if (hcrx->ccid3hcrx_p > p_prev) {
		ccid3_hc_rx_send_feedback(sk);
		return;
	}
}

1085
static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
1086
{
1087
	struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
1088 1089 1090 1091 1092

	ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);

	hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
	INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
1093
	INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
1094
	dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack);
1095
	hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
1096
	hcrx->ccid3hcrx_s   = 0;
1097
	hcrx->ccid3hcrx_rtt = 0;
1098 1099 1100 1101 1102
	return 0;
}

static void ccid3_hc_rx_exit(struct sock *sk)
{
1103
	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1104

1105
	BUG_ON(hcrx == NULL);
1106 1107 1108 1109

	ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);

	/* Empty packet history */
1110
	dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
1111 1112

	/* Empty loss interval history */
1113
	dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
1114 1115
}

1116 1117
static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
{
1118
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1119

1120 1121 1122 1123
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return;

1124
	BUG_ON(hcrx == NULL);
1125 1126 1127 1128 1129 1130 1131 1132

	info->tcpi_ca_state	= hcrx->ccid3hcrx_state;
	info->tcpi_options	|= TCPI_OPT_TIMESTAMPS;
	info->tcpi_rcv_rtt	= hcrx->ccid3hcrx_rtt;
}

static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
{
1133
	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
1134

1135 1136 1137 1138
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return;

1139
	BUG_ON(hctx == NULL);
1140 1141 1142 1143 1144

	info->tcpi_rto = hctx->ccid3hctx_t_rto;
	info->tcpi_rtt = hctx->ccid3hctx_rtt;
}

1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
				  u32 __user *optval, int __user *optlen)
{
	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
	const void *val;
	
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return -EINVAL;

	switch (optname) {
	case DCCP_SOCKOPT_CCID_RX_INFO:
		if (len < sizeof(hcrx->ccid3hcrx_tfrc))
			return -EINVAL;
		len = sizeof(hcrx->ccid3hcrx_tfrc);
		val = &hcrx->ccid3hcrx_tfrc;
		break;
	default:
		return -ENOPROTOOPT;
	}

	if (put_user(len, optlen) || copy_to_user(optval, val, len))
		return -EFAULT;

	return 0;
}

static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
				  u32 __user *optval, int __user *optlen)
{
	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
	const void *val;
	
	/* Listen socks doesn't have a private CCID block */
	if (sk->sk_state == DCCP_LISTEN)
		return -EINVAL;

	switch (optname) {
	case DCCP_SOCKOPT_CCID_TX_INFO:
		if (len < sizeof(hctx->ccid3hctx_tfrc))
			return -EINVAL;
		len = sizeof(hctx->ccid3hctx_tfrc);
		val = &hctx->ccid3hctx_tfrc;
		break;
	default:
		return -ENOPROTOOPT;
	}

	if (put_user(len, optlen) || copy_to_user(optval, val, len))
		return -EFAULT;

	return 0;
}

1199
static struct ccid_operations ccid3 = {
I
Ian McDonald 已提交
1200
	.ccid_id		   = DCCPC_CCID3,
1201 1202
	.ccid_name		   = "ccid3",
	.ccid_owner		   = THIS_MODULE,
1203
	.ccid_hc_tx_obj_size	   = sizeof(struct ccid3_hc_tx_sock),
1204 1205 1206 1207 1208 1209
	.ccid_hc_tx_init	   = ccid3_hc_tx_init,
	.ccid_hc_tx_exit	   = ccid3_hc_tx_exit,
	.ccid_hc_tx_send_packet	   = ccid3_hc_tx_send_packet,
	.ccid_hc_tx_packet_sent	   = ccid3_hc_tx_packet_sent,
	.ccid_hc_tx_packet_recv	   = ccid3_hc_tx_packet_recv,
	.ccid_hc_tx_parse_options  = ccid3_hc_tx_parse_options,
1210
	.ccid_hc_rx_obj_size	   = sizeof(struct ccid3_hc_rx_sock),
1211 1212 1213 1214
	.ccid_hc_rx_init	   = ccid3_hc_rx_init,
	.ccid_hc_rx_exit	   = ccid3_hc_rx_exit,
	.ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
	.ccid_hc_rx_packet_recv	   = ccid3_hc_rx_packet_recv,
1215 1216
	.ccid_hc_rx_get_info	   = ccid3_hc_rx_get_info,
	.ccid_hc_tx_get_info	   = ccid3_hc_tx_get_info,
1217 1218
	.ccid_hc_rx_getsockopt	   = ccid3_hc_rx_getsockopt,
	.ccid_hc_tx_getsockopt	   = ccid3_hc_tx_getsockopt,
1219 1220
};
 
1221
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
1222 1223
module_param(ccid3_debug, int, 0444);
MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
1224
#endif
1225 1226 1227

static __init int ccid3_module_init(void)
{
1228
	int rc = -ENOBUFS;
1229

1230 1231
	ccid3_rx_hist = dccp_rx_hist_new("ccid3");
	if (ccid3_rx_hist == NULL)
1232 1233
		goto out;

1234 1235 1236
	ccid3_tx_hist = dccp_tx_hist_new("ccid3");
	if (ccid3_tx_hist == NULL)
		goto out_free_rx;
1237

1238 1239
	ccid3_li_hist = dccp_li_hist_new("ccid3");
	if (ccid3_li_hist == NULL)
1240
		goto out_free_tx;
1241 1242 1243 1244 1245 1246

	rc = ccid_register(&ccid3);
	if (rc != 0) 
		goto out_free_loss_interval_history;
out:
	return rc;
1247

1248
out_free_loss_interval_history:
1249 1250
	dccp_li_hist_delete(ccid3_li_hist);
	ccid3_li_hist = NULL;
1251 1252 1253 1254 1255 1256
out_free_tx:
	dccp_tx_hist_delete(ccid3_tx_hist);
	ccid3_tx_hist = NULL;
out_free_rx:
	dccp_rx_hist_delete(ccid3_rx_hist);
	ccid3_rx_hist = NULL;
1257 1258 1259 1260 1261 1262 1263 1264
	goto out;
}
module_init(ccid3_module_init);

static __exit void ccid3_module_exit(void)
{
	ccid_unregister(&ccid3);

1265 1266 1267
	if (ccid3_tx_hist != NULL) {
		dccp_tx_hist_delete(ccid3_tx_hist);
		ccid3_tx_hist = NULL;
1268
	}
1269 1270 1271
	if (ccid3_rx_hist != NULL) {
		dccp_rx_hist_delete(ccid3_rx_hist);
		ccid3_rx_hist = NULL;
1272
	}
1273 1274 1275
	if (ccid3_li_hist != NULL) {
		dccp_li_hist_delete(ccid3_li_hist);
		ccid3_li_hist = NULL;
1276 1277 1278 1279
	}
}
module_exit(ccid3_module_exit);

1280
MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
1281
	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
1282 1283 1284
MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
MODULE_LICENSE("GPL");
MODULE_ALIAS("net-dccp-ccid-3");