packet_history.c 14.3 KB
Newer Older
1
/*
I
Ian McDonald 已提交
2
 *  net/dccp/packet_history.c
3
 *
4 5
 *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
 *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
6 7 8 9 10
 *
 *  An implementation of the DCCP protocol
 *
 *  This code has been developed by the University of Waikato WAND
 *  research group. For further information please see http://www.wand.net.nz/
11
 *  or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
 *
 *  This code also uses code from Lulea University, rereleased as GPL by its
 *  authors:
 *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
 *
 *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
 *  and to make it work as a loadable module in the DCCP stack written by
 *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
 *
 *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <linux/string.h>
39
#include <linux/slab.h>
40
#include "packet_history.h"
41
#include "../../dccp.h"
42

43
/*
44
 * Transmitter History Routines
45
 */
46
static struct kmem_cache *tfrc_tx_hist_slab;
47

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
int __init tfrc_tx_packet_history_init(void)
{
	tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist",
					      sizeof(struct tfrc_tx_hist_entry),
					      0, SLAB_HWCACHE_ALIGN, NULL);
	return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0;
}

void tfrc_tx_packet_history_exit(void)
{
	if (tfrc_tx_hist_slab != NULL) {
		kmem_cache_destroy(tfrc_tx_hist_slab);
		tfrc_tx_hist_slab = NULL;
	}
}

64
int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
65
{
66
	struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
67 68 69 70 71 72 73 74

	if (entry == NULL)
		return -ENOBUFS;
	entry->seqno = seqno;
	entry->stamp = ktime_get_real();
	entry->next  = *headp;
	*headp	     = entry;
	return 0;
75
}
76
EXPORT_SYMBOL_GPL(tfrc_tx_hist_add);
77

78
void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
79
{
80
	struct tfrc_tx_hist_entry *head = *headp;
81

82 83
	while (head != NULL) {
		struct tfrc_tx_hist_entry *next = head->next;
84

85
		kmem_cache_free(tfrc_tx_hist_slab, head);
86
		head = next;
87 88
	}

89 90 91
	*headp = NULL;
}
EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
92

G
Gerrit Renker 已提交
93 94 95 96 97
/*
 * 	Receiver History Routines
 */
static struct kmem_cache *tfrc_rx_hist_slab;

98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
int __init tfrc_rx_packet_history_init(void)
{
	tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache",
					      sizeof(struct tfrc_rx_hist_entry),
					      0, SLAB_HWCACHE_ALIGN, NULL);
	return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0;
}

void tfrc_rx_packet_history_exit(void)
{
	if (tfrc_rx_hist_slab != NULL) {
		kmem_cache_destroy(tfrc_rx_hist_slab);
		tfrc_rx_hist_slab = NULL;
	}
}

114 115
static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry,
					       const struct sk_buff *skb,
116
					       const u64 ndp)
117
{
118 119 120 121 122 123 124
	const struct dccp_hdr *dh = dccp_hdr(skb);

	entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
	entry->tfrchrx_ccval = dh->dccph_ccval;
	entry->tfrchrx_type  = dh->dccph_type;
	entry->tfrchrx_ndp   = ndp;
	entry->tfrchrx_tstamp = ktime_get_real();
125
}
126 127 128

void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
			     const struct sk_buff *skb,
129
			     const u64 ndp)
130 131 132 133 134
{
	struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);

	tfrc_rx_hist_entry_from_skb(entry, skb, ndp);
}
135
EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
136

137 138 139 140 141 142 143 144
/* has the packet contained in skb been seen before? */
int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
{
	const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq;
	int i;

	if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0)
		return 1;
145

146 147 148
	for (i = 1; i <= h->loss_count; i++)
		if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq)
			return 1;
149

150
	return 0;
151
}
152
EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
153

154 155 156 157 158 159 160 161 162

static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
{
	struct tfrc_rx_hist_entry *tmp = h->ring[a];

	h->ring[a] = h->ring[b];
	h->ring[b] = tmp;
}

163 164
static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
{
165 166 167
	__tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a),
			       tfrc_rx_hist_index(h, b));
}
168

169 170 171 172 173 174 175 176 177 178
/**
 * tfrc_rx_hist_resume_rtt_sampling  -  Prepare RX history for RTT sampling
 * This is called after loss detection has finished, when the history entry
 * with the index of `loss_count' holds the highest-received sequence number.
 * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt).
 */
static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h)
{
	__tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count));
	h->loss_count = h->loss_start = 0;
179 180 181 182 183 184 185
}

/*
 * Private helper functions for loss detection.
 *
 * In the descriptions, `Si' refers to the sequence number of entry number i,
 * whose NDP count is `Ni' (lower case is used for variables).
186 187 188
 * Note: All __xxx_loss functions expect that a test against duplicates has been
 *       performed already: the seqno of the skb must not be less than the seqno
 *       of loss_prev; and it must not equal that of any valid history entry.
189
 */
190 191 192 193 194 195 196 197 198 199 200
static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
{
	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
	    s1 = DCCP_SKB_CB(skb)->dccpd_seq;

	if (!dccp_loss_free(s0, s1, n1)) {	/* gap between S0 and S1 */
		h->loss_count = 1;
		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
	}
}

201 202 203 204 205 206
static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
{
	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
	    s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
	    s2 = DCCP_SKB_CB(skb)->dccpd_seq;

207
	if (likely(dccp_delta_seqno(s1, s2) > 0)) {	/* S1  <  S2 */
208 209 210 211 212 213 214
		h->loss_count = 2;
		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2);
		return;
	}

	/* S0  <  S2  <  S1 */

215 216
	if (dccp_loss_free(s0, s2, n2)) {
		u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp;
217

218
		if (dccp_loss_free(s2, s1, n1)) {
219
			/* hole is filled: S0, S2, and S1 are consecutive */
220
			tfrc_rx_hist_resume_rtt_sampling(h);
221 222 223 224
		} else
			/* gap between S2 and S1: just update loss_prev */
			tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);

225
	} else {	/* gap between S0 and S2 */
226
		/*
227
		 * Reorder history to insert S2 between S0 and S1
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
		 */
		tfrc_rx_hist_swap(h, 0, 3);
		h->loss_start = tfrc_rx_hist_index(h, 3);
		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2);
		h->loss_count = 2;
	}
}

/* return 1 if a new loss event has been identified */
static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
{
	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
	    s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
	    s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
	    s3 = DCCP_SKB_CB(skb)->dccpd_seq;

244
	if (likely(dccp_delta_seqno(s2, s3) > 0)) {	/* S2  <  S3 */
245 246 247 248 249 250 251
		h->loss_count = 3;
		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3);
		return 1;
	}

	/* S3  <  S2 */

252
	if (dccp_delta_seqno(s1, s3) > 0) {		/* S1  <  S3  <  S2 */
253
		/*
254
		 * Reorder history to insert S3 between S1 and S2
255 256 257 258 259 260 261 262 263
		 */
		tfrc_rx_hist_swap(h, 2, 3);
		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3);
		h->loss_count = 3;
		return 1;
	}

	/* S0  <  S3  <  S1 */

264 265
	if (dccp_loss_free(s0, s3, n3)) {
		u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp;
266

267
		if (dccp_loss_free(s3, s1, n1)) {
268
			/* hole between S0 and S1 filled by S3 */
269
			u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp;
270

271
			if (dccp_loss_free(s1, s2, n2)) {
272
				/* entire hole filled by S0, S3, S1, S2 */
273
				tfrc_rx_hist_resume_rtt_sampling(h);
274 275 276 277 278 279 280 281 282 283 284 285 286
			} else {
				/* gap remains between S1 and S2 */
				h->loss_start = tfrc_rx_hist_index(h, 1);
				h->loss_count = 1;
			}

		} else /* gap exists between S3 and S1, loss_count stays at 2 */
			tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3);

		return 0;
	}

	/*
287 288
	 * The remaining case:  S0  <  S3  <  S1  <  S2;  gap between S0 and S3
	 * Reorder history to insert S3 between S0 and S1.
289 290 291 292 293 294 295 296 297 298 299 300 301
	 */
	tfrc_rx_hist_swap(h, 0, 3);
	h->loss_start = tfrc_rx_hist_index(h, 3);
	tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3);
	h->loss_count = 3;

	return 1;
}

/* recycle RX history records to continue loss detection if necessary */
static void __three_after_loss(struct tfrc_rx_hist *h)
{
	/*
302 303 304 305
	 * At this stage we know already that there is a gap between S0 and S1
	 * (since S0 was the highest sequence number received before detecting
	 * the loss). To recycle the loss record, it is	thus only necessary to
	 * check for other possible gaps between S1/S2 and between S2/S3.
306
	 */
307 308 309 310
	u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
	    s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
	    s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno;
	u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp,
311 312
	    n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp;

313
	if (dccp_loss_free(s1, s2, n2)) {
314

315 316
		if (dccp_loss_free(s2, s3, n3)) {
			/* no gap between S2 and S3: entire hole is filled */
317
			tfrc_rx_hist_resume_rtt_sampling(h);
318 319 320 321 322 323
		} else {
			/* gap between S2 and S3 */
			h->loss_start = tfrc_rx_hist_index(h, 2);
			h->loss_count = 1;
		}

324
	} else {	/* gap between S1 and S2 */
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
		h->loss_start = tfrc_rx_hist_index(h, 1);
		h->loss_count = 2;
	}
}

/**
 *  tfrc_rx_handle_loss  -  Loss detection and further processing
 *  @h:		    The non-empty RX history object
 *  @lh:	    Loss Intervals database to update
 *  @skb:	    Currently received packet
 *  @ndp:	    The NDP count belonging to @skb
 *  @calc_first_li: Caller-dependent computation of first loss interval in @lh
 *  @sk:	    Used by @calc_first_li (see tfrc_lh_interval_add)
 *  Chooses action according to pending loss, updates LI database when a new
 *  loss was detected, and does required post-processing. Returns 1 when caller
 *  should send feedback, 0 otherwise.
341 342 343
 *  Since it also takes care of reordering during loss detection and updates the
 *  records accordingly, the caller should not perform any more RX history
 *  operations when loss_count is greater than 0 after calling this function.
344 345 346
 */
int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
			struct tfrc_loss_hist *lh,
347
			struct sk_buff *skb, const u64 ndp,
348 349 350 351
			u32 (*calc_first_li)(struct sock *), struct sock *sk)
{
	int is_new_loss = 0;

352 353 354
	if (tfrc_rx_hist_duplicate(h, skb))
		return 0;

355 356
	if (h->loss_count == 0) {
		__do_track_loss(h, skb, ndp);
357
		tfrc_rx_hist_sample_rtt(h, skb);
358
	} else if (h->loss_count == 1) {
359 360 361 362 363 364 365 366 367 368
		__one_after_loss(h, skb, ndp);
	} else if (h->loss_count != 2) {
		DCCP_BUG("invalid loss_count %d", h->loss_count);
	} else if (__two_after_loss(h, skb, ndp)) {
		/*
		 * Update Loss Interval database and recycle RX records
		 */
		is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
		__three_after_loss(h);
	}
369

370 371 372 373 374 375 376 377 378 379
	/*
	 * Update moving-average of `s' and the sum of received payload bytes.
	 */
	if (dccp_data_packet(skb)) {
		const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;

		h->packet_size = tfrc_ewma(h->packet_size, payload, 9);
		h->bytes_recvd += payload;
	}

380 381 382 383
	/* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
	if (!is_new_loss)
		tfrc_lh_update_i_mean(lh, skb);

384 385 386 387
	return is_new_loss;
}
EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);

388
void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
389
{
390 391 392 393 394 395
	int i;

	for (i = 0; i <= TFRC_NDUPACK; ++i)
		if (h->ring[i] != NULL) {
			kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
			h->ring[i] = NULL;
396 397
		}
}
398
EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
399

400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
{
	int i;

	memset(h, 0, sizeof(*h));

	for (i = 0; i <= TFRC_NDUPACK; i++) {
		h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
		if (h->ring[i] == NULL) {
			tfrc_rx_hist_purge(h);
			return -ENOBUFS;
		}
	}
	return 0;
}

int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk)
{
	if (tfrc_rx_hist_alloc(h))
		return -ENOBUFS;
	/*
	 * Initialise first entry with GSR to start loss detection as early as
	 * possible. Code using this must not use any other fields. The entry
	 * will be overwritten once the CCID updates its received packets.
	 */
	tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr;
	return 0;
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);

430 431
/**
 * tfrc_rx_hist_sample_rtt  -  Sample RTT from timestamp / CCVal
432 433 434 435
 * Based on ideas presented in RFC 4342, 8.1. This function expects that no loss
 * is pending and uses the following history entries (via rtt_sample_prev):
 * - h->ring[0]  contains the most recent history entry prior to @skb;
 * - h->ring[1]  is an unused `dummy' entry when the current difference is 0;
436
 */
437
void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
438
{
439 440
	struct tfrc_rx_hist_entry *last = h->ring[0];
	u32 sample, delta_v;
441 442 443 444 445 446 447 448 449 450 451 452 453

	/*
	 * When not to sample:
	 * - on non-data packets
	 *   (RFC 4342, 8.1: CCVal only fully defined for data packets);
	 * - when no data packets have been received yet
	 *   (FIXME: using sampled packet size as indicator here);
	 * - as long as there are gaps in the sequence space (pending loss).
	 */
	if (!dccp_data_packet(skb) || h->packet_size == 0 ||
	    tfrc_rx_hist_loss_pending(h))
		return;

454
	h->rtt_sample_prev = 0;		/* reset previous candidate */
455

456 457 458 459
	delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval);
	if (delta_v == 0) {		/* less than RTT/4 difference */
		h->rtt_sample_prev = 1;
		return;
460
	}
461
	sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp)));
462

463 464 465 466 467 468 469 470 471 472
	if (delta_v <= 4)		/* between RTT/4 and RTT */
		sample *= 4 / delta_v;
	else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2))
		/*
		* Optimisation: CCVal difference is greater than 1 RTT, yet the
		* sample is less than the local RTT estimate; which means that
		* the RTT estimate is too high.
		* To avoid noise, it is not done if the sample is below RTT/2.
		*/
		return;
473

474 475
	/* Use a lower weight than usual to increase responsiveness */
	h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5);
476 477
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);