input.c 31.2 KB
Newer Older
1 2
/* RxRPC packet reception
 *
3
 * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
4 5 6 7 8 9 10 11
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14 15 16 17 18 19 20 21
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/errqueue.h>
#include <linux/udp.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/icmp.h>
22
#include <linux/gfp.h>
23 24 25
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
26
#include <net/udp.h>
27
#include <net/net_namespace.h>
28 29
#include "ar-internal.h"

30 31 32 33 34 35 36 37 38
static void rxrpc_proto_abort(const char *why,
			      struct rxrpc_call *call, rxrpc_seq_t seq)
{
	if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) {
		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
		rxrpc_queue_call(call);
	}
}

D
David Howells 已提交
39 40 41 42 43
/*
 * Do TCP-style congestion management [RFC 5681].
 */
static void rxrpc_congestion_management(struct rxrpc_call *call,
					struct sk_buff *skb,
44 45
					struct rxrpc_ack_summary *summary,
					rxrpc_serial_t acked_serial)
D
David Howells 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59
{
	enum rxrpc_congest_change change = rxrpc_cong_no_change;
	unsigned int cumulative_acks = call->cong_cumul_acks;
	unsigned int cwnd = call->cong_cwnd;
	bool resend = false;

	summary->flight_size =
		(call->tx_top - call->tx_hard_ack) - summary->nr_acks;

	if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) {
		summary->retrans_timeo = true;
		call->cong_ssthresh = max_t(unsigned int,
					    summary->flight_size / 2, 2);
		cwnd = 1;
60
		if (cwnd >= call->cong_ssthresh &&
D
David Howells 已提交
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
		    call->cong_mode == RXRPC_CALL_SLOW_START) {
			call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
			call->cong_tstamp = skb->tstamp;
			cumulative_acks = 0;
		}
	}

	cumulative_acks += summary->nr_new_acks;
	cumulative_acks += summary->nr_rot_new_acks;
	if (cumulative_acks > 255)
		cumulative_acks = 255;

	summary->mode = call->cong_mode;
	summary->cwnd = call->cong_cwnd;
	summary->ssthresh = call->cong_ssthresh;
	summary->cumulative_acks = cumulative_acks;
	summary->dup_acks = call->cong_dup_acks;

	switch (call->cong_mode) {
	case RXRPC_CALL_SLOW_START:
		if (summary->nr_nacks > 0)
			goto packet_loss_detected;
		if (summary->cumulative_acks > 0)
			cwnd += 1;
85
		if (cwnd >= call->cong_ssthresh) {
D
David Howells 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
			call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
			call->cong_tstamp = skb->tstamp;
		}
		goto out;

	case RXRPC_CALL_CONGEST_AVOIDANCE:
		if (summary->nr_nacks > 0)
			goto packet_loss_detected;

		/* We analyse the number of packets that get ACK'd per RTT
		 * period and increase the window if we managed to fill it.
		 */
		if (call->peer->rtt_usage == 0)
			goto out;
		if (ktime_before(skb->tstamp,
				 ktime_add_ns(call->cong_tstamp,
					      call->peer->rtt)))
			goto out_no_clear_ca;
		change = rxrpc_cong_rtt_window_end;
		call->cong_tstamp = skb->tstamp;
		if (cumulative_acks >= cwnd)
			cwnd++;
		goto out;

	case RXRPC_CALL_PACKET_LOSS:
		if (summary->nr_nacks == 0)
			goto resume_normality;

		if (summary->new_low_nack) {
			change = rxrpc_cong_new_low_nack;
			call->cong_dup_acks = 1;
			if (call->cong_extra > 1)
				call->cong_extra = 1;
			goto send_extra_data;
		}

		call->cong_dup_acks++;
		if (call->cong_dup_acks < 3)
			goto send_extra_data;

		change = rxrpc_cong_begin_retransmission;
		call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT;
		call->cong_ssthresh = max_t(unsigned int,
					    summary->flight_size / 2, 2);
		cwnd = call->cong_ssthresh + 3;
		call->cong_extra = 0;
		call->cong_dup_acks = 0;
		resend = true;
		goto out;

	case RXRPC_CALL_FAST_RETRANSMIT:
		if (!summary->new_low_nack) {
			if (summary->nr_new_acks == 0)
				cwnd += 1;
			call->cong_dup_acks++;
			if (call->cong_dup_acks == 2) {
				change = rxrpc_cong_retransmit_again;
				call->cong_dup_acks = 0;
				resend = true;
			}
		} else {
			change = rxrpc_cong_progress;
			cwnd = call->cong_ssthresh;
			if (summary->nr_nacks == 0)
				goto resume_normality;
		}
		goto out;

	default:
		BUG();
		goto out;
	}

resume_normality:
	change = rxrpc_cong_cleared_nacks;
	call->cong_dup_acks = 0;
	call->cong_extra = 0;
	call->cong_tstamp = skb->tstamp;
164
	if (cwnd < call->cong_ssthresh)
D
David Howells 已提交
165 166 167 168 169 170 171 172 173 174
		call->cong_mode = RXRPC_CALL_SLOW_START;
	else
		call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
out:
	cumulative_acks = 0;
out_no_clear_ca:
	if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1)
		cwnd = RXRPC_RXTX_BUFF_SIZE - 1;
	call->cong_cwnd = cwnd;
	call->cong_cumul_acks = cumulative_acks;
175
	trace_rxrpc_congest(call, summary, acked_serial, change);
D
David Howells 已提交
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
	if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
		rxrpc_queue_call(call);
	return;

packet_loss_detected:
	change = rxrpc_cong_saw_nack;
	call->cong_mode = RXRPC_CALL_PACKET_LOSS;
	call->cong_dup_acks = 0;
	goto send_extra_data;

send_extra_data:
	/* Send some previously unsent DATA if we have some to advance the ACK
	 * state.
	 */
	if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
	    RXRPC_TX_ANNO_LAST ||
	    summary->nr_acks != call->tx_top - call->tx_hard_ack) {
		call->cong_extra++;
		wake_up(&call->waitq);
	}
	goto out_no_clear_ca;
}

199 200 201 202 203 204 205 206
/*
 * Ping the other end to fill our RTT cache and to retrieve the rwind
 * and MTU parameters.
 */
static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb,
			    int skew)
{
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
207
	ktime_t now = skb->tstamp;
208

209 210 211
	if (call->peer->rtt_usage < 3 ||
	    ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
		rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
212 213
				  true, true,
				  rxrpc_propose_ack_ping_for_params);
214 215
}

216
/*
217
 * Apply a hard ACK by advancing the Tx window.
218
 */
219 220
static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
				   struct rxrpc_ack_summary *summary)
221
{
222 223
	struct sk_buff *skb, *list = NULL;
	int ix;
224
	u8 annotation;
225

226 227 228 229 230 231 232
	if (call->acks_lowest_nak == call->tx_hard_ack) {
		call->acks_lowest_nak = to;
	} else if (before_eq(call->acks_lowest_nak, to)) {
		summary->new_low_nack = true;
		call->acks_lowest_nak = to;
	}

233
	spin_lock(&call->lock);
234

235 236 237 238
	while (before(call->tx_hard_ack, to)) {
		call->tx_hard_ack++;
		ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK;
		skb = call->rxtx_buffer[ix];
239
		annotation = call->rxtx_annotations[ix];
D
David Howells 已提交
240
		rxrpc_see_skb(skb, rxrpc_skb_tx_rotated);
241 242 243 244
		call->rxtx_buffer[ix] = NULL;
		call->rxtx_annotations[ix] = 0;
		skb->next = list;
		list = skb;
245 246 247

		if (annotation & RXRPC_TX_ANNO_LAST)
			set_bit(RXRPC_CALL_TX_LAST, &call->flags);
248 249
		if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK)
			summary->nr_rot_new_acks++;
250
	}
251

252
	spin_unlock(&call->lock);
253

254 255 256
	trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ?
				    rxrpc_transmit_rotate_last :
				    rxrpc_transmit_rotate));
257 258
	wake_up(&call->waitq);

259 260 261 262
	while (list) {
		skb = list;
		list = skb->next;
		skb->next = NULL;
D
David Howells 已提交
263
		rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
264
	}
265
}
266

267 268 269 270 271 272
/*
 * End the transmission phase of a call.
 *
 * This occurs when we get an ACKALL packet, the first DATA packet of a reply,
 * or a final ACK packet.
 */
273 274
static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
			       const char *abort_why)
275
{
276

277
	ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
278

279
	write_lock(&call->state_lock);
280

281
	switch (call->state) {
282
	case RXRPC_CALL_CLIENT_SEND_REQUEST:
283
	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
284 285 286 287
		if (reply_begun)
			call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
		else
			call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
288
		break;
289

290 291 292 293
	case RXRPC_CALL_SERVER_AWAIT_ACK:
		__rxrpc_call_completed(call);
		rxrpc_notify_socket(call);
		break;
294 295 296

	default:
		goto bad_state;
297 298
	}

299
	write_unlock(&call->state_lock);
300
	if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
301 302
		rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
				  rxrpc_propose_ack_client_tx_end);
303 304 305 306
		trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
	} else {
		trace_rxrpc_transmit(call, rxrpc_transmit_end);
	}
307 308
	_leave(" = ok");
	return true;
309 310 311 312 313 314 315 316 317 318 319 320 321

bad_state:
	write_unlock(&call->state_lock);
	kdebug("end_tx %s", rxrpc_call_states[call->state]);
	rxrpc_proto_abort(abort_why, call, call->tx_top);
	return false;
}

/*
 * Begin the reply reception phase of a call.
 */
static bool rxrpc_receiving_reply(struct rxrpc_call *call)
{
322
	struct rxrpc_ack_summary summary = { 0 };
323 324
	rxrpc_seq_t top = READ_ONCE(call->tx_top);

325 326 327 328 329 330
	if (call->ackr_reason) {
		spin_lock_bh(&call->lock);
		call->ackr_reason = 0;
		call->resend_at = call->expire_at;
		call->ack_at = call->expire_at;
		spin_unlock_bh(&call->lock);
331 332
		rxrpc_set_timer(call, rxrpc_timer_init_for_reply,
				ktime_get_real());
333 334
	}

335
	if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
336
		rxrpc_rotate_tx_window(call, top, &summary);
337 338 339 340 341 342 343 344
	if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
		rxrpc_proto_abort("TXL", call, top);
		return false;
	}
	if (!rxrpc_end_tx_phase(call, true, "ETD"))
		return false;
	call->tx_phase = false;
	return true;
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
}

/*
 * Scan a jumbo packet to validate its structure and to work out how many
 * subpackets it contains.
 *
 * A jumbo packet is a collection of consecutive packets glued together with
 * little headers between that indicate how to change the initial header for
 * each subpacket.
 *
 * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but
 * the last are RXRPC_JUMBO_DATALEN in size.  The last subpacket may be of any
 * size.
 */
static bool rxrpc_validate_jumbo(struct sk_buff *skb)
{
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
362
	unsigned int offset = sizeof(struct rxrpc_wire_header);
363
	unsigned int len = skb->len;
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
	int nr_jumbo = 1;
	u8 flags = sp->hdr.flags;

	do {
		nr_jumbo++;
		if (len - offset < RXRPC_JUMBO_SUBPKTLEN)
			goto protocol_error;
		if (flags & RXRPC_LAST_PACKET)
			goto protocol_error;
		offset += RXRPC_JUMBO_DATALEN;
		if (skb_copy_bits(skb, offset, &flags, 1) < 0)
			goto protocol_error;
		offset += sizeof(struct rxrpc_jumbo_header);
	} while (flags & RXRPC_JUMBO_PACKET);

	sp->nr_jumbo = nr_jumbo;
	return true;
381

382 383
protocol_error:
	return false;
384 385 386
}

/*
387 388 389 390 391 392 393 394 395 396 397
 * Handle reception of a duplicate packet.
 *
 * We have to take care to avoid an attack here whereby we're given a series of
 * jumbograms, each with a sequence number one before the preceding one and
 * filled up to maximum UDP size.  If they never send us the first packet in
 * the sequence, they can cause us to have to hold on to around 2MiB of kernel
 * space until the call times out.
 *
 * We limit the space usage by only accepting three duplicate jumbo packets per
 * call.  After that, we tell the other side we're no longer accepting jumbos
 * (that information is encoded in the ACK packet).
398
 */
399
static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq,
400
				 u8 annotation, bool *_jumbo_bad)
401
{
402 403 404
	/* Discard normal packets that are duplicates. */
	if (annotation == 0)
		return;
405

406 407 408 409
	/* Skip jumbo subpackets that are duplicates.  When we've had three or
	 * more partially duplicate jumbo packets, we refuse to take any more
	 * jumbos for this call.
	 */
410 411 412
	if (!*_jumbo_bad) {
		call->nr_jumbo_bad++;
		*_jumbo_bad = true;
413 414
	}
}
415

416 417 418 419 420 421 422
/*
 * Process a DATA packet, adding the packet to the Rx ring.
 */
static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
			     u16 skew)
{
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
423
	unsigned int offset = sizeof(struct rxrpc_wire_header);
424 425 426
	unsigned int ix;
	rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
	rxrpc_seq_t seq = sp->hdr.seq, hard_ack;
427
	bool immediate_ack = false, jumbo_bad = false, queued;
428 429
	u16 len;
	u8 ack = 0, flags, annotation = 0;
430

431
	_enter("{%u,%u},{%u,%u}",
432
	       call->rx_hard_ack, call->rx_top, skb->len, seq);
433

434 435
	_proto("Rx DATA %%%u { #%u f=%02x }",
	       sp->hdr.serial, seq, sp->hdr.flags);
436

437 438
	if (call->state >= RXRPC_CALL_COMPLETE)
		return;
439

440 441 442
	/* Received data implicitly ACKs all of the request packets we sent
	 * when we're acting as a client.
	 */
443 444 445
	if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
	     call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
	    !rxrpc_receiving_reply(call))
446
		return;
447

448
	call->ackr_prev_seq = seq;
449

450 451
	hard_ack = READ_ONCE(call->rx_hard_ack);
	if (after(seq, hard_ack + call->rx_winsize)) {
452
		ack = RXRPC_ACK_EXCEEDS_WINDOW;
453 454
		ack_serial = serial;
		goto ack;
455 456
	}

457 458
	flags = sp->hdr.flags;
	if (flags & RXRPC_JUMBO_PACKET) {
459
		if (call->nr_jumbo_bad > 3) {
460 461 462
			ack = RXRPC_ACK_NOSPACE;
			ack_serial = serial;
			goto ack;
463
		}
464
		annotation = 1;
465 466
	}

467 468 469
next_subpacket:
	queued = false;
	ix = seq & RXRPC_RXTX_BUFF_MASK;
470
	len = skb->len;
471 472 473 474
	if (flags & RXRPC_JUMBO_PACKET)
		len = RXRPC_JUMBO_DATALEN;

	if (flags & RXRPC_LAST_PACKET) {
475
		if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
476 477 478 479 480 481
		    seq != call->rx_top)
			return rxrpc_proto_abort("LSN", call, seq);
	} else {
		if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
		    after_eq(seq, call->rx_top))
			return rxrpc_proto_abort("LSA", call, seq);
482 483
	}

484 485 486 487 488 489 490 491 492 493 494 495
	if (before_eq(seq, hard_ack)) {
		ack = RXRPC_ACK_DUPLICATE;
		ack_serial = serial;
		goto skip;
	}

	if (flags & RXRPC_REQUEST_ACK && !ack) {
		ack = RXRPC_ACK_REQUESTED;
		ack_serial = serial;
	}

	if (call->rxtx_buffer[ix]) {
496
		rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad);
497 498 499
		if (ack != RXRPC_ACK_DUPLICATE) {
			ack = RXRPC_ACK_DUPLICATE;
			ack_serial = serial;
500
		}
501 502
		immediate_ack = true;
		goto skip;
503 504
	}

505 506 507 508 509 510 511 512
	/* Queue the packet.  We use a couple of memory barriers here as need
	 * to make sure that rx_top is perceived to be set after the buffer
	 * pointer and that the buffer pointer is set after the annotation and
	 * the skb data.
	 *
	 * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window()
	 * and also rxrpc_fill_out_ack().
	 */
D
David Howells 已提交
513
	rxrpc_get_skb(skb, rxrpc_skb_rx_got);
514 515 516
	call->rxtx_annotations[ix] = annotation;
	smp_wmb();
	call->rxtx_buffer[ix] = skb;
517
	if (after(seq, call->rx_top)) {
518
		smp_store_release(&call->rx_top, seq);
519 520 521 522 523 524 525 526
	} else if (before(seq, call->rx_top)) {
		/* Send an immediate ACK if we fill in a hole */
		if (!ack) {
			ack = RXRPC_ACK_DELAY;
			ack_serial = serial;
		}
		immediate_ack = true;
	}
527
	if (flags & RXRPC_LAST_PACKET) {
528
		set_bit(RXRPC_CALL_RX_LAST, &call->flags);
529 530 531 532
		trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq);
	} else {
		trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq);
	}
533 534 535 536 537 538 539 540 541
	queued = true;

	if (after_eq(seq, call->rx_expect_next)) {
		if (after(seq, call->rx_expect_next)) {
			_net("OOS %u > %u", seq, call->rx_expect_next);
			ack = RXRPC_ACK_OUT_OF_SEQUENCE;
			ack_serial = serial;
		}
		call->rx_expect_next = seq + 1;
542 543
	}

544 545 546 547 548 549 550 551 552 553 554
skip:
	offset += len;
	if (flags & RXRPC_JUMBO_PACKET) {
		if (skb_copy_bits(skb, offset, &flags, 1) < 0)
			return rxrpc_proto_abort("XJF", call, seq);
		offset += sizeof(struct rxrpc_jumbo_header);
		seq++;
		serial++;
		annotation++;
		if (flags & RXRPC_JUMBO_PACKET)
			annotation |= RXRPC_RX_ANNO_JLAST;
555 556 557 558 559 560 561 562 563
		if (after(seq, hard_ack + call->rx_winsize)) {
			ack = RXRPC_ACK_EXCEEDS_WINDOW;
			ack_serial = serial;
			if (!jumbo_bad) {
				call->nr_jumbo_bad++;
				jumbo_bad = true;
			}
			goto ack;
		}
564 565 566 567

		_proto("Rx DATA Jumbo %%%u", serial);
		goto next_subpacket;
	}
568

569 570 571 572
	if (queued && flags & RXRPC_LAST_PACKET && !ack) {
		ack = RXRPC_ACK_DELAY;
		ack_serial = serial;
	}
573

574 575 576
ack:
	if (ack)
		rxrpc_propose_ACK(call, ack, skew, ack_serial,
577 578
				  immediate_ack, true,
				  rxrpc_propose_ack_input_data);
579

580 581 582
	if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1)
		rxrpc_notify_socket(call);
	_leave(" [queued]");
583 584
}

585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
/*
 * Process a requested ACK.
 */
static void rxrpc_input_requested_ack(struct rxrpc_call *call,
				      ktime_t resp_time,
				      rxrpc_serial_t orig_serial,
				      rxrpc_serial_t ack_serial)
{
	struct rxrpc_skb_priv *sp;
	struct sk_buff *skb;
	ktime_t sent_at;
	int ix;

	for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) {
		skb = call->rxtx_buffer[ix];
		if (!skb)
			continue;

		sp = rxrpc_skb(skb);
		if (sp->hdr.serial != orig_serial)
			continue;
		smp_rmb();
		sent_at = skb->tstamp;
		goto found;
	}
	return;

found:
	rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack,
			   orig_serial, ack_serial, sent_at, resp_time);
}

617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
/*
 * Process a ping response.
 */
static void rxrpc_input_ping_response(struct rxrpc_call *call,
				      ktime_t resp_time,
				      rxrpc_serial_t orig_serial,
				      rxrpc_serial_t ack_serial)
{
	rxrpc_serial_t ping_serial;
	ktime_t ping_time;

	ping_time = call->ackr_ping_time;
	smp_rmb();
	ping_serial = call->ackr_ping;

	if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
	    before(orig_serial, ping_serial))
		return;
	clear_bit(RXRPC_CALL_PINGING, &call->flags);
	if (after(orig_serial, ping_serial))
		return;

	rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response,
			   orig_serial, ack_serial, ping_time, resp_time);
}

643
/*
644
 * Process the extra information that may be appended to an ACK packet
645
 */
646 647
static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
				struct rxrpc_ackinfo *ackinfo)
648
{
649 650 651
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
	struct rxrpc_peer *peer;
	unsigned int mtu;
652
	u32 rwind = ntohl(ackinfo->rwind);
653 654 655 656

	_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
	       sp->hdr.serial,
	       ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
657
	       rwind, ntohl(ackinfo->jumbo_max));
658

659 660 661
	if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
		rwind = RXRPC_RXTX_BUFF_SIZE - 1;
	call->tx_winsize = rwind;
662 663
	if (call->cong_ssthresh > rwind)
		call->cong_ssthresh = rwind;
664 665 666 667 668 669 670 671 672 673 674 675

	mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));

	peer = call->peer;
	if (mtu < peer->maxdata) {
		spin_lock_bh(&peer->lock);
		peer->maxdata = mtu;
		peer->mtu = mtu + peer->hdrsize;
		spin_unlock_bh(&peer->lock);
		_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
	}
}
676

677 678 679 680 681 682 683 684 685 686
/*
 * Process individual soft ACKs.
 *
 * Each ACK in the array corresponds to one packet and can be either an ACK or
 * a NAK.  If we get find an explicitly NAK'd packet we resend immediately;
 * packets that lie beyond the end of the ACK list are scheduled for resend by
 * the timer on the basis that the peer might just not have processed them at
 * the time the ACK was sent.
 */
static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
687 688
				  rxrpc_seq_t seq, int nr_acks,
				  struct rxrpc_ack_summary *summary)
689 690
{
	int ix;
D
David Howells 已提交
691
	u8 annotation, anno_type;
692 693 694

	for (; nr_acks > 0; nr_acks--, seq++) {
		ix = seq & RXRPC_RXTX_BUFF_MASK;
D
David Howells 已提交
695 696 697
		annotation = call->rxtx_annotations[ix];
		anno_type = annotation & RXRPC_TX_ANNO_MASK;
		annotation &= ~RXRPC_TX_ANNO_MASK;
698
		switch (*acks++) {
699
		case RXRPC_ACK_TYPE_ACK:
700
			summary->nr_acks++;
D
David Howells 已提交
701 702
			if (anno_type == RXRPC_TX_ANNO_ACK)
				continue;
703
			summary->nr_new_acks++;
D
David Howells 已提交
704 705
			call->rxtx_annotations[ix] =
				RXRPC_TX_ANNO_ACK | annotation;
706 707
			break;
		case RXRPC_ACK_TYPE_NACK:
708 709 710 711 712 713
			if (!summary->nr_nacks &&
			    call->acks_lowest_nak != seq) {
				call->acks_lowest_nak = seq;
				summary->new_low_nack = true;
			}
			summary->nr_nacks++;
D
David Howells 已提交
714
			if (anno_type == RXRPC_TX_ANNO_NAK)
715
				continue;
716
			summary->nr_new_nacks++;
717 718
			if (anno_type == RXRPC_TX_ANNO_RETRANS)
				continue;
D
David Howells 已提交
719 720
			call->rxtx_annotations[ix] =
				RXRPC_TX_ANNO_NAK | annotation;
721 722 723
			break;
		default:
			return rxrpc_proto_abort("SFT", call, 0);
724 725 726 727 728
		}
	}
}

/*
729 730 731 732 733 734 735 736
 * Process an ACK packet.
 *
 * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet
 * in the ACK array.  Anything before that is hard-ACK'd and may be discarded.
 *
 * A hard-ACK means that a packet has been processed and may be discarded; a
 * soft-ACK means that the packet may be discarded and retransmission
 * requested.  A phase is complete when all packets are hard-ACK'd.
737
 */
738 739
static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
			    u16 skew)
740
{
741
	struct rxrpc_ack_summary summary = { 0 };
742
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
743 744 745 746 747
	union {
		struct rxrpc_ackpacket ack;
		struct rxrpc_ackinfo info;
		u8 acks[RXRPC_MAXACKS];
	} buf;
748
	rxrpc_serial_t acked_serial;
749
	rxrpc_seq_t first_soft_ack, hard_ack;
750
	int nr_acks, offset, ioffset;
751 752 753

	_enter("");

754 755
	offset = sizeof(struct rxrpc_wire_header);
	if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) {
756 757
		_debug("extraction failure");
		return rxrpc_proto_abort("XAK", call, 0);
758
	}
759
	offset += sizeof(buf.ack);
760

761
	acked_serial = ntohl(buf.ack.serial);
762 763 764
	first_soft_ack = ntohl(buf.ack.firstPacket);
	hard_ack = first_soft_ack - 1;
	nr_acks = buf.ack.nAcks;
765 766
	summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ?
			      buf.ack.reason : RXRPC_ACK__INVALID);
767

768
	trace_rxrpc_rx_ack(call, first_soft_ack, summary.ack_reason, nr_acks);
769

770 771 772 773 774
	_proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
	       sp->hdr.serial,
	       ntohs(buf.ack.maxSkew),
	       first_soft_ack,
	       ntohl(buf.ack.previousPacket),
775
	       acked_serial,
776
	       rxrpc_ack_names[summary.ack_reason],
777 778
	       buf.ack.nAcks);

779 780 781
	if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE)
		rxrpc_input_ping_response(call, skb->tstamp, acked_serial,
					  sp->hdr.serial);
782 783 784
	if (buf.ack.reason == RXRPC_ACK_REQUESTED)
		rxrpc_input_requested_ack(call, skb->tstamp, acked_serial,
					  sp->hdr.serial);
785

786 787 788
	if (buf.ack.reason == RXRPC_ACK_PING) {
		_proto("Rx ACK %%%u PING Request", sp->hdr.serial);
		rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
789 790
				  skew, sp->hdr.serial, true, true,
				  rxrpc_propose_ack_respond_to_ping);
791
	} else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
792
		rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
793 794
				  skew, sp->hdr.serial, true, true,
				  rxrpc_propose_ack_respond_to_ack);
795 796
	}

797 798 799
	ioffset = offset + nr_acks + 3;
	if (skb->len >= ioffset + sizeof(buf.info)) {
		if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0)
800 801 802
			return rxrpc_proto_abort("XAI", call, 0);
		rxrpc_input_ackinfo(call, skb, &buf.info);
	}
803

804 805
	if (first_soft_ack == 0)
		return rxrpc_proto_abort("AK0", call, 0);
806

807 808 809 810 811 812 813
	/* Ignore ACKs unless we are or have just been transmitting. */
	switch (call->state) {
	case RXRPC_CALL_CLIENT_SEND_REQUEST:
	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
	case RXRPC_CALL_SERVER_SEND_REPLY:
	case RXRPC_CALL_SERVER_AWAIT_ACK:
		break;
814
	default:
815 816
		return;
	}
817

818
	/* Discard any out-of-order or duplicate ACKs. */
819
	if (before_eq(sp->hdr.serial, call->acks_latest)) {
820 821 822 823
		_debug("discard ACK %d <= %d",
		       sp->hdr.serial, call->acks_latest);
		return;
	}
D
David Howells 已提交
824
	call->acks_latest_ts = skb->tstamp;
825
	call->acks_latest = sp->hdr.serial;
826

827 828 829
	if (before(hard_ack, call->tx_hard_ack) ||
	    after(hard_ack, call->tx_top))
		return rxrpc_proto_abort("AKW", call, 0);
830 831
	if (nr_acks > call->tx_top - hard_ack)
		return rxrpc_proto_abort("AKN", call, 0);
832

833
	if (after(hard_ack, call->tx_hard_ack))
834
		rxrpc_rotate_tx_window(call, hard_ack, &summary);
835

836
	if (nr_acks > 0) {
837
		if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0)
838
			return rxrpc_proto_abort("XSA", call, 0);
839 840
		rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks,
				      &summary);
841 842 843 844
	}

	if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
		rxrpc_end_tx_phase(call, false, "ETA");
845
		return;
846
	}
847

848 849 850 851 852 853
	if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
	    RXRPC_TX_ANNO_LAST &&
	    summary.nr_acks == call->tx_top - hard_ack)
		rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
				  false, true,
				  rxrpc_propose_ack_ping_for_lost_reply);
D
David Howells 已提交
854

855
	return rxrpc_congestion_management(call, skb, &summary, acked_serial);
856 857 858
}

/*
859
 * Process an ACKALL packet.
860
 */
861
static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
862
{
863
	struct rxrpc_ack_summary summary = { 0 };
864
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
865

866
	_proto("Rx ACKALL %%%u", sp->hdr.serial);
867

868
	rxrpc_rotate_tx_window(call, call->tx_top, &summary);
869 870
	if (test_bit(RXRPC_CALL_TX_LAST, &call->flags))
		rxrpc_end_tx_phase(call, false, "ETL");
871
}
872

873 874 875 876 877 878 879 880
/*
 * Process an ABORT packet.
 */
static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
{
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
	__be32 wtmp;
	u32 abort_code = RX_CALL_DEAD;
881

882
	_enter("");
883

884
	if (skb->len >= 4 &&
885 886
	    skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
			  &wtmp, sizeof(wtmp)) >= 0)
887
		abort_code = ntohl(wtmp);
888

889
	_proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
890

891 892 893
	if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
				      abort_code, ECONNABORTED))
		rxrpc_notify_socket(call);
894 895 896
}

/*
897
 * Process an incoming call packet.
898
 */
899 900
static void rxrpc_input_call_packet(struct rxrpc_call *call,
				    struct sk_buff *skb, u16 skew)
901
{
902
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
903

904
	_enter("%p,%p", call, skb);
905

906 907 908 909
	switch (sp->hdr.type) {
	case RXRPC_PACKET_TYPE_DATA:
		rxrpc_input_data(call, skb, skew);
		break;
910

911 912
	case RXRPC_PACKET_TYPE_ACK:
		rxrpc_input_ack(call, skb, skew);
913 914
		break;

915 916
	case RXRPC_PACKET_TYPE_BUSY:
		_proto("Rx BUSY %%%u", sp->hdr.serial);
917

918 919 920 921 922
		/* Just ignore BUSY packets from the server; the retry and
		 * lifespan timers will take care of business.  BUSY packets
		 * from the client don't make sense.
		 */
		break;
923

924 925 926
	case RXRPC_PACKET_TYPE_ABORT:
		rxrpc_input_abort(call, skb);
		break;
927

928 929 930
	case RXRPC_PACKET_TYPE_ACKALL:
		rxrpc_input_ackall(call, skb);
		break;
931

932 933 934
	default:
		_proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial);
		break;
935
	}
936

937 938 939 940 941
	_leave("");
}

/*
 * post connection-level events to the connection
942 943
 * - this includes challenges, responses, some aborts and call terminal packet
 *   retransmission.
944
 */
945
static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
946 947 948 949 950
				      struct sk_buff *skb)
{
	_enter("%p,%p", conn, skb);

	skb_queue_tail(&conn->rx_queue, skb);
951
	rxrpc_queue_conn(conn);
952 953
}

954 955 956 957 958 959 960 961 962 963
/*
 * post endpoint-level events to the local endpoint
 * - this includes debug and version messages
 */
static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
				       struct sk_buff *skb)
{
	_enter("%p,%p", local, skb);

	skb_queue_tail(&local->event_queue, skb);
964
	rxrpc_queue_local(local);
965 966
}

967 968 969 970 971 972 973 974 975 976 977
/*
 * put a packet up for transport-level abort
 */
static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
{
	CHECK_SLAB_OKAY(&local->usage);

	skb_queue_tail(&local->reject_queue, skb);
	rxrpc_queue_local(local);
}

978 979 980 981 982 983 984 985 986
/*
 * Extract the wire header from a packet and translate the byte order.
 */
static noinline
int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
{
	struct rxrpc_wire_header whdr;

	/* dig out the RxRPC connection details */
987
	if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
		return -EBADMSG;

	memset(sp, 0, sizeof(*sp));
	sp->hdr.epoch		= ntohl(whdr.epoch);
	sp->hdr.cid		= ntohl(whdr.cid);
	sp->hdr.callNumber	= ntohl(whdr.callNumber);
	sp->hdr.seq		= ntohl(whdr.seq);
	sp->hdr.serial		= ntohl(whdr.serial);
	sp->hdr.flags		= whdr.flags;
	sp->hdr.type		= whdr.type;
	sp->hdr.userStatus	= whdr.userStatus;
	sp->hdr.securityIndex	= whdr.securityIndex;
	sp->hdr._rsvd		= ntohs(whdr._rsvd);
	sp->hdr.serviceId	= ntohs(whdr.serviceId);
	return 0;
}

1005 1006 1007
/*
 * handle data received on the local endpoint
 * - may be called in interrupt context
1008 1009 1010 1011
 *
 * The socket is locked by the caller and this prevents the socket from being
 * shut down and the local endpoint from going away, thus sk_user_data will not
 * be cleared until this function returns.
1012
 */
1013
void rxrpc_data_ready(struct sock *udp_sk)
1014
{
1015
	struct rxrpc_connection *conn;
1016 1017
	struct rxrpc_channel *chan;
	struct rxrpc_call *call;
1018
	struct rxrpc_skb_priv *sp;
1019
	struct rxrpc_local *local = udp_sk->sk_user_data;
1020
	struct sk_buff *skb;
1021
	unsigned int channel;
1022
	int ret, skew;
1023

1024
	_enter("%p", udp_sk);
1025 1026 1027

	ASSERT(!irqs_disabled());

1028
	skb = skb_recv_datagram(udp_sk, 0, 1, &ret);
1029 1030 1031 1032 1033 1034 1035
	if (!skb) {
		if (ret == -EAGAIN)
			return;
		_debug("UDP socket error %d", ret);
		return;
	}

D
David Howells 已提交
1036
	rxrpc_new_skb(skb, rxrpc_skb_rx_received);
1037 1038 1039 1040 1041

	_net("recv skb %p", skb);

	/* we'll probably need to checksum it (didn't call sock_recvmsg) */
	if (skb_checksum_complete(skb)) {
D
David Howells 已提交
1042
		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
1043
		__UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0);
1044 1045 1046 1047
		_leave(" [CSUM failed]");
		return;
	}

1048
	__UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
1049

1050 1051 1052
	/* The socket buffer we have is owned by UDP, with UDP's data all over
	 * it, but we really want our own data there.
	 */
1053 1054 1055
	skb_orphan(skb);
	sp = rxrpc_skb(skb);

1056 1057 1058 1059
	/* dig out the RxRPC connection details */
	if (rxrpc_extract_header(sp, skb) < 0)
		goto bad_message;

1060 1061 1062
	if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
		static int lose;
		if ((lose++ & 7) == 7) {
1063
			trace_rxrpc_rx_lose(sp);
1064 1065 1066 1067 1068
			rxrpc_lose_skb(skb, rxrpc_skb_rx_lost);
			return;
		}
	}

1069
	trace_rxrpc_rx_packet(sp);
1070 1071 1072

	_net("Rx RxRPC %s ep=%x call=%x:%x",
	     sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
1073
	     sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber);
1074

1075 1076
	if (sp->hdr.type >= RXRPC_N_PACKET_TYPES ||
	    !((RXRPC_SUPPORTED_PACKET_TYPES >> sp->hdr.type) & 1)) {
1077 1078 1079 1080
		_proto("Rx Bad Packet Type %u", sp->hdr.type);
		goto bad_message;
	}

1081 1082
	switch (sp->hdr.type) {
	case RXRPC_PACKET_TYPE_VERSION:
1083 1084
		rxrpc_post_packet_to_local(local, skb);
		goto out;
1085

1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097
	case RXRPC_PACKET_TYPE_BUSY:
		if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
			goto discard;

	case RXRPC_PACKET_TYPE_DATA:
		if (sp->hdr.callNumber == 0)
			goto bad_message;
		if (sp->hdr.flags & RXRPC_JUMBO_PACKET &&
		    !rxrpc_validate_jumbo(skb))
			goto bad_message;
		break;
	}
1098

1099 1100 1101
	rcu_read_lock();

	conn = rxrpc_find_connection_rcu(local, skb);
1102 1103 1104
	if (conn) {
		if (sp->hdr.securityIndex != conn->security_ix)
			goto wrong_security;
1105

1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121
		if (sp->hdr.callNumber == 0) {
			/* Connection-level packet */
			_debug("CONN %p {%d}", conn, conn->debug_id);
			rxrpc_post_packet_to_conn(conn, skb);
			goto out_unlock;
		}

		/* Note the serial number skew here */
		skew = (int)sp->hdr.serial - (int)conn->hi_serial;
		if (skew >= 0) {
			if (skew > 0)
				conn->hi_serial = sp->hdr.serial;
		} else {
			skew = -skew;
			skew = min(skew, 65535);
		}
1122

1123
		/* Call-bound packets are routed by connection channel. */
1124 1125
		channel = sp->hdr.cid & RXRPC_CHANNELMASK;
		chan = &conn->channels[channel];
1126 1127 1128 1129 1130 1131

		/* Ignore really old calls */
		if (sp->hdr.callNumber < chan->last_call)
			goto discard_unlock;

		if (sp->hdr.callNumber == chan->last_call) {
1132 1133
			/* For the previous service call, if completed successfully, we
			 * discard all further packets.
1134
			 */
D
David Howells 已提交
1135
			if (rxrpc_conn_is_service(conn) &&
1136 1137 1138 1139
			    (chan->last_type == RXRPC_PACKET_TYPE_ACK ||
			     sp->hdr.type == RXRPC_PACKET_TYPE_ABORT))
				goto discard_unlock;

1140 1141
			/* But otherwise we need to retransmit the final packet from
			 * data cached in the connection record.
1142 1143 1144 1145
			 */
			rxrpc_post_packet_to_conn(conn, skb);
			goto out_unlock;
		}
1146

1147
		call = rcu_dereference(chan->call);
1148 1149 1150 1151
	} else {
		skew = 0;
		call = NULL;
	}
1152

1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
	if (!call || atomic_read(&call->usage) == 0) {
		if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) ||
		    sp->hdr.callNumber == 0 ||
		    sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
			goto bad_message_unlock;
		if (sp->hdr.seq != 1)
			goto discard_unlock;
		call = rxrpc_new_incoming_call(local, conn, skb);
		if (!call) {
			rcu_read_unlock();
			goto reject_packet;
		}
1165
		rxrpc_send_ping(call, skb, skew);
1166
	}
1167

1168 1169 1170
	rxrpc_input_call_packet(call, skb, skew);
	goto discard_unlock;

1171
discard_unlock:
1172
	rcu_read_unlock();
1173
discard:
D
David Howells 已提交
1174
	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
1175
out:
1176
	trace_rxrpc_rx_done(0, 0);
1177 1178
	return;

1179
out_unlock:
1180
	rcu_read_unlock();
1181
	goto out;
1182

1183 1184 1185 1186 1187 1188
wrong_security:
	rcu_read_unlock();
	trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
			  RXKADINCONSISTENCY, EBADMSG);
	skb->priority = RXKADINCONSISTENCY;
	goto post_abort;
1189

1190 1191
bad_message_unlock:
	rcu_read_unlock();
1192
bad_message:
1193 1194
	trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
			  RX_PROTOCOL_ERROR, EBADMSG);
1195
	skb->priority = RX_PROTOCOL_ERROR;
1196 1197
post_abort:
	skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
1198 1199
reject_packet:
	trace_rxrpc_rx_done(skb->mark, skb->priority);
1200 1201 1202
	rxrpc_reject_packet(local, skb);
	_leave(" [badmsg]");
}