output.c 13.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/* RxRPC packet transmission
 *
 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/net.h>
15
#include <linux/gfp.h>
16
#include <linux/skbuff.h>
17
#include <linux/export.h>
18 19 20 21
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"

22
struct rxrpc_ack_buffer {
23
	struct rxrpc_wire_header whdr;
24 25 26
	struct rxrpc_ackpacket ack;
	u8 acks[255];
	u8 pad[3];
27 28 29
	struct rxrpc_ackinfo ackinfo;
};

30 31 32 33 34
struct rxrpc_abort_buffer {
	struct rxrpc_wire_header whdr;
	__be32 abort_code;
};

D
David Howells 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
/*
 * Arrange for a keepalive ping a certain time after we last transmitted.  This
 * lets the far side know we're still interested in this call and helps keep
 * the route through any intervening firewall open.
 *
 * Receiving a response to the ping will prevent the ->expect_rx_by timer from
 * expiring.
 */
static void rxrpc_set_keepalive(struct rxrpc_call *call)
{
	unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6;

	keepalive_at += now;
	WRITE_ONCE(call->keepalive_at, keepalive_at);
	rxrpc_reduce_call_timer(call, keepalive_at, now,
				rxrpc_timer_set_for_keepalive);
}

53 54 55
/*
 * Fill out an ACK packet.
 */
56 57
static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
				 struct rxrpc_call *call,
58
				 struct rxrpc_ack_buffer *pkt,
59
				 rxrpc_seq_t *_hard_ack,
60 61
				 rxrpc_seq_t *_top,
				 u8 reason)
62
{
63
	rxrpc_serial_t serial;
64 65
	rxrpc_seq_t hard_ack, top, seq;
	int ix;
66 67 68
	u32 mtu, jmax;
	u8 *ackp = pkt->acks;

69
	/* Barrier against rxrpc_input_data(). */
70
	serial = call->ackr_serial;
71 72
	hard_ack = READ_ONCE(call->rx_hard_ack);
	top = smp_load_acquire(&call->rx_top);
73 74
	*_hard_ack = hard_ack;
	*_top = top;
75

76
	pkt->ack.bufferSpace	= htons(8);
77 78
	pkt->ack.maxSkew	= htons(call->ackr_skew);
	pkt->ack.firstPacket	= htonl(hard_ack + 1);
79
	pkt->ack.previousPacket	= htonl(call->ackr_prev_seq);
80
	pkt->ack.serial		= htonl(serial);
81
	pkt->ack.reason		= reason;
82 83
	pkt->ack.nAcks		= top - hard_ack;

84
	if (reason == RXRPC_ACK_PING)
85 86
		pkt->whdr.flags |= RXRPC_REQUEST_ACK;

87 88 89 90 91 92 93 94 95 96 97
	if (after(top, hard_ack)) {
		seq = hard_ack + 1;
		do {
			ix = seq & RXRPC_RXTX_BUFF_MASK;
			if (call->rxtx_buffer[ix])
				*ackp++ = RXRPC_ACK_TYPE_ACK;
			else
				*ackp++ = RXRPC_ACK_TYPE_NACK;
			seq++;
		} while (before_eq(seq, top));
	}
98

99 100
	mtu = conn->params.peer->if_mtu;
	mtu -= conn->params.peer->hdrsize;
101
	jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
102 103
	pkt->ackinfo.rxMTU	= htonl(rxrpc_rx_mtu);
	pkt->ackinfo.maxMTU	= htonl(mtu);
104
	pkt->ackinfo.rwind	= htonl(call->rx_winsize);
105 106 107 108 109
	pkt->ackinfo.jumbo_max	= htonl(jmax);

	*ackp++ = 0;
	*ackp++ = 0;
	*ackp++ = 0;
110
	return top - hard_ack + 3;
111 112 113
}

/*
114
 * Send an ACK call packet.
115
 */
116 117
int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
			  rxrpc_serial_t *_serial)
118 119
{
	struct rxrpc_connection *conn = NULL;
120
	struct rxrpc_ack_buffer *pkt;
121 122 123
	struct msghdr msg;
	struct kvec iov[2];
	rxrpc_serial_t serial;
124
	rxrpc_seq_t hard_ack, top;
125
	size_t len, n;
126
	int ret;
127
	u8 reason;
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151

	spin_lock_bh(&call->lock);
	if (call->conn)
		conn = rxrpc_get_connection_maybe(call->conn);
	spin_unlock_bh(&call->lock);
	if (!conn)
		return -ECONNRESET;

	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
	if (!pkt) {
		rxrpc_put_connection(conn);
		return -ENOMEM;
	}

	msg.msg_name	= &call->peer->srx.transport;
	msg.msg_namelen	= call->peer->srx.transport_len;
	msg.msg_control	= NULL;
	msg.msg_controllen = 0;
	msg.msg_flags	= 0;

	pkt->whdr.epoch		= htonl(conn->proto.epoch);
	pkt->whdr.cid		= htonl(call->cid);
	pkt->whdr.callNumber	= htonl(call->call_id);
	pkt->whdr.seq		= 0;
152 153
	pkt->whdr.type		= RXRPC_PACKET_TYPE_ACK;
	pkt->whdr.flags		= RXRPC_SLOW_START_OK | conn->out_clientflag;
154 155 156 157 158
	pkt->whdr.userStatus	= 0;
	pkt->whdr.securityIndex	= call->security_ix;
	pkt->whdr._rsvd		= 0;
	pkt->whdr.serviceId	= htons(call->service_id);

159
	spin_lock_bh(&call->lock);
160 161 162 163 164 165 166 167 168 169
	if (ping) {
		reason = RXRPC_ACK_PING;
	} else {
		reason = call->ackr_reason;
		if (!call->ackr_reason) {
			spin_unlock_bh(&call->lock);
			ret = 0;
			goto out;
		}
		call->ackr_reason = 0;
170
	}
171
	n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
172 173 174 175 176 177 178 179

	spin_unlock_bh(&call->lock);

	iov[0].iov_base	= pkt;
	iov[0].iov_len	= sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
	iov[1].iov_base = &pkt->ackinfo;
	iov[1].iov_len	= sizeof(pkt->ackinfo);
	len = iov[0].iov_len + iov[1].iov_len;
180

181 182
	serial = atomic_inc_return(&conn->serial);
	pkt->whdr.serial = htonl(serial);
183 184 185 186
	trace_rxrpc_tx_ack(call, serial,
			   ntohl(pkt->ack.firstPacket),
			   ntohl(pkt->ack.serial),
			   pkt->ack.reason, pkt->ack.nAcks);
187 188
	if (_serial)
		*_serial = serial;
189

190
	if (ping) {
191
		call->ping_serial = serial;
192 193 194 195 196 197 198 199
		smp_wmb();
		/* We need to stick a time in before we send the packet in case
		 * the reply gets back before kernel_sendmsg() completes - but
		 * asking UDP to send the packet can take a relatively long
		 * time, so we update the time after, on the assumption that
		 * the packet transmission is more likely to happen towards the
		 * end of the kernel_sendmsg() call.
		 */
200
		call->ping_time = ktime_get_real();
201 202 203
		set_bit(RXRPC_CALL_PINGING, &call->flags);
		trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial);
	}
204 205

	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
206
	if (ping)
207
		call->ping_time = ktime_get_real();
208

209
	if (call->state < RXRPC_CALL_COMPLETE) {
210
		if (ret < 0) {
211 212
			if (ping)
				clear_bit(RXRPC_CALL_PINGING, &call->flags);
213 214 215
			rxrpc_propose_ACK(call, pkt->ack.reason,
					  ntohs(pkt->ack.maxSkew),
					  ntohl(pkt->ack.serial),
216 217
					  true, true,
					  rxrpc_propose_ack_retry_tx);
218 219 220 221 222 223 224
		} else {
			spin_lock_bh(&call->lock);
			if (after(hard_ack, call->ackr_consumed))
				call->ackr_consumed = hard_ack;
			if (after(top, call->ackr_seen))
				call->ackr_seen = top;
			spin_unlock_bh(&call->lock);
225
		}
D
David Howells 已提交
226 227

		rxrpc_set_keepalive(call);
228 229
	}

230 231 232 233 234 235
out:
	rxrpc_put_connection(conn);
	kfree(pkt);
	return ret;
}

236 237 238 239 240 241 242 243 244 245 246 247
/*
 * Send an ABORT call packet.
 */
int rxrpc_send_abort_packet(struct rxrpc_call *call)
{
	struct rxrpc_connection *conn = NULL;
	struct rxrpc_abort_buffer pkt;
	struct msghdr msg;
	struct kvec iov[1];
	rxrpc_serial_t serial;
	int ret;

D
David Howells 已提交
248 249 250 251 252 253 254 255 256 257
	/* Don't bother sending aborts for a client call once the server has
	 * hard-ACK'd all of its request data.  After that point, we're not
	 * going to stop the operation proceeding, and whilst we might limit
	 * the reply, it's not worth it if we can send a new call on the same
	 * channel instead, thereby closing off this call.
	 */
	if (rxrpc_is_client_call(call) &&
	    test_bit(RXRPC_CALL_TX_LAST, &call->flags))
		return 0;

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
	spin_lock_bh(&call->lock);
	if (call->conn)
		conn = rxrpc_get_connection_maybe(call->conn);
	spin_unlock_bh(&call->lock);
	if (!conn)
		return -ECONNRESET;

	msg.msg_name	= &call->peer->srx.transport;
	msg.msg_namelen	= call->peer->srx.transport_len;
	msg.msg_control	= NULL;
	msg.msg_controllen = 0;
	msg.msg_flags	= 0;

	pkt.whdr.epoch		= htonl(conn->proto.epoch);
	pkt.whdr.cid		= htonl(call->cid);
	pkt.whdr.callNumber	= htonl(call->call_id);
	pkt.whdr.seq		= 0;
	pkt.whdr.type		= RXRPC_PACKET_TYPE_ABORT;
	pkt.whdr.flags		= conn->out_clientflag;
	pkt.whdr.userStatus	= 0;
	pkt.whdr.securityIndex	= call->security_ix;
	pkt.whdr._rsvd		= 0;
	pkt.whdr.serviceId	= htons(call->service_id);
	pkt.abort_code		= htonl(call->abort_code);

	iov[0].iov_base	= &pkt;
	iov[0].iov_len	= sizeof(pkt);

	serial = atomic_inc_return(&conn->serial);
	pkt.whdr.serial = htonl(serial);

	ret = kernel_sendmsg(conn->params.local->socket,
			     &msg, iov, 1, sizeof(pkt));

	rxrpc_put_connection(conn);
	return ret;
}

296 297 298
/*
 * send a packet through the transport endpoint
 */
299 300
int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
			   bool retrans)
301
{
302 303 304
	struct rxrpc_connection *conn = call->conn;
	struct rxrpc_wire_header whdr;
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
305
	struct msghdr msg;
306 307 308
	struct kvec iov[2];
	rxrpc_serial_t serial;
	size_t len;
309
	bool lost = false;
310 311 312 313
	int ret, opt;

	_enter(",{%d}", skb->len);

314 315
	/* Each transmission of a Tx packet needs a new serial number */
	serial = atomic_inc_return(&conn->serial);
316

317 318 319 320 321 322 323 324 325 326 327 328
	whdr.epoch	= htonl(conn->proto.epoch);
	whdr.cid	= htonl(call->cid);
	whdr.callNumber	= htonl(call->call_id);
	whdr.seq	= htonl(sp->hdr.seq);
	whdr.serial	= htonl(serial);
	whdr.type	= RXRPC_PACKET_TYPE_DATA;
	whdr.flags	= sp->hdr.flags;
	whdr.userStatus	= 0;
	whdr.securityIndex = call->security_ix;
	whdr._rsvd	= htons(sp->hdr._rsvd);
	whdr.serviceId	= htons(call->service_id);

329 330 331 332
	if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
	    sp->hdr.seq == 1)
		whdr.userStatus	= RXRPC_USERSTATUS_SERVICE_UPGRADE;

333 334 335 336 337 338 339 340
	iov[0].iov_base = &whdr;
	iov[0].iov_len = sizeof(whdr);
	iov[1].iov_base = skb->head;
	iov[1].iov_len = skb->len;
	len = iov[0].iov_len + iov[1].iov_len;

	msg.msg_name = &call->peer->srx.transport;
	msg.msg_namelen = call->peer->srx.transport_len;
341 342 343 344
	msg.msg_control = NULL;
	msg.msg_controllen = 0;
	msg.msg_flags = 0;

D
David Howells 已提交
345 346 347
	/* If our RTT cache needs working on, request an ACK.  Also request
	 * ACKs if a DATA packet appears to have been lost.
	 */
348
	if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
349 350
	    (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
	     retrans ||
351 352 353 354
	     call->cong_mode == RXRPC_CALL_SLOW_START ||
	     (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
	     ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
			  ktime_get_real())))
355 356
		whdr.flags |= RXRPC_REQUEST_ACK;

357 358 359
	if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
		static int lose;
		if ((lose++ & 7) == 7) {
360 361 362
			ret = 0;
			lost = true;
			goto done;
363 364 365
		}
	}

366 367
	_proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);

368 369
	/* send the packet with the don't fragment bit set if we currently
	 * think it's small enough */
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
	if (iov[1].iov_len >= call->peer->maxdata)
		goto send_fragmentable;

	down_read(&conn->params.local->defrag_sem);
	/* send the packet by UDP
	 * - returns -EMSGSIZE if UDP would have to fragment the packet
	 *   to go out of the interface
	 *   - in which case, we'll have processed the ICMP error
	 *     message and update the peer record
	 */
	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);

	up_read(&conn->params.local->defrag_sem);
	if (ret == -EMSGSIZE)
		goto send_fragmentable;

done:
387 388
	trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
			    retrans, lost);
389
	if (ret >= 0) {
390 391
		ktime_t now = ktime_get_real();
		skb->tstamp = now;
392
		smp_wmb();
393
		sp->hdr.serial = serial;
394 395
		if (whdr.flags & RXRPC_REQUEST_ACK) {
			call->peer->rtt_last_req = now;
396
			trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
397 398 399 400 401 402 403 404 405 406 407 408
			if (call->peer->rtt_usage > 1) {
				unsigned long nowj = jiffies, ack_lost_at;

				ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt);
				if (ack_lost_at < 1)
					ack_lost_at = 1;

				ack_lost_at += nowj;
				WRITE_ONCE(call->ack_lost_at, ack_lost_at);
				rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
							rxrpc_timer_set_for_lost_ack);
			}
409
		}
410
	}
D
David Howells 已提交
411 412 413

	rxrpc_set_keepalive(call);

414 415
	_leave(" = %d [%u]", ret, call->peer->maxdata);
	return ret;
416 417 418 419 420

send_fragmentable:
	/* attempt to send this message with fragmentation enabled */
	_debug("send fragment");

421 422 423 424 425 426 427 428 429
	down_write(&conn->params.local->defrag_sem);

	switch (conn->params.local->srx.transport.family) {
	case AF_INET:
		opt = IP_PMTUDISC_DONT;
		ret = kernel_setsockopt(conn->params.local->socket,
					SOL_IP, IP_MTU_DISCOVER,
					(char *)&opt, sizeof(opt));
		if (ret == 0) {
430 431
			ret = kernel_sendmsg(conn->params.local->socket, &msg,
					     iov, 2, len);
432 433 434 435 436 437 438

			opt = IP_PMTUDISC_DO;
			kernel_setsockopt(conn->params.local->socket, SOL_IP,
					  IP_MTU_DISCOVER,
					  (char *)&opt, sizeof(opt));
		}
		break;
D
David Howells 已提交
439

440
#ifdef CONFIG_AF_RXRPC_IPV6
D
David Howells 已提交
441 442 443 444 445 446 447
	case AF_INET6:
		opt = IPV6_PMTUDISC_DONT;
		ret = kernel_setsockopt(conn->params.local->socket,
					SOL_IPV6, IPV6_MTU_DISCOVER,
					(char *)&opt, sizeof(opt));
		if (ret == 0) {
			ret = kernel_sendmsg(conn->params.local->socket, &msg,
448
					     iov, 2, len);
D
David Howells 已提交
449 450 451 452 453 454 455

			opt = IPV6_PMTUDISC_DO;
			kernel_setsockopt(conn->params.local->socket,
					  SOL_IPV6, IPV6_MTU_DISCOVER,
					  (char *)&opt, sizeof(opt));
		}
		break;
456
#endif
457 458
	}

459
	up_write(&conn->params.local->defrag_sem);
460
	goto done;
461
}
462 463 464 465 466 467

/*
 * reject packets through the local endpoint
 */
void rxrpc_reject_packets(struct rxrpc_local *local)
{
468
	struct sockaddr_rxrpc srx;
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
	struct rxrpc_skb_priv *sp;
	struct rxrpc_wire_header whdr;
	struct sk_buff *skb;
	struct msghdr msg;
	struct kvec iov[2];
	size_t size;
	__be32 code;

	_enter("%d", local->debug_id);

	iov[0].iov_base = &whdr;
	iov[0].iov_len = sizeof(whdr);
	iov[1].iov_base = &code;
	iov[1].iov_len = sizeof(code);
	size = sizeof(whdr) + sizeof(code);

485
	msg.msg_name = &srx.transport;
486 487 488 489 490 491 492 493
	msg.msg_control = NULL;
	msg.msg_controllen = 0;
	msg.msg_flags = 0;

	memset(&whdr, 0, sizeof(whdr));
	whdr.type = RXRPC_PACKET_TYPE_ABORT;

	while ((skb = skb_dequeue(&local->reject_queue))) {
D
David Howells 已提交
494
		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
495
		sp = rxrpc_skb(skb);
496

D
David Howells 已提交
497
		if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
498 499
			msg.msg_namelen = srx.transport_len;

500 501 502 503 504 505 506 507 508 509 510 511 512
			code = htonl(skb->priority);

			whdr.epoch	= htonl(sp->hdr.epoch);
			whdr.cid	= htonl(sp->hdr.cid);
			whdr.callNumber	= htonl(sp->hdr.callNumber);
			whdr.serviceId	= htons(sp->hdr.serviceId);
			whdr.flags	= sp->hdr.flags;
			whdr.flags	^= RXRPC_CLIENT_INITIATED;
			whdr.flags	&= RXRPC_CLIENT_INITIATED;

			kernel_sendmsg(local->socket, &msg, iov, 2, size);
		}

D
David Howells 已提交
513
		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
514 515 516 517
	}

	_leave("");
}