output.c 11.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/* RxRPC packet transmission
 *
 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/net.h>
15
#include <linux/gfp.h>
16
#include <linux/skbuff.h>
17
#include <linux/export.h>
18 19 20 21
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"

22
struct rxrpc_ack_buffer {
23
	struct rxrpc_wire_header whdr;
24 25 26
	struct rxrpc_ackpacket ack;
	u8 acks[255];
	u8 pad[3];
27 28 29
	struct rxrpc_ackinfo ackinfo;
};

30 31 32 33 34
struct rxrpc_abort_buffer {
	struct rxrpc_wire_header whdr;
	__be32 abort_code;
};

35 36 37 38
/*
 * Fill out an ACK packet.
 */
static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
39
				 struct rxrpc_ack_buffer *pkt,
40
				 rxrpc_seq_t *_hard_ack,
41 42
				 rxrpc_seq_t *_top,
				 u8 reason)
43
{
44
	rxrpc_serial_t serial;
45 46
	rxrpc_seq_t hard_ack, top, seq;
	int ix;
47 48 49
	u32 mtu, jmax;
	u8 *ackp = pkt->acks;

50
	/* Barrier against rxrpc_input_data(). */
51
	serial = call->ackr_serial;
52 53
	hard_ack = READ_ONCE(call->rx_hard_ack);
	top = smp_load_acquire(&call->rx_top);
54 55
	*_hard_ack = hard_ack;
	*_top = top;
56

57
	pkt->ack.bufferSpace	= htons(8);
58 59
	pkt->ack.maxSkew	= htons(call->ackr_skew);
	pkt->ack.firstPacket	= htonl(hard_ack + 1);
60
	pkt->ack.previousPacket	= htonl(call->ackr_prev_seq);
61
	pkt->ack.serial		= htonl(serial);
62
	pkt->ack.reason		= reason;
63 64
	pkt->ack.nAcks		= top - hard_ack;

65
	if (reason == RXRPC_ACK_PING)
66 67
		pkt->whdr.flags |= RXRPC_REQUEST_ACK;

68 69 70 71 72 73 74 75 76 77 78
	if (after(top, hard_ack)) {
		seq = hard_ack + 1;
		do {
			ix = seq & RXRPC_RXTX_BUFF_MASK;
			if (call->rxtx_buffer[ix])
				*ackp++ = RXRPC_ACK_TYPE_ACK;
			else
				*ackp++ = RXRPC_ACK_TYPE_NACK;
			seq++;
		} while (before_eq(seq, top));
	}
79

80 81
	mtu = call->conn->params.peer->if_mtu;
	mtu -= call->conn->params.peer->hdrsize;
82
	jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
83 84
	pkt->ackinfo.rxMTU	= htonl(rxrpc_rx_mtu);
	pkt->ackinfo.maxMTU	= htonl(mtu);
85
	pkt->ackinfo.rwind	= htonl(call->rx_winsize);
86 87 88 89 90
	pkt->ackinfo.jumbo_max	= htonl(jmax);

	*ackp++ = 0;
	*ackp++ = 0;
	*ackp++ = 0;
91
	return top - hard_ack + 3;
92 93 94
}

/*
95
 * Send an ACK call packet.
96
 */
97
int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
98 99
{
	struct rxrpc_connection *conn = NULL;
100
	struct rxrpc_ack_buffer *pkt;
101 102 103
	struct msghdr msg;
	struct kvec iov[2];
	rxrpc_serial_t serial;
104
	rxrpc_seq_t hard_ack, top;
105
	size_t len, n;
106
	int ret;
107
	u8 reason;
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131

	spin_lock_bh(&call->lock);
	if (call->conn)
		conn = rxrpc_get_connection_maybe(call->conn);
	spin_unlock_bh(&call->lock);
	if (!conn)
		return -ECONNRESET;

	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
	if (!pkt) {
		rxrpc_put_connection(conn);
		return -ENOMEM;
	}

	msg.msg_name	= &call->peer->srx.transport;
	msg.msg_namelen	= call->peer->srx.transport_len;
	msg.msg_control	= NULL;
	msg.msg_controllen = 0;
	msg.msg_flags	= 0;

	pkt->whdr.epoch		= htonl(conn->proto.epoch);
	pkt->whdr.cid		= htonl(call->cid);
	pkt->whdr.callNumber	= htonl(call->call_id);
	pkt->whdr.seq		= 0;
132 133
	pkt->whdr.type		= RXRPC_PACKET_TYPE_ACK;
	pkt->whdr.flags		= RXRPC_SLOW_START_OK | conn->out_clientflag;
134 135 136 137 138
	pkt->whdr.userStatus	= 0;
	pkt->whdr.securityIndex	= call->security_ix;
	pkt->whdr._rsvd		= 0;
	pkt->whdr.serviceId	= htons(call->service_id);

139
	spin_lock_bh(&call->lock);
140 141 142 143 144 145 146 147 148 149
	if (ping) {
		reason = RXRPC_ACK_PING;
	} else {
		reason = call->ackr_reason;
		if (!call->ackr_reason) {
			spin_unlock_bh(&call->lock);
			ret = 0;
			goto out;
		}
		call->ackr_reason = 0;
150
	}
151
	n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason);
152 153 154 155 156 157 158 159

	spin_unlock_bh(&call->lock);

	iov[0].iov_base	= pkt;
	iov[0].iov_len	= sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
	iov[1].iov_base = &pkt->ackinfo;
	iov[1].iov_len	= sizeof(pkt->ackinfo);
	len = iov[0].iov_len + iov[1].iov_len;
160

161 162
	serial = atomic_inc_return(&conn->serial);
	pkt->whdr.serial = htonl(serial);
163 164 165 166
	trace_rxrpc_tx_ack(call, serial,
			   ntohl(pkt->ack.firstPacket),
			   ntohl(pkt->ack.serial),
			   pkt->ack.reason, pkt->ack.nAcks);
167

168
	if (ping) {
169
		call->ping_serial = serial;
170 171 172 173 174 175 176 177
		smp_wmb();
		/* We need to stick a time in before we send the packet in case
		 * the reply gets back before kernel_sendmsg() completes - but
		 * asking UDP to send the packet can take a relatively long
		 * time, so we update the time after, on the assumption that
		 * the packet transmission is more likely to happen towards the
		 * end of the kernel_sendmsg() call.
		 */
178
		call->ping_time = ktime_get_real();
179 180 181
		set_bit(RXRPC_CALL_PINGING, &call->flags);
		trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial);
	}
182 183

	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
184
	if (ping)
185
		call->ping_time = ktime_get_real();
186

187
	if (call->state < RXRPC_CALL_COMPLETE) {
188
		if (ret < 0) {
189 190
			if (ping)
				clear_bit(RXRPC_CALL_PINGING, &call->flags);
191 192 193
			rxrpc_propose_ACK(call, pkt->ack.reason,
					  ntohs(pkt->ack.maxSkew),
					  ntohl(pkt->ack.serial),
194 195
					  true, true,
					  rxrpc_propose_ack_retry_tx);
196 197 198 199 200 201 202
		} else {
			spin_lock_bh(&call->lock);
			if (after(hard_ack, call->ackr_consumed))
				call->ackr_consumed = hard_ack;
			if (after(top, call->ackr_seen))
				call->ackr_seen = top;
			spin_unlock_bh(&call->lock);
203 204 205
		}
	}

206 207 208 209 210 211
out:
	rxrpc_put_connection(conn);
	kfree(pkt);
	return ret;
}

212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
/*
 * Send an ABORT call packet.
 */
int rxrpc_send_abort_packet(struct rxrpc_call *call)
{
	struct rxrpc_connection *conn = NULL;
	struct rxrpc_abort_buffer pkt;
	struct msghdr msg;
	struct kvec iov[1];
	rxrpc_serial_t serial;
	int ret;

	spin_lock_bh(&call->lock);
	if (call->conn)
		conn = rxrpc_get_connection_maybe(call->conn);
	spin_unlock_bh(&call->lock);
	if (!conn)
		return -ECONNRESET;

	msg.msg_name	= &call->peer->srx.transport;
	msg.msg_namelen	= call->peer->srx.transport_len;
	msg.msg_control	= NULL;
	msg.msg_controllen = 0;
	msg.msg_flags	= 0;

	pkt.whdr.epoch		= htonl(conn->proto.epoch);
	pkt.whdr.cid		= htonl(call->cid);
	pkt.whdr.callNumber	= htonl(call->call_id);
	pkt.whdr.seq		= 0;
	pkt.whdr.type		= RXRPC_PACKET_TYPE_ABORT;
	pkt.whdr.flags		= conn->out_clientflag;
	pkt.whdr.userStatus	= 0;
	pkt.whdr.securityIndex	= call->security_ix;
	pkt.whdr._rsvd		= 0;
	pkt.whdr.serviceId	= htons(call->service_id);
	pkt.abort_code		= htonl(call->abort_code);

	iov[0].iov_base	= &pkt;
	iov[0].iov_len	= sizeof(pkt);

	serial = atomic_inc_return(&conn->serial);
	pkt.whdr.serial = htonl(serial);

	ret = kernel_sendmsg(conn->params.local->socket,
			     &msg, iov, 1, sizeof(pkt));

	rxrpc_put_connection(conn);
	return ret;
}

262 263 264
/*
 * send a packet through the transport endpoint
 */
265 266
int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
			   bool retrans)
267
{
268 269 270
	struct rxrpc_connection *conn = call->conn;
	struct rxrpc_wire_header whdr;
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
271
	struct msghdr msg;
272 273 274
	struct kvec iov[2];
	rxrpc_serial_t serial;
	size_t len;
275
	bool lost = false;
276 277 278 279
	int ret, opt;

	_enter(",{%d}", skb->len);

280 281
	/* Each transmission of a Tx packet needs a new serial number */
	serial = atomic_inc_return(&conn->serial);
282

283 284 285 286 287 288 289 290 291 292 293 294
	whdr.epoch	= htonl(conn->proto.epoch);
	whdr.cid	= htonl(call->cid);
	whdr.callNumber	= htonl(call->call_id);
	whdr.seq	= htonl(sp->hdr.seq);
	whdr.serial	= htonl(serial);
	whdr.type	= RXRPC_PACKET_TYPE_DATA;
	whdr.flags	= sp->hdr.flags;
	whdr.userStatus	= 0;
	whdr.securityIndex = call->security_ix;
	whdr._rsvd	= htons(sp->hdr._rsvd);
	whdr.serviceId	= htons(call->service_id);

295 296 297 298
	if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
	    sp->hdr.seq == 1)
		whdr.userStatus	= RXRPC_USERSTATUS_SERVICE_UPGRADE;

299 300 301 302 303 304 305 306
	iov[0].iov_base = &whdr;
	iov[0].iov_len = sizeof(whdr);
	iov[1].iov_base = skb->head;
	iov[1].iov_len = skb->len;
	len = iov[0].iov_len + iov[1].iov_len;

	msg.msg_name = &call->peer->srx.transport;
	msg.msg_namelen = call->peer->srx.transport_len;
307 308 309 310
	msg.msg_control = NULL;
	msg.msg_controllen = 0;
	msg.msg_flags = 0;

D
David Howells 已提交
311 312 313
	/* If our RTT cache needs working on, request an ACK.  Also request
	 * ACKs if a DATA packet appears to have been lost.
	 */
314 315 316 317 318 319
	if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
	    (retrans ||
	     call->cong_mode == RXRPC_CALL_SLOW_START ||
	     (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
	     ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
			  ktime_get_real())))
320 321
		whdr.flags |= RXRPC_REQUEST_ACK;

322 323 324
	if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
		static int lose;
		if ((lose++ & 7) == 7) {
325 326 327
			ret = 0;
			lost = true;
			goto done;
328 329 330
		}
	}

331 332
	_proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);

333 334
	/* send the packet with the don't fragment bit set if we currently
	 * think it's small enough */
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
	if (iov[1].iov_len >= call->peer->maxdata)
		goto send_fragmentable;

	down_read(&conn->params.local->defrag_sem);
	/* send the packet by UDP
	 * - returns -EMSGSIZE if UDP would have to fragment the packet
	 *   to go out of the interface
	 *   - in which case, we'll have processed the ICMP error
	 *     message and update the peer record
	 */
	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);

	up_read(&conn->params.local->defrag_sem);
	if (ret == -EMSGSIZE)
		goto send_fragmentable;

done:
352 353
	trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
			    retrans, lost);
354
	if (ret >= 0) {
355 356
		ktime_t now = ktime_get_real();
		skb->tstamp = now;
357
		smp_wmb();
358
		sp->hdr.serial = serial;
359 360
		if (whdr.flags & RXRPC_REQUEST_ACK) {
			call->peer->rtt_last_req = now;
361
			trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
362
		}
363
	}
364 365
	_leave(" = %d [%u]", ret, call->peer->maxdata);
	return ret;
366 367 368 369 370

send_fragmentable:
	/* attempt to send this message with fragmentation enabled */
	_debug("send fragment");

371 372 373 374 375 376 377 378 379
	down_write(&conn->params.local->defrag_sem);

	switch (conn->params.local->srx.transport.family) {
	case AF_INET:
		opt = IP_PMTUDISC_DONT;
		ret = kernel_setsockopt(conn->params.local->socket,
					SOL_IP, IP_MTU_DISCOVER,
					(char *)&opt, sizeof(opt));
		if (ret == 0) {
380 381
			ret = kernel_sendmsg(conn->params.local->socket, &msg,
					     iov, 2, len);
382 383 384 385 386 387 388

			opt = IP_PMTUDISC_DO;
			kernel_setsockopt(conn->params.local->socket, SOL_IP,
					  IP_MTU_DISCOVER,
					  (char *)&opt, sizeof(opt));
		}
		break;
D
David Howells 已提交
389

390
#ifdef CONFIG_AF_RXRPC_IPV6
D
David Howells 已提交
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
	case AF_INET6:
		opt = IPV6_PMTUDISC_DONT;
		ret = kernel_setsockopt(conn->params.local->socket,
					SOL_IPV6, IPV6_MTU_DISCOVER,
					(char *)&opt, sizeof(opt));
		if (ret == 0) {
			ret = kernel_sendmsg(conn->params.local->socket, &msg,
					     iov, 1, iov[0].iov_len);

			opt = IPV6_PMTUDISC_DO;
			kernel_setsockopt(conn->params.local->socket,
					  SOL_IPV6, IPV6_MTU_DISCOVER,
					  (char *)&opt, sizeof(opt));
		}
		break;
406
#endif
407 408
	}

409
	up_write(&conn->params.local->defrag_sem);
410
	goto done;
411
}
412 413 414 415 416 417

/*
 * reject packets through the local endpoint
 */
void rxrpc_reject_packets(struct rxrpc_local *local)
{
418
	struct sockaddr_rxrpc srx;
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
	struct rxrpc_skb_priv *sp;
	struct rxrpc_wire_header whdr;
	struct sk_buff *skb;
	struct msghdr msg;
	struct kvec iov[2];
	size_t size;
	__be32 code;

	_enter("%d", local->debug_id);

	iov[0].iov_base = &whdr;
	iov[0].iov_len = sizeof(whdr);
	iov[1].iov_base = &code;
	iov[1].iov_len = sizeof(code);
	size = sizeof(whdr) + sizeof(code);

435
	msg.msg_name = &srx.transport;
436 437 438 439 440 441 442 443
	msg.msg_control = NULL;
	msg.msg_controllen = 0;
	msg.msg_flags = 0;

	memset(&whdr, 0, sizeof(whdr));
	whdr.type = RXRPC_PACKET_TYPE_ABORT;

	while ((skb = skb_dequeue(&local->reject_queue))) {
D
David Howells 已提交
444
		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
445
		sp = rxrpc_skb(skb);
446

D
David Howells 已提交
447
		if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
448 449
			msg.msg_namelen = srx.transport_len;

450 451 452 453 454 455 456 457 458 459 460 461 462
			code = htonl(skb->priority);

			whdr.epoch	= htonl(sp->hdr.epoch);
			whdr.cid	= htonl(sp->hdr.cid);
			whdr.callNumber	= htonl(sp->hdr.callNumber);
			whdr.serviceId	= htons(sp->hdr.serviceId);
			whdr.flags	= sp->hdr.flags;
			whdr.flags	^= RXRPC_CLIENT_INITIATED;
			whdr.flags	&= RXRPC_CLIENT_INITIATED;

			kernel_sendmsg(local->socket, &msg, iov, 2, size);
		}

D
David Howells 已提交
463
		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
464 465 466 467
	}

	_leave("");
}