local_object.c 12.6 KB
Newer Older
1
/* Local endpoint object management
2
 *
3
 * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
4 5 6
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public Licence
8
 * as published by the Free Software Foundation; either version
9
 * 2 of the Licence, or (at your option) any later version.
10 11
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14 15 16
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
17
#include <linux/slab.h>
18 19
#include <linux/udp.h>
#include <linux/ip.h>
20
#include <linux/hashtable.h>
21
#include <net/sock.h>
D
David Howells 已提交
22
#include <net/udp.h>
23 24 25
#include <net/af_rxrpc.h>
#include "ar-internal.h"

26 27
static void rxrpc_local_processor(struct work_struct *);
static void rxrpc_local_rcu(struct rcu_head *);
28 29

/*
30 31 32 33 34 35 36
 * Compare a local to an address.  Return -ve, 0 or +ve to indicate less than,
 * same or greater than.
 *
 * We explicitly don't compare the RxRPC service ID as we want to reject
 * conflicting uses by differing services.  Further, we don't want to share
 * addresses with different options (IPv6), so we don't compare those bits
 * either.
37
 */
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
				const struct sockaddr_rxrpc *srx)
{
	long diff;

	diff = ((local->srx.transport_type - srx->transport_type) ?:
		(local->srx.transport_len - srx->transport_len) ?:
		(local->srx.transport.family - srx->transport.family));
	if (diff != 0)
		return diff;

	switch (srx->transport.family) {
	case AF_INET:
		/* If the choice of UDP port is left up to the transport, then
		 * the endpoint record doesn't match.
		 */
		return ((u16 __force)local->srx.transport.sin.sin_port -
			(u16 __force)srx->transport.sin.sin_port) ?:
			memcmp(&local->srx.transport.sin.sin_addr,
			       &srx->transport.sin.sin_addr,
			       sizeof(struct in_addr));
59
#ifdef CONFIG_AF_RXRPC_IPV6
D
David Howells 已提交
60 61 62 63 64 65 66 67 68
	case AF_INET6:
		/* If the choice of UDP6 port is left up to the transport, then
		 * the endpoint record doesn't match.
		 */
		return ((u16 __force)local->srx.transport.sin6.sin6_port -
			(u16 __force)srx->transport.sin6.sin6_port) ?:
			memcmp(&local->srx.transport.sin6.sin6_addr,
			       &srx->transport.sin6.sin6_addr,
			       sizeof(struct in6_addr));
69
#endif
70 71 72 73 74 75 76 77
	default:
		BUG();
	}
}

/*
 * Allocate a new local endpoint.
 */
78 79
static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
					     const struct sockaddr_rxrpc *srx)
80 81 82 83 84
{
	struct rxrpc_local *local;

	local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
	if (local) {
85
		atomic_set(&local->usage, 1);
86
		atomic_set(&local->active_users, 1);
87
		local->rxnet = rxnet;
88
		INIT_LIST_HEAD(&local->link);
89
		INIT_WORK(&local->processor, rxrpc_local_processor);
90 91
		init_rwsem(&local->defrag_sem);
		skb_queue_head_init(&local->reject_queue);
92
		skb_queue_head_init(&local->event_queue);
93 94
		local->client_conns = RB_ROOT;
		spin_lock_init(&local->client_conns_lock);
95 96 97 98
		spin_lock_init(&local->lock);
		rwlock_init(&local->services_lock);
		local->debug_id = atomic_inc_return(&rxrpc_debug_id);
		memcpy(&local->srx, srx, sizeof(*srx));
99
		local->srx.srx_service = 0;
100
		trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL);
101 102 103 104 105 106 107 108
	}

	_leave(" = %p", local);
	return local;
}

/*
 * create the local socket
109
 * - must be called with rxrpc_local_mutex locked
110
 */
111
static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
112
{
D
David Howells 已提交
113
	struct sock *usk;
114 115
	int ret, opt;

D
David Howells 已提交
116 117
	_enter("%p{%d,%d}",
	       local, local->srx.transport_type, local->srx.transport.family);
118 119

	/* create a socket to represent the local endpoint */
120
	ret = sock_create_kern(net, local->srx.transport.family,
121
			       local->srx.transport_type, 0, &local->socket);
122 123 124 125 126
	if (ret < 0) {
		_leave(" = %d [socket]", ret);
		return ret;
	}

D
David Howells 已提交
127
	/* set the socket up */
D
David Howells 已提交
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
	usk = local->socket->sk;
	inet_sk(usk)->mc_loop = 0;

	/* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
	inet_inc_convert_csum(usk);

	rcu_assign_sk_user_data(usk, local);

	udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC;
	udp_sk(usk)->encap_rcv = rxrpc_input_packet;
	udp_sk(usk)->encap_destroy = NULL;
	udp_sk(usk)->gro_receive = NULL;
	udp_sk(usk)->gro_complete = NULL;

	udp_encap_enable();
143
#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6)
D
David Howells 已提交
144 145 146 147
	if (local->srx.transport.family == AF_INET6)
		udpv6_encap_enable();
#endif
	usk->sk_error_report = rxrpc_error_report;
D
David Howells 已提交
148

149 150 151 152
	/* if a local address was supplied then bind it */
	if (local->srx.transport_len > sizeof(sa_family_t)) {
		_debug("bind");
		ret = kernel_bind(local->socket,
153
				  (struct sockaddr *)&local->srx.transport,
154 155
				  local->srx.transport_len);
		if (ret < 0) {
156
			_debug("bind failed %d", ret);
157 158 159 160
			goto error;
		}
	}

161
	switch (local->srx.transport.family) {
162 163
	case AF_INET6:
		/* we want to receive ICMPv6 errors */
164
		opt = 1;
165
		ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
166 167 168 169 170
					(char *) &opt, sizeof(opt));
		if (ret < 0) {
			_debug("setsockopt failed");
			goto error;
		}
171

172
		/* we want to set the don't fragment bit */
173 174
		opt = IPV6_PMTUDISC_DO;
		ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER,
175 176 177 178 179 180
					(char *) &opt, sizeof(opt));
		if (ret < 0) {
			_debug("setsockopt failed");
			goto error;
		}

181 182 183 184 185
		/* Fall through and set IPv4 options too otherwise we don't get
		 * errors from IPv4 packets sent through the IPv6 socket.
		 */

	case AF_INET:
186 187
		/* we want to receive ICMP errors */
		opt = 1;
188
		ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
189 190 191 192 193 194 195
					(char *) &opt, sizeof(opt));
		if (ret < 0) {
			_debug("setsockopt failed");
			goto error;
		}

		/* we want to set the don't fragment bit */
196 197
		opt = IP_PMTUDISC_DO;
		ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
198 199 200 201 202
					(char *) &opt, sizeof(opt));
		if (ret < 0) {
			_debug("setsockopt failed");
			goto error;
		}
D
David Howells 已提交
203 204 205 206 207 208 209 210 211

		/* We want receive timestamps. */
		opt = 1;
		ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS,
					(char *)&opt, sizeof(opt));
		if (ret < 0) {
			_debug("setsockopt failed");
			goto error;
		}
212 213 214 215
		break;

	default:
		BUG();
216 217 218 219 220 221
	}

	_leave(" = 0");
	return 0;

error:
222
	kernel_sock_shutdown(local->socket, SHUT_RDWR);
223 224 225 226 227 228 229 230 231
	local->socket->sk->sk_user_data = NULL;
	sock_release(local->socket);
	local->socket = NULL;

	_leave(" = %d", ret);
	return ret;
}

/*
232
 * Look up or create a new local endpoint using the specified local address.
233
 */
234 235
struct rxrpc_local *rxrpc_lookup_local(struct net *net,
				       const struct sockaddr_rxrpc *srx)
236 237
{
	struct rxrpc_local *local;
238
	struct rxrpc_net *rxnet = rxrpc_net(net);
239 240 241
	struct list_head *cursor;
	const char *age;
	long diff;
242 243
	int ret;

D
David Howells 已提交
244 245
	_enter("{%d,%d,%pISp}",
	       srx->transport_type, srx->transport.family, &srx->transport);
246

247
	mutex_lock(&rxnet->local_mutex);
248

249 250
	for (cursor = rxnet->local_endpoints.next;
	     cursor != &rxnet->local_endpoints;
251 252
	     cursor = cursor->next) {
		local = list_entry(cursor, struct rxrpc_local, link);
253

254 255
		diff = rxrpc_local_cmp_key(local, srx);
		if (diff < 0)
256
			continue;
257 258 259 260 261 262 263 264 265 266 267 268
		if (diff > 0)
			break;

		/* Services aren't allowed to share transport sockets, so
		 * reject that here.  It is possible that the object is dying -
		 * but it may also still have the local transport address that
		 * we want bound.
		 */
		if (srx->srx_service) {
			local = NULL;
			goto addr_in_use;
		}
269

270 271 272 273
		/* Found a match.  We replace a dying object.  Attempting to
		 * bind the transport socket may still fail if we're attempting
		 * to use a local address that the dying object is still using.
		 */
274
		if (!rxrpc_use_local(local))
275
			break;
276

277 278 279
		age = "old";
		goto found;
	}
280

281
	local = rxrpc_alloc_local(rxnet, srx);
282 283
	if (!local)
		goto nomem;
284

285
	ret = rxrpc_open_socket(local, net);
286 287 288
	if (ret < 0)
		goto sock_error;

289
	if (cursor != &rxnet->local_endpoints)
290
		list_replace_init(cursor, &local->link);
291 292
	else
		list_add_tail(&local->link, cursor);
293
	age = "new";
294

295
found:
296
	mutex_unlock(&rxnet->local_mutex);
297

D
David Howells 已提交
298 299
	_net("LOCAL %s %d {%pISp}",
	     age, local->debug_id, &local->srx.transport);
300

301
	_leave(" = %p", local);
302 303
	return local;

304 305 306
nomem:
	ret = -ENOMEM;
sock_error:
307
	mutex_unlock(&rxnet->local_mutex);
308 309
	if (local)
		call_rcu(&local->rcu, rxrpc_local_rcu);
310 311
	_leave(" = %d", ret);
	return ERR_PTR(ret);
312

313
addr_in_use:
314
	mutex_unlock(&rxnet->local_mutex);
315 316 317
	_leave(" = -EADDRINUSE");
	return ERR_PTR(-EADDRINUSE);
}
318

319 320 321 322 323 324 325 326 327
/*
 * Get a ref on a local endpoint.
 */
struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
{
	const void *here = __builtin_return_address(0);
	int n;

	n = atomic_inc_return(&local->usage);
328
	trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here);
329 330 331 332 333 334 335 336 337 338 339
	return local;
}

/*
 * Get a ref on a local endpoint unless its usage has already reached 0.
 */
struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
{
	const void *here = __builtin_return_address(0);

	if (local) {
340
		int n = atomic_fetch_add_unless(&local->usage, 1, 0);
341
		if (n > 0)
342 343
			trace_rxrpc_local(local->debug_id, rxrpc_local_got,
					  n + 1, here);
344 345 346 347 348 349 350
		else
			local = NULL;
	}
	return local;
}

/*
351
 * Queue a local endpoint and pass the caller's reference to the work item.
352 353 354 355
 */
void rxrpc_queue_local(struct rxrpc_local *local)
{
	const void *here = __builtin_return_address(0);
356 357
	unsigned int debug_id = local->debug_id;
	int n = atomic_read(&local->usage);
358 359

	if (rxrpc_queue_work(&local->processor))
360
		trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here);
361 362
	else
		rxrpc_put_local(local);
363 364
}

365 366 367 368 369 370 371 372 373 374
/*
 * Drop a ref on a local endpoint.
 */
void rxrpc_put_local(struct rxrpc_local *local)
{
	const void *here = __builtin_return_address(0);
	int n;

	if (local) {
		n = atomic_dec_return(&local->usage);
375
		trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here);
376 377

		if (n == 0)
378
			call_rcu(&local->rcu, rxrpc_local_rcu);
379 380 381
	}
}

382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
/*
 * Start using a local endpoint.
 */
struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
{
	unsigned int au;

	local = rxrpc_get_local_maybe(local);
	if (!local)
		return NULL;

	au = atomic_fetch_add_unless(&local->active_users, 1, 0);
	if (au == 0) {
		rxrpc_put_local(local);
		return NULL;
	}

	return local;
}

/*
 * Cease using a local endpoint.  Once the number of active users reaches 0, we
 * start the closure of the transport in the work processor.
 */
void rxrpc_unuse_local(struct rxrpc_local *local)
{
	unsigned int au;

	au = atomic_dec_return(&local->active_users);
	if (au == 0)
		rxrpc_queue_local(local);
	else
		rxrpc_put_local(local);
}

417
/*
418 419 420 421 422
 * Destroy a local endpoint's socket and then hand the record to RCU to dispose
 * of.
 *
 * Closing the socket cannot be done from bottom half context or RCU callback
 * context because it might sleep.
423
 */
424
static void rxrpc_local_destroyer(struct rxrpc_local *local)
425
{
426
	struct socket *socket = local->socket;
427
	struct rxrpc_net *rxnet = local->rxnet;
428

429
	_enter("%d", local->debug_id);
430

431
	mutex_lock(&rxnet->local_mutex);
432
	list_del_init(&local->link);
433
	mutex_unlock(&rxnet->local_mutex);
434

435
	ASSERT(RB_EMPTY_ROOT(&local->client_conns));
436
	ASSERT(!local->service);
437 438 439 440 441 442 443 444 445 446 447 448 449

	if (socket) {
		local->socket = NULL;
		kernel_sock_shutdown(socket, SHUT_RDWR);
		socket->sk->sk_user_data = NULL;
		sock_release(socket);
	}

	/* At this point, there should be no more packets coming in to the
	 * local endpoint.
	 */
	rxrpc_purge_queue(&local->reject_queue);
	rxrpc_purge_queue(&local->event_queue);
450 451 452
}

/*
453 454
 * Process events on an endpoint.  The work item carries a ref which
 * we must release.
455
 */
456
static void rxrpc_local_processor(struct work_struct *work)
457 458
{
	struct rxrpc_local *local =
459 460
		container_of(work, struct rxrpc_local, processor);
	bool again;
461

462
	trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
463
			  atomic_read(&local->usage), NULL);
464

465 466
	do {
		again = false;
467 468 469 470
		if (atomic_read(&local->active_users) == 0) {
			rxrpc_local_destroyer(local);
			break;
		}
471

472 473 474 475
		if (!skb_queue_empty(&local->reject_queue)) {
			rxrpc_reject_packets(local);
			again = true;
		}
476

477 478 479 480 481
		if (!skb_queue_empty(&local->event_queue)) {
			rxrpc_process_local_events(local);
			again = true;
		}
	} while (again);
482 483

	rxrpc_put_local(local);
484
}
485

486 487 488 489 490 491
/*
 * Destroy a local endpoint after the RCU grace period expires.
 */
static void rxrpc_local_rcu(struct rcu_head *rcu)
{
	struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu);
492

493
	_enter("%d", local->debug_id);
494

495
	ASSERT(!work_pending(&local->processor));
496 497 498 499 500 501 502

	_net("DESTROY LOCAL %d", local->debug_id);
	kfree(local);
	_leave("");
}

/*
503
 * Verify the local endpoint list is empty by this point.
504
 */
505
void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet)
506
{
507
	struct rxrpc_local *local;
508 509 510

	_enter("");

511
	flush_workqueue(rxrpc_workqueue);
512

513 514 515
	if (!list_empty(&rxnet->local_endpoints)) {
		mutex_lock(&rxnet->local_mutex);
		list_for_each_entry(local, &rxnet->local_endpoints, link) {
516 517 518
			pr_err("AF_RXRPC: Leaked local %p {%d}\n",
			       local, atomic_read(&local->usage));
		}
519
		mutex_unlock(&rxnet->local_mutex);
520
		BUG();
521 522
	}
}