net_namespace.c 21.3 KB
Newer Older
J
Joe Perches 已提交
1 2
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

3 4 5 6 7 8
#include <linux/workqueue.h>
#include <linux/rtnetlink.h>
#include <linux/cache.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/delay.h>
9
#include <linux/sched.h>
10
#include <linux/idr.h>
11
#include <linux/rculist.h>
12
#include <linux/nsproxy.h>
13 14
#include <linux/fs.h>
#include <linux/proc_ns.h>
15
#include <linux/file.h>
16
#include <linux/export.h>
17
#include <linux/user_namespace.h>
18 19 20 21
#include <linux/net_namespace.h>
#include <linux/rtnetlink.h>
#include <net/sock.h>
#include <net/netlink.h>
22
#include <net/net_namespace.h>
23
#include <net/netns/generic.h>
24 25 26 27 28 29 30

/*
 *	Our network namespace constructor/destructor lists
 */

static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
31
DEFINE_MUTEX(net_mutex);
32 33

LIST_HEAD(net_namespace_list);
A
Alexey Dobriyan 已提交
34
EXPORT_SYMBOL_GPL(net_namespace_list);
35

36 37 38
struct net init_net = {
	.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
};
39
EXPORT_SYMBOL(init_net);
40

41 42
#define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */

E
Eric Dumazet 已提交
43 44 45 46 47 48 49 50 51 52 53 54 55 56
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;

static struct net_generic *net_alloc_generic(void)
{
	struct net_generic *ng;
	size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);

	ng = kzalloc(generic_size, GFP_KERNEL);
	if (ng)
		ng->len = max_gen_ptrs;

	return ng;
}

57 58 59 60 61 62 63
static int net_assign_generic(struct net *net, int id, void *data)
{
	struct net_generic *ng, *old_ng;

	BUG_ON(!mutex_is_locked(&net_mutex));
	BUG_ON(id == 0);

E
Eric Dumazet 已提交
64 65 66
	old_ng = rcu_dereference_protected(net->gen,
					   lockdep_is_held(&net_mutex));
	ng = old_ng;
67 68 69
	if (old_ng->len >= id)
		goto assign;

E
Eric Dumazet 已提交
70
	ng = net_alloc_generic();
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
	if (ng == NULL)
		return -ENOMEM;

	/*
	 * Some synchronisation notes:
	 *
	 * The net_generic explores the net->gen array inside rcu
	 * read section. Besides once set the net->gen->ptr[x]
	 * pointer never changes (see rules in netns/generic.h).
	 *
	 * That said, we simply duplicate this array and schedule
	 * the old copy for kfree after a grace period.
	 */

	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));

	rcu_assign_pointer(net->gen, ng);
88
	kfree_rcu(old_ng, rcu);
89 90 91 92 93
assign:
	ng->ptr[id - 1] = data;
	return 0;
}

94 95
static int ops_init(const struct pernet_operations *ops, struct net *net)
{
96 97 98
	int err = -ENOMEM;
	void *data = NULL;

99
	if (ops->id && ops->size) {
100
		data = kzalloc(ops->size, GFP_KERNEL);
101
		if (!data)
102
			goto out;
103 104

		err = net_assign_generic(net, *ops->id, data);
105 106
		if (err)
			goto cleanup;
107
	}
108
	err = 0;
109
	if (ops->init)
110 111 112 113 114 115 116 117 118
		err = ops->init(net);
	if (!err)
		return 0;

cleanup:
	kfree(data);

out:
	return err;
119 120 121 122 123 124 125 126 127 128
}

static void ops_free(const struct pernet_operations *ops, struct net *net)
{
	if (ops->id && ops->size) {
		int id = *ops->id;
		kfree(net_generic(net, id));
	}
}

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
static void ops_exit_list(const struct pernet_operations *ops,
			  struct list_head *net_exit_list)
{
	struct net *net;
	if (ops->exit) {
		list_for_each_entry(net, net_exit_list, exit_list)
			ops->exit(net);
	}
	if (ops->exit_batch)
		ops->exit_batch(net_exit_list);
}

static void ops_free_list(const struct pernet_operations *ops,
			  struct list_head *net_exit_list)
{
	struct net *net;
	if (ops->size && ops->id) {
		list_for_each_entry(net, net_exit_list, exit_list)
			ops_free(ops, net);
	}
}

N
Nicolas Dichtel 已提交
151 152
static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
			      int id);
153 154
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
N
Nicolas Dichtel 已提交
155
	int min = 0, max = 0, id;
156 157 158 159 160 161 162 163

	ASSERT_RTNL();

	if (reqid >= 0) {
		min = reqid;
		max = reqid + 1;
	}

N
Nicolas Dichtel 已提交
164 165 166 167 168
	id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
	if (id >= 0)
		rtnl_net_notifyid(net, peer, RTM_NEWNSID, id);

	return id;
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
}

/* This function is used by idr_for_each(). If net is equal to peer, the
 * function returns the id so that idr_for_each() stops. Because we cannot
 * returns the id 0 (idr_for_each() will not stop), we return the magic value
 * NET_ID_ZERO (-1) for it.
 */
#define NET_ID_ZERO -1
static int net_eq_idr(int id, void *net, void *peer)
{
	if (net_eq(net, peer))
		return id ? : NET_ID_ZERO;
	return 0;
}

static int __peernet2id(struct net *net, struct net *peer, bool alloc)
{
	int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);

	ASSERT_RTNL();

	/* Magic value for id 0. */
	if (id == NET_ID_ZERO)
		return 0;
	if (id > 0)
		return id;

	if (alloc)
		return alloc_netid(net, peer, -1);

	return -ENOENT;
}

/* This function returns the id of a peer netns. If no id is assigned, one will
 * be allocated and returned.
 */
int peernet2id(struct net *net, struct net *peer)
{
207 208
	bool alloc = atomic_read(&peer->count) == 0 ? false : true;
	int id;
209

210
	id = __peernet2id(net, peer, alloc);
211 212
	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
}
213
EXPORT_SYMBOL(peernet2id);
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230

struct net *get_net_ns_by_id(struct net *net, int id)
{
	struct net *peer;

	if (id < 0)
		return NULL;

	rcu_read_lock();
	peer = idr_find(&net->netns_ids, id);
	if (peer)
		get_net(peer);
	rcu_read_unlock();

	return peer;
}

231 232 233
/*
 * setup_net runs the initializers for the network namespace object.
 */
234
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
235 236
{
	/* Must be called with net_mutex held */
237
	const struct pernet_operations *ops, *saved_ops;
238
	int error = 0;
239
	LIST_HEAD(net_exit_list);
240 241

	atomic_set(&net->count, 1);
242
	atomic_set(&net->passive, 1);
243
	net->dev_base_seq = 1;
244
	net->user_ns = user_ns;
245
	idr_init(&net->netns_ids);
246

247
	list_for_each_entry(ops, &pernet_list, list) {
248 249 250
		error = ops_init(ops, net);
		if (error < 0)
			goto out_undo;
251 252 253
	}
out:
	return error;
254

255 256 257 258
out_undo:
	/* Walk through the list backwards calling the exit functions
	 * for the pernet modules whose init functions did not fail.
	 */
259
	list_add(&net->exit_list, &net_exit_list);
260
	saved_ops = ops;
261 262 263
	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
		ops_exit_list(ops, &net_exit_list);

264 265
	ops = saved_ops;
	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
266
		ops_free_list(ops, &net_exit_list);
267 268

	rcu_barrier();
269 270 271
	goto out;
}

272

273 274 275 276
#ifdef CONFIG_NET_NS
static struct kmem_cache *net_cachep;
static struct workqueue_struct *netns_wq;

277
static struct net *net_alloc(void)
278
{
279 280 281 282 283 284 285 286
	struct net *net = NULL;
	struct net_generic *ng;

	ng = net_alloc_generic();
	if (!ng)
		goto out;

	net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
287
	if (!net)
288
		goto out_free;
289

290 291 292 293 294 295 296 297 298 299 300
	rcu_assign_pointer(net->gen, ng);
out:
	return net;

out_free:
	kfree(ng);
	goto out;
}

static void net_free(struct net *net)
{
E
Eric Dumazet 已提交
301
	kfree(rcu_access_pointer(net->gen));
302 303 304
	kmem_cache_free(net_cachep, net);
}

305 306 307 308 309 310 311
void net_drop_ns(void *p)
{
	struct net *ns = p;
	if (ns && atomic_dec_and_test(&ns->passive))
		net_free(ns);
}

312 313
struct net *copy_net_ns(unsigned long flags,
			struct user_namespace *user_ns, struct net *old_net)
314
{
315 316
	struct net *net;
	int rv;
317

318 319 320
	if (!(flags & CLONE_NEWNET))
		return get_net(old_net);

321 322 323
	net = net_alloc();
	if (!net)
		return ERR_PTR(-ENOMEM);
324 325 326

	get_user_ns(user_ns);

327
	mutex_lock(&net_mutex);
328
	rv = setup_net(net, user_ns);
329
	if (rv == 0) {
330
		rtnl_lock();
331
		list_add_tail_rcu(&net->list, &net_namespace_list);
332 333
		rtnl_unlock();
	}
334
	mutex_unlock(&net_mutex);
335
	if (rv < 0) {
336
		put_user_ns(user_ns);
337
		net_drop_ns(net);
338 339 340 341
		return ERR_PTR(rv);
	}
	return net;
}
342

343 344 345
static DEFINE_SPINLOCK(cleanup_list_lock);
static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */

346 347
static void cleanup_net(struct work_struct *work)
{
348
	const struct pernet_operations *ops;
349
	struct net *net, *tmp;
X
xiao jin 已提交
350
	struct list_head net_kill_list;
351
	LIST_HEAD(net_exit_list);
352

353 354 355 356
	/* Atomically snapshot the list of namespaces to cleanup */
	spin_lock_irq(&cleanup_list_lock);
	list_replace_init(&cleanup_list, &net_kill_list);
	spin_unlock_irq(&cleanup_list_lock);
357 358 359 360 361

	mutex_lock(&net_mutex);

	/* Don't let anyone else find us. */
	rtnl_lock();
362
	list_for_each_entry(net, &net_kill_list, cleanup_list) {
363
		list_del_rcu(&net->list);
364
		list_add_tail(&net->exit_list, &net_exit_list);
365 366 367
		for_each_net(tmp) {
			int id = __peernet2id(tmp, net, false);

N
Nicolas Dichtel 已提交
368 369
			if (id >= 0) {
				rtnl_net_notifyid(tmp, net, RTM_DELNSID, id);
370
				idr_remove(&tmp->netns_ids, id);
N
Nicolas Dichtel 已提交
371
			}
372 373 374
		}
		idr_destroy(&net->netns_ids);

375
	}
376 377
	rtnl_unlock();

378 379 380 381 382 383 384
	/*
	 * Another CPU might be rcu-iterating the list, wait for it.
	 * This needs to be before calling the exit() notifiers, so
	 * the rcu_barrier() below isn't sufficient alone.
	 */
	synchronize_rcu();

385
	/* Run all of the network namespace exit methods */
386 387 388
	list_for_each_entry_reverse(ops, &pernet_list, list)
		ops_exit_list(ops, &net_exit_list);

389
	/* Free the net generic variables */
390 391
	list_for_each_entry_reverse(ops, &pernet_list, list)
		ops_free_list(ops, &net_exit_list);
392 393 394 395 396 397 398 399 400

	mutex_unlock(&net_mutex);

	/* Ensure there are no outstanding rcu callbacks using this
	 * network namespace.
	 */
	rcu_barrier();

	/* Finally it is safe to free my network namespace structure */
401 402
	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
		list_del_init(&net->exit_list);
403
		put_user_ns(net->user_ns);
404
		net_drop_ns(net);
405
	}
406
}
407
static DECLARE_WORK(net_cleanup_work, cleanup_net);
408 409 410 411

void __put_net(struct net *net)
{
	/* Cleanup the network namespace in process context */
412 413 414 415 416 417 418
	unsigned long flags;

	spin_lock_irqsave(&cleanup_list_lock, flags);
	list_add(&net->cleanup_list, &cleanup_list);
	spin_unlock_irqrestore(&cleanup_list_lock, flags);

	queue_work(netns_wq, &net_cleanup_work);
419 420 421
}
EXPORT_SYMBOL_GPL(__put_net);

422 423 424
struct net *get_net_ns_by_fd(int fd)
{
	struct file *file;
425
	struct ns_common *ns;
426 427 428
	struct net *net;

	file = proc_ns_fget(fd);
429 430
	if (IS_ERR(file))
		return ERR_CAST(file);
431

A
Al Viro 已提交
432
	ns = get_proc_ns(file_inode(file));
433 434
	if (ns->ops == &netns_operations)
		net = get_net(container_of(ns, struct net, ns));
435 436
	else
		net = ERR_PTR(-EINVAL);
437

438
	fput(file);
439 440 441
	return net;
}

442
#else
443 444 445 446
struct net *get_net_ns_by_fd(int fd)
{
	return ERR_PTR(-EINVAL);
}
447
#endif
448
EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
449

450 451 452 453 454 455 456 457 458 459 460
struct net *get_net_ns_by_pid(pid_t pid)
{
	struct task_struct *tsk;
	struct net *net;

	/* Lookup the network namespace */
	net = ERR_PTR(-ESRCH);
	rcu_read_lock();
	tsk = find_task_by_vpid(pid);
	if (tsk) {
		struct nsproxy *nsproxy;
461 462
		task_lock(tsk);
		nsproxy = tsk->nsproxy;
463 464
		if (nsproxy)
			net = get_net(nsproxy->net_ns);
465
		task_unlock(tsk);
466 467 468 469 470 471
	}
	rcu_read_unlock();
	return net;
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);

472 473
static __net_init int net_ns_net_init(struct net *net)
{
474 475 476
#ifdef CONFIG_NET_NS
	net->ns.ops = &netns_operations;
#endif
A
Al Viro 已提交
477
	return ns_alloc_inum(&net->ns);
478 479 480 481
}

static __net_exit void net_ns_net_exit(struct net *net)
{
A
Al Viro 已提交
482
	ns_free_inum(&net->ns);
483 484 485 486 487 488 489
}

static struct pernet_operations __net_initdata net_ns_ops = {
	.init = net_ns_net_init,
	.exit = net_ns_net_exit,
};

490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
	[NETNSA_NONE]		= { .type = NLA_UNSPEC },
	[NETNSA_NSID]		= { .type = NLA_S32 },
	[NETNSA_PID]		= { .type = NLA_U32 },
	[NETNSA_FD]		= { .type = NLA_U32 },
};

static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tb[NETNSA_MAX + 1];
	struct net *peer;
	int nsid, err;

	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
			  rtnl_net_policy);
	if (err < 0)
		return err;
	if (!tb[NETNSA_NSID])
		return -EINVAL;
	nsid = nla_get_s32(tb[NETNSA_NSID]);

	if (tb[NETNSA_PID])
		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
	else if (tb[NETNSA_FD])
		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
	else
		return -EINVAL;
	if (IS_ERR(peer))
		return PTR_ERR(peer);

	if (__peernet2id(net, peer, false) >= 0) {
		err = -EEXIST;
		goto out;
	}

	err = alloc_netid(net, peer, nsid);
	if (err > 0)
		err = 0;
out:
	put_net(peer);
	return err;
}

static int rtnl_net_get_size(void)
{
	return NLMSG_ALIGN(sizeof(struct rtgenmsg))
	       + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
	       ;
}

static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
N
Nicolas Dichtel 已提交
542 543
			 int cmd, struct net *net, struct net *peer,
			 int nsid)
544 545 546 547 548 549 550 551 552 553 554 555 556 557
{
	struct nlmsghdr *nlh;
	struct rtgenmsg *rth;
	int id;

	ASSERT_RTNL();

	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
	if (!nlh)
		return -EMSGSIZE;

	rth = nlmsg_data(nlh);
	rth->rtgen_family = AF_UNSPEC;

N
Nicolas Dichtel 已提交
558 559 560 561 562 563 564
	if (nsid >= 0) {
		id = nsid;
	} else {
		id = __peernet2id(net, peer, false);
		if  (id < 0)
			id = NETNSA_NSID_NOT_ASSIGNED;
	}
565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
	if (nla_put_s32(skb, NETNSA_NSID, id))
		goto nla_put_failure;

	nlmsg_end(skb, nlh);
	return 0;

nla_put_failure:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tb[NETNSA_MAX + 1];
	struct sk_buff *msg;
	struct net *peer;
582
	int err;
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604

	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
			  rtnl_net_policy);
	if (err < 0)
		return err;
	if (tb[NETNSA_PID])
		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
	else if (tb[NETNSA_FD])
		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
	else
		return -EINVAL;

	if (IS_ERR(peer))
		return PTR_ERR(peer);

	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
	if (!msg) {
		err = -ENOMEM;
		goto out;
	}

	err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
N
Nicolas Dichtel 已提交
605
			    RTM_GETNSID, net, peer, -1);
606 607 608 609 610 611 612 613 614 615 616 617 618
	if (err < 0)
		goto err_out;

	err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
	goto out;

err_out:
	nlmsg_free(msg);
out:
	put_net(peer);
	return err;
}

N
Nicolas Dichtel 已提交
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
struct rtnl_net_dump_cb {
	struct net *net;
	struct sk_buff *skb;
	struct netlink_callback *cb;
	int idx;
	int s_idx;
};

static int rtnl_net_dumpid_one(int id, void *peer, void *data)
{
	struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
	int ret;

	if (net_cb->idx < net_cb->s_idx)
		goto cont;

	ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
			    net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
			    RTM_NEWNSID, net_cb->net, peer, id);
	if (ret < 0)
		return ret;

cont:
	net_cb->idx++;
	return 0;
}

static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct net *net = sock_net(skb->sk);
	struct rtnl_net_dump_cb net_cb = {
		.net = net,
		.skb = skb,
		.cb = cb,
		.idx = 0,
		.s_idx = cb->args[0],
	};

	ASSERT_RTNL();

	idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);

	cb->args[0] = net_cb.idx;
	return skb->len;
}

N
Nicolas Dichtel 已提交
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687
static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
			      int id)
{
	struct sk_buff *msg;
	int err = -ENOMEM;

	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
	if (!msg)
		goto out;

	err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id);
	if (err < 0)
		goto err_out;

	rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
	return;

err_out:
	nlmsg_free(msg);
out:
	rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}

688 689
static int __init net_ns_init(void)
{
690
	struct net_generic *ng;
691

692
#ifdef CONFIG_NET_NS
693 694 695
	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
					SMP_CACHE_BYTES,
					SLAB_PANIC, NULL);
696 697 698 699 700

	/* Create workqueue for cleanup */
	netns_wq = create_singlethread_workqueue("netns");
	if (!netns_wq)
		panic("Could not create netns workq");
701
#endif
702

703 704 705 706 707 708
	ng = net_alloc_generic();
	if (!ng)
		panic("Could not allocate generic netns");

	rcu_assign_pointer(init_net.gen, ng);

709
	mutex_lock(&net_mutex);
710
	if (setup_net(&init_net, &init_user_ns))
S
Stephen Hemminger 已提交
711
		panic("Could not setup the initial network namespace");
712

713
	rtnl_lock();
714
	list_add_tail_rcu(&init_net.list, &net_namespace_list);
715
	rtnl_unlock();
716 717 718

	mutex_unlock(&net_mutex);

719 720
	register_pernet_subsys(&net_ns_ops);

721
	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
N
Nicolas Dichtel 已提交
722 723
	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
		      NULL);
724

725 726 727 728 729
	return 0;
}

pure_initcall(net_ns_init);

730
#ifdef CONFIG_NET_NS
731 732
static int __register_pernet_operations(struct list_head *list,
					struct pernet_operations *ops)
733
{
734
	struct net *net;
735
	int error;
736
	LIST_HEAD(net_exit_list);
737 738

	list_add_tail(&ops->list, list);
739
	if (ops->init || (ops->id && ops->size)) {
740
		for_each_net(net) {
741
			error = ops_init(ops, net);
742 743
			if (error)
				goto out_undo;
744
			list_add_tail(&net->exit_list, &net_exit_list);
745 746
		}
	}
747
	return 0;
748 749 750 751

out_undo:
	/* If I have an error cleanup all namespaces I initialized */
	list_del(&ops->list);
752 753
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
754
	return error;
755 756
}

757
static void __unregister_pernet_operations(struct pernet_operations *ops)
758 759
{
	struct net *net;
760
	LIST_HEAD(net_exit_list);
761 762

	list_del(&ops->list);
763 764 765 766
	for_each_net(net)
		list_add_tail(&net->exit_list, &net_exit_list);
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
767 768
}

769 770
#else

771 772
static int __register_pernet_operations(struct list_head *list,
					struct pernet_operations *ops)
773
{
774
	return ops_init(ops, &init_net);
775 776
}

777
static void __unregister_pernet_operations(struct pernet_operations *ops)
778
{
779 780 781 782
	LIST_HEAD(net_exit_list);
	list_add(&init_net.exit_list, &net_exit_list);
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
783
}
784 785

#endif /* CONFIG_NET_NS */
786

787 788
static DEFINE_IDA(net_generic_ids);

789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
static int register_pernet_operations(struct list_head *list,
				      struct pernet_operations *ops)
{
	int error;

	if (ops->id) {
again:
		error = ida_get_new_above(&net_generic_ids, 1, ops->id);
		if (error < 0) {
			if (error == -EAGAIN) {
				ida_pre_get(&net_generic_ids, GFP_KERNEL);
				goto again;
			}
			return error;
		}
E
Eric Dumazet 已提交
804
		max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
805 806
	}
	error = __register_pernet_operations(list, ops);
807 808 809 810 811
	if (error) {
		rcu_barrier();
		if (ops->id)
			ida_remove(&net_generic_ids, *ops->id);
	}
812 813 814 815 816 817 818 819

	return error;
}

static void unregister_pernet_operations(struct pernet_operations *ops)
{
	
	__unregister_pernet_operations(ops);
820
	rcu_barrier();
821 822 823 824
	if (ops->id)
		ida_remove(&net_generic_ids, *ops->id);
}

825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858
/**
 *      register_pernet_subsys - register a network namespace subsystem
 *	@ops:  pernet operations structure for the subsystem
 *
 *	Register a subsystem which has init and exit functions
 *	that are called when network namespaces are created and
 *	destroyed respectively.
 *
 *	When registered all network namespace init functions are
 *	called for every existing network namespace.  Allowing kernel
 *	modules to have a race free view of the set of network namespaces.
 *
 *	When a new network namespace is created all of the init
 *	methods are called in the order in which they were registered.
 *
 *	When a network namespace is destroyed all of the exit methods
 *	are called in the reverse of the order with which they were
 *	registered.
 */
int register_pernet_subsys(struct pernet_operations *ops)
{
	int error;
	mutex_lock(&net_mutex);
	error =  register_pernet_operations(first_device, ops);
	mutex_unlock(&net_mutex);
	return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);

/**
 *      unregister_pernet_subsys - unregister a network namespace subsystem
 *	@ops: pernet operations structure to manipulate
 *
 *	Remove the pernet operations structure from the list to be
859
 *	used when network namespaces are created or destroyed.  In
860 861 862
 *	addition run the exit method for all existing network
 *	namespaces.
 */
863
void unregister_pernet_subsys(struct pernet_operations *ops)
864 865
{
	mutex_lock(&net_mutex);
866
	unregister_pernet_operations(ops);
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
	mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);

/**
 *      register_pernet_device - register a network namespace device
 *	@ops:  pernet operations structure for the subsystem
 *
 *	Register a device which has init and exit functions
 *	that are called when network namespaces are created and
 *	destroyed respectively.
 *
 *	When registered all network namespace init functions are
 *	called for every existing network namespace.  Allowing kernel
 *	modules to have a race free view of the set of network namespaces.
 *
 *	When a new network namespace is created all of the init
 *	methods are called in the order in which they were registered.
 *
 *	When a network namespace is destroyed all of the exit methods
 *	are called in the reverse of the order with which they were
 *	registered.
 */
int register_pernet_device(struct pernet_operations *ops)
{
	int error;
	mutex_lock(&net_mutex);
	error = register_pernet_operations(&pernet_list, ops);
	if (!error && (first_device == &pernet_list))
		first_device = &ops->list;
	mutex_unlock(&net_mutex);
	return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);

/**
 *      unregister_pernet_device - unregister a network namespace netdevice
 *	@ops: pernet operations structure to manipulate
 *
 *	Remove the pernet operations structure from the list to be
907
 *	used when network namespaces are created or destroyed.  In
908 909 910 911 912 913 914 915 916 917 918 919
 *	addition run the exit method for all existing network
 *	namespaces.
 */
void unregister_pernet_device(struct pernet_operations *ops)
{
	mutex_lock(&net_mutex);
	if (&ops->list == first_device)
		first_device = first_device->next;
	unregister_pernet_operations(ops);
	mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
920 921

#ifdef CONFIG_NET_NS
922
static struct ns_common *netns_get(struct task_struct *task)
923
{
924 925 926
	struct net *net = NULL;
	struct nsproxy *nsproxy;

927 928
	task_lock(task);
	nsproxy = task->nsproxy;
929 930
	if (nsproxy)
		net = get_net(nsproxy->net_ns);
931
	task_unlock(task);
932

933 934 935 936 937 938
	return net ? &net->ns : NULL;
}

static inline struct net *to_net_ns(struct ns_common *ns)
{
	return container_of(ns, struct net, ns);
939 940
}

941
static void netns_put(struct ns_common *ns)
942
{
943
	put_net(to_net_ns(ns));
944 945
}

946
static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
947
{
948
	struct net *net = to_net_ns(ns);
949

950
	if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
951
	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
952 953
		return -EPERM;

954
	put_net(nsproxy->net_ns);
955
	nsproxy->net_ns = get_net(net);
956 957 958 959 960 961 962 963 964 965 966
	return 0;
}

const struct proc_ns_operations netns_operations = {
	.name		= "net",
	.type		= CLONE_NEWNET,
	.get		= netns_get,
	.put		= netns_put,
	.install	= netns_install,
};
#endif