net_namespace.c 20.1 KB
Newer Older
J
Joe Perches 已提交
1 2
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

3 4 5 6 7 8
#include <linux/workqueue.h>
#include <linux/rtnetlink.h>
#include <linux/cache.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/delay.h>
9
#include <linux/sched.h>
10
#include <linux/idr.h>
11
#include <linux/rculist.h>
12
#include <linux/nsproxy.h>
13 14
#include <linux/fs.h>
#include <linux/proc_ns.h>
15
#include <linux/file.h>
16
#include <linux/export.h>
17
#include <linux/user_namespace.h>
18 19 20 21
#include <linux/net_namespace.h>
#include <linux/rtnetlink.h>
#include <net/sock.h>
#include <net/netlink.h>
22
#include <net/net_namespace.h>
23
#include <net/netns/generic.h>
24 25 26 27 28 29 30

/*
 *	Our network namespace constructor/destructor lists
 */

static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
31
DEFINE_MUTEX(net_mutex);
32 33

LIST_HEAD(net_namespace_list);
A
Alexey Dobriyan 已提交
34
EXPORT_SYMBOL_GPL(net_namespace_list);
35

36 37 38
struct net init_net = {
	.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
};
39
EXPORT_SYMBOL(init_net);
40

41 42
#define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */

E
Eric Dumazet 已提交
43 44 45 46 47 48 49 50 51 52 53 54 55 56
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;

static struct net_generic *net_alloc_generic(void)
{
	struct net_generic *ng;
	size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);

	ng = kzalloc(generic_size, GFP_KERNEL);
	if (ng)
		ng->len = max_gen_ptrs;

	return ng;
}

57 58 59 60 61 62 63
static int net_assign_generic(struct net *net, int id, void *data)
{
	struct net_generic *ng, *old_ng;

	BUG_ON(!mutex_is_locked(&net_mutex));
	BUG_ON(id == 0);

E
Eric Dumazet 已提交
64 65 66
	old_ng = rcu_dereference_protected(net->gen,
					   lockdep_is_held(&net_mutex));
	ng = old_ng;
67 68 69
	if (old_ng->len >= id)
		goto assign;

E
Eric Dumazet 已提交
70
	ng = net_alloc_generic();
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
	if (ng == NULL)
		return -ENOMEM;

	/*
	 * Some synchronisation notes:
	 *
	 * The net_generic explores the net->gen array inside rcu
	 * read section. Besides once set the net->gen->ptr[x]
	 * pointer never changes (see rules in netns/generic.h).
	 *
	 * That said, we simply duplicate this array and schedule
	 * the old copy for kfree after a grace period.
	 */

	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));

	rcu_assign_pointer(net->gen, ng);
88
	kfree_rcu(old_ng, rcu);
89 90 91 92 93
assign:
	ng->ptr[id - 1] = data;
	return 0;
}

94 95
static int ops_init(const struct pernet_operations *ops, struct net *net)
{
96 97 98
	int err = -ENOMEM;
	void *data = NULL;

99
	if (ops->id && ops->size) {
100
		data = kzalloc(ops->size, GFP_KERNEL);
101
		if (!data)
102
			goto out;
103 104

		err = net_assign_generic(net, *ops->id, data);
105 106
		if (err)
			goto cleanup;
107
	}
108
	err = 0;
109
	if (ops->init)
110 111 112 113 114 115 116 117 118
		err = ops->init(net);
	if (!err)
		return 0;

cleanup:
	kfree(data);

out:
	return err;
119 120 121 122 123 124 125 126 127 128
}

static void ops_free(const struct pernet_operations *ops, struct net *net)
{
	if (ops->id && ops->size) {
		int id = *ops->id;
		kfree(net_generic(net, id));
	}
}

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
static void ops_exit_list(const struct pernet_operations *ops,
			  struct list_head *net_exit_list)
{
	struct net *net;
	if (ops->exit) {
		list_for_each_entry(net, net_exit_list, exit_list)
			ops->exit(net);
	}
	if (ops->exit_batch)
		ops->exit_batch(net_exit_list);
}

static void ops_free_list(const struct pernet_operations *ops,
			  struct list_head *net_exit_list)
{
	struct net *net;
	if (ops->size && ops->id) {
		list_for_each_entry(net, net_exit_list, exit_list)
			ops_free(ops, net);
	}
}

151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
	int min = 0, max = 0;

	ASSERT_RTNL();

	if (reqid >= 0) {
		min = reqid;
		max = reqid + 1;
	}

	return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
}

/* This function is used by idr_for_each(). If net is equal to peer, the
 * function returns the id so that idr_for_each() stops. Because we cannot
 * returns the id 0 (idr_for_each() will not stop), we return the magic value
 * NET_ID_ZERO (-1) for it.
 */
#define NET_ID_ZERO -1
static int net_eq_idr(int id, void *net, void *peer)
{
	if (net_eq(net, peer))
		return id ? : NET_ID_ZERO;
	return 0;
}

static int __peernet2id(struct net *net, struct net *peer, bool alloc)
{
	int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);

	ASSERT_RTNL();

	/* Magic value for id 0. */
	if (id == NET_ID_ZERO)
		return 0;
	if (id > 0)
		return id;

	if (alloc)
		return alloc_netid(net, peer, -1);

	return -ENOENT;
}

/* This function returns the id of a peer netns. If no id is assigned, one will
 * be allocated and returned.
 */
int peernet2id(struct net *net, struct net *peer)
{
	int id = __peernet2id(net, peer, true);

	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
}
205
EXPORT_SYMBOL(peernet2id);
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

struct net *get_net_ns_by_id(struct net *net, int id)
{
	struct net *peer;

	if (id < 0)
		return NULL;

	rcu_read_lock();
	peer = idr_find(&net->netns_ids, id);
	if (peer)
		get_net(peer);
	rcu_read_unlock();

	return peer;
}

223 224 225
/*
 * setup_net runs the initializers for the network namespace object.
 */
226
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
227 228
{
	/* Must be called with net_mutex held */
229
	const struct pernet_operations *ops, *saved_ops;
230
	int error = 0;
231
	LIST_HEAD(net_exit_list);
232 233

	atomic_set(&net->count, 1);
234
	atomic_set(&net->passive, 1);
235
	net->dev_base_seq = 1;
236
	net->user_ns = user_ns;
237
	idr_init(&net->netns_ids);
238

239
#ifdef NETNS_REFCNT_DEBUG
240
	atomic_set(&net->use_count, 0);
241
#endif
242

243
	list_for_each_entry(ops, &pernet_list, list) {
244 245 246
		error = ops_init(ops, net);
		if (error < 0)
			goto out_undo;
247 248 249
	}
out:
	return error;
250

251 252 253 254
out_undo:
	/* Walk through the list backwards calling the exit functions
	 * for the pernet modules whose init functions did not fail.
	 */
255
	list_add(&net->exit_list, &net_exit_list);
256
	saved_ops = ops;
257 258 259
	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
		ops_exit_list(ops, &net_exit_list);

260 261
	ops = saved_ops;
	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
262
		ops_free_list(ops, &net_exit_list);
263 264

	rcu_barrier();
265 266 267
	goto out;
}

268

269 270 271 272
#ifdef CONFIG_NET_NS
static struct kmem_cache *net_cachep;
static struct workqueue_struct *netns_wq;

273
static struct net *net_alloc(void)
274
{
275 276 277 278 279 280 281 282
	struct net *net = NULL;
	struct net_generic *ng;

	ng = net_alloc_generic();
	if (!ng)
		goto out;

	net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
283
	if (!net)
284
		goto out_free;
285

286 287 288 289 290 291 292 293 294 295 296
	rcu_assign_pointer(net->gen, ng);
out:
	return net;

out_free:
	kfree(ng);
	goto out;
}

static void net_free(struct net *net)
{
297
#ifdef NETNS_REFCNT_DEBUG
298
	if (unlikely(atomic_read(&net->use_count) != 0)) {
J
Joe Perches 已提交
299 300
		pr_emerg("network namespace not free! Usage: %d\n",
			 atomic_read(&net->use_count));
301 302
		return;
	}
303
#endif
E
Eric Dumazet 已提交
304
	kfree(rcu_access_pointer(net->gen));
305 306 307
	kmem_cache_free(net_cachep, net);
}

308 309 310 311 312 313 314
void net_drop_ns(void *p)
{
	struct net *ns = p;
	if (ns && atomic_dec_and_test(&ns->passive))
		net_free(ns);
}

315 316
struct net *copy_net_ns(unsigned long flags,
			struct user_namespace *user_ns, struct net *old_net)
317
{
318 319
	struct net *net;
	int rv;
320

321 322 323
	if (!(flags & CLONE_NEWNET))
		return get_net(old_net);

324 325 326
	net = net_alloc();
	if (!net)
		return ERR_PTR(-ENOMEM);
327 328 329

	get_user_ns(user_ns);

330
	mutex_lock(&net_mutex);
331
	rv = setup_net(net, user_ns);
332
	if (rv == 0) {
333
		rtnl_lock();
334
		list_add_tail_rcu(&net->list, &net_namespace_list);
335 336
		rtnl_unlock();
	}
337
	mutex_unlock(&net_mutex);
338
	if (rv < 0) {
339
		put_user_ns(user_ns);
340
		net_drop_ns(net);
341 342 343 344
		return ERR_PTR(rv);
	}
	return net;
}
345

346 347 348
static DEFINE_SPINLOCK(cleanup_list_lock);
static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */

349 350
static void cleanup_net(struct work_struct *work)
{
351
	const struct pernet_operations *ops;
352
	struct net *net, *tmp, *peer;
X
xiao jin 已提交
353
	struct list_head net_kill_list;
354
	LIST_HEAD(net_exit_list);
355

356 357 358 359
	/* Atomically snapshot the list of namespaces to cleanup */
	spin_lock_irq(&cleanup_list_lock);
	list_replace_init(&cleanup_list, &net_kill_list);
	spin_unlock_irq(&cleanup_list_lock);
360 361 362 363 364

	mutex_lock(&net_mutex);

	/* Don't let anyone else find us. */
	rtnl_lock();
365
	list_for_each_entry(net, &net_kill_list, cleanup_list) {
366
		list_del_rcu(&net->list);
367 368
		list_add_tail(&net->exit_list, &net_exit_list);
	}
369 370
	rtnl_unlock();

371 372 373 374 375 376 377
	/*
	 * Another CPU might be rcu-iterating the list, wait for it.
	 * This needs to be before calling the exit() notifiers, so
	 * the rcu_barrier() below isn't sufficient alone.
	 */
	synchronize_rcu();

378
	/* Run all of the network namespace exit methods */
379 380 381
	list_for_each_entry_reverse(ops, &pernet_list, list)
		ops_exit_list(ops, &net_exit_list);

382
	/* Free the net generic variables */
383 384
	list_for_each_entry_reverse(ops, &pernet_list, list)
		ops_free_list(ops, &net_exit_list);
385 386 387 388 389 390 391 392

	mutex_unlock(&net_mutex);

	/* Ensure there are no outstanding rcu callbacks using this
	 * network namespace.
	 */
	rcu_barrier();

393
	rtnl_lock();
394
	/* Finally it is safe to free my network namespace structure */
395
	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
396 397 398 399 400 401 402 403 404 405 406 407
		/* Unreference net from all peers (no need to loop over
		 * net_exit_list because idr_destroy() will be called for each
		 * element of this list.
		 */
		for_each_net(peer) {
			int id = __peernet2id(peer, net, false);

			if (id >= 0)
				idr_remove(&peer->netns_ids, id);
		}
		idr_destroy(&net->netns_ids);

408
		list_del_init(&net->exit_list);
409
		put_user_ns(net->user_ns);
410
		net_drop_ns(net);
411
	}
412
	rtnl_unlock();
413
}
414
static DECLARE_WORK(net_cleanup_work, cleanup_net);
415 416 417 418

void __put_net(struct net *net)
{
	/* Cleanup the network namespace in process context */
419 420 421 422 423 424 425
	unsigned long flags;

	spin_lock_irqsave(&cleanup_list_lock, flags);
	list_add(&net->cleanup_list, &cleanup_list);
	spin_unlock_irqrestore(&cleanup_list_lock, flags);

	queue_work(netns_wq, &net_cleanup_work);
426 427 428
}
EXPORT_SYMBOL_GPL(__put_net);

429 430 431
struct net *get_net_ns_by_fd(int fd)
{
	struct file *file;
432
	struct ns_common *ns;
433 434 435
	struct net *net;

	file = proc_ns_fget(fd);
436 437
	if (IS_ERR(file))
		return ERR_CAST(file);
438

A
Al Viro 已提交
439
	ns = get_proc_ns(file_inode(file));
440 441
	if (ns->ops == &netns_operations)
		net = get_net(container_of(ns, struct net, ns));
442 443
	else
		net = ERR_PTR(-EINVAL);
444

445
	fput(file);
446 447 448
	return net;
}

449
#else
450 451 452 453
struct net *get_net_ns_by_fd(int fd)
{
	return ERR_PTR(-EINVAL);
}
454
#endif
455
EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
456

457 458 459 460 461 462 463 464 465 466 467
struct net *get_net_ns_by_pid(pid_t pid)
{
	struct task_struct *tsk;
	struct net *net;

	/* Lookup the network namespace */
	net = ERR_PTR(-ESRCH);
	rcu_read_lock();
	tsk = find_task_by_vpid(pid);
	if (tsk) {
		struct nsproxy *nsproxy;
468 469
		task_lock(tsk);
		nsproxy = tsk->nsproxy;
470 471
		if (nsproxy)
			net = get_net(nsproxy->net_ns);
472
		task_unlock(tsk);
473 474 475 476 477 478
	}
	rcu_read_unlock();
	return net;
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);

479 480
static __net_init int net_ns_net_init(struct net *net)
{
481 482 483
#ifdef CONFIG_NET_NS
	net->ns.ops = &netns_operations;
#endif
A
Al Viro 已提交
484
	return ns_alloc_inum(&net->ns);
485 486 487 488
}

static __net_exit void net_ns_net_exit(struct net *net)
{
A
Al Viro 已提交
489
	ns_free_inum(&net->ns);
490 491 492 493 494 495 496
}

static struct pernet_operations __net_initdata net_ns_ops = {
	.init = net_ns_net_init,
	.exit = net_ns_net_exit,
};

497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
	[NETNSA_NONE]		= { .type = NLA_UNSPEC },
	[NETNSA_NSID]		= { .type = NLA_S32 },
	[NETNSA_PID]		= { .type = NLA_U32 },
	[NETNSA_FD]		= { .type = NLA_U32 },
};

static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tb[NETNSA_MAX + 1];
	struct net *peer;
	int nsid, err;

	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
			  rtnl_net_policy);
	if (err < 0)
		return err;
	if (!tb[NETNSA_NSID])
		return -EINVAL;
	nsid = nla_get_s32(tb[NETNSA_NSID]);

	if (tb[NETNSA_PID])
		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
	else if (tb[NETNSA_FD])
		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
	else
		return -EINVAL;
	if (IS_ERR(peer))
		return PTR_ERR(peer);

	if (__peernet2id(net, peer, false) >= 0) {
		err = -EEXIST;
		goto out;
	}

	err = alloc_netid(net, peer, nsid);
	if (err > 0)
		err = 0;
out:
	put_net(peer);
	return err;
}

static int rtnl_net_get_size(void)
{
	return NLMSG_ALIGN(sizeof(struct rtgenmsg))
	       + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
	       ;
}

static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
			 int cmd, struct net *net, struct net *peer)
{
	struct nlmsghdr *nlh;
	struct rtgenmsg *rth;
	int id;

	ASSERT_RTNL();

	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
	if (!nlh)
		return -EMSGSIZE;

	rth = nlmsg_data(nlh);
	rth->rtgen_family = AF_UNSPEC;

	id = __peernet2id(net, peer, false);
	if  (id < 0)
		id = NETNSA_NSID_NOT_ASSIGNED;
	if (nla_put_s32(skb, NETNSA_NSID, id))
		goto nla_put_failure;

	nlmsg_end(skb, nlh);
	return 0;

nla_put_failure:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tb[NETNSA_MAX + 1];
	struct sk_buff *msg;
	int err = -ENOBUFS;
	struct net *peer;

	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
			  rtnl_net_policy);
	if (err < 0)
		return err;
	if (tb[NETNSA_PID])
		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
	else if (tb[NETNSA_FD])
		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
	else
		return -EINVAL;

	if (IS_ERR(peer))
		return PTR_ERR(peer);

	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
	if (!msg) {
		err = -ENOMEM;
		goto out;
	}

	err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
			    RTM_GETNSID, net, peer);
	if (err < 0)
		goto err_out;

	err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
	goto out;

err_out:
	nlmsg_free(msg);
out:
	put_net(peer);
	return err;
}

621 622
static int __init net_ns_init(void)
{
623
	struct net_generic *ng;
624

625
#ifdef CONFIG_NET_NS
626 627 628
	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
					SMP_CACHE_BYTES,
					SLAB_PANIC, NULL);
629 630 631 632 633

	/* Create workqueue for cleanup */
	netns_wq = create_singlethread_workqueue("netns");
	if (!netns_wq)
		panic("Could not create netns workq");
634
#endif
635

636 637 638 639 640 641
	ng = net_alloc_generic();
	if (!ng)
		panic("Could not allocate generic netns");

	rcu_assign_pointer(init_net.gen, ng);

642
	mutex_lock(&net_mutex);
643
	if (setup_net(&init_net, &init_user_ns))
S
Stephen Hemminger 已提交
644
		panic("Could not setup the initial network namespace");
645

646
	rtnl_lock();
647
	list_add_tail_rcu(&init_net.list, &net_namespace_list);
648
	rtnl_unlock();
649 650 651

	mutex_unlock(&net_mutex);

652 653
	register_pernet_subsys(&net_ns_ops);

654 655 656
	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);

657 658 659 660 661
	return 0;
}

pure_initcall(net_ns_init);

662
#ifdef CONFIG_NET_NS
663 664
static int __register_pernet_operations(struct list_head *list,
					struct pernet_operations *ops)
665
{
666
	struct net *net;
667
	int error;
668
	LIST_HEAD(net_exit_list);
669 670

	list_add_tail(&ops->list, list);
671
	if (ops->init || (ops->id && ops->size)) {
672
		for_each_net(net) {
673
			error = ops_init(ops, net);
674 675
			if (error)
				goto out_undo;
676
			list_add_tail(&net->exit_list, &net_exit_list);
677 678
		}
	}
679
	return 0;
680 681 682 683

out_undo:
	/* If I have an error cleanup all namespaces I initialized */
	list_del(&ops->list);
684 685
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
686
	return error;
687 688
}

689
static void __unregister_pernet_operations(struct pernet_operations *ops)
690 691
{
	struct net *net;
692
	LIST_HEAD(net_exit_list);
693 694

	list_del(&ops->list);
695 696 697 698
	for_each_net(net)
		list_add_tail(&net->exit_list, &net_exit_list);
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
699 700
}

701 702
#else

703 704
static int __register_pernet_operations(struct list_head *list,
					struct pernet_operations *ops)
705
{
706
	return ops_init(ops, &init_net);
707 708
}

709
static void __unregister_pernet_operations(struct pernet_operations *ops)
710
{
711 712 713 714
	LIST_HEAD(net_exit_list);
	list_add(&init_net.exit_list, &net_exit_list);
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
715
}
716 717

#endif /* CONFIG_NET_NS */
718

719 720
static DEFINE_IDA(net_generic_ids);

721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
static int register_pernet_operations(struct list_head *list,
				      struct pernet_operations *ops)
{
	int error;

	if (ops->id) {
again:
		error = ida_get_new_above(&net_generic_ids, 1, ops->id);
		if (error < 0) {
			if (error == -EAGAIN) {
				ida_pre_get(&net_generic_ids, GFP_KERNEL);
				goto again;
			}
			return error;
		}
E
Eric Dumazet 已提交
736
		max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
737 738
	}
	error = __register_pernet_operations(list, ops);
739 740 741 742 743
	if (error) {
		rcu_barrier();
		if (ops->id)
			ida_remove(&net_generic_ids, *ops->id);
	}
744 745 746 747 748 749 750 751

	return error;
}

static void unregister_pernet_operations(struct pernet_operations *ops)
{
	
	__unregister_pernet_operations(ops);
752
	rcu_barrier();
753 754 755 756
	if (ops->id)
		ida_remove(&net_generic_ids, *ops->id);
}

757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
/**
 *      register_pernet_subsys - register a network namespace subsystem
 *	@ops:  pernet operations structure for the subsystem
 *
 *	Register a subsystem which has init and exit functions
 *	that are called when network namespaces are created and
 *	destroyed respectively.
 *
 *	When registered all network namespace init functions are
 *	called for every existing network namespace.  Allowing kernel
 *	modules to have a race free view of the set of network namespaces.
 *
 *	When a new network namespace is created all of the init
 *	methods are called in the order in which they were registered.
 *
 *	When a network namespace is destroyed all of the exit methods
 *	are called in the reverse of the order with which they were
 *	registered.
 */
int register_pernet_subsys(struct pernet_operations *ops)
{
	int error;
	mutex_lock(&net_mutex);
	error =  register_pernet_operations(first_device, ops);
	mutex_unlock(&net_mutex);
	return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);

/**
 *      unregister_pernet_subsys - unregister a network namespace subsystem
 *	@ops: pernet operations structure to manipulate
 *
 *	Remove the pernet operations structure from the list to be
791
 *	used when network namespaces are created or destroyed.  In
792 793 794
 *	addition run the exit method for all existing network
 *	namespaces.
 */
795
void unregister_pernet_subsys(struct pernet_operations *ops)
796 797
{
	mutex_lock(&net_mutex);
798
	unregister_pernet_operations(ops);
799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
	mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);

/**
 *      register_pernet_device - register a network namespace device
 *	@ops:  pernet operations structure for the subsystem
 *
 *	Register a device which has init and exit functions
 *	that are called when network namespaces are created and
 *	destroyed respectively.
 *
 *	When registered all network namespace init functions are
 *	called for every existing network namespace.  Allowing kernel
 *	modules to have a race free view of the set of network namespaces.
 *
 *	When a new network namespace is created all of the init
 *	methods are called in the order in which they were registered.
 *
 *	When a network namespace is destroyed all of the exit methods
 *	are called in the reverse of the order with which they were
 *	registered.
 */
int register_pernet_device(struct pernet_operations *ops)
{
	int error;
	mutex_lock(&net_mutex);
	error = register_pernet_operations(&pernet_list, ops);
	if (!error && (first_device == &pernet_list))
		first_device = &ops->list;
	mutex_unlock(&net_mutex);
	return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);

/**
 *      unregister_pernet_device - unregister a network namespace netdevice
 *	@ops: pernet operations structure to manipulate
 *
 *	Remove the pernet operations structure from the list to be
839
 *	used when network namespaces are created or destroyed.  In
840 841 842 843 844 845 846 847 848 849 850 851
 *	addition run the exit method for all existing network
 *	namespaces.
 */
void unregister_pernet_device(struct pernet_operations *ops)
{
	mutex_lock(&net_mutex);
	if (&ops->list == first_device)
		first_device = first_device->next;
	unregister_pernet_operations(ops);
	mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
852 853

#ifdef CONFIG_NET_NS
854
static struct ns_common *netns_get(struct task_struct *task)
855
{
856 857 858
	struct net *net = NULL;
	struct nsproxy *nsproxy;

859 860
	task_lock(task);
	nsproxy = task->nsproxy;
861 862
	if (nsproxy)
		net = get_net(nsproxy->net_ns);
863
	task_unlock(task);
864

865 866 867 868 869 870
	return net ? &net->ns : NULL;
}

static inline struct net *to_net_ns(struct ns_common *ns)
{
	return container_of(ns, struct net, ns);
871 872
}

873
static void netns_put(struct ns_common *ns)
874
{
875
	put_net(to_net_ns(ns));
876 877
}

878
static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
879
{
880
	struct net *net = to_net_ns(ns);
881

882
	if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
883
	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
884 885
		return -EPERM;

886
	put_net(nsproxy->net_ns);
887
	nsproxy->net_ns = get_net(net);
888 889 890 891 892 893 894 895 896 897 898
	return 0;
}

const struct proc_ns_operations netns_operations = {
	.name		= "net",
	.type		= CLONE_NEWNET,
	.get		= netns_get,
	.put		= netns_put,
	.install	= netns_install,
};
#endif