net_namespace.c 21.4 KB
Newer Older
J
Joe Perches 已提交
1 2
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

3 4 5 6 7 8
#include <linux/workqueue.h>
#include <linux/rtnetlink.h>
#include <linux/cache.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/delay.h>
9
#include <linux/sched.h>
10
#include <linux/idr.h>
11
#include <linux/rculist.h>
12
#include <linux/nsproxy.h>
13 14
#include <linux/fs.h>
#include <linux/proc_ns.h>
15
#include <linux/file.h>
16
#include <linux/export.h>
17
#include <linux/user_namespace.h>
18 19 20
#include <linux/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
21
#include <net/net_namespace.h>
22
#include <net/netns/generic.h>
23 24 25 26 27 28 29

/*
 *	Our network namespace constructor/destructor lists
 */

static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
30
DEFINE_MUTEX(net_mutex);
31 32

LIST_HEAD(net_namespace_list);
A
Alexey Dobriyan 已提交
33
EXPORT_SYMBOL_GPL(net_namespace_list);
34

35 36 37
struct net init_net = {
	.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
};
38
EXPORT_SYMBOL(init_net);
39

40 41
#define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */

E
Eric Dumazet 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;

static struct net_generic *net_alloc_generic(void)
{
	struct net_generic *ng;
	size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);

	ng = kzalloc(generic_size, GFP_KERNEL);
	if (ng)
		ng->len = max_gen_ptrs;

	return ng;
}

56 57 58 59 60 61 62
static int net_assign_generic(struct net *net, int id, void *data)
{
	struct net_generic *ng, *old_ng;

	BUG_ON(!mutex_is_locked(&net_mutex));
	BUG_ON(id == 0);

E
Eric Dumazet 已提交
63 64 65
	old_ng = rcu_dereference_protected(net->gen,
					   lockdep_is_held(&net_mutex));
	ng = old_ng;
66 67 68
	if (old_ng->len >= id)
		goto assign;

E
Eric Dumazet 已提交
69
	ng = net_alloc_generic();
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
	if (ng == NULL)
		return -ENOMEM;

	/*
	 * Some synchronisation notes:
	 *
	 * The net_generic explores the net->gen array inside rcu
	 * read section. Besides once set the net->gen->ptr[x]
	 * pointer never changes (see rules in netns/generic.h).
	 *
	 * That said, we simply duplicate this array and schedule
	 * the old copy for kfree after a grace period.
	 */

	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));

	rcu_assign_pointer(net->gen, ng);
87
	kfree_rcu(old_ng, rcu);
88 89 90 91 92
assign:
	ng->ptr[id - 1] = data;
	return 0;
}

93 94
static int ops_init(const struct pernet_operations *ops, struct net *net)
{
95 96 97
	int err = -ENOMEM;
	void *data = NULL;

98
	if (ops->id && ops->size) {
99
		data = kzalloc(ops->size, GFP_KERNEL);
100
		if (!data)
101
			goto out;
102 103

		err = net_assign_generic(net, *ops->id, data);
104 105
		if (err)
			goto cleanup;
106
	}
107
	err = 0;
108
	if (ops->init)
109 110 111 112 113 114 115 116 117
		err = ops->init(net);
	if (!err)
		return 0;

cleanup:
	kfree(data);

out:
	return err;
118 119 120 121 122 123 124 125 126 127
}

static void ops_free(const struct pernet_operations *ops, struct net *net)
{
	if (ops->id && ops->size) {
		int id = *ops->id;
		kfree(net_generic(net, id));
	}
}

128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
static void ops_exit_list(const struct pernet_operations *ops,
			  struct list_head *net_exit_list)
{
	struct net *net;
	if (ops->exit) {
		list_for_each_entry(net, net_exit_list, exit_list)
			ops->exit(net);
	}
	if (ops->exit_batch)
		ops->exit_batch(net_exit_list);
}

static void ops_free_list(const struct pernet_operations *ops,
			  struct list_head *net_exit_list)
{
	struct net *net;
	if (ops->size && ops->id) {
		list_for_each_entry(net, net_exit_list, exit_list)
			ops_free(ops, net);
	}
}

150 151
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
152
	int min = 0, max = 0;
153 154 155 156 157 158 159 160

	ASSERT_RTNL();

	if (reqid >= 0) {
		min = reqid;
		max = reqid + 1;
	}

161
	return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
}

/* This function is used by idr_for_each(). If net is equal to peer, the
 * function returns the id so that idr_for_each() stops. Because we cannot
 * returns the id 0 (idr_for_each() will not stop), we return the magic value
 * NET_ID_ZERO (-1) for it.
 */
#define NET_ID_ZERO -1
static int net_eq_idr(int id, void *net, void *peer)
{
	if (net_eq(net, peer))
		return id ? : NET_ID_ZERO;
	return 0;
}

177
static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
178 179
{
	int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
180
	bool alloc_it = *alloc;
181 182 183

	ASSERT_RTNL();

184 185
	*alloc = false;

186 187 188 189 190 191
	/* Magic value for id 0. */
	if (id == NET_ID_ZERO)
		return 0;
	if (id > 0)
		return id;

192
	if (alloc_it) {
193
		id = alloc_netid(net, peer, -1);
194
		*alloc = true;
195 196
		return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
	}
197

198
	return NETNSA_NSID_NOT_ASSIGNED;
199 200
}

201 202 203 204 205 206 207 208
static int __peernet2id(struct net *net, struct net *peer)
{
	bool no = false;

	return __peernet2id_alloc(net, peer, &no);
}

static void rtnl_net_notifyid(struct net *net, int cmd, int id);
209 210 211
/* This function returns the id of a peer netns. If no id is assigned, one will
 * be allocated and returned.
 */
212
int peernet2id_alloc(struct net *net, struct net *peer)
213
{
214
	bool alloc = atomic_read(&peer->count) == 0 ? false : true;
215
	int id;
216

217 218 219 220
	id = __peernet2id_alloc(net, peer, &alloc);
	if (alloc && id >= 0)
		rtnl_net_notifyid(net, RTM_NEWNSID, id);
	return id;
221
}
222
EXPORT_SYMBOL(peernet2id_alloc);
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239

struct net *get_net_ns_by_id(struct net *net, int id)
{
	struct net *peer;

	if (id < 0)
		return NULL;

	rcu_read_lock();
	peer = idr_find(&net->netns_ids, id);
	if (peer)
		get_net(peer);
	rcu_read_unlock();

	return peer;
}

240 241 242
/*
 * setup_net runs the initializers for the network namespace object.
 */
243
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
244 245
{
	/* Must be called with net_mutex held */
246
	const struct pernet_operations *ops, *saved_ops;
247
	int error = 0;
248
	LIST_HEAD(net_exit_list);
249 250

	atomic_set(&net->count, 1);
251
	atomic_set(&net->passive, 1);
252
	net->dev_base_seq = 1;
253
	net->user_ns = user_ns;
254
	idr_init(&net->netns_ids);
255

256
	list_for_each_entry(ops, &pernet_list, list) {
257 258 259
		error = ops_init(ops, net);
		if (error < 0)
			goto out_undo;
260 261 262
	}
out:
	return error;
263

264 265 266 267
out_undo:
	/* Walk through the list backwards calling the exit functions
	 * for the pernet modules whose init functions did not fail.
	 */
268
	list_add(&net->exit_list, &net_exit_list);
269
	saved_ops = ops;
270 271 272
	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
		ops_exit_list(ops, &net_exit_list);

273 274
	ops = saved_ops;
	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
275
		ops_free_list(ops, &net_exit_list);
276 277

	rcu_barrier();
278 279 280
	goto out;
}

281

282 283 284 285
#ifdef CONFIG_NET_NS
static struct kmem_cache *net_cachep;
static struct workqueue_struct *netns_wq;

286
static struct net *net_alloc(void)
287
{
288 289 290 291 292 293 294 295
	struct net *net = NULL;
	struct net_generic *ng;

	ng = net_alloc_generic();
	if (!ng)
		goto out;

	net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
296
	if (!net)
297
		goto out_free;
298

299 300 301 302 303 304 305 306 307 308 309
	rcu_assign_pointer(net->gen, ng);
out:
	return net;

out_free:
	kfree(ng);
	goto out;
}

static void net_free(struct net *net)
{
E
Eric Dumazet 已提交
310
	kfree(rcu_access_pointer(net->gen));
311 312 313
	kmem_cache_free(net_cachep, net);
}

314 315 316 317 318 319 320
void net_drop_ns(void *p)
{
	struct net *ns = p;
	if (ns && atomic_dec_and_test(&ns->passive))
		net_free(ns);
}

321 322
struct net *copy_net_ns(unsigned long flags,
			struct user_namespace *user_ns, struct net *old_net)
323
{
324 325
	struct net *net;
	int rv;
326

327 328 329
	if (!(flags & CLONE_NEWNET))
		return get_net(old_net);

330 331 332
	net = net_alloc();
	if (!net)
		return ERR_PTR(-ENOMEM);
333 334 335

	get_user_ns(user_ns);

336
	mutex_lock(&net_mutex);
337
	rv = setup_net(net, user_ns);
338
	if (rv == 0) {
339
		rtnl_lock();
340
		list_add_tail_rcu(&net->list, &net_namespace_list);
341 342
		rtnl_unlock();
	}
343
	mutex_unlock(&net_mutex);
344
	if (rv < 0) {
345
		put_user_ns(user_ns);
346
		net_drop_ns(net);
347 348 349 350
		return ERR_PTR(rv);
	}
	return net;
}
351

352 353 354
static DEFINE_SPINLOCK(cleanup_list_lock);
static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */

355 356
static void cleanup_net(struct work_struct *work)
{
357
	const struct pernet_operations *ops;
358
	struct net *net, *tmp;
X
xiao jin 已提交
359
	struct list_head net_kill_list;
360
	LIST_HEAD(net_exit_list);
361

362 363 364 365
	/* Atomically snapshot the list of namespaces to cleanup */
	spin_lock_irq(&cleanup_list_lock);
	list_replace_init(&cleanup_list, &net_kill_list);
	spin_unlock_irq(&cleanup_list_lock);
366 367 368 369 370

	mutex_lock(&net_mutex);

	/* Don't let anyone else find us. */
	rtnl_lock();
371
	list_for_each_entry(net, &net_kill_list, cleanup_list) {
372
		list_del_rcu(&net->list);
373
		list_add_tail(&net->exit_list, &net_exit_list);
374
		for_each_net(tmp) {
375
			int id = __peernet2id(tmp, net);
376

N
Nicolas Dichtel 已提交
377
			if (id >= 0) {
378
				rtnl_net_notifyid(tmp, RTM_DELNSID, id);
379
				idr_remove(&tmp->netns_ids, id);
N
Nicolas Dichtel 已提交
380
			}
381 382 383
		}
		idr_destroy(&net->netns_ids);

384
	}
385 386
	rtnl_unlock();

387 388 389 390 391 392 393
	/*
	 * Another CPU might be rcu-iterating the list, wait for it.
	 * This needs to be before calling the exit() notifiers, so
	 * the rcu_barrier() below isn't sufficient alone.
	 */
	synchronize_rcu();

394
	/* Run all of the network namespace exit methods */
395 396 397
	list_for_each_entry_reverse(ops, &pernet_list, list)
		ops_exit_list(ops, &net_exit_list);

398
	/* Free the net generic variables */
399 400
	list_for_each_entry_reverse(ops, &pernet_list, list)
		ops_free_list(ops, &net_exit_list);
401 402 403 404 405 406 407 408 409

	mutex_unlock(&net_mutex);

	/* Ensure there are no outstanding rcu callbacks using this
	 * network namespace.
	 */
	rcu_barrier();

	/* Finally it is safe to free my network namespace structure */
410 411
	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
		list_del_init(&net->exit_list);
412
		put_user_ns(net->user_ns);
413
		net_drop_ns(net);
414
	}
415
}
416
static DECLARE_WORK(net_cleanup_work, cleanup_net);
417 418 419 420

void __put_net(struct net *net)
{
	/* Cleanup the network namespace in process context */
421 422 423 424 425 426 427
	unsigned long flags;

	spin_lock_irqsave(&cleanup_list_lock, flags);
	list_add(&net->cleanup_list, &cleanup_list);
	spin_unlock_irqrestore(&cleanup_list_lock, flags);

	queue_work(netns_wq, &net_cleanup_work);
428 429 430
}
EXPORT_SYMBOL_GPL(__put_net);

431 432 433
struct net *get_net_ns_by_fd(int fd)
{
	struct file *file;
434
	struct ns_common *ns;
435 436 437
	struct net *net;

	file = proc_ns_fget(fd);
438 439
	if (IS_ERR(file))
		return ERR_CAST(file);
440

A
Al Viro 已提交
441
	ns = get_proc_ns(file_inode(file));
442 443
	if (ns->ops == &netns_operations)
		net = get_net(container_of(ns, struct net, ns));
444 445
	else
		net = ERR_PTR(-EINVAL);
446

447
	fput(file);
448 449 450
	return net;
}

451
#else
452 453 454 455
struct net *get_net_ns_by_fd(int fd)
{
	return ERR_PTR(-EINVAL);
}
456
#endif
457
EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
458

459 460 461 462 463 464 465 466 467 468 469
struct net *get_net_ns_by_pid(pid_t pid)
{
	struct task_struct *tsk;
	struct net *net;

	/* Lookup the network namespace */
	net = ERR_PTR(-ESRCH);
	rcu_read_lock();
	tsk = find_task_by_vpid(pid);
	if (tsk) {
		struct nsproxy *nsproxy;
470 471
		task_lock(tsk);
		nsproxy = tsk->nsproxy;
472 473
		if (nsproxy)
			net = get_net(nsproxy->net_ns);
474
		task_unlock(tsk);
475 476 477 478 479 480
	}
	rcu_read_unlock();
	return net;
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);

481 482
static __net_init int net_ns_net_init(struct net *net)
{
483 484 485
#ifdef CONFIG_NET_NS
	net->ns.ops = &netns_operations;
#endif
A
Al Viro 已提交
486
	return ns_alloc_inum(&net->ns);
487 488 489 490
}

static __net_exit void net_ns_net_exit(struct net *net)
{
A
Al Viro 已提交
491
	ns_free_inum(&net->ns);
492 493 494 495 496 497 498
}

static struct pernet_operations __net_initdata net_ns_ops = {
	.init = net_ns_net_init,
	.exit = net_ns_net_exit,
};

499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
	[NETNSA_NONE]		= { .type = NLA_UNSPEC },
	[NETNSA_NSID]		= { .type = NLA_S32 },
	[NETNSA_PID]		= { .type = NLA_U32 },
	[NETNSA_FD]		= { .type = NLA_U32 },
};

static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tb[NETNSA_MAX + 1];
	struct net *peer;
	int nsid, err;

	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
			  rtnl_net_policy);
	if (err < 0)
		return err;
	if (!tb[NETNSA_NSID])
		return -EINVAL;
	nsid = nla_get_s32(tb[NETNSA_NSID]);

	if (tb[NETNSA_PID])
		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
	else if (tb[NETNSA_FD])
		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
	else
		return -EINVAL;
	if (IS_ERR(peer))
		return PTR_ERR(peer);

530
	if (__peernet2id(net, peer) >= 0) {
531 532 533 534 535
		err = -EEXIST;
		goto out;
	}

	err = alloc_netid(net, peer, nsid);
536 537
	if (err >= 0) {
		rtnl_net_notifyid(net, RTM_NEWNSID, err);
538
		err = 0;
539
	}
540 541 542 543 544 545 546 547 548 549 550 551 552
out:
	put_net(peer);
	return err;
}

static int rtnl_net_get_size(void)
{
	return NLMSG_ALIGN(sizeof(struct rtgenmsg))
	       + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
	       ;
}

static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
553
			 int cmd, struct net *net, int nsid)
554 555 556 557 558 559 560 561 562 563 564
{
	struct nlmsghdr *nlh;
	struct rtgenmsg *rth;

	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
	if (!nlh)
		return -EMSGSIZE;

	rth = nlmsg_data(nlh);
	rth->rtgen_family = AF_UNSPEC;

565
	if (nla_put_s32(skb, NETNSA_NSID, nsid))
566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
		goto nla_put_failure;

	nlmsg_end(skb, nlh);
	return 0;

nla_put_failure:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tb[NETNSA_MAX + 1];
	struct sk_buff *msg;
	struct net *peer;
582
	int err, id;
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603

	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
			  rtnl_net_policy);
	if (err < 0)
		return err;
	if (tb[NETNSA_PID])
		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
	else if (tb[NETNSA_FD])
		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
	else
		return -EINVAL;

	if (IS_ERR(peer))
		return PTR_ERR(peer);

	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
	if (!msg) {
		err = -ENOMEM;
		goto out;
	}

604
	id = __peernet2id(net, peer);
605
	err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
606
			    RTM_GETNSID, net, id);
607 608 609 610 611 612 613 614 615 616 617 618 619
	if (err < 0)
		goto err_out;

	err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
	goto out;

err_out:
	nlmsg_free(msg);
out:
	put_net(peer);
	return err;
}

N
Nicolas Dichtel 已提交
620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
struct rtnl_net_dump_cb {
	struct net *net;
	struct sk_buff *skb;
	struct netlink_callback *cb;
	int idx;
	int s_idx;
};

static int rtnl_net_dumpid_one(int id, void *peer, void *data)
{
	struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
	int ret;

	if (net_cb->idx < net_cb->s_idx)
		goto cont;

	ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
			    net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
638
			    RTM_NEWNSID, net_cb->net, id);
N
Nicolas Dichtel 已提交
639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
	if (ret < 0)
		return ret;

cont:
	net_cb->idx++;
	return 0;
}

static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct net *net = sock_net(skb->sk);
	struct rtnl_net_dump_cb net_cb = {
		.net = net,
		.skb = skb,
		.cb = cb,
		.idx = 0,
		.s_idx = cb->args[0],
	};

	ASSERT_RTNL();

	idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);

	cb->args[0] = net_cb.idx;
	return skb->len;
}

666
static void rtnl_net_notifyid(struct net *net, int cmd, int id)
N
Nicolas Dichtel 已提交
667 668 669 670 671 672 673 674
{
	struct sk_buff *msg;
	int err = -ENOMEM;

	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
	if (!msg)
		goto out;

675
	err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
N
Nicolas Dichtel 已提交
676 677 678 679 680 681 682 683 684 685 686 687
	if (err < 0)
		goto err_out;

	rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
	return;

err_out:
	nlmsg_free(msg);
out:
	rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}

688 689
static int __init net_ns_init(void)
{
690
	struct net_generic *ng;
691

692
#ifdef CONFIG_NET_NS
693 694 695
	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
					SMP_CACHE_BYTES,
					SLAB_PANIC, NULL);
696 697 698 699 700

	/* Create workqueue for cleanup */
	netns_wq = create_singlethread_workqueue("netns");
	if (!netns_wq)
		panic("Could not create netns workq");
701
#endif
702

703 704 705 706 707 708
	ng = net_alloc_generic();
	if (!ng)
		panic("Could not allocate generic netns");

	rcu_assign_pointer(init_net.gen, ng);

709
	mutex_lock(&net_mutex);
710
	if (setup_net(&init_net, &init_user_ns))
S
Stephen Hemminger 已提交
711
		panic("Could not setup the initial network namespace");
712

713
	rtnl_lock();
714
	list_add_tail_rcu(&init_net.list, &net_namespace_list);
715
	rtnl_unlock();
716 717 718

	mutex_unlock(&net_mutex);

719 720
	register_pernet_subsys(&net_ns_ops);

721
	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
N
Nicolas Dichtel 已提交
722 723
	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
		      NULL);
724

725 726 727 728 729
	return 0;
}

pure_initcall(net_ns_init);

730
#ifdef CONFIG_NET_NS
731 732
static int __register_pernet_operations(struct list_head *list,
					struct pernet_operations *ops)
733
{
734
	struct net *net;
735
	int error;
736
	LIST_HEAD(net_exit_list);
737 738

	list_add_tail(&ops->list, list);
739
	if (ops->init || (ops->id && ops->size)) {
740
		for_each_net(net) {
741
			error = ops_init(ops, net);
742 743
			if (error)
				goto out_undo;
744
			list_add_tail(&net->exit_list, &net_exit_list);
745 746
		}
	}
747
	return 0;
748 749 750 751

out_undo:
	/* If I have an error cleanup all namespaces I initialized */
	list_del(&ops->list);
752 753
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
754
	return error;
755 756
}

757
static void __unregister_pernet_operations(struct pernet_operations *ops)
758 759
{
	struct net *net;
760
	LIST_HEAD(net_exit_list);
761 762

	list_del(&ops->list);
763 764 765 766
	for_each_net(net)
		list_add_tail(&net->exit_list, &net_exit_list);
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
767 768
}

769 770
#else

771 772
static int __register_pernet_operations(struct list_head *list,
					struct pernet_operations *ops)
773
{
774
	return ops_init(ops, &init_net);
775 776
}

777
static void __unregister_pernet_operations(struct pernet_operations *ops)
778
{
779 780 781 782
	LIST_HEAD(net_exit_list);
	list_add(&init_net.exit_list, &net_exit_list);
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
783
}
784 785

#endif /* CONFIG_NET_NS */
786

787 788
static DEFINE_IDA(net_generic_ids);

789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
static int register_pernet_operations(struct list_head *list,
				      struct pernet_operations *ops)
{
	int error;

	if (ops->id) {
again:
		error = ida_get_new_above(&net_generic_ids, 1, ops->id);
		if (error < 0) {
			if (error == -EAGAIN) {
				ida_pre_get(&net_generic_ids, GFP_KERNEL);
				goto again;
			}
			return error;
		}
E
Eric Dumazet 已提交
804
		max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
805 806
	}
	error = __register_pernet_operations(list, ops);
807 808 809 810 811
	if (error) {
		rcu_barrier();
		if (ops->id)
			ida_remove(&net_generic_ids, *ops->id);
	}
812 813 814 815 816 817 818 819

	return error;
}

static void unregister_pernet_operations(struct pernet_operations *ops)
{
	
	__unregister_pernet_operations(ops);
820
	rcu_barrier();
821 822 823 824
	if (ops->id)
		ida_remove(&net_generic_ids, *ops->id);
}

825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858
/**
 *      register_pernet_subsys - register a network namespace subsystem
 *	@ops:  pernet operations structure for the subsystem
 *
 *	Register a subsystem which has init and exit functions
 *	that are called when network namespaces are created and
 *	destroyed respectively.
 *
 *	When registered all network namespace init functions are
 *	called for every existing network namespace.  Allowing kernel
 *	modules to have a race free view of the set of network namespaces.
 *
 *	When a new network namespace is created all of the init
 *	methods are called in the order in which they were registered.
 *
 *	When a network namespace is destroyed all of the exit methods
 *	are called in the reverse of the order with which they were
 *	registered.
 */
int register_pernet_subsys(struct pernet_operations *ops)
{
	int error;
	mutex_lock(&net_mutex);
	error =  register_pernet_operations(first_device, ops);
	mutex_unlock(&net_mutex);
	return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);

/**
 *      unregister_pernet_subsys - unregister a network namespace subsystem
 *	@ops: pernet operations structure to manipulate
 *
 *	Remove the pernet operations structure from the list to be
859
 *	used when network namespaces are created or destroyed.  In
860 861 862
 *	addition run the exit method for all existing network
 *	namespaces.
 */
863
void unregister_pernet_subsys(struct pernet_operations *ops)
864 865
{
	mutex_lock(&net_mutex);
866
	unregister_pernet_operations(ops);
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
	mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);

/**
 *      register_pernet_device - register a network namespace device
 *	@ops:  pernet operations structure for the subsystem
 *
 *	Register a device which has init and exit functions
 *	that are called when network namespaces are created and
 *	destroyed respectively.
 *
 *	When registered all network namespace init functions are
 *	called for every existing network namespace.  Allowing kernel
 *	modules to have a race free view of the set of network namespaces.
 *
 *	When a new network namespace is created all of the init
 *	methods are called in the order in which they were registered.
 *
 *	When a network namespace is destroyed all of the exit methods
 *	are called in the reverse of the order with which they were
 *	registered.
 */
int register_pernet_device(struct pernet_operations *ops)
{
	int error;
	mutex_lock(&net_mutex);
	error = register_pernet_operations(&pernet_list, ops);
	if (!error && (first_device == &pernet_list))
		first_device = &ops->list;
	mutex_unlock(&net_mutex);
	return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);

/**
 *      unregister_pernet_device - unregister a network namespace netdevice
 *	@ops: pernet operations structure to manipulate
 *
 *	Remove the pernet operations structure from the list to be
907
 *	used when network namespaces are created or destroyed.  In
908 909 910 911 912 913 914 915 916 917 918 919
 *	addition run the exit method for all existing network
 *	namespaces.
 */
void unregister_pernet_device(struct pernet_operations *ops)
{
	mutex_lock(&net_mutex);
	if (&ops->list == first_device)
		first_device = first_device->next;
	unregister_pernet_operations(ops);
	mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
920 921

#ifdef CONFIG_NET_NS
922
static struct ns_common *netns_get(struct task_struct *task)
923
{
924 925 926
	struct net *net = NULL;
	struct nsproxy *nsproxy;

927 928
	task_lock(task);
	nsproxy = task->nsproxy;
929 930
	if (nsproxy)
		net = get_net(nsproxy->net_ns);
931
	task_unlock(task);
932

933 934 935 936 937 938
	return net ? &net->ns : NULL;
}

static inline struct net *to_net_ns(struct ns_common *ns)
{
	return container_of(ns, struct net, ns);
939 940
}

941
static void netns_put(struct ns_common *ns)
942
{
943
	put_net(to_net_ns(ns));
944 945
}

946
static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
947
{
948
	struct net *net = to_net_ns(ns);
949

950
	if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
951
	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
952 953
		return -EPERM;

954
	put_net(nsproxy->net_ns);
955
	nsproxy->net_ns = get_net(net);
956 957 958 959 960 961 962 963 964 965 966
	return 0;
}

const struct proc_ns_operations netns_operations = {
	.name		= "net",
	.type		= CLONE_NEWNET,
	.get		= netns_get,
	.put		= netns_put,
	.install	= netns_install,
};
#endif