net_namespace.c 23.7 KB
Newer Older
J
Joe Perches 已提交
1 2
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

3 4 5 6 7 8
#include <linux/workqueue.h>
#include <linux/rtnetlink.h>
#include <linux/cache.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/delay.h>
9
#include <linux/sched.h>
10
#include <linux/idr.h>
11
#include <linux/rculist.h>
12
#include <linux/nsproxy.h>
13 14
#include <linux/fs.h>
#include <linux/proc_ns.h>
15
#include <linux/file.h>
16
#include <linux/export.h>
17
#include <linux/user_namespace.h>
18 19 20
#include <linux/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
21
#include <net/net_namespace.h>
22
#include <net/netns/generic.h>
23 24 25 26 27 28 29

/*
 *	Our network namespace constructor/destructor lists
 */

static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
30
DEFINE_MUTEX(net_mutex);
31 32

LIST_HEAD(net_namespace_list);
A
Alexey Dobriyan 已提交
33
EXPORT_SYMBOL_GPL(net_namespace_list);
34

35 36 37
struct net init_net = {
	.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
};
38
EXPORT_SYMBOL(init_net);
39

40 41
static bool init_net_initialized;

42 43
#define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */

E
Eric Dumazet 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;

static struct net_generic *net_alloc_generic(void)
{
	struct net_generic *ng;
	size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);

	ng = kzalloc(generic_size, GFP_KERNEL);
	if (ng)
		ng->len = max_gen_ptrs;

	return ng;
}

58
static int net_assign_generic(struct net *net, unsigned int id, void *data)
59 60 61 62 63 64
{
	struct net_generic *ng, *old_ng;

	BUG_ON(!mutex_is_locked(&net_mutex));
	BUG_ON(id == 0);

E
Eric Dumazet 已提交
65 66
	old_ng = rcu_dereference_protected(net->gen,
					   lockdep_is_held(&net_mutex));
67 68 69 70
	if (old_ng->len >= id) {
		old_ng->ptr[id - 1] = data;
		return 0;
	}
71

E
Eric Dumazet 已提交
72
	ng = net_alloc_generic();
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
	if (ng == NULL)
		return -ENOMEM;

	/*
	 * Some synchronisation notes:
	 *
	 * The net_generic explores the net->gen array inside rcu
	 * read section. Besides once set the net->gen->ptr[x]
	 * pointer never changes (see rules in netns/generic.h).
	 *
	 * That said, we simply duplicate this array and schedule
	 * the old copy for kfree after a grace period.
	 */

	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
88
	ng->ptr[id - 1] = data;
89 90

	rcu_assign_pointer(net->gen, ng);
91
	kfree_rcu(old_ng, rcu);
92 93 94
	return 0;
}

95 96
static int ops_init(const struct pernet_operations *ops, struct net *net)
{
97 98 99
	int err = -ENOMEM;
	void *data = NULL;

100
	if (ops->id && ops->size) {
101
		data = kzalloc(ops->size, GFP_KERNEL);
102
		if (!data)
103
			goto out;
104 105

		err = net_assign_generic(net, *ops->id, data);
106 107
		if (err)
			goto cleanup;
108
	}
109
	err = 0;
110
	if (ops->init)
111 112 113 114 115 116 117 118 119
		err = ops->init(net);
	if (!err)
		return 0;

cleanup:
	kfree(data);

out:
	return err;
120 121 122 123 124
}

static void ops_free(const struct pernet_operations *ops, struct net *net)
{
	if (ops->id && ops->size) {
125
		kfree(net_generic(net, *ops->id));
126 127 128
	}
}

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
static void ops_exit_list(const struct pernet_operations *ops,
			  struct list_head *net_exit_list)
{
	struct net *net;
	if (ops->exit) {
		list_for_each_entry(net, net_exit_list, exit_list)
			ops->exit(net);
	}
	if (ops->exit_batch)
		ops->exit_batch(net_exit_list);
}

static void ops_free_list(const struct pernet_operations *ops,
			  struct list_head *net_exit_list)
{
	struct net *net;
	if (ops->size && ops->id) {
		list_for_each_entry(net, net_exit_list, exit_list)
			ops_free(ops, net);
	}
}

151
/* should be called with nsid_lock held */
152 153
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
154
	int min = 0, max = 0;
155 156 157 158 159 160

	if (reqid >= 0) {
		min = reqid;
		max = reqid + 1;
	}

161
	return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
}

/* This function is used by idr_for_each(). If net is equal to peer, the
 * function returns the id so that idr_for_each() stops. Because we cannot
 * returns the id 0 (idr_for_each() will not stop), we return the magic value
 * NET_ID_ZERO (-1) for it.
 */
#define NET_ID_ZERO -1
static int net_eq_idr(int id, void *net, void *peer)
{
	if (net_eq(net, peer))
		return id ? : NET_ID_ZERO;
	return 0;
}

177 178 179 180
/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
 * is set to true, thus the caller knows that the new id must be notified via
 * rtnl.
 */
181
static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
182 183
{
	int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
184
	bool alloc_it = *alloc;
185

186 187
	*alloc = false;

188 189 190 191 192 193
	/* Magic value for id 0. */
	if (id == NET_ID_ZERO)
		return 0;
	if (id > 0)
		return id;

194
	if (alloc_it) {
195
		id = alloc_netid(net, peer, -1);
196
		*alloc = true;
197 198
		return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
	}
199

200
	return NETNSA_NSID_NOT_ASSIGNED;
201 202
}

203
/* should be called with nsid_lock held */
204 205 206 207 208 209 210 211
static int __peernet2id(struct net *net, struct net *peer)
{
	bool no = false;

	return __peernet2id_alloc(net, peer, &no);
}

static void rtnl_net_notifyid(struct net *net, int cmd, int id);
212 213 214
/* This function returns the id of a peer netns. If no id is assigned, one will
 * be allocated and returned.
 */
215
int peernet2id_alloc(struct net *net, struct net *peer)
216
{
217
	unsigned long flags;
218
	bool alloc;
219
	int id;
220

221 222
	if (atomic_read(&net->count) == 0)
		return NETNSA_NSID_NOT_ASSIGNED;
223
	spin_lock_irqsave(&net->nsid_lock, flags);
224
	alloc = atomic_read(&peer->count) == 0 ? false : true;
225
	id = __peernet2id_alloc(net, peer, &alloc);
226
	spin_unlock_irqrestore(&net->nsid_lock, flags);
227 228 229
	if (alloc && id >= 0)
		rtnl_net_notifyid(net, RTM_NEWNSID, id);
	return id;
230 231
}

232
/* This function returns, if assigned, the id of a peer netns. */
233
int peernet2id(struct net *net, struct net *peer)
234
{
235
	unsigned long flags;
236 237
	int id;

238
	spin_lock_irqsave(&net->nsid_lock, flags);
239
	id = __peernet2id(net, peer);
240
	spin_unlock_irqrestore(&net->nsid_lock, flags);
241 242
	return id;
}
243
EXPORT_SYMBOL(peernet2id);
244

245 246 247 248 249 250 251 252
/* This function returns true is the peer netns has an id assigned into the
 * current netns.
 */
bool peernet_has_id(struct net *net, struct net *peer)
{
	return peernet2id(net, peer) >= 0;
}

253 254
struct net *get_net_ns_by_id(struct net *net, int id)
{
255
	unsigned long flags;
256 257 258 259 260 261
	struct net *peer;

	if (id < 0)
		return NULL;

	rcu_read_lock();
262
	spin_lock_irqsave(&net->nsid_lock, flags);
263 264 265
	peer = idr_find(&net->netns_ids, id);
	if (peer)
		get_net(peer);
266
	spin_unlock_irqrestore(&net->nsid_lock, flags);
267 268 269 270 271
	rcu_read_unlock();

	return peer;
}

272 273 274
/*
 * setup_net runs the initializers for the network namespace object.
 */
275
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
276 277
{
	/* Must be called with net_mutex held */
278
	const struct pernet_operations *ops, *saved_ops;
279
	int error = 0;
280
	LIST_HEAD(net_exit_list);
281 282

	atomic_set(&net->count, 1);
283
	atomic_set(&net->passive, 1);
284
	net->dev_base_seq = 1;
285
	net->user_ns = user_ns;
286
	idr_init(&net->netns_ids);
W
WANG Cong 已提交
287
	spin_lock_init(&net->nsid_lock);
288

289
	list_for_each_entry(ops, &pernet_list, list) {
290 291 292
		error = ops_init(ops, net);
		if (error < 0)
			goto out_undo;
293 294 295
	}
out:
	return error;
296

297 298 299 300
out_undo:
	/* Walk through the list backwards calling the exit functions
	 * for the pernet modules whose init functions did not fail.
	 */
301
	list_add(&net->exit_list, &net_exit_list);
302
	saved_ops = ops;
303 304 305
	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
		ops_exit_list(ops, &net_exit_list);

306 307
	ops = saved_ops;
	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
308
		ops_free_list(ops, &net_exit_list);
309 310

	rcu_barrier();
311 312 313
	goto out;
}

314

315
#ifdef CONFIG_NET_NS
316 317 318 319 320 321 322 323 324 325
static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
{
	return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
}

static void dec_net_namespaces(struct ucounts *ucounts)
{
	dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
}

326 327 328
static struct kmem_cache *net_cachep;
static struct workqueue_struct *netns_wq;

329
static struct net *net_alloc(void)
330
{
331 332 333 334 335 336 337 338
	struct net *net = NULL;
	struct net_generic *ng;

	ng = net_alloc_generic();
	if (!ng)
		goto out;

	net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
339
	if (!net)
340
		goto out_free;
341

342 343 344 345 346 347 348 349 350 351 352
	rcu_assign_pointer(net->gen, ng);
out:
	return net;

out_free:
	kfree(ng);
	goto out;
}

static void net_free(struct net *net)
{
E
Eric Dumazet 已提交
353
	kfree(rcu_access_pointer(net->gen));
354 355 356
	kmem_cache_free(net_cachep, net);
}

357 358 359 360 361 362 363
void net_drop_ns(void *p)
{
	struct net *ns = p;
	if (ns && atomic_dec_and_test(&ns->passive))
		net_free(ns);
}

364 365
struct net *copy_net_ns(unsigned long flags,
			struct user_namespace *user_ns, struct net *old_net)
366
{
367
	struct ucounts *ucounts;
368 369
	struct net *net;
	int rv;
370

371 372 373
	if (!(flags & CLONE_NEWNET))
		return get_net(old_net);

374 375
	ucounts = inc_net_namespaces(user_ns);
	if (!ucounts)
376
		return ERR_PTR(-ENOSPC);
377

378
	net = net_alloc();
379 380
	if (!net) {
		dec_net_namespaces(ucounts);
381
		return ERR_PTR(-ENOMEM);
382
	}
383 384 385

	get_user_ns(user_ns);

386 387 388 389 390 391 392 393
	rv = mutex_lock_killable(&net_mutex);
	if (rv < 0) {
		net_free(net);
		dec_net_namespaces(ucounts);
		put_user_ns(user_ns);
		return ERR_PTR(rv);
	}

394
	net->ucounts = ucounts;
395
	rv = setup_net(net, user_ns);
396
	if (rv == 0) {
397
		rtnl_lock();
398
		list_add_tail_rcu(&net->list, &net_namespace_list);
399 400
		rtnl_unlock();
	}
401
	mutex_unlock(&net_mutex);
402
	if (rv < 0) {
403
		dec_net_namespaces(ucounts);
404
		put_user_ns(user_ns);
405
		net_drop_ns(net);
406 407 408 409
		return ERR_PTR(rv);
	}
	return net;
}
410

411 412 413
static DEFINE_SPINLOCK(cleanup_list_lock);
static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */

414 415
static void cleanup_net(struct work_struct *work)
{
416
	const struct pernet_operations *ops;
417
	struct net *net, *tmp;
X
xiao jin 已提交
418
	struct list_head net_kill_list;
419
	LIST_HEAD(net_exit_list);
420

421 422 423 424
	/* Atomically snapshot the list of namespaces to cleanup */
	spin_lock_irq(&cleanup_list_lock);
	list_replace_init(&cleanup_list, &net_kill_list);
	spin_unlock_irq(&cleanup_list_lock);
425 426 427 428 429

	mutex_lock(&net_mutex);

	/* Don't let anyone else find us. */
	rtnl_lock();
430
	list_for_each_entry(net, &net_kill_list, cleanup_list) {
431
		list_del_rcu(&net->list);
432
		list_add_tail(&net->exit_list, &net_exit_list);
433
		for_each_net(tmp) {
434
			int id;
435

436
			spin_lock_irq(&tmp->nsid_lock);
437 438
			id = __peernet2id(tmp, net);
			if (id >= 0)
439
				idr_remove(&tmp->netns_ids, id);
440
			spin_unlock_irq(&tmp->nsid_lock);
441 442
			if (id >= 0)
				rtnl_net_notifyid(tmp, RTM_DELNSID, id);
443
		}
444
		spin_lock_irq(&net->nsid_lock);
445
		idr_destroy(&net->netns_ids);
446
		spin_unlock_irq(&net->nsid_lock);
447

448
	}
449 450
	rtnl_unlock();

451 452 453 454 455 456 457
	/*
	 * Another CPU might be rcu-iterating the list, wait for it.
	 * This needs to be before calling the exit() notifiers, so
	 * the rcu_barrier() below isn't sufficient alone.
	 */
	synchronize_rcu();

458
	/* Run all of the network namespace exit methods */
459 460 461
	list_for_each_entry_reverse(ops, &pernet_list, list)
		ops_exit_list(ops, &net_exit_list);

462
	/* Free the net generic variables */
463 464
	list_for_each_entry_reverse(ops, &pernet_list, list)
		ops_free_list(ops, &net_exit_list);
465 466 467 468 469 470 471 472 473

	mutex_unlock(&net_mutex);

	/* Ensure there are no outstanding rcu callbacks using this
	 * network namespace.
	 */
	rcu_barrier();

	/* Finally it is safe to free my network namespace structure */
474 475
	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
		list_del_init(&net->exit_list);
476
		dec_net_namespaces(net->ucounts);
477
		put_user_ns(net->user_ns);
478
		net_drop_ns(net);
479
	}
480
}
481
static DECLARE_WORK(net_cleanup_work, cleanup_net);
482 483 484 485

void __put_net(struct net *net)
{
	/* Cleanup the network namespace in process context */
486 487 488 489 490 491 492
	unsigned long flags;

	spin_lock_irqsave(&cleanup_list_lock, flags);
	list_add(&net->cleanup_list, &cleanup_list);
	spin_unlock_irqrestore(&cleanup_list_lock, flags);

	queue_work(netns_wq, &net_cleanup_work);
493 494 495
}
EXPORT_SYMBOL_GPL(__put_net);

496 497 498
struct net *get_net_ns_by_fd(int fd)
{
	struct file *file;
499
	struct ns_common *ns;
500 501 502
	struct net *net;

	file = proc_ns_fget(fd);
503 504
	if (IS_ERR(file))
		return ERR_CAST(file);
505

A
Al Viro 已提交
506
	ns = get_proc_ns(file_inode(file));
507 508
	if (ns->ops == &netns_operations)
		net = get_net(container_of(ns, struct net, ns));
509 510
	else
		net = ERR_PTR(-EINVAL);
511

512
	fput(file);
513 514 515
	return net;
}

516
#else
517 518 519 520
struct net *get_net_ns_by_fd(int fd)
{
	return ERR_PTR(-EINVAL);
}
521
#endif
522
EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
523

524 525 526 527 528 529 530 531 532 533 534
struct net *get_net_ns_by_pid(pid_t pid)
{
	struct task_struct *tsk;
	struct net *net;

	/* Lookup the network namespace */
	net = ERR_PTR(-ESRCH);
	rcu_read_lock();
	tsk = find_task_by_vpid(pid);
	if (tsk) {
		struct nsproxy *nsproxy;
535 536
		task_lock(tsk);
		nsproxy = tsk->nsproxy;
537 538
		if (nsproxy)
			net = get_net(nsproxy->net_ns);
539
		task_unlock(tsk);
540 541 542 543 544 545
	}
	rcu_read_unlock();
	return net;
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);

546 547
static __net_init int net_ns_net_init(struct net *net)
{
548 549 550
#ifdef CONFIG_NET_NS
	net->ns.ops = &netns_operations;
#endif
A
Al Viro 已提交
551
	return ns_alloc_inum(&net->ns);
552 553 554 555
}

static __net_exit void net_ns_net_exit(struct net *net)
{
A
Al Viro 已提交
556
	ns_free_inum(&net->ns);
557 558 559 560 561 562 563
}

static struct pernet_operations __net_initdata net_ns_ops = {
	.init = net_ns_net_init,
	.exit = net_ns_net_exit,
};

S
stephen hemminger 已提交
564
static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
565 566 567 568 569 570 571 572 573 574
	[NETNSA_NONE]		= { .type = NLA_UNSPEC },
	[NETNSA_NSID]		= { .type = NLA_S32 },
	[NETNSA_PID]		= { .type = NLA_U32 },
	[NETNSA_FD]		= { .type = NLA_U32 },
};

static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tb[NETNSA_MAX + 1];
575
	unsigned long flags;
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
	struct net *peer;
	int nsid, err;

	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
			  rtnl_net_policy);
	if (err < 0)
		return err;
	if (!tb[NETNSA_NSID])
		return -EINVAL;
	nsid = nla_get_s32(tb[NETNSA_NSID]);

	if (tb[NETNSA_PID])
		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
	else if (tb[NETNSA_FD])
		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
	else
		return -EINVAL;
	if (IS_ERR(peer))
		return PTR_ERR(peer);

596
	spin_lock_irqsave(&net->nsid_lock, flags);
597
	if (__peernet2id(net, peer) >= 0) {
598
		spin_unlock_irqrestore(&net->nsid_lock, flags);
599 600 601 602 603
		err = -EEXIST;
		goto out;
	}

	err = alloc_netid(net, peer, nsid);
604
	spin_unlock_irqrestore(&net->nsid_lock, flags);
605 606
	if (err >= 0) {
		rtnl_net_notifyid(net, RTM_NEWNSID, err);
607
		err = 0;
608
	}
609 610 611 612 613 614 615 616 617 618 619 620 621
out:
	put_net(peer);
	return err;
}

static int rtnl_net_get_size(void)
{
	return NLMSG_ALIGN(sizeof(struct rtgenmsg))
	       + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
	       ;
}

static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
622
			 int cmd, struct net *net, int nsid)
623 624 625 626 627 628 629 630 631 632 633
{
	struct nlmsghdr *nlh;
	struct rtgenmsg *rth;

	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
	if (!nlh)
		return -EMSGSIZE;

	rth = nlmsg_data(nlh);
	rth->rtgen_family = AF_UNSPEC;

634
	if (nla_put_s32(skb, NETNSA_NSID, nsid))
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
		goto nla_put_failure;

	nlmsg_end(skb, nlh);
	return 0;

nla_put_failure:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tb[NETNSA_MAX + 1];
	struct sk_buff *msg;
	struct net *peer;
651
	int err, id;
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672

	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
			  rtnl_net_policy);
	if (err < 0)
		return err;
	if (tb[NETNSA_PID])
		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
	else if (tb[NETNSA_FD])
		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
	else
		return -EINVAL;

	if (IS_ERR(peer))
		return PTR_ERR(peer);

	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
	if (!msg) {
		err = -ENOMEM;
		goto out;
	}

673
	id = peernet2id(net, peer);
674
	err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
675
			    RTM_NEWNSID, net, id);
676 677 678 679 680 681 682 683 684 685 686 687 688
	if (err < 0)
		goto err_out;

	err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
	goto out;

err_out:
	nlmsg_free(msg);
out:
	put_net(peer);
	return err;
}

N
Nicolas Dichtel 已提交
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
struct rtnl_net_dump_cb {
	struct net *net;
	struct sk_buff *skb;
	struct netlink_callback *cb;
	int idx;
	int s_idx;
};

static int rtnl_net_dumpid_one(int id, void *peer, void *data)
{
	struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
	int ret;

	if (net_cb->idx < net_cb->s_idx)
		goto cont;

	ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
			    net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
707
			    RTM_NEWNSID, net_cb->net, id);
N
Nicolas Dichtel 已提交
708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725
	if (ret < 0)
		return ret;

cont:
	net_cb->idx++;
	return 0;
}

static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct net *net = sock_net(skb->sk);
	struct rtnl_net_dump_cb net_cb = {
		.net = net,
		.skb = skb,
		.cb = cb,
		.idx = 0,
		.s_idx = cb->args[0],
	};
726
	unsigned long flags;
N
Nicolas Dichtel 已提交
727

728
	spin_lock_irqsave(&net->nsid_lock, flags);
N
Nicolas Dichtel 已提交
729
	idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
730
	spin_unlock_irqrestore(&net->nsid_lock, flags);
N
Nicolas Dichtel 已提交
731 732 733 734 735

	cb->args[0] = net_cb.idx;
	return skb->len;
}

736
static void rtnl_net_notifyid(struct net *net, int cmd, int id)
N
Nicolas Dichtel 已提交
737 738 739 740 741 742 743 744
{
	struct sk_buff *msg;
	int err = -ENOMEM;

	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
	if (!msg)
		goto out;

745
	err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
N
Nicolas Dichtel 已提交
746 747 748 749 750 751 752 753 754 755 756 757
	if (err < 0)
		goto err_out;

	rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
	return;

err_out:
	nlmsg_free(msg);
out:
	rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}

758 759
static int __init net_ns_init(void)
{
760
	struct net_generic *ng;
761

762
#ifdef CONFIG_NET_NS
763 764 765
	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
					SMP_CACHE_BYTES,
					SLAB_PANIC, NULL);
766 767 768 769 770

	/* Create workqueue for cleanup */
	netns_wq = create_singlethread_workqueue("netns");
	if (!netns_wq)
		panic("Could not create netns workq");
771
#endif
772

773 774 775 776 777 778
	ng = net_alloc_generic();
	if (!ng)
		panic("Could not allocate generic netns");

	rcu_assign_pointer(init_net.gen, ng);

779
	mutex_lock(&net_mutex);
780
	if (setup_net(&init_net, &init_user_ns))
S
Stephen Hemminger 已提交
781
		panic("Could not setup the initial network namespace");
782

783 784
	init_net_initialized = true;

785
	rtnl_lock();
786
	list_add_tail_rcu(&init_net.list, &net_namespace_list);
787
	rtnl_unlock();
788 789 790

	mutex_unlock(&net_mutex);

791 792
	register_pernet_subsys(&net_ns_ops);

793
	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
N
Nicolas Dichtel 已提交
794 795
	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
		      NULL);
796

797 798 799 800 801
	return 0;
}

pure_initcall(net_ns_init);

802
#ifdef CONFIG_NET_NS
803 804
static int __register_pernet_operations(struct list_head *list,
					struct pernet_operations *ops)
805
{
806
	struct net *net;
807
	int error;
808
	LIST_HEAD(net_exit_list);
809 810

	list_add_tail(&ops->list, list);
811
	if (ops->init || (ops->id && ops->size)) {
812
		for_each_net(net) {
813
			error = ops_init(ops, net);
814 815
			if (error)
				goto out_undo;
816
			list_add_tail(&net->exit_list, &net_exit_list);
817 818
		}
	}
819
	return 0;
820 821 822 823

out_undo:
	/* If I have an error cleanup all namespaces I initialized */
	list_del(&ops->list);
824 825
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
826
	return error;
827 828
}

829
static void __unregister_pernet_operations(struct pernet_operations *ops)
830 831
{
	struct net *net;
832
	LIST_HEAD(net_exit_list);
833 834

	list_del(&ops->list);
835 836 837 838
	for_each_net(net)
		list_add_tail(&net->exit_list, &net_exit_list);
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
839 840
}

841 842
#else

843 844
static int __register_pernet_operations(struct list_head *list,
					struct pernet_operations *ops)
845
{
846 847 848 849 850
	if (!init_net_initialized) {
		list_add_tail(&ops->list, list);
		return 0;
	}

851
	return ops_init(ops, &init_net);
852 853
}

854
static void __unregister_pernet_operations(struct pernet_operations *ops)
855
{
856 857 858 859 860 861 862 863
	if (!init_net_initialized) {
		list_del(&ops->list);
	} else {
		LIST_HEAD(net_exit_list);
		list_add(&init_net.exit_list, &net_exit_list);
		ops_exit_list(ops, &net_exit_list);
		ops_free_list(ops, &net_exit_list);
	}
864
}
865 866

#endif /* CONFIG_NET_NS */
867

868 869
static DEFINE_IDA(net_generic_ids);

870 871 872 873 874 875 876 877 878 879 880 881 882 883 884
static int register_pernet_operations(struct list_head *list,
				      struct pernet_operations *ops)
{
	int error;

	if (ops->id) {
again:
		error = ida_get_new_above(&net_generic_ids, 1, ops->id);
		if (error < 0) {
			if (error == -EAGAIN) {
				ida_pre_get(&net_generic_ids, GFP_KERNEL);
				goto again;
			}
			return error;
		}
885
		max_gen_ptrs = max(max_gen_ptrs, *ops->id);
886 887
	}
	error = __register_pernet_operations(list, ops);
888 889 890 891 892
	if (error) {
		rcu_barrier();
		if (ops->id)
			ida_remove(&net_generic_ids, *ops->id);
	}
893 894 895 896 897 898 899 900

	return error;
}

static void unregister_pernet_operations(struct pernet_operations *ops)
{
	
	__unregister_pernet_operations(ops);
901
	rcu_barrier();
902 903 904 905
	if (ops->id)
		ida_remove(&net_generic_ids, *ops->id);
}

906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939
/**
 *      register_pernet_subsys - register a network namespace subsystem
 *	@ops:  pernet operations structure for the subsystem
 *
 *	Register a subsystem which has init and exit functions
 *	that are called when network namespaces are created and
 *	destroyed respectively.
 *
 *	When registered all network namespace init functions are
 *	called for every existing network namespace.  Allowing kernel
 *	modules to have a race free view of the set of network namespaces.
 *
 *	When a new network namespace is created all of the init
 *	methods are called in the order in which they were registered.
 *
 *	When a network namespace is destroyed all of the exit methods
 *	are called in the reverse of the order with which they were
 *	registered.
 */
int register_pernet_subsys(struct pernet_operations *ops)
{
	int error;
	mutex_lock(&net_mutex);
	error =  register_pernet_operations(first_device, ops);
	mutex_unlock(&net_mutex);
	return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);

/**
 *      unregister_pernet_subsys - unregister a network namespace subsystem
 *	@ops: pernet operations structure to manipulate
 *
 *	Remove the pernet operations structure from the list to be
940
 *	used when network namespaces are created or destroyed.  In
941 942 943
 *	addition run the exit method for all existing network
 *	namespaces.
 */
944
void unregister_pernet_subsys(struct pernet_operations *ops)
945 946
{
	mutex_lock(&net_mutex);
947
	unregister_pernet_operations(ops);
948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
	mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);

/**
 *      register_pernet_device - register a network namespace device
 *	@ops:  pernet operations structure for the subsystem
 *
 *	Register a device which has init and exit functions
 *	that are called when network namespaces are created and
 *	destroyed respectively.
 *
 *	When registered all network namespace init functions are
 *	called for every existing network namespace.  Allowing kernel
 *	modules to have a race free view of the set of network namespaces.
 *
 *	When a new network namespace is created all of the init
 *	methods are called in the order in which they were registered.
 *
 *	When a network namespace is destroyed all of the exit methods
 *	are called in the reverse of the order with which they were
 *	registered.
 */
int register_pernet_device(struct pernet_operations *ops)
{
	int error;
	mutex_lock(&net_mutex);
	error = register_pernet_operations(&pernet_list, ops);
	if (!error && (first_device == &pernet_list))
		first_device = &ops->list;
	mutex_unlock(&net_mutex);
	return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);

/**
 *      unregister_pernet_device - unregister a network namespace netdevice
 *	@ops: pernet operations structure to manipulate
 *
 *	Remove the pernet operations structure from the list to be
988
 *	used when network namespaces are created or destroyed.  In
989 990 991 992 993 994 995 996 997 998 999 1000
 *	addition run the exit method for all existing network
 *	namespaces.
 */
void unregister_pernet_device(struct pernet_operations *ops)
{
	mutex_lock(&net_mutex);
	if (&ops->list == first_device)
		first_device = first_device->next;
	unregister_pernet_operations(ops);
	mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
1001 1002

#ifdef CONFIG_NET_NS
1003
static struct ns_common *netns_get(struct task_struct *task)
1004
{
1005 1006 1007
	struct net *net = NULL;
	struct nsproxy *nsproxy;

1008 1009
	task_lock(task);
	nsproxy = task->nsproxy;
1010 1011
	if (nsproxy)
		net = get_net(nsproxy->net_ns);
1012
	task_unlock(task);
1013

1014 1015 1016 1017 1018 1019
	return net ? &net->ns : NULL;
}

static inline struct net *to_net_ns(struct ns_common *ns)
{
	return container_of(ns, struct net, ns);
1020 1021
}

1022
static void netns_put(struct ns_common *ns)
1023
{
1024
	put_net(to_net_ns(ns));
1025 1026
}

1027
static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
1028
{
1029
	struct net *net = to_net_ns(ns);
1030

1031
	if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
1032
	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
1033 1034
		return -EPERM;

1035
	put_net(nsproxy->net_ns);
1036
	nsproxy->net_ns = get_net(net);
1037 1038 1039
	return 0;
}

1040 1041 1042 1043 1044
static struct user_namespace *netns_owner(struct ns_common *ns)
{
	return to_net_ns(ns)->user_ns;
}

1045 1046 1047 1048 1049 1050
const struct proc_ns_operations netns_operations = {
	.name		= "net",
	.type		= CLONE_NEWNET,
	.get		= netns_get,
	.put		= netns_put,
	.install	= netns_install,
1051
	.owner		= netns_owner,
1052 1053
};
#endif