ip6_flowlabel.c 19.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 *	ip6_flowlabel.c		IPv6 flowlabel manager.
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 *	Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 */

12
#include <linux/capability.h>
L
Linus Torvalds 已提交
13 14 15 16 17 18 19 20 21 22
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/in6.h>
#include <linux/route.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
23
#include <linux/slab.h>
24
#include <linux/export.h>
25
#include <linux/pid_namespace.h>
L
Linus Torvalds 已提交
26

27
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
#include <net/sock.h>

#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/rawv6.h>
#include <net/icmp.h>
#include <net/transp_v6.h>

#include <asm/uaccess.h>

#define FL_MIN_LINGER	6	/* Minimal linger. It is set to 6sec specified
				   in old IPv6 RFC. Well, it was reasonable value.
				 */
#define FL_MAX_LINGER	60	/* Maximal linger timeout */

/* FL hash table */

#define FL_MAX_PER_SOCK	32
#define FL_MAX_SIZE	4096
#define FL_HASH_MASK	255
#define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)

static atomic_t fl_size = ATOMIC_INIT(0);
54
static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
L
Linus Torvalds 已提交
55 56

static void ip6_fl_gc(unsigned long dummy);
57
static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
L
Linus Torvalds 已提交
58 59 60

/* FL hash table lock: it protects only of GC */

61
static DEFINE_SPINLOCK(ip6_fl_lock);
L
Linus Torvalds 已提交
62 63 64

/* Big socket sock */

65
static DEFINE_SPINLOCK(ip6_sk_fl_lock);
L
Linus Torvalds 已提交
66

67
#define for_each_fl_rcu(hash, fl)				\
68
	for (fl = rcu_dereference_bh(fl_ht[(hash)]);		\
69
	     fl != NULL;					\
70
	     fl = rcu_dereference_bh(fl->next))
71
#define for_each_fl_continue_rcu(fl)				\
72
	for (fl = rcu_dereference_bh(fl->next);			\
73
	     fl != NULL;					\
74
	     fl = rcu_dereference_bh(fl->next))
L
Linus Torvalds 已提交
75

76 77 78 79 80
#define for_each_sk_fl_rcu(np, sfl)				\
	for (sfl = rcu_dereference_bh(np->ipv6_fl_list);	\
	     sfl != NULL;					\
	     sfl = rcu_dereference_bh(sfl->next))

81
static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
L
Linus Torvalds 已提交
82 83 84
{
	struct ip6_flowlabel *fl;

85
	for_each_fl_rcu(FL_HASH(label), fl) {
O
Octavian Purdila 已提交
86
		if (fl->label == label && net_eq(fl->fl_net, net))
L
Linus Torvalds 已提交
87 88 89 90 91
			return fl;
	}
	return NULL;
}

92
static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
L
Linus Torvalds 已提交
93 94 95
{
	struct ip6_flowlabel *fl;

96
	rcu_read_lock_bh();
97
	fl = __fl_lookup(net, label);
98 99 100
	if (fl && !atomic_inc_not_zero(&fl->users))
		fl = NULL;
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
101 102 103 104 105 106
	return fl;
}


static void fl_free(struct ip6_flowlabel *fl)
{
107
	if (fl) {
108 109
		if (fl->share == IPV6_FL_S_PROCESS)
			put_pid(fl->owner.pid);
110
		release_net(fl->fl_net);
L
Linus Torvalds 已提交
111
		kfree(fl->opt);
112
		kfree_rcu(fl, rcu);
113
	}
L
Linus Torvalds 已提交
114 115 116 117
}

static void fl_release(struct ip6_flowlabel *fl)
{
118
	spin_lock_bh(&ip6_fl_lock);
L
Linus Torvalds 已提交
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134

	fl->lastuse = jiffies;
	if (atomic_dec_and_test(&fl->users)) {
		unsigned long ttd = fl->lastuse + fl->linger;
		if (time_after(ttd, fl->expires))
			fl->expires = ttd;
		ttd = fl->expires;
		if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
			struct ipv6_txoptions *opt = fl->opt;
			fl->opt = NULL;
			kfree(opt);
		}
		if (!timer_pending(&ip6_fl_gc_timer) ||
		    time_after(ip6_fl_gc_timer.expires, ttd))
			mod_timer(&ip6_fl_gc_timer, ttd);
	}
135
	spin_unlock_bh(&ip6_fl_lock);
L
Linus Torvalds 已提交
136 137 138 139 140 141 142 143
}

static void ip6_fl_gc(unsigned long dummy)
{
	int i;
	unsigned long now = jiffies;
	unsigned long sched = 0;

144
	spin_lock(&ip6_fl_lock);
L
Linus Torvalds 已提交
145 146

	for (i=0; i<=FL_HASH_MASK; i++) {
147 148 149
		struct ip6_flowlabel *fl;
		struct ip6_flowlabel __rcu **flp;

L
Linus Torvalds 已提交
150
		flp = &fl_ht[i];
151 152
		while ((fl = rcu_dereference_protected(*flp,
						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
L
Linus Torvalds 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
			if (atomic_read(&fl->users) == 0) {
				unsigned long ttd = fl->lastuse + fl->linger;
				if (time_after(ttd, fl->expires))
					fl->expires = ttd;
				ttd = fl->expires;
				if (time_after_eq(now, ttd)) {
					*flp = fl->next;
					fl_free(fl);
					atomic_dec(&fl_size);
					continue;
				}
				if (!sched || time_before(ttd, sched))
					sched = ttd;
			}
			flp = &fl->next;
		}
	}
	if (!sched && atomic_read(&fl_size))
		sched = now + FL_MAX_LINGER;
	if (sched) {
173
		mod_timer(&ip6_fl_gc_timer, sched);
L
Linus Torvalds 已提交
174
	}
175
	spin_unlock(&ip6_fl_lock);
L
Linus Torvalds 已提交
176 177
}

178
static void __net_exit ip6_fl_purge(struct net *net)
179 180 181
{
	int i;

182
	spin_lock(&ip6_fl_lock);
183
	for (i = 0; i <= FL_HASH_MASK; i++) {
184 185 186
		struct ip6_flowlabel *fl;
		struct ip6_flowlabel __rcu **flp;

187
		flp = &fl_ht[i];
188 189
		while ((fl = rcu_dereference_protected(*flp,
						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
O
Octavian Purdila 已提交
190 191
			if (net_eq(fl->fl_net, net) &&
			    atomic_read(&fl->users) == 0) {
192 193 194 195 196 197 198 199
				*flp = fl->next;
				fl_free(fl);
				atomic_dec(&fl_size);
				continue;
			}
			flp = &fl->next;
		}
	}
200
	spin_unlock(&ip6_fl_lock);
201 202 203 204
}

static struct ip6_flowlabel *fl_intern(struct net *net,
				       struct ip6_flowlabel *fl, __be32 label)
L
Linus Torvalds 已提交
205
{
206 207
	struct ip6_flowlabel *lfl;

L
Linus Torvalds 已提交
208 209
	fl->label = label & IPV6_FLOWLABEL_MASK;

210
	spin_lock_bh(&ip6_fl_lock);
L
Linus Torvalds 已提交
211 212 213 214
	if (label == 0) {
		for (;;) {
			fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
			if (fl->label) {
215
				lfl = __fl_lookup(net, fl->label);
L
Linus Torvalds 已提交
216 217 218 219
				if (lfl == NULL)
					break;
			}
		}
220 221 222 223 224 225 226 227 228
	} else {
		/*
		 * we dropper the ip6_fl_lock, so this entry could reappear
		 * and we need to recheck with it.
		 *
		 * OTOH no need to search the active socket first, like it is
		 * done in ipv6_flowlabel_opt - sock is locked, so new entry
		 * with the same label can only appear on another sock
		 */
229
		lfl = __fl_lookup(net, fl->label);
230 231
		if (lfl != NULL) {
			atomic_inc(&lfl->users);
232
			spin_unlock_bh(&ip6_fl_lock);
233 234
			return lfl;
		}
L
Linus Torvalds 已提交
235 236 237 238
	}

	fl->lastuse = jiffies;
	fl->next = fl_ht[FL_HASH(fl->label)];
239
	rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
L
Linus Torvalds 已提交
240
	atomic_inc(&fl_size);
241
	spin_unlock_bh(&ip6_fl_lock);
242
	return NULL;
L
Linus Torvalds 已提交
243 244 245 246 247 248
}



/* Socket flowlabel lists */

A
Al Viro 已提交
249
struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
L
Linus Torvalds 已提交
250 251 252 253 254 255
{
	struct ipv6_fl_socklist *sfl;
	struct ipv6_pinfo *np = inet6_sk(sk);

	label &= IPV6_FLOWLABEL_MASK;

256 257
	rcu_read_lock_bh();
	for_each_sk_fl_rcu(np, sfl) {
L
Linus Torvalds 已提交
258 259 260 261
		struct ip6_flowlabel *fl = sfl->fl;
		if (fl->label == label) {
			fl->lastuse = jiffies;
			atomic_inc(&fl->users);
262
			rcu_read_unlock_bh();
L
Linus Torvalds 已提交
263 264 265
			return fl;
		}
	}
266
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
267 268 269
	return NULL;
}

270 271
EXPORT_SYMBOL_GPL(fl6_sock_lookup);

L
Linus Torvalds 已提交
272 273 274 275 276
void fl6_free_socklist(struct sock *sk)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct ipv6_fl_socklist *sfl;

277
	if (!rcu_access_pointer(np->ipv6_fl_list))
278 279
		return;

280 281 282 283 284
	spin_lock_bh(&ip6_sk_fl_lock);
	while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
						lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
		np->ipv6_fl_list = sfl->next;
		spin_unlock_bh(&ip6_sk_fl_lock);
285

L
Linus Torvalds 已提交
286
		fl_release(sfl->fl);
287 288 289
		kfree_rcu(sfl, rcu);

		spin_lock_bh(&ip6_sk_fl_lock);
L
Linus Torvalds 已提交
290
	}
291
	spin_unlock_bh(&ip6_sk_fl_lock);
L
Linus Torvalds 已提交
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
}

/* Service routines */


/*
   It is the only difficult place. flowlabel enforces equal headers
   before and including routing header, however user may supply options
   following rthdr.
 */

struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
					 struct ip6_flowlabel * fl,
					 struct ipv6_txoptions * fopt)
{
307
	struct ipv6_txoptions * fl_opt = fl->opt;
308

309 310
	if (fopt == NULL || fopt->opt_flen == 0)
		return fl_opt;
311

L
Linus Torvalds 已提交
312 313
	if (fl_opt != NULL) {
		opt_space->hopopt = fl_opt->hopopt;
314
		opt_space->dst0opt = fl_opt->dst0opt;
L
Linus Torvalds 已提交
315 316 317 318 319 320 321 322 323 324 325 326 327 328
		opt_space->srcrt = fl_opt->srcrt;
		opt_space->opt_nflen = fl_opt->opt_nflen;
	} else {
		if (fopt->opt_nflen == 0)
			return fopt;
		opt_space->hopopt = NULL;
		opt_space->dst0opt = NULL;
		opt_space->srcrt = NULL;
		opt_space->opt_nflen = 0;
	}
	opt_space->dst1opt = fopt->dst1opt;
	opt_space->opt_flen = fopt->opt_flen;
	return opt_space;
}
329
EXPORT_SYMBOL_GPL(fl6_merge_options);
L
Linus Torvalds 已提交
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358

static unsigned long check_linger(unsigned long ttl)
{
	if (ttl < FL_MIN_LINGER)
		return FL_MIN_LINGER*HZ;
	if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
		return 0;
	return ttl*HZ;
}

static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
{
	linger = check_linger(linger);
	if (!linger)
		return -EPERM;
	expires = check_linger(expires);
	if (!expires)
		return -EPERM;
	fl->lastuse = jiffies;
	if (time_before(fl->linger, linger))
		fl->linger = linger;
	if (time_before(expires, fl->linger))
		expires = fl->linger;
	if (time_before(fl->expires, fl->lastuse + expires))
		fl->expires = fl->lastuse + expires;
	return 0;
}

static struct ip6_flowlabel *
359 360
fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
	  char __user *optval, int optlen, int *err_p)
L
Linus Torvalds 已提交
361
{
362
	struct ip6_flowlabel *fl = NULL;
L
Linus Torvalds 已提交
363 364 365 366
	int olen;
	int addr_type;
	int err;

367 368 369 370 371
	olen = optlen - CMSG_ALIGN(sizeof(*freq));
	err = -EINVAL;
	if (olen > 64 * 1024)
		goto done;

L
Linus Torvalds 已提交
372
	err = -ENOMEM;
373
	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
L
Linus Torvalds 已提交
374 375 376 377 378
	if (fl == NULL)
		goto done;

	if (olen > 0) {
		struct msghdr msg;
379
		struct flowi6 flowi6;
L
Linus Torvalds 已提交
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
		int junk;

		err = -ENOMEM;
		fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
		if (fl->opt == NULL)
			goto done;

		memset(fl->opt, 0, sizeof(*fl->opt));
		fl->opt->tot_len = sizeof(*fl->opt) + olen;
		err = -EFAULT;
		if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
			goto done;

		msg.msg_controllen = olen;
		msg.msg_control = (void*)(fl->opt+1);
395
		memset(&flowi6, 0, sizeof(flowi6));
L
Linus Torvalds 已提交
396

397 398
		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
					    &junk, &junk, &junk);
L
Linus Torvalds 已提交
399 400 401 402 403 404 405 406 407 408 409
		if (err)
			goto done;
		err = -EINVAL;
		if (fl->opt->opt_flen)
			goto done;
		if (fl->opt->opt_nflen == 0) {
			kfree(fl->opt);
			fl->opt = NULL;
		}
	}

410
	fl->fl_net = hold_net(net);
L
Linus Torvalds 已提交
411 412 413 414 415 416
	fl->expires = jiffies;
	err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
	if (err)
		goto done;
	fl->share = freq->flr_share;
	addr_type = ipv6_addr_type(&freq->flr_dst);
417 418
	if ((addr_type & IPV6_ADDR_MAPPED) ||
	    addr_type == IPV6_ADDR_ANY) {
419
		err = -EINVAL;
L
Linus Torvalds 已提交
420
		goto done;
421
	}
A
Alexey Dobriyan 已提交
422
	fl->dst = freq->flr_dst;
L
Linus Torvalds 已提交
423 424 425 426 427 428
	atomic_set(&fl->users, 1);
	switch (fl->share) {
	case IPV6_FL_S_EXCL:
	case IPV6_FL_S_ANY:
		break;
	case IPV6_FL_S_PROCESS:
429
		fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
L
Linus Torvalds 已提交
430 431
		break;
	case IPV6_FL_S_USER:
432
		fl->owner.uid = current_euid();
L
Linus Torvalds 已提交
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
		break;
	default:
		err = -EINVAL;
		goto done;
	}
	return fl;

done:
	fl_free(fl);
	*err_p = err;
	return NULL;
}

static int mem_check(struct sock *sk)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct ipv6_fl_socklist *sfl;
	int room = FL_MAX_SIZE - atomic_read(&fl_size);
	int count = 0;

	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
		return 0;

456
	for_each_sk_fl_rcu(np, sfl)
L
Linus Torvalds 已提交
457 458 459 460
		count++;

	if (room <= 0 ||
	    ((count >= FL_MAX_PER_SOCK ||
461 462
	      (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
	     !capable(CAP_NET_ADMIN)))
L
Linus Torvalds 已提交
463 464 465 466 467
		return -ENOBUFS;

	return 0;
}

468
static bool ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
L
Linus Torvalds 已提交
469 470
{
	if (h1 == h2)
471
		return false;
L
Linus Torvalds 已提交
472
	if (h1 == NULL || h2 == NULL)
473
		return true;
L
Linus Torvalds 已提交
474
	if (h1->hdrlen != h2->hdrlen)
475
		return true;
L
Linus Torvalds 已提交
476 477 478
	return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
}

479
static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
L
Linus Torvalds 已提交
480 481
{
	if (o1 == o2)
482
		return false;
L
Linus Torvalds 已提交
483
	if (o1 == NULL || o2 == NULL)
484
		return true;
L
Linus Torvalds 已提交
485
	if (o1->opt_nflen != o2->opt_nflen)
486
		return true;
L
Linus Torvalds 已提交
487
	if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
488
		return true;
L
Linus Torvalds 已提交
489
	if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
490
		return true;
L
Linus Torvalds 已提交
491
	if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
492 493
		return true;
	return false;
L
Linus Torvalds 已提交
494 495
}

496 497 498
static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
		struct ip6_flowlabel *fl)
{
499
	spin_lock_bh(&ip6_sk_fl_lock);
500 501
	sfl->fl = fl;
	sfl->next = np->ipv6_fl_list;
502 503
	rcu_assign_pointer(np->ipv6_fl_list, sfl);
	spin_unlock_bh(&ip6_sk_fl_lock);
504 505
}

L
Linus Torvalds 已提交
506 507
int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
{
508
	int uninitialized_var(err);
509
	struct net *net = sock_net(sk);
L
Linus Torvalds 已提交
510 511 512
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct in6_flowlabel_req freq;
	struct ipv6_fl_socklist *sfl1=NULL;
513 514
	struct ipv6_fl_socklist *sfl;
	struct ipv6_fl_socklist __rcu **sflp;
515 516
	struct ip6_flowlabel *fl, *fl1 = NULL;

L
Linus Torvalds 已提交
517 518 519 520 521 522 523 524 525

	if (optlen < sizeof(freq))
		return -EINVAL;

	if (copy_from_user(&freq, optval, sizeof(freq)))
		return -EFAULT;

	switch (freq.flr_action) {
	case IPV6_FL_A_PUT:
526 527 528 529
		spin_lock_bh(&ip6_sk_fl_lock);
		for (sflp = &np->ipv6_fl_list;
		     (sfl = rcu_dereference(*sflp))!=NULL;
		     sflp = &sfl->next) {
L
Linus Torvalds 已提交
530 531 532
			if (sfl->fl->label == freq.flr_label) {
				if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
					np->flow_label &= ~IPV6_FLOWLABEL_MASK;
533 534
				*sflp = rcu_dereference(sfl->next);
				spin_unlock_bh(&ip6_sk_fl_lock);
L
Linus Torvalds 已提交
535
				fl_release(sfl->fl);
536
				kfree_rcu(sfl, rcu);
L
Linus Torvalds 已提交
537 538 539
				return 0;
			}
		}
540
		spin_unlock_bh(&ip6_sk_fl_lock);
L
Linus Torvalds 已提交
541 542 543
		return -ESRCH;

	case IPV6_FL_A_RENEW:
544 545
		rcu_read_lock_bh();
		for_each_sk_fl_rcu(np, sfl) {
L
Linus Torvalds 已提交
546 547
			if (sfl->fl->label == freq.flr_label) {
				err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
548
				rcu_read_unlock_bh();
L
Linus Torvalds 已提交
549 550 551
				return err;
			}
		}
552
		rcu_read_unlock_bh();
L
Linus Torvalds 已提交
553

554 555
		if (freq.flr_share == IPV6_FL_S_NONE &&
		    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
556
			fl = fl_lookup(net, freq.flr_label);
L
Linus Torvalds 已提交
557 558 559 560 561 562 563 564 565 566 567 568
			if (fl) {
				err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
				fl_release(fl);
				return err;
			}
		}
		return -ESRCH;

	case IPV6_FL_A_GET:
		if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
			return -EINVAL;

569
		fl = fl_create(net, sk, &freq, optval, optlen, &err);
L
Linus Torvalds 已提交
570 571 572 573 574 575
		if (fl == NULL)
			return err;
		sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);

		if (freq.flr_label) {
			err = -EEXIST;
576 577
			rcu_read_lock_bh();
			for_each_sk_fl_rcu(np, sfl) {
L
Linus Torvalds 已提交
578 579
				if (sfl->fl->label == freq.flr_label) {
					if (freq.flr_flags&IPV6_FL_F_EXCL) {
580
						rcu_read_unlock_bh();
L
Linus Torvalds 已提交
581 582 583
						goto done;
					}
					fl1 = sfl->fl;
584
					atomic_inc(&fl1->users);
L
Linus Torvalds 已提交
585 586 587
					break;
				}
			}
588
			rcu_read_unlock_bh();
L
Linus Torvalds 已提交
589 590

			if (fl1 == NULL)
591
				fl1 = fl_lookup(net, freq.flr_label);
L
Linus Torvalds 已提交
592
			if (fl1) {
593
recheck:
L
Linus Torvalds 已提交
594 595 596 597 598 599
				err = -EEXIST;
				if (freq.flr_flags&IPV6_FL_F_EXCL)
					goto release;
				err = -EPERM;
				if (fl1->share == IPV6_FL_S_EXCL ||
				    fl1->share != fl->share ||
600 601 602 603
				    ((fl1->share == IPV6_FL_S_PROCESS) &&
				     (fl1->owner.pid == fl->owner.pid)) ||
				    ((fl1->share == IPV6_FL_S_USER) &&
				     uid_eq(fl1->owner.uid, fl->owner.uid)))
L
Linus Torvalds 已提交
604 605 606 607 608 609 610 611 612 613 614 615 616 617
					goto release;

				err = -EINVAL;
				if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
				    ipv6_opt_cmp(fl1->opt, fl->opt))
					goto release;

				err = -ENOMEM;
				if (sfl1 == NULL)
					goto release;
				if (fl->linger > fl1->linger)
					fl1->linger = fl->linger;
				if ((long)(fl->expires - fl1->expires) > 0)
					fl1->expires = fl->expires;
618
				fl_link(np, sfl1, fl1);
L
Linus Torvalds 已提交
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
				fl_free(fl);
				return 0;

release:
				fl_release(fl1);
				goto done;
			}
		}
		err = -ENOENT;
		if (!(freq.flr_flags&IPV6_FL_F_CREATE))
			goto done;

		err = -ENOMEM;
		if (sfl1 == NULL || (err = mem_check(sk)) != 0)
			goto done;

635
		fl1 = fl_intern(net, fl, freq.flr_label);
636 637
		if (fl1 != NULL)
			goto recheck;
L
Linus Torvalds 已提交
638

639 640 641 642 643 644
		if (!freq.flr_label) {
			if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
					 &fl->label, sizeof(fl->label))) {
				/* Intentionally ignore fault. */
			}
		}
L
Linus Torvalds 已提交
645

646
		fl_link(np, sfl1, fl);
L
Linus Torvalds 已提交
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661
		return 0;

	default:
		return -EINVAL;
	}

done:
	fl_free(fl);
	kfree(sfl1);
	return err;
}

#ifdef CONFIG_PROC_FS

struct ip6fl_iter_state {
662
	struct seq_net_private p;
663
	struct pid_namespace *pid_ns;
L
Linus Torvalds 已提交
664 665 666 667 668 669 670 671 672
	int bucket;
};

#define ip6fl_seq_private(seq)	((struct ip6fl_iter_state *)(seq)->private)

static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
{
	struct ip6_flowlabel *fl = NULL;
	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
673
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
674 675

	for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
676 677 678 679
		for_each_fl_rcu(state->bucket, fl) {
			if (net_eq(fl->fl_net, net))
				goto out;
		}
L
Linus Torvalds 已提交
680
	}
681 682
	fl = NULL;
out:
L
Linus Torvalds 已提交
683 684 685 686 687 688
	return fl;
}

static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
{
	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
689
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
690

691 692 693 694 695
	for_each_fl_continue_rcu(fl) {
		if (net_eq(fl->fl_net, net))
			goto out;
	}

696
try_again:
697 698 699 700 701 702
	if (++state->bucket <= FL_HASH_MASK) {
		for_each_fl_rcu(state->bucket, fl) {
			if (net_eq(fl->fl_net, net))
				goto out;
		}
		goto try_again;
L
Linus Torvalds 已提交
703
	}
704 705 706
	fl = NULL;

out:
L
Linus Torvalds 已提交
707 708 709 710 711 712 713 714 715 716 717 718 719
	return fl;
}

static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
{
	struct ip6_flowlabel *fl = ip6fl_get_first(seq);
	if (fl)
		while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
			--pos;
	return pos ? NULL : fl;
}

static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
720
	__acquires(RCU)
L
Linus Torvalds 已提交
721
{
722
	rcu_read_lock_bh();
L
Linus Torvalds 已提交
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}

static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct ip6_flowlabel *fl;

	if (v == SEQ_START_TOKEN)
		fl = ip6fl_get_first(seq);
	else
		fl = ip6fl_get_next(seq, v);
	++*pos;
	return fl;
}

static void ip6fl_seq_stop(struct seq_file *seq, void *v)
739
	__releases(RCU)
L
Linus Torvalds 已提交
740
{
741
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
742 743
}

744
static int ip6fl_seq_show(struct seq_file *seq, void *v)
L
Linus Torvalds 已提交
745
{
746
	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
747 748 749 750 751
	if (v == SEQ_START_TOKEN)
		seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
			   "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
	else {
		struct ip6_flowlabel *fl = v;
L
Linus Torvalds 已提交
752
		seq_printf(seq,
753
			   "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
754
			   (unsigned int)ntohl(fl->label),
L
Linus Torvalds 已提交
755
			   fl->share,
756 757 758 759 760
			   ((fl->share == IPV6_FL_S_PROCESS) ?
			    pid_nr_ns(fl->owner.pid, state->pid_ns) :
			    ((fl->share == IPV6_FL_S_USER) ?
			     from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
			     0)),
L
Linus Torvalds 已提交
761 762 763
			   atomic_read(&fl->users),
			   fl->linger/HZ,
			   (long)(fl->expires - jiffies)/HZ,
764
			   &fl->dst,
L
Linus Torvalds 已提交
765 766 767 768 769
			   fl->opt ? fl->opt->opt_nflen : 0);
	}
	return 0;
}

770
static const struct seq_operations ip6fl_seq_ops = {
L
Linus Torvalds 已提交
771 772 773 774 775 776 777 778
	.start	=	ip6fl_seq_start,
	.next	=	ip6fl_seq_next,
	.stop	=	ip6fl_seq_stop,
	.show	=	ip6fl_seq_show,
};

static int ip6fl_seq_open(struct inode *inode, struct file *file)
{
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801
	struct seq_file *seq;
	struct ip6fl_iter_state *state;
	int err;

	err = seq_open_net(inode, file, &ip6fl_seq_ops,
			   sizeof(struct ip6fl_iter_state));

	if (!err) {
		seq = file->private_data;
		state = ip6fl_seq_private(seq);
		rcu_read_lock();
		state->pid_ns = get_pid_ns(task_active_pid_ns(current));
		rcu_read_unlock();
	}
	return err;
}

static int ip6fl_seq_release(struct inode *inode, struct file *file)
{
	struct seq_file *seq = file->private_data;
	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
	put_pid_ns(state->pid_ns);
	return seq_release_net(inode, file);
L
Linus Torvalds 已提交
802 803
}

804
static const struct file_operations ip6fl_seq_fops = {
L
Linus Torvalds 已提交
805 806 807 808
	.owner		=	THIS_MODULE,
	.open		=	ip6fl_seq_open,
	.read		=	seq_read,
	.llseek		=	seq_lseek,
809
	.release	=	ip6fl_seq_release,
L
Linus Torvalds 已提交
810 811
};

812
static int __net_init ip6_flowlabel_proc_init(struct net *net)
813
{
814 815
	if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
			 &ip6fl_seq_fops))
816 817 818
		return -ENOMEM;
	return 0;
}
L
Linus Torvalds 已提交
819

820
static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
L
Linus Torvalds 已提交
821
{
822
	remove_proc_entry("ip6_flowlabel", net->proc_net);
823 824 825 826 827 828 829 830 831
}
#else
static inline int ip6_flowlabel_proc_init(struct net *net)
{
	return 0;
}
static inline void ip6_flowlabel_proc_fini(struct net *net)
{
}
L
Linus Torvalds 已提交
832
#endif
833

834
static void __net_exit ip6_flowlabel_net_exit(struct net *net)
835 836
{
	ip6_fl_purge(net);
837
	ip6_flowlabel_proc_fini(net);
838 839 840
}

static struct pernet_operations ip6_flowlabel_net_ops = {
841
	.init = ip6_flowlabel_proc_init,
842 843 844
	.exit = ip6_flowlabel_net_exit,
};

845 846
int ip6_flowlabel_init(void)
{
847
	return register_pernet_subsys(&ip6_flowlabel_net_ops);
L
Linus Torvalds 已提交
848 849 850 851 852
}

void ip6_flowlabel_cleanup(void)
{
	del_timer(&ip6_fl_gc_timer);
853
	unregister_pernet_subsys(&ip6_flowlabel_net_ops);
L
Linus Torvalds 已提交
854
}