ip6_flowlabel.c 19.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 *	ip6_flowlabel.c		IPv6 flowlabel manager.
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 *	Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 */

12
#include <linux/capability.h>
L
Linus Torvalds 已提交
13 14 15 16 17 18 19 20 21 22
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/in6.h>
#include <linux/route.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
23
#include <linux/slab.h>
24
#include <linux/export.h>
25
#include <linux/pid_namespace.h>
L
Linus Torvalds 已提交
26

27
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
#include <net/sock.h>

#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/rawv6.h>
#include <net/icmp.h>
#include <net/transp_v6.h>

#include <asm/uaccess.h>

#define FL_MIN_LINGER	6	/* Minimal linger. It is set to 6sec specified
				   in old IPv6 RFC. Well, it was reasonable value.
				 */
#define FL_MAX_LINGER	60	/* Maximal linger timeout */

/* FL hash table */

#define FL_MAX_PER_SOCK	32
#define FL_MAX_SIZE	4096
#define FL_HASH_MASK	255
#define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)

static atomic_t fl_size = ATOMIC_INIT(0);
54
static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
L
Linus Torvalds 已提交
55 56

static void ip6_fl_gc(unsigned long dummy);
57
static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
L
Linus Torvalds 已提交
58 59 60

/* FL hash table lock: it protects only of GC */

61
static DEFINE_SPINLOCK(ip6_fl_lock);
L
Linus Torvalds 已提交
62 63 64

/* Big socket sock */

65
static DEFINE_SPINLOCK(ip6_sk_fl_lock);
L
Linus Torvalds 已提交
66

67
#define for_each_fl_rcu(hash, fl)				\
68
	for (fl = rcu_dereference_bh(fl_ht[(hash)]);		\
69
	     fl != NULL;					\
70
	     fl = rcu_dereference_bh(fl->next))
71
#define for_each_fl_continue_rcu(fl)				\
72
	for (fl = rcu_dereference_bh(fl->next);			\
73
	     fl != NULL;					\
74
	     fl = rcu_dereference_bh(fl->next))
L
Linus Torvalds 已提交
75

76 77 78 79 80
#define for_each_sk_fl_rcu(np, sfl)				\
	for (sfl = rcu_dereference_bh(np->ipv6_fl_list);	\
	     sfl != NULL;					\
	     sfl = rcu_dereference_bh(sfl->next))

81
static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
L
Linus Torvalds 已提交
82 83 84
{
	struct ip6_flowlabel *fl;

85
	for_each_fl_rcu(FL_HASH(label), fl) {
O
Octavian Purdila 已提交
86
		if (fl->label == label && net_eq(fl->fl_net, net))
L
Linus Torvalds 已提交
87 88 89 90 91
			return fl;
	}
	return NULL;
}

92
static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
L
Linus Torvalds 已提交
93 94 95
{
	struct ip6_flowlabel *fl;

96
	rcu_read_lock_bh();
97
	fl = __fl_lookup(net, label);
98 99 100
	if (fl && !atomic_inc_not_zero(&fl->users))
		fl = NULL;
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
101 102 103 104 105 106
	return fl;
}


static void fl_free(struct ip6_flowlabel *fl)
{
107
	if (fl) {
108 109
		if (fl->share == IPV6_FL_S_PROCESS)
			put_pid(fl->owner.pid);
110
		release_net(fl->fl_net);
L
Linus Torvalds 已提交
111
		kfree(fl->opt);
112
		kfree_rcu(fl, rcu);
113
	}
L
Linus Torvalds 已提交
114 115 116 117
}

static void fl_release(struct ip6_flowlabel *fl)
{
118
	spin_lock_bh(&ip6_fl_lock);
L
Linus Torvalds 已提交
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134

	fl->lastuse = jiffies;
	if (atomic_dec_and_test(&fl->users)) {
		unsigned long ttd = fl->lastuse + fl->linger;
		if (time_after(ttd, fl->expires))
			fl->expires = ttd;
		ttd = fl->expires;
		if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
			struct ipv6_txoptions *opt = fl->opt;
			fl->opt = NULL;
			kfree(opt);
		}
		if (!timer_pending(&ip6_fl_gc_timer) ||
		    time_after(ip6_fl_gc_timer.expires, ttd))
			mod_timer(&ip6_fl_gc_timer, ttd);
	}
135
	spin_unlock_bh(&ip6_fl_lock);
L
Linus Torvalds 已提交
136 137 138 139 140 141 142 143
}

static void ip6_fl_gc(unsigned long dummy)
{
	int i;
	unsigned long now = jiffies;
	unsigned long sched = 0;

144
	spin_lock(&ip6_fl_lock);
L
Linus Torvalds 已提交
145 146 147 148

	for (i=0; i<=FL_HASH_MASK; i++) {
		struct ip6_flowlabel *fl, **flp;
		flp = &fl_ht[i];
149 150
		while ((fl = rcu_dereference_protected(*flp,
						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
L
Linus Torvalds 已提交
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
			if (atomic_read(&fl->users) == 0) {
				unsigned long ttd = fl->lastuse + fl->linger;
				if (time_after(ttd, fl->expires))
					fl->expires = ttd;
				ttd = fl->expires;
				if (time_after_eq(now, ttd)) {
					*flp = fl->next;
					fl_free(fl);
					atomic_dec(&fl_size);
					continue;
				}
				if (!sched || time_before(ttd, sched))
					sched = ttd;
			}
			flp = &fl->next;
		}
	}
	if (!sched && atomic_read(&fl_size))
		sched = now + FL_MAX_LINGER;
	if (sched) {
171
		mod_timer(&ip6_fl_gc_timer, sched);
L
Linus Torvalds 已提交
172
	}
173
	spin_unlock(&ip6_fl_lock);
L
Linus Torvalds 已提交
174 175
}

176
static void __net_exit ip6_fl_purge(struct net *net)
177 178 179
{
	int i;

180
	spin_lock(&ip6_fl_lock);
181 182 183
	for (i = 0; i <= FL_HASH_MASK; i++) {
		struct ip6_flowlabel *fl, **flp;
		flp = &fl_ht[i];
184 185
		while ((fl = rcu_dereference_protected(*flp,
						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
O
Octavian Purdila 已提交
186 187
			if (net_eq(fl->fl_net, net) &&
			    atomic_read(&fl->users) == 0) {
188 189 190 191 192 193 194 195
				*flp = fl->next;
				fl_free(fl);
				atomic_dec(&fl_size);
				continue;
			}
			flp = &fl->next;
		}
	}
196
	spin_unlock(&ip6_fl_lock);
197 198 199 200
}

static struct ip6_flowlabel *fl_intern(struct net *net,
				       struct ip6_flowlabel *fl, __be32 label)
L
Linus Torvalds 已提交
201
{
202 203
	struct ip6_flowlabel *lfl;

L
Linus Torvalds 已提交
204 205
	fl->label = label & IPV6_FLOWLABEL_MASK;

206
	spin_lock_bh(&ip6_fl_lock);
L
Linus Torvalds 已提交
207 208 209 210
	if (label == 0) {
		for (;;) {
			fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
			if (fl->label) {
211
				lfl = __fl_lookup(net, fl->label);
L
Linus Torvalds 已提交
212 213 214 215
				if (lfl == NULL)
					break;
			}
		}
216 217 218 219 220 221 222 223 224
	} else {
		/*
		 * we dropper the ip6_fl_lock, so this entry could reappear
		 * and we need to recheck with it.
		 *
		 * OTOH no need to search the active socket first, like it is
		 * done in ipv6_flowlabel_opt - sock is locked, so new entry
		 * with the same label can only appear on another sock
		 */
225
		lfl = __fl_lookup(net, fl->label);
226 227
		if (lfl != NULL) {
			atomic_inc(&lfl->users);
228
			spin_unlock_bh(&ip6_fl_lock);
229 230
			return lfl;
		}
L
Linus Torvalds 已提交
231 232 233 234
	}

	fl->lastuse = jiffies;
	fl->next = fl_ht[FL_HASH(fl->label)];
235
	rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
L
Linus Torvalds 已提交
236
	atomic_inc(&fl_size);
237
	spin_unlock_bh(&ip6_fl_lock);
238
	return NULL;
L
Linus Torvalds 已提交
239 240 241 242 243 244
}



/* Socket flowlabel lists */

A
Al Viro 已提交
245
struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
L
Linus Torvalds 已提交
246 247 248 249 250 251
{
	struct ipv6_fl_socklist *sfl;
	struct ipv6_pinfo *np = inet6_sk(sk);

	label &= IPV6_FLOWLABEL_MASK;

252 253
	rcu_read_lock_bh();
	for_each_sk_fl_rcu(np, sfl) {
L
Linus Torvalds 已提交
254 255 256 257
		struct ip6_flowlabel *fl = sfl->fl;
		if (fl->label == label) {
			fl->lastuse = jiffies;
			atomic_inc(&fl->users);
258
			rcu_read_unlock_bh();
L
Linus Torvalds 已提交
259 260 261
			return fl;
		}
	}
262
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
263 264 265
	return NULL;
}

266 267
EXPORT_SYMBOL_GPL(fl6_sock_lookup);

L
Linus Torvalds 已提交
268 269 270 271 272
void fl6_free_socklist(struct sock *sk)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct ipv6_fl_socklist *sfl;

273
	if (!rcu_access_pointer(np->ipv6_fl_list))
274 275
		return;

276 277 278 279 280
	spin_lock_bh(&ip6_sk_fl_lock);
	while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
						lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
		np->ipv6_fl_list = sfl->next;
		spin_unlock_bh(&ip6_sk_fl_lock);
281

L
Linus Torvalds 已提交
282
		fl_release(sfl->fl);
283 284 285
		kfree_rcu(sfl, rcu);

		spin_lock_bh(&ip6_sk_fl_lock);
L
Linus Torvalds 已提交
286
	}
287
	spin_unlock_bh(&ip6_sk_fl_lock);
L
Linus Torvalds 已提交
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
}

/* Service routines */


/*
   It is the only difficult place. flowlabel enforces equal headers
   before and including routing header, however user may supply options
   following rthdr.
 */

struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
					 struct ip6_flowlabel * fl,
					 struct ipv6_txoptions * fopt)
{
303
	struct ipv6_txoptions * fl_opt = fl->opt;
304

305 306
	if (fopt == NULL || fopt->opt_flen == 0)
		return fl_opt;
307

L
Linus Torvalds 已提交
308 309
	if (fl_opt != NULL) {
		opt_space->hopopt = fl_opt->hopopt;
310
		opt_space->dst0opt = fl_opt->dst0opt;
L
Linus Torvalds 已提交
311 312 313 314 315 316 317 318 319 320 321 322 323 324
		opt_space->srcrt = fl_opt->srcrt;
		opt_space->opt_nflen = fl_opt->opt_nflen;
	} else {
		if (fopt->opt_nflen == 0)
			return fopt;
		opt_space->hopopt = NULL;
		opt_space->dst0opt = NULL;
		opt_space->srcrt = NULL;
		opt_space->opt_nflen = 0;
	}
	opt_space->dst1opt = fopt->dst1opt;
	opt_space->opt_flen = fopt->opt_flen;
	return opt_space;
}
325
EXPORT_SYMBOL_GPL(fl6_merge_options);
L
Linus Torvalds 已提交
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354

static unsigned long check_linger(unsigned long ttl)
{
	if (ttl < FL_MIN_LINGER)
		return FL_MIN_LINGER*HZ;
	if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
		return 0;
	return ttl*HZ;
}

static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
{
	linger = check_linger(linger);
	if (!linger)
		return -EPERM;
	expires = check_linger(expires);
	if (!expires)
		return -EPERM;
	fl->lastuse = jiffies;
	if (time_before(fl->linger, linger))
		fl->linger = linger;
	if (time_before(expires, fl->linger))
		expires = fl->linger;
	if (time_before(fl->expires, fl->lastuse + expires))
		fl->expires = fl->lastuse + expires;
	return 0;
}

static struct ip6_flowlabel *
355 356
fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
	  char __user *optval, int optlen, int *err_p)
L
Linus Torvalds 已提交
357
{
358
	struct ip6_flowlabel *fl = NULL;
L
Linus Torvalds 已提交
359 360 361 362
	int olen;
	int addr_type;
	int err;

363 364 365 366 367
	olen = optlen - CMSG_ALIGN(sizeof(*freq));
	err = -EINVAL;
	if (olen > 64 * 1024)
		goto done;

L
Linus Torvalds 已提交
368
	err = -ENOMEM;
369
	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
L
Linus Torvalds 已提交
370 371 372 373 374
	if (fl == NULL)
		goto done;

	if (olen > 0) {
		struct msghdr msg;
375
		struct flowi6 flowi6;
L
Linus Torvalds 已提交
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
		int junk;

		err = -ENOMEM;
		fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
		if (fl->opt == NULL)
			goto done;

		memset(fl->opt, 0, sizeof(*fl->opt));
		fl->opt->tot_len = sizeof(*fl->opt) + olen;
		err = -EFAULT;
		if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
			goto done;

		msg.msg_controllen = olen;
		msg.msg_control = (void*)(fl->opt+1);
391
		memset(&flowi6, 0, sizeof(flowi6));
L
Linus Torvalds 已提交
392

393 394
		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
					    &junk, &junk, &junk);
L
Linus Torvalds 已提交
395 396 397 398 399 400 401 402 403 404 405
		if (err)
			goto done;
		err = -EINVAL;
		if (fl->opt->opt_flen)
			goto done;
		if (fl->opt->opt_nflen == 0) {
			kfree(fl->opt);
			fl->opt = NULL;
		}
	}

406
	fl->fl_net = hold_net(net);
L
Linus Torvalds 已提交
407 408 409 410 411 412
	fl->expires = jiffies;
	err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
	if (err)
		goto done;
	fl->share = freq->flr_share;
	addr_type = ipv6_addr_type(&freq->flr_dst);
413 414
	if ((addr_type & IPV6_ADDR_MAPPED) ||
	    addr_type == IPV6_ADDR_ANY) {
415
		err = -EINVAL;
L
Linus Torvalds 已提交
416
		goto done;
417
	}
A
Alexey Dobriyan 已提交
418
	fl->dst = freq->flr_dst;
L
Linus Torvalds 已提交
419 420 421 422 423 424
	atomic_set(&fl->users, 1);
	switch (fl->share) {
	case IPV6_FL_S_EXCL:
	case IPV6_FL_S_ANY:
		break;
	case IPV6_FL_S_PROCESS:
425
		fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
L
Linus Torvalds 已提交
426 427
		break;
	case IPV6_FL_S_USER:
428
		fl->owner.uid = current_euid();
L
Linus Torvalds 已提交
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
		break;
	default:
		err = -EINVAL;
		goto done;
	}
	return fl;

done:
	fl_free(fl);
	*err_p = err;
	return NULL;
}

static int mem_check(struct sock *sk)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct ipv6_fl_socklist *sfl;
	int room = FL_MAX_SIZE - atomic_read(&fl_size);
	int count = 0;

	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
		return 0;

452
	for_each_sk_fl_rcu(np, sfl)
L
Linus Torvalds 已提交
453 454 455 456
		count++;

	if (room <= 0 ||
	    ((count >= FL_MAX_PER_SOCK ||
457 458
	      (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
	     !capable(CAP_NET_ADMIN)))
L
Linus Torvalds 已提交
459 460 461 462 463
		return -ENOBUFS;

	return 0;
}

464
static bool ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
L
Linus Torvalds 已提交
465 466
{
	if (h1 == h2)
467
		return false;
L
Linus Torvalds 已提交
468
	if (h1 == NULL || h2 == NULL)
469
		return true;
L
Linus Torvalds 已提交
470
	if (h1->hdrlen != h2->hdrlen)
471
		return true;
L
Linus Torvalds 已提交
472 473 474
	return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
}

475
static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
L
Linus Torvalds 已提交
476 477
{
	if (o1 == o2)
478
		return false;
L
Linus Torvalds 已提交
479
	if (o1 == NULL || o2 == NULL)
480
		return true;
L
Linus Torvalds 已提交
481
	if (o1->opt_nflen != o2->opt_nflen)
482
		return true;
L
Linus Torvalds 已提交
483
	if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
484
		return true;
L
Linus Torvalds 已提交
485
	if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
486
		return true;
L
Linus Torvalds 已提交
487
	if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
488 489
		return true;
	return false;
L
Linus Torvalds 已提交
490 491
}

492 493 494
static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
		struct ip6_flowlabel *fl)
{
495
	spin_lock_bh(&ip6_sk_fl_lock);
496 497
	sfl->fl = fl;
	sfl->next = np->ipv6_fl_list;
498 499
	rcu_assign_pointer(np->ipv6_fl_list, sfl);
	spin_unlock_bh(&ip6_sk_fl_lock);
500 501
}

L
Linus Torvalds 已提交
502 503
int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
{
504
	int uninitialized_var(err);
505
	struct net *net = sock_net(sk);
L
Linus Torvalds 已提交
506 507 508 509
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct in6_flowlabel_req freq;
	struct ipv6_fl_socklist *sfl1=NULL;
	struct ipv6_fl_socklist *sfl, **sflp;
510 511
	struct ip6_flowlabel *fl, *fl1 = NULL;

L
Linus Torvalds 已提交
512 513 514 515 516 517 518 519 520

	if (optlen < sizeof(freq))
		return -EINVAL;

	if (copy_from_user(&freq, optval, sizeof(freq)))
		return -EFAULT;

	switch (freq.flr_action) {
	case IPV6_FL_A_PUT:
521 522 523 524
		spin_lock_bh(&ip6_sk_fl_lock);
		for (sflp = &np->ipv6_fl_list;
		     (sfl = rcu_dereference(*sflp))!=NULL;
		     sflp = &sfl->next) {
L
Linus Torvalds 已提交
525 526 527
			if (sfl->fl->label == freq.flr_label) {
				if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
					np->flow_label &= ~IPV6_FLOWLABEL_MASK;
528 529
				*sflp = rcu_dereference(sfl->next);
				spin_unlock_bh(&ip6_sk_fl_lock);
L
Linus Torvalds 已提交
530
				fl_release(sfl->fl);
531
				kfree_rcu(sfl, rcu);
L
Linus Torvalds 已提交
532 533 534
				return 0;
			}
		}
535
		spin_unlock_bh(&ip6_sk_fl_lock);
L
Linus Torvalds 已提交
536 537 538
		return -ESRCH;

	case IPV6_FL_A_RENEW:
539 540
		rcu_read_lock_bh();
		for_each_sk_fl_rcu(np, sfl) {
L
Linus Torvalds 已提交
541 542
			if (sfl->fl->label == freq.flr_label) {
				err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
543
				rcu_read_unlock_bh();
L
Linus Torvalds 已提交
544 545 546
				return err;
			}
		}
547
		rcu_read_unlock_bh();
L
Linus Torvalds 已提交
548

549 550
		if (freq.flr_share == IPV6_FL_S_NONE &&
		    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
551
			fl = fl_lookup(net, freq.flr_label);
L
Linus Torvalds 已提交
552 553 554 555 556 557 558 559 560 561 562 563
			if (fl) {
				err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
				fl_release(fl);
				return err;
			}
		}
		return -ESRCH;

	case IPV6_FL_A_GET:
		if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
			return -EINVAL;

564
		fl = fl_create(net, sk, &freq, optval, optlen, &err);
L
Linus Torvalds 已提交
565 566 567 568 569 570
		if (fl == NULL)
			return err;
		sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);

		if (freq.flr_label) {
			err = -EEXIST;
571 572
			rcu_read_lock_bh();
			for_each_sk_fl_rcu(np, sfl) {
L
Linus Torvalds 已提交
573 574
				if (sfl->fl->label == freq.flr_label) {
					if (freq.flr_flags&IPV6_FL_F_EXCL) {
575
						rcu_read_unlock_bh();
L
Linus Torvalds 已提交
576 577 578
						goto done;
					}
					fl1 = sfl->fl;
579
					atomic_inc(&fl1->users);
L
Linus Torvalds 已提交
580 581 582
					break;
				}
			}
583
			rcu_read_unlock_bh();
L
Linus Torvalds 已提交
584 585

			if (fl1 == NULL)
586
				fl1 = fl_lookup(net, freq.flr_label);
L
Linus Torvalds 已提交
587
			if (fl1) {
588
recheck:
L
Linus Torvalds 已提交
589 590 591 592 593 594
				err = -EEXIST;
				if (freq.flr_flags&IPV6_FL_F_EXCL)
					goto release;
				err = -EPERM;
				if (fl1->share == IPV6_FL_S_EXCL ||
				    fl1->share != fl->share ||
595 596 597 598
				    ((fl1->share == IPV6_FL_S_PROCESS) &&
				     (fl1->owner.pid == fl->owner.pid)) ||
				    ((fl1->share == IPV6_FL_S_USER) &&
				     uid_eq(fl1->owner.uid, fl->owner.uid)))
L
Linus Torvalds 已提交
599 600 601 602 603 604 605 606 607 608 609 610 611 612
					goto release;

				err = -EINVAL;
				if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
				    ipv6_opt_cmp(fl1->opt, fl->opt))
					goto release;

				err = -ENOMEM;
				if (sfl1 == NULL)
					goto release;
				if (fl->linger > fl1->linger)
					fl1->linger = fl->linger;
				if ((long)(fl->expires - fl1->expires) > 0)
					fl1->expires = fl->expires;
613
				fl_link(np, sfl1, fl1);
L
Linus Torvalds 已提交
614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
				fl_free(fl);
				return 0;

release:
				fl_release(fl1);
				goto done;
			}
		}
		err = -ENOENT;
		if (!(freq.flr_flags&IPV6_FL_F_CREATE))
			goto done;

		err = -ENOMEM;
		if (sfl1 == NULL || (err = mem_check(sk)) != 0)
			goto done;

630
		fl1 = fl_intern(net, fl, freq.flr_label);
631 632
		if (fl1 != NULL)
			goto recheck;
L
Linus Torvalds 已提交
633

634 635 636 637 638 639
		if (!freq.flr_label) {
			if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
					 &fl->label, sizeof(fl->label))) {
				/* Intentionally ignore fault. */
			}
		}
L
Linus Torvalds 已提交
640

641
		fl_link(np, sfl1, fl);
L
Linus Torvalds 已提交
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656
		return 0;

	default:
		return -EINVAL;
	}

done:
	fl_free(fl);
	kfree(sfl1);
	return err;
}

#ifdef CONFIG_PROC_FS

struct ip6fl_iter_state {
657
	struct seq_net_private p;
658
	struct pid_namespace *pid_ns;
L
Linus Torvalds 已提交
659 660 661 662 663 664 665 666 667
	int bucket;
};

#define ip6fl_seq_private(seq)	((struct ip6fl_iter_state *)(seq)->private)

static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
{
	struct ip6_flowlabel *fl = NULL;
	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
668
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
669 670

	for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
671 672 673 674
		for_each_fl_rcu(state->bucket, fl) {
			if (net_eq(fl->fl_net, net))
				goto out;
		}
L
Linus Torvalds 已提交
675
	}
676 677
	fl = NULL;
out:
L
Linus Torvalds 已提交
678 679 680 681 682 683
	return fl;
}

static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
{
	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
684
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
685

686 687 688 689 690
	for_each_fl_continue_rcu(fl) {
		if (net_eq(fl->fl_net, net))
			goto out;
	}

691
try_again:
692 693 694 695 696 697
	if (++state->bucket <= FL_HASH_MASK) {
		for_each_fl_rcu(state->bucket, fl) {
			if (net_eq(fl->fl_net, net))
				goto out;
		}
		goto try_again;
L
Linus Torvalds 已提交
698
	}
699 700 701
	fl = NULL;

out:
L
Linus Torvalds 已提交
702 703 704 705 706 707 708 709 710 711 712 713 714
	return fl;
}

static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
{
	struct ip6_flowlabel *fl = ip6fl_get_first(seq);
	if (fl)
		while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
			--pos;
	return pos ? NULL : fl;
}

static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
715
	__acquires(RCU)
L
Linus Torvalds 已提交
716
{
717
	rcu_read_lock_bh();
L
Linus Torvalds 已提交
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}

static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct ip6_flowlabel *fl;

	if (v == SEQ_START_TOKEN)
		fl = ip6fl_get_first(seq);
	else
		fl = ip6fl_get_next(seq, v);
	++*pos;
	return fl;
}

static void ip6fl_seq_stop(struct seq_file *seq, void *v)
734
	__releases(RCU)
L
Linus Torvalds 已提交
735
{
736
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
737 738
}

739
static int ip6fl_seq_show(struct seq_file *seq, void *v)
L
Linus Torvalds 已提交
740
{
741
	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
742 743 744 745 746
	if (v == SEQ_START_TOKEN)
		seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
			   "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
	else {
		struct ip6_flowlabel *fl = v;
L
Linus Torvalds 已提交
747
		seq_printf(seq,
748
			   "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
749
			   (unsigned int)ntohl(fl->label),
L
Linus Torvalds 已提交
750
			   fl->share,
751 752 753 754 755
			   ((fl->share == IPV6_FL_S_PROCESS) ?
			    pid_nr_ns(fl->owner.pid, state->pid_ns) :
			    ((fl->share == IPV6_FL_S_USER) ?
			     from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
			     0)),
L
Linus Torvalds 已提交
756 757 758
			   atomic_read(&fl->users),
			   fl->linger/HZ,
			   (long)(fl->expires - jiffies)/HZ,
759
			   &fl->dst,
L
Linus Torvalds 已提交
760 761 762 763 764
			   fl->opt ? fl->opt->opt_nflen : 0);
	}
	return 0;
}

765
static const struct seq_operations ip6fl_seq_ops = {
L
Linus Torvalds 已提交
766 767 768 769 770 771 772 773
	.start	=	ip6fl_seq_start,
	.next	=	ip6fl_seq_next,
	.stop	=	ip6fl_seq_stop,
	.show	=	ip6fl_seq_show,
};

static int ip6fl_seq_open(struct inode *inode, struct file *file)
{
774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796
	struct seq_file *seq;
	struct ip6fl_iter_state *state;
	int err;

	err = seq_open_net(inode, file, &ip6fl_seq_ops,
			   sizeof(struct ip6fl_iter_state));

	if (!err) {
		seq = file->private_data;
		state = ip6fl_seq_private(seq);
		rcu_read_lock();
		state->pid_ns = get_pid_ns(task_active_pid_ns(current));
		rcu_read_unlock();
	}
	return err;
}

static int ip6fl_seq_release(struct inode *inode, struct file *file)
{
	struct seq_file *seq = file->private_data;
	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
	put_pid_ns(state->pid_ns);
	return seq_release_net(inode, file);
L
Linus Torvalds 已提交
797 798
}

799
static const struct file_operations ip6fl_seq_fops = {
L
Linus Torvalds 已提交
800 801 802 803
	.owner		=	THIS_MODULE,
	.open		=	ip6fl_seq_open,
	.read		=	seq_read,
	.llseek		=	seq_lseek,
804
	.release	=	ip6fl_seq_release,
L
Linus Torvalds 已提交
805 806
};

807
static int __net_init ip6_flowlabel_proc_init(struct net *net)
808
{
809 810
	if (!proc_net_fops_create(net, "ip6_flowlabel",
				  S_IRUGO, &ip6fl_seq_fops))
811 812 813
		return -ENOMEM;
	return 0;
}
L
Linus Torvalds 已提交
814

815
static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
L
Linus Torvalds 已提交
816
{
817 818 819 820 821 822 823 824 825 826
	proc_net_remove(net, "ip6_flowlabel");
}
#else
static inline int ip6_flowlabel_proc_init(struct net *net)
{
	return 0;
}
static inline void ip6_flowlabel_proc_fini(struct net *net)
{
}
L
Linus Torvalds 已提交
827
#endif
828

829
static void __net_exit ip6_flowlabel_net_exit(struct net *net)
830 831
{
	ip6_fl_purge(net);
832
	ip6_flowlabel_proc_fini(net);
833 834 835
}

static struct pernet_operations ip6_flowlabel_net_ops = {
836
	.init = ip6_flowlabel_proc_init,
837 838 839
	.exit = ip6_flowlabel_net_exit,
};

840 841
int ip6_flowlabel_init(void)
{
842
	return register_pernet_subsys(&ip6_flowlabel_net_ops);
L
Linus Torvalds 已提交
843 844 845 846 847
}

void ip6_flowlabel_cleanup(void)
{
	del_timer(&ip6_fl_gc_timer);
848
	unregister_pernet_subsys(&ip6_flowlabel_net_ops);
L
Linus Torvalds 已提交
849
}