ip6mr.c 54.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 *	Linux IPv6 multicast routing support for BSD pim6sd
 *	Based on net/ipv4/ipmr.c.
 *
 *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
 *		LSIIT Laboratory, Strasbourg, France
 *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
 *		6WIND, Paris, France
 *	Copyright (C)2007,2008 USAGI/WIDE Project
 *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 */

19
#include <linux/uaccess.h>
20 21 22 23 24 25 26 27 28 29 30 31 32 33
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
34
#include <linux/compat.h>
35 36 37 38 39 40 41
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
#include <net/checksum.h>
#include <net/netlink.h>
42
#include <net/fib_rules.h>
43 44 45 46

#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <linux/mroute6.h>
47
#include <linux/pim.h>
48 49
#include <net/addrconf.h>
#include <linux/netfilter_ipv6.h>
50
#include <linux/export.h>
D
Dave Jones 已提交
51
#include <net/ip6_checksum.h>
52
#include <linux/netconf.h>
53

54 55 56 57 58
struct ip6mr_rule {
	struct fib_rule		common;
};

struct ip6mr_result {
59
	struct mr_table	*mrt;
60 61
};

62 63 64 65 66 67
/* Big lock, protecting vif table, mrt cache and mroute socket state.
   Note that the changes are semaphored via rtnl_lock.
 */

static DEFINE_RWLOCK(mrt_lock);

68
/* Multicast router control variables */
69 70 71 72 73 74 75 76 77 78 79 80 81 82

/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);

/* We return to original Alan's scheme. Hash table of resolved
   entries is changed only in process context and protected
   with weak lock mrt_lock. Queue of unresolved entries is protected
   with strong spinlock mfc_unres_lock.

   In this case data path is free of exclusive locks at all.
 */

static struct kmem_cache *mrt_cachep __read_mostly;

83 84
static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
static void ip6mr_free_table(struct mr_table *mrt);
85

86
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
87
			   struct sk_buff *skb, struct mfc6_cache *cache);
88
static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
89
			      mifi_t mifi, int assert);
90
static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
91
			      int cmd);
92
static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
93 94
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
			       struct netlink_callback *cb);
95
static void mroute_clean_tables(struct mr_table *mrt, bool all);
96
static void ipmr_expire_process(struct timer_list *t);
97 98

#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
99
#define ip6mr_for_each_table(mrt, net) \
100 101
	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)

Y
Yuval Mintz 已提交
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
					    struct mr_table *mrt)
{
	struct mr_table *ret;

	if (!mrt)
		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
				     struct mr_table, list);
	else
		ret = list_entry_rcu(mrt->list.next,
				     struct mr_table, list);

	if (&ret->list == &net->ipv6.mr6_tables)
		return NULL;
	return ret;
}

119
static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
120
{
121
	struct mr_table *mrt;
122 123 124 125 126 127 128 129

	ip6mr_for_each_table(mrt, net) {
		if (mrt->id == id)
			return mrt;
	}
	return NULL;
}

130
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
131
			    struct mr_table **mrt)
132 133
{
	int err;
134 135 136 137 138
	struct ip6mr_result res;
	struct fib_lookup_arg arg = {
		.result = &res,
		.flags = FIB_LOOKUP_NOREF,
	};
139

140 141
	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
			       flowi6_to_flowi(flp6), 0, &arg);
142 143 144 145 146 147 148 149 150 151
	if (err < 0)
		return err;
	*mrt = res.mrt;
	return 0;
}

static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
			     int flags, struct fib_lookup_arg *arg)
{
	struct ip6mr_result *res = arg->result;
152
	struct mr_table *mrt;
153 154 155 156 157 158 159 160 161 162 163 164 165 166

	switch (rule->action) {
	case FR_ACT_TO_TBL:
		break;
	case FR_ACT_UNREACHABLE:
		return -ENETUNREACH;
	case FR_ACT_PROHIBIT:
		return -EACCES;
	case FR_ACT_BLACKHOLE:
	default:
		return -EINVAL;
	}

	mrt = ip6mr_get_table(rule->fr_net, rule->table);
167
	if (!mrt)
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
		return -EAGAIN;
	res->mrt = mrt;
	return 0;
}

static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
{
	return 1;
}

static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
	FRA_GENERIC_POLICY,
};

static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
				struct fib_rule_hdr *frh, struct nlattr **tb)
{
	return 0;
}

static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
			      struct nlattr **tb)
{
	return 1;
}

static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
			   struct fib_rule_hdr *frh)
{
	frh->dst_len = 0;
	frh->src_len = 0;
	frh->tos     = 0;
	return 0;
}

203
static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
	.family		= RTNL_FAMILY_IP6MR,
	.rule_size	= sizeof(struct ip6mr_rule),
	.addr_size	= sizeof(struct in6_addr),
	.action		= ip6mr_rule_action,
	.match		= ip6mr_rule_match,
	.configure	= ip6mr_rule_configure,
	.compare	= ip6mr_rule_compare,
	.fill		= ip6mr_rule_fill,
	.nlgroup	= RTNLGRP_IPV6_RULE,
	.policy		= ip6mr_rule_policy,
	.owner		= THIS_MODULE,
};

static int __net_init ip6mr_rules_init(struct net *net)
{
	struct fib_rules_ops *ops;
220
	struct mr_table *mrt;
221 222 223 224 225 226 227 228 229
	int err;

	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
	if (IS_ERR(ops))
		return PTR_ERR(ops);

	INIT_LIST_HEAD(&net->ipv6.mr6_tables);

	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
230
	if (!mrt) {
231 232 233 234 235 236 237 238 239 240 241 242
		err = -ENOMEM;
		goto err1;
	}

	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
	if (err < 0)
		goto err2;

	net->ipv6.mr6_rules_ops = ops;
	return 0;

err2:
243
	ip6mr_free_table(mrt);
244 245 246 247 248 249 250
err1:
	fib_rules_unregister(ops);
	return err;
}

static void __net_exit ip6mr_rules_exit(struct net *net)
{
251
	struct mr_table *mrt, *next;
252

253
	rtnl_lock();
E
Eric Dumazet 已提交
254 255
	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
		list_del(&mrt->list);
256
		ip6mr_free_table(mrt);
E
Eric Dumazet 已提交
257
	}
258
	fib_rules_unregister(net->ipv6.mr6_rules_ops);
259
	rtnl_unlock();
260 261 262 263 264
}
#else
#define ip6mr_for_each_table(mrt, net) \
	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)

Y
Yuval Mintz 已提交
265 266 267 268 269 270 271 272
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
					    struct mr_table *mrt)
{
	if (!mrt)
		return net->ipv6.mrt6;
	return NULL;
}

273
static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
274 275 276 277
{
	return net->ipv6.mrt6;
}

278
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
279
			    struct mr_table **mrt)
280 281 282 283 284 285 286 287 288 289 290 291 292
{
	*mrt = net->ipv6.mrt6;
	return 0;
}

static int __net_init ip6mr_rules_init(struct net *net)
{
	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
	return net->ipv6.mrt6 ? 0 : -ENOMEM;
}

static void __net_exit ip6mr_rules_exit(struct net *net)
{
293
	rtnl_lock();
294
	ip6mr_free_table(net->ipv6.mrt6);
295 296
	net->ipv6.mrt6 = NULL;
	rtnl_unlock();
297 298 299
}
#endif

300 301 302 303 304 305 306 307 308 309 310
static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
			  const void *ptr)
{
	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
	struct mfc6_cache *c = (struct mfc6_cache *)ptr;

	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
}

static const struct rhashtable_params ip6mr_rht_params = {
311
	.head_offset = offsetof(struct mr_mfc, mnode),
312 313 314 315 316 317 318 319
	.key_offset = offsetof(struct mfc6_cache, cmparg),
	.key_len = sizeof(struct mfc6_cache_cmp_arg),
	.nelem_hint = 3,
	.locks_mul = 1,
	.obj_cmpfn = ip6mr_hash_cmp,
	.automatic_shrinking = true,
};

320 321 322 323 324 325 326 327
static void ip6mr_new_table_set(struct mr_table *mrt,
				struct net *net)
{
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
#endif
}

328 329 330 331 332 333 334 335 336 337
static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
	.mf6c_origin = IN6ADDR_ANY_INIT,
	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
};

static struct mr_table_ops ip6mr_mr_table_ops = {
	.rht_params = &ip6mr_rht_params,
	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
};

338
static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
339
{
340
	struct mr_table *mrt;
341 342

	mrt = ip6mr_get_table(net, id);
343
	if (mrt)
344 345
		return mrt;

346
	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
347
			      ipmr_expire_process, ip6mr_new_table_set);
348
}
349

350
static void ip6mr_free_table(struct mr_table *mrt)
351
{
352
	del_timer_sync(&mrt->ipmr_expire_timer);
353
	mroute_clean_tables(mrt, true);
354
	rhltable_destroy(&mrt->mfc_hash);
355 356
	kfree(mrt);
}
357 358

#ifdef CONFIG_PROC_FS
Y
Yuval Mintz 已提交
359 360
/* The /proc interfaces to multicast routing
 * /proc/ip6_mr_cache /proc/ip6_mr_vif
361 362 363 364 365
 */

static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
	__acquires(mrt_lock)
{
366
	struct mr_vif_iter *iter = seq->private;
367
	struct net *net = seq_file_net(seq);
368
	struct mr_table *mrt;
369 370

	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
371
	if (!mrt)
372 373 374
		return ERR_PTR(-ENOENT);

	iter->mrt = mrt;
375

376
	read_lock(&mrt_lock);
377
	return mr_vif_seq_start(seq, pos);
378 379 380 381 382 383 384 385 386 387
}

static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
	__releases(mrt_lock)
{
	read_unlock(&mrt_lock);
}

static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
{
388
	struct mr_vif_iter *iter = seq->private;
389
	struct mr_table *mrt = iter->mrt;
390

391 392 393 394
	if (v == SEQ_START_TOKEN) {
		seq_puts(seq,
			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
	} else {
395
		const struct vif_device *vif = v;
396 397 398
		const char *name = vif->dev ? vif->dev->name : "none";

		seq_printf(seq,
A
Al Viro 已提交
399
			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
400
			   vif - mrt->vif_table,
401 402 403 404 405 406 407
			   name, vif->bytes_in, vif->pkt_in,
			   vif->bytes_out, vif->pkt_out,
			   vif->flags);
	}
	return 0;
}

408
static const struct seq_operations ip6mr_vif_seq_ops = {
409
	.start = ip6mr_vif_seq_start,
410
	.next  = mr_vif_seq_next,
411 412 413 414 415 416
	.stop  = ip6mr_vif_seq_stop,
	.show  = ip6mr_vif_seq_show,
};

static int ip6mr_vif_open(struct inode *inode, struct file *file)
{
417
	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
418
			    sizeof(struct mr_vif_iter));
419 420
}

421
static const struct file_operations ip6mr_vif_fops = {
422 423 424
	.open    = ip6mr_vif_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
425
	.release = seq_release_net,
426 427 428 429
};

static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
430
	struct net *net = seq_file_net(seq);
431
	struct mr_table *mrt;
432

433
	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
434
	if (!mrt)
435 436
		return ERR_PTR(-ENOENT);

Y
Yuval Mintz 已提交
437
	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
438 439 440 441 442 443 444 445 446 447 448 449 450
}

static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
{
	int n;

	if (v == SEQ_START_TOKEN) {
		seq_puts(seq,
			 "Group                            "
			 "Origin                           "
			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
	} else {
		const struct mfc6_cache *mfc = v;
Y
Yuval Mintz 已提交
451
		const struct mr_mfc_iter *it = seq->private;
452
		struct mr_table *mrt = it->mrt;
453

454
		seq_printf(seq, "%pI6 %pI6 %-3hd",
455
			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
456
			   mfc->_c.mfc_parent);
457

458
		if (it->cache != &mrt->mfc_unres_queue) {
459
			seq_printf(seq, " %8lu %8lu %8lu",
460 461 462 463 464
				   mfc->_c.mfc_un.res.pkt,
				   mfc->_c.mfc_un.res.bytes,
				   mfc->_c.mfc_un.res.wrong_if);
			for (n = mfc->_c.mfc_un.res.minvif;
			     n < mfc->_c.mfc_un.res.maxvif; n++) {
465
				if (VIF_EXISTS(mrt, n) &&
466
				    mfc->_c.mfc_un.res.ttls[n] < 255)
467
					seq_printf(seq,
468 469
						   " %2d:%-3d", n,
						   mfc->_c.mfc_un.res.ttls[n]);
470
			}
471 472 473 474 475
		} else {
			/* unresolved mfc_caches don't contain
			 * pkt, bytes and wrong_if values
			 */
			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
476 477 478 479 480 481
		}
		seq_putc(seq, '\n');
	}
	return 0;
}

J
James Morris 已提交
482
static const struct seq_operations ipmr_mfc_seq_ops = {
483
	.start = ipmr_mfc_seq_start,
Y
Yuval Mintz 已提交
484 485
	.next  = mr_mfc_seq_next,
	.stop  = mr_mfc_seq_stop,
486 487 488 489 490
	.show  = ipmr_mfc_seq_show,
};

static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
491
	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
Y
Yuval Mintz 已提交
492
			    sizeof(struct mr_mfc_iter));
493 494
}

495
static const struct file_operations ip6mr_mfc_fops = {
496 497 498
	.open    = ipmr_mfc_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
499
	.release = seq_release_net,
500 501 502
};
#endif

503 504 505 506 507 508 509
#ifdef CONFIG_IPV6_PIMSM_V2

static int pim6_rcv(struct sk_buff *skb)
{
	struct pimreghdr *pim;
	struct ipv6hdr   *encap;
	struct net_device  *reg_dev = NULL;
510
	struct net *net = dev_net(skb->dev);
511
	struct mr_table *mrt;
512 513 514
	struct flowi6 fl6 = {
		.flowi6_iif	= skb->dev->ifindex,
		.flowi6_mark	= skb->mark,
515 516
	};
	int reg_vif_num;
517 518 519 520 521

	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
		goto drop;

	pim = (struct pimreghdr *)skb_transport_header(skb);
522
	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
523
	    (pim->flags & PIM_NULL_REGISTER) ||
T
Thomas Goff 已提交
524 525 526
	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
			     sizeof(*pim), IPPROTO_PIM,
			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
527
	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
528 529 530 531 532 533 534 535 536 537 538
		goto drop;

	/* check if the inner packet is destined to mcast group */
	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
				   sizeof(*pim));

	if (!ipv6_addr_is_multicast(&encap->daddr) ||
	    encap->payload_len == 0 ||
	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
		goto drop;

539
	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
540 541 542
		goto drop;
	reg_vif_num = mrt->mroute_reg_vif_num;

543 544
	read_lock(&mrt_lock);
	if (reg_vif_num >= 0)
545
		reg_dev = mrt->vif_table[reg_vif_num].dev;
546 547 548 549
	if (reg_dev)
		dev_hold(reg_dev);
	read_unlock(&mrt_lock);

550
	if (!reg_dev)
551 552 553 554 555
		goto drop;

	skb->mac_header = skb->network_header;
	skb_pull(skb, (u8 *)encap - skb->data);
	skb_reset_network_header(skb);
T
Thomas Goff 已提交
556
	skb->protocol = htons(ETH_P_IPV6);
557
	skb->ip_summed = CHECKSUM_NONE;
558

559
	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
560

561
	netif_rx(skb);
E
Eric Dumazet 已提交
562

563 564 565 566 567 568 569
	dev_put(reg_dev);
	return 0;
 drop:
	kfree_skb(skb);
	return 0;
}

570
static const struct inet6_protocol pim6_protocol = {
571 572 573 574 575
	.handler	=	pim6_rcv,
};

/* Service routines creating virtual interfaces: PIMREG */

576 577
static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
				      struct net_device *dev)
578
{
579
	struct net *net = dev_net(dev);
580
	struct mr_table *mrt;
581 582
	struct flowi6 fl6 = {
		.flowi6_oif	= dev->ifindex,
583
		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
584
		.flowi6_mark	= skb->mark,
585 586 587
	};
	int err;

588
	err = ip6mr_fib_lookup(net, &fl6, &mrt);
589 590
	if (err < 0) {
		kfree_skb(skb);
591
		return err;
592
	}
593

594
	read_lock(&mrt_lock);
595 596
	dev->stats.tx_bytes += skb->len;
	dev->stats.tx_packets++;
597
	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
598 599
	read_unlock(&mrt_lock);
	kfree_skb(skb);
600
	return NETDEV_TX_OK;
601 602
}

603 604 605 606 607
static int reg_vif_get_iflink(const struct net_device *dev)
{
	return 0;
}

608 609
static const struct net_device_ops reg_vif_netdev_ops = {
	.ndo_start_xmit	= reg_vif_xmit,
610
	.ndo_get_iflink = reg_vif_get_iflink,
611 612
};

613 614 615 616 617
static void reg_vif_setup(struct net_device *dev)
{
	dev->type		= ARPHRD_PIMREG;
	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
	dev->flags		= IFF_NOARP;
618
	dev->netdev_ops		= &reg_vif_netdev_ops;
619
	dev->needs_free_netdev	= true;
T
Tom Goff 已提交
620
	dev->features		|= NETIF_F_NETNS_LOCAL;
621 622
}

623
static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
624 625
{
	struct net_device *dev;
626 627 628 629 630 631
	char name[IFNAMSIZ];

	if (mrt->id == RT6_TABLE_DFLT)
		sprintf(name, "pim6reg");
	else
		sprintf(name, "pim6reg%u", mrt->id);
632

633
	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
634
	if (!dev)
635 636
		return NULL;

637 638
	dev_net_set(dev, net);

639 640 641 642 643 644 645 646
	if (register_netdevice(dev)) {
		free_netdev(dev);
		return NULL;
	}

	if (dev_open(dev))
		goto failure;

W
Wang Chen 已提交
647
	dev_hold(dev);
648 649 650 651 652 653 654 655
	return dev;

failure:
	unregister_netdevice(dev);
	return NULL;
}
#endif

656 657 658 659
/*
 *	Delete a VIF entry
 */

660
static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
661
		       struct list_head *head)
662
{
663
	struct vif_device *v;
664
	struct net_device *dev;
T
Thomas Goff 已提交
665
	struct inet6_dev *in6_dev;
666 667

	if (vifi < 0 || vifi >= mrt->maxvif)
668 669
		return -EADDRNOTAVAIL;

670
	v = &mrt->vif_table[vifi];
671 672 673 674 675 676 677 678 679 680

	write_lock_bh(&mrt_lock);
	dev = v->dev;
	v->dev = NULL;

	if (!dev) {
		write_unlock_bh(&mrt_lock);
		return -EADDRNOTAVAIL;
	}

681
#ifdef CONFIG_IPV6_PIMSM_V2
682 683
	if (vifi == mrt->mroute_reg_vif_num)
		mrt->mroute_reg_vif_num = -1;
684 685
#endif

686
	if (vifi + 1 == mrt->maxvif) {
687 688
		int tmp;
		for (tmp = vifi - 1; tmp >= 0; tmp--) {
689
			if (VIF_EXISTS(mrt, tmp))
690 691
				break;
		}
692
		mrt->maxvif = tmp + 1;
693 694 695 696 697 698
	}

	write_unlock_bh(&mrt_lock);

	dev_set_allmulti(dev, -1);

T
Thomas Goff 已提交
699
	in6_dev = __in6_dev_get(dev);
700
	if (in6_dev) {
T
Thomas Goff 已提交
701
		in6_dev->cnf.mc_forwarding--;
702
		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
703 704 705
					     NETCONFA_MC_FORWARDING,
					     dev->ifindex, &in6_dev->cnf);
	}
T
Thomas Goff 已提交
706

707
	if ((v->flags & MIFF_REGISTER) && !notify)
708
		unregister_netdevice_queue(dev, head);
709 710 711 712 713

	dev_put(dev);
	return 0;
}

714
static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
715
{
716
	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
717

718
	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
719 720
}

721 722
static inline void ip6mr_cache_free(struct mfc6_cache *c)
{
723
	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
724 725
}

726 727 728 729
/* Destroy an unresolved cache entry, killing queued skbs
   and reporting error to netlink readers.
 */

730
static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
731
{
732
	struct net *net = read_pnet(&mrt->net);
733 734
	struct sk_buff *skb;

735
	atomic_dec(&mrt->cache_resolve_queue_len);
736

737
	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
738
		if (ipv6_hdr(skb)->version == 0) {
739 740
			struct nlmsghdr *nlh = skb_pull(skb,
							sizeof(struct ipv6hdr));
741
			nlh->nlmsg_type = NLMSG_ERROR;
742
			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
743
			skb_trim(skb, nlh->nlmsg_len);
744
			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
745
			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
746 747 748 749
		} else
			kfree_skb(skb);
	}

750
	ip6mr_cache_free(c);
751 752 753
}


754
/* Timer process for all the unresolved queue. */
755

756
static void ipmr_do_expire_process(struct mr_table *mrt)
757 758 759
{
	unsigned long now = jiffies;
	unsigned long expires = 10 * HZ;
760
	struct mr_mfc *c, *next;
761

762
	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
763 764 765 766 767 768 769 770
		if (time_after(c->mfc_un.unres.expires, now)) {
			/* not yet... */
			unsigned long interval = c->mfc_un.unres.expires - now;
			if (interval < expires)
				expires = interval;
			continue;
		}

771
		list_del(&c->list);
772 773
		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
774 775
	}

776
	if (!list_empty(&mrt->mfc_unres_queue))
777
		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
778 779
}

780
static void ipmr_expire_process(struct timer_list *t)
781
{
782
	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
783

784
	if (!spin_trylock(&mfc_unres_lock)) {
785
		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
786 787 788
		return;
	}

789
	if (!list_empty(&mrt->mfc_unres_queue))
790
		ipmr_do_expire_process(mrt);
791 792 793 794 795 796

	spin_unlock(&mfc_unres_lock);
}

/* Fill oifs list. It is called under write locked mrt_lock. */

797
static void ip6mr_update_thresholds(struct mr_table *mrt,
798
				    struct mr_mfc *cache,
799
				    unsigned char *ttls)
800 801 802
{
	int vifi;

803
	cache->mfc_un.res.minvif = MAXMIFS;
804
	cache->mfc_un.res.maxvif = 0;
805
	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
806

807
	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
808
		if (VIF_EXISTS(mrt, vifi) &&
809
		    ttls[vifi] && ttls[vifi] < 255) {
810 811 812 813 814 815 816
			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
			if (cache->mfc_un.res.minvif > vifi)
				cache->mfc_un.res.minvif = vifi;
			if (cache->mfc_un.res.maxvif <= vifi)
				cache->mfc_un.res.maxvif = vifi + 1;
		}
	}
817
	cache->mfc_un.res.lastuse = jiffies;
818 819
}

820
static int mif6_add(struct net *net, struct mr_table *mrt,
821
		    struct mif6ctl *vifc, int mrtsock)
822 823
{
	int vifi = vifc->mif6c_mifi;
824
	struct vif_device *v = &mrt->vif_table[vifi];
825
	struct net_device *dev;
T
Thomas Goff 已提交
826
	struct inet6_dev *in6_dev;
827
	int err;
828 829

	/* Is vif busy ? */
830
	if (VIF_EXISTS(mrt, vifi))
831 832 833
		return -EADDRINUSE;

	switch (vifc->mif6c_flags) {
834 835 836 837 838 839
#ifdef CONFIG_IPV6_PIMSM_V2
	case MIFF_REGISTER:
		/*
		 * Special Purpose VIF in PIM
		 * All the packets will be sent to the daemon
		 */
840
		if (mrt->mroute_reg_vif_num >= 0)
841
			return -EADDRINUSE;
842
		dev = ip6mr_reg_vif(net, mrt);
843 844
		if (!dev)
			return -ENOBUFS;
845 846 847
		err = dev_set_allmulti(dev, 1);
		if (err) {
			unregister_netdevice(dev);
W
Wang Chen 已提交
848
			dev_put(dev);
849 850
			return err;
		}
851 852
		break;
#endif
853
	case 0:
854
		dev = dev_get_by_index(net, vifc->mif6c_pifi);
855 856
		if (!dev)
			return -EADDRNOTAVAIL;
857
		err = dev_set_allmulti(dev, 1);
W
Wang Chen 已提交
858 859
		if (err) {
			dev_put(dev);
860
			return err;
W
Wang Chen 已提交
861
		}
862 863 864 865 866
		break;
	default:
		return -EINVAL;
	}

T
Thomas Goff 已提交
867
	in6_dev = __in6_dev_get(dev);
868
	if (in6_dev) {
T
Thomas Goff 已提交
869
		in6_dev->cnf.mc_forwarding++;
870
		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
871 872 873
					     NETCONFA_MC_FORWARDING,
					     dev->ifindex, &in6_dev->cnf);
	}
T
Thomas Goff 已提交
874

875 876 877 878
	/* Fill in the VIF structures */
	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
			MIFF_REGISTER);
879 880 881 882

	/* And finish update writing critical data */
	write_lock_bh(&mrt_lock);
	v->dev = dev;
883 884
#ifdef CONFIG_IPV6_PIMSM_V2
	if (v->flags & MIFF_REGISTER)
885
		mrt->mroute_reg_vif_num = vifi;
886
#endif
887 888
	if (vifi + 1 > mrt->maxvif)
		mrt->maxvif = vifi + 1;
889 890 891 892
	write_unlock_bh(&mrt_lock);
	return 0;
}

893
static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
894 895
					   const struct in6_addr *origin,
					   const struct in6_addr *mcastgrp)
896
{
897 898 899 900 901
	struct mfc6_cache_cmp_arg arg = {
		.mf6c_origin = *origin,
		.mf6c_mcastgrp = *mcastgrp,
	};

902
	return mr_mfc_find(mrt, &arg);
903 904 905
}

/* Look for a (*,G) entry */
906
static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
907 908 909
					       struct in6_addr *mcastgrp,
					       mifi_t mifi)
{
910 911 912 913
	struct mfc6_cache_cmp_arg arg = {
		.mf6c_origin = in6addr_any,
		.mf6c_mcastgrp = *mcastgrp,
	};
914 915

	if (ipv6_addr_any(mcastgrp))
916 917
		return mr_mfc_find_any_parent(mrt, mifi);
	return mr_mfc_find_any(mrt, mifi, &arg);
918 919
}

920 921
/* Look for a (S,G,iif) entry if parent != -1 */
static struct mfc6_cache *
922
ip6mr_cache_find_parent(struct mr_table *mrt,
923 924 925 926 927 928 929 930 931
			const struct in6_addr *origin,
			const struct in6_addr *mcastgrp,
			int parent)
{
	struct mfc6_cache_cmp_arg arg = {
		.mf6c_origin = *origin,
		.mf6c_mcastgrp = *mcastgrp,
	};

932
	return mr_mfc_find_parent(mrt, &arg, parent);
933 934
}

935
/* Allocate a multicast cache entry */
936
static struct mfc6_cache *ip6mr_cache_alloc(void)
937
{
938
	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
939
	if (!c)
940
		return NULL;
941 942
	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
	c->_c.mfc_un.res.minvif = MAXMIFS;
943 944 945
	return c;
}

946
static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
947
{
948
	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
949
	if (!c)
950
		return NULL;
951 952
	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
953 954 955 956 957 958 959
	return c;
}

/*
 *	A cache entry has gone into a resolved state from queued
 */

960
static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
961
				struct mfc6_cache *uc, struct mfc6_cache *c)
962 963 964 965 966 967 968
{
	struct sk_buff *skb;

	/*
	 *	Play the pending entries through our router
	 */

969
	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
970
		if (ipv6_hdr(skb)->version == 0) {
971 972
			struct nlmsghdr *nlh = skb_pull(skb,
							sizeof(struct ipv6hdr));
973

Y
Yuval Mintz 已提交
974 975
			if (mr_fill_mroute(mrt, skb, &c->_c,
					   nlmsg_data(nlh)) > 0) {
976
				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
977 978
			} else {
				nlh->nlmsg_type = NLMSG_ERROR;
979
				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
980
				skb_trim(skb, nlh->nlmsg_len);
981
				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
982
			}
983
			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
984
		} else
985
			ip6_mr_forward(net, mrt, skb, c);
986 987 988 989
	}
}

/*
990
 *	Bounce a cache query up to pim6sd and netlink.
991 992 993 994
 *
 *	Called under mrt_lock.
 */

995
static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
996
			      mifi_t mifi, int assert)
997
{
Y
Yuval Mintz 已提交
998
	struct sock *mroute6_sk;
999 1000 1001 1002
	struct sk_buff *skb;
	struct mrt6msg *msg;
	int ret;

1003 1004 1005 1006 1007 1008 1009
#ifdef CONFIG_IPV6_PIMSM_V2
	if (assert == MRT6MSG_WHOLEPKT)
		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
						+sizeof(*msg));
	else
#endif
		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1010 1011 1012 1013 1014 1015 1016 1017 1018

	if (!skb)
		return -ENOBUFS;

	/* I suppose that internal messages
	 * do not require checksums */

	skb->ip_summed = CHECKSUM_UNNECESSARY;

1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
#ifdef CONFIG_IPV6_PIMSM_V2
	if (assert == MRT6MSG_WHOLEPKT) {
		/* Ugly, but we have no choice with this interface.
		   Duplicate old header, fix length etc.
		   And all this only to mangle msg->im6_msgtype and
		   to set msg->im6_mbz to "mbz" :-)
		 */
		skb_push(skb, -skb_network_offset(pkt));

		skb_push(skb, sizeof(*msg));
		skb_reset_transport_header(skb);
		msg = (struct mrt6msg *)skb_transport_header(skb);
		msg->im6_mbz = 0;
		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1033
		msg->im6_mif = mrt->mroute_reg_vif_num;
1034
		msg->im6_pad = 0;
A
Alexey Dobriyan 已提交
1035 1036
		msg->im6_src = ipv6_hdr(pkt)->saddr;
		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1037 1038 1039 1040 1041

		skb->ip_summed = CHECKSUM_UNNECESSARY;
	} else
#endif
	{
1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
	/*
	 *	Copy the IP header
	 */

	skb_put(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));

	/*
	 *	Add our header
	 */
	skb_put(skb, sizeof(*msg));
	skb_reset_transport_header(skb);
	msg = (struct mrt6msg *)skb_transport_header(skb);

	msg->im6_mbz = 0;
	msg->im6_msgtype = assert;
1059
	msg->im6_mif = mifi;
1060
	msg->im6_pad = 0;
A
Alexey Dobriyan 已提交
1061 1062
	msg->im6_src = ipv6_hdr(pkt)->saddr;
	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1063

E
Eric Dumazet 已提交
1064
	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1065
	skb->ip_summed = CHECKSUM_UNNECESSARY;
1066
	}
1067

Y
Yuval Mintz 已提交
1068
	rcu_read_lock();
1069
	mroute6_sk = rcu_dereference(mrt->mroute_sk);
Y
Yuval Mintz 已提交
1070 1071
	if (!mroute6_sk) {
		rcu_read_unlock();
1072 1073 1074 1075
		kfree_skb(skb);
		return -EINVAL;
	}

1076 1077
	mrt6msg_netlink_event(mrt, skb);

Y
Yuval Mintz 已提交
1078 1079 1080
	/* Deliver to user space multicast routing algorithms */
	ret = sock_queue_rcv_skb(mroute6_sk, skb);
	rcu_read_unlock();
1081
	if (ret < 0) {
1082
		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1083 1084 1085 1086 1087 1088
		kfree_skb(skb);
	}

	return ret;
}

1089 1090 1091
/* Queue a packet for resolution. It gets locked cache entry! */
static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
				  struct sk_buff *skb)
1092
{
1093
	struct mfc6_cache *c;
1094
	bool found = false;
1095 1096 1097
	int err;

	spin_lock_bh(&mfc_unres_lock);
1098
	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1099
		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1100 1101
		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
			found = true;
1102
			break;
1103
		}
1104 1105
	}

1106
	if (!found) {
1107 1108 1109 1110
		/*
		 *	Create a new entry if allowable
		 */

1111
		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1112
		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1113 1114 1115 1116 1117 1118
			spin_unlock_bh(&mfc_unres_lock);

			kfree_skb(skb);
			return -ENOBUFS;
		}

1119 1120
		/* Fill in the new cache entry */
		c->_c.mfc_parent = -1;
1121 1122 1123 1124 1125 1126
		c->mf6c_origin = ipv6_hdr(skb)->saddr;
		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;

		/*
		 *	Reflect first query at pim6sd
		 */
1127
		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1128
		if (err < 0) {
1129 1130 1131 1132 1133
			/* If the report failed throw the cache entry
			   out - Brad Parker
			 */
			spin_unlock_bh(&mfc_unres_lock);

1134
			ip6mr_cache_free(c);
1135 1136 1137 1138
			kfree_skb(skb);
			return err;
		}

1139
		atomic_inc(&mrt->cache_resolve_queue_len);
1140
		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1141
		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1142

1143
		ipmr_do_expire_process(mrt);
1144 1145
	}

1146 1147
	/* See if we can append the packet */
	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1148 1149 1150
		kfree_skb(skb);
		err = -ENOBUFS;
	} else {
1151
		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162
		err = 0;
	}

	spin_unlock_bh(&mfc_unres_lock);
	return err;
}

/*
 *	MFC6 cache manipulation by user space
 */

1163
static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1164
			    int parent)
1165
{
1166
	struct mfc6_cache *c;
1167

1168 1169 1170 1171 1172 1173 1174
	/* The entries are added/deleted only under RTNL */
	rcu_read_lock();
	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
	rcu_read_unlock();
	if (!c)
		return -ENOENT;
1175 1176
	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
	list_del_rcu(&c->_c.list);
1177

1178 1179 1180
	mr6_netlink_event(mrt, c, RTM_DELROUTE);
	ip6mr_cache_free(c);
	return 0;
1181 1182 1183 1184 1185
}

static int ip6mr_device_event(struct notifier_block *this,
			      unsigned long event, void *ptr)
{
1186
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1187
	struct net *net = dev_net(dev);
1188
	struct mr_table *mrt;
1189
	struct vif_device *v;
1190 1191 1192 1193 1194
	int ct;

	if (event != NETDEV_UNREGISTER)
		return NOTIFY_DONE;

1195
	ip6mr_for_each_table(mrt, net) {
1196
		v = &mrt->vif_table[0];
1197 1198
		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
			if (v->dev == dev)
1199
				mif6_delete(mrt, ct, 1, NULL);
1200
		}
1201
	}
1202

1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
	return NOTIFY_DONE;
}

static struct notifier_block ip6_mr_notifier = {
	.notifier_call = ip6mr_device_event
};

/*
 *	Setup for IP multicast routing
 */

1214 1215
static int __net_init ip6mr_net_init(struct net *net)
{
1216
	int err;
1217

1218 1219
	err = ip6mr_rules_init(net);
	if (err < 0)
1220
		goto fail;
1221 1222 1223

#ifdef CONFIG_PROC_FS
	err = -ENOMEM;
1224
	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1225
		goto proc_vif_fail;
1226
	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1227 1228
		goto proc_cache_fail;
#endif
1229

1230 1231
	return 0;

1232 1233
#ifdef CONFIG_PROC_FS
proc_cache_fail:
1234
	remove_proc_entry("ip6_mr_vif", net->proc_net);
1235
proc_vif_fail:
1236
	ip6mr_rules_exit(net);
1237
#endif
1238 1239 1240 1241 1242 1243
fail:
	return err;
}

static void __net_exit ip6mr_net_exit(struct net *net)
{
1244
#ifdef CONFIG_PROC_FS
1245 1246
	remove_proc_entry("ip6_mr_cache", net->proc_net);
	remove_proc_entry("ip6_mr_vif", net->proc_net);
1247
#endif
1248
	ip6mr_rules_exit(net);
1249 1250 1251 1252 1253
}

static struct pernet_operations ip6mr_net_ops = {
	.init = ip6mr_net_init,
	.exit = ip6mr_net_exit,
K
Kirill Tkhai 已提交
1254
	.async = true,
1255 1256
};

W
Wang Chen 已提交
1257
int __init ip6_mr_init(void)
1258
{
W
Wang Chen 已提交
1259 1260
	int err;

1261 1262 1263 1264 1265
	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
				       sizeof(struct mfc6_cache),
				       0, SLAB_HWCACHE_ALIGN,
				       NULL);
	if (!mrt_cachep)
W
Wang Chen 已提交
1266
		return -ENOMEM;
1267

1268 1269 1270 1271
	err = register_pernet_subsys(&ip6mr_net_ops);
	if (err)
		goto reg_pernet_fail;

W
Wang Chen 已提交
1272 1273 1274
	err = register_netdevice_notifier(&ip6_mr_notifier);
	if (err)
		goto reg_notif_fail;
T
Tom Goff 已提交
1275 1276
#ifdef CONFIG_IPV6_PIMSM_V2
	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1277
		pr_err("%s: can't add PIM protocol\n", __func__);
T
Tom Goff 已提交
1278 1279 1280 1281
		err = -EAGAIN;
		goto add_proto_fail;
	}
#endif
1282 1283 1284 1285 1286
	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
				   NULL, ip6mr_rtm_dumproute, 0);
	if (err == 0)
		return 0;

T
Tom Goff 已提交
1287
#ifdef CONFIG_IPV6_PIMSM_V2
1288
	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
T
Tom Goff 已提交
1289 1290 1291
add_proto_fail:
	unregister_netdevice_notifier(&ip6_mr_notifier);
#endif
B
Benjamin Thery 已提交
1292
reg_notif_fail:
1293 1294
	unregister_pernet_subsys(&ip6mr_net_ops);
reg_pernet_fail:
B
Benjamin Thery 已提交
1295
	kmem_cache_destroy(mrt_cachep);
W
Wang Chen 已提交
1296
	return err;
1297 1298
}

W
Wang Chen 已提交
1299 1300
void ip6_mr_cleanup(void)
{
1301 1302 1303 1304
	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
#ifdef CONFIG_IPV6_PIMSM_V2
	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
#endif
W
Wang Chen 已提交
1305
	unregister_netdevice_notifier(&ip6_mr_notifier);
1306
	unregister_pernet_subsys(&ip6mr_net_ops);
W
Wang Chen 已提交
1307 1308
	kmem_cache_destroy(mrt_cachep);
}
1309

1310
static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1311
			 struct mf6cctl *mfc, int mrtsock, int parent)
1312
{
1313
	unsigned char ttls[MAXMIFS];
1314
	struct mfc6_cache *uc, *c;
1315
	struct mr_mfc *_uc;
1316 1317
	bool found;
	int i, err;
1318

1319 1320 1321
	if (mfc->mf6cc_parent >= MAXMIFS)
		return -ENFILE;

1322 1323
	memset(ttls, 255, MAXMIFS);
	for (i = 0; i < MAXMIFS; i++) {
1324 1325 1326 1327
		if (IF_ISSET(i, &mfc->mf6cc_ifset))
			ttls[i] = 1;
	}

1328 1329 1330 1331 1332 1333
	/* The entries are added/deleted only under RTNL */
	rcu_read_lock();
	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
	rcu_read_unlock();
	if (c) {
1334
		write_lock_bh(&mrt_lock);
1335 1336
		c->_c.mfc_parent = mfc->mf6cc_parent;
		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1337
		if (!mrtsock)
1338
			c->_c.mfc_flags |= MFC_STATIC;
1339
		write_unlock_bh(&mrt_lock);
1340
		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1341 1342 1343
		return 0;
	}

1344 1345
	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1346 1347
		return -EINVAL;

1348
	c = ip6mr_cache_alloc();
1349
	if (!c)
1350 1351 1352 1353
		return -ENOMEM;

	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1354 1355
	c->_c.mfc_parent = mfc->mf6cc_parent;
	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1356
	if (!mrtsock)
1357
		c->_c.mfc_flags |= MFC_STATIC;
1358

1359
	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1360 1361 1362 1363 1364 1365
				  ip6mr_rht_params);
	if (err) {
		pr_err("ip6mr: rhtable insert error %d\n", err);
		ip6mr_cache_free(c);
		return err;
	}
1366
	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1367

1368 1369
	/* Check to see if we resolved a queued list. If so we
	 * need to send on the frames and tidy up.
1370
	 */
1371
	found = false;
1372
	spin_lock_bh(&mfc_unres_lock);
1373 1374
	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
		uc = (struct mfc6_cache *)_uc;
1375
		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1376
		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1377
			list_del(&_uc->list);
1378
			atomic_dec(&mrt->cache_resolve_queue_len);
1379
			found = true;
1380 1381 1382
			break;
		}
	}
1383
	if (list_empty(&mrt->mfc_unres_queue))
1384
		del_timer(&mrt->ipmr_expire_timer);
1385 1386
	spin_unlock_bh(&mfc_unres_lock);

1387
	if (found) {
1388
		ip6mr_cache_resolve(net, mrt, uc, c);
1389
		ip6mr_cache_free(uc);
1390
	}
1391
	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1392 1393 1394 1395 1396 1397 1398
	return 0;
}

/*
 *	Close the multicast socket, and clear the vif tables etc
 */

1399
static void mroute_clean_tables(struct mr_table *mrt, bool all)
1400
{
1401
	struct mr_mfc *c, *tmp;
1402
	LIST_HEAD(list);
1403
	int i;
1404

1405
	/* Shut down all active vif entries */
1406
	for (i = 0; i < mrt->maxvif; i++) {
1407
		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1408
			continue;
1409
		mif6_delete(mrt, i, 0, &list);
1410
	}
1411
	unregister_netdevice_many(&list);
1412

1413
	/* Wipe the cache */
1414
	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1415 1416
		if (!all && (c->mfc_flags & MFC_STATIC))
			continue;
1417
		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1418
		list_del_rcu(&c->list);
1419 1420
		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
		ip6mr_cache_free((struct mfc6_cache *)c);
1421 1422
	}

1423
	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1424
		spin_lock_bh(&mfc_unres_lock);
1425
		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1426
			list_del(&c->list);
1427 1428 1429
			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
					  RTM_DELROUTE);
			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1430 1431 1432 1433 1434
		}
		spin_unlock_bh(&mfc_unres_lock);
	}
}

1435
static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1436 1437
{
	int err = 0;
1438
	struct net *net = sock_net(sk);
1439 1440 1441

	rtnl_lock();
	write_lock_bh(&mrt_lock);
1442
	if (rtnl_dereference(mrt->mroute_sk)) {
1443
		err = -EADDRINUSE;
Y
Yuval Mintz 已提交
1444
	} else {
1445
		rcu_assign_pointer(mrt->mroute_sk, sk);
1446
		sock_set_flag(sk, SOCK_RCU_FREE);
Y
Yuval Mintz 已提交
1447
		net->ipv6.devconf_all->mc_forwarding++;
1448
	}
1449 1450
	write_unlock_bh(&mrt_lock);

1451
	if (!err)
1452 1453
		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
					     NETCONFA_MC_FORWARDING,
1454 1455
					     NETCONFA_IFINDEX_ALL,
					     net->ipv6.devconf_all);
1456 1457 1458 1459 1460 1461 1462
	rtnl_unlock();

	return err;
}

int ip6mr_sk_done(struct sock *sk)
{
1463
	int err = -EACCES;
1464
	struct net *net = sock_net(sk);
1465
	struct mr_table *mrt;
1466

1467 1468 1469 1470
	if (sk->sk_type != SOCK_RAW ||
	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
		return err;

1471
	rtnl_lock();
1472
	ip6mr_for_each_table(mrt, net) {
1473
		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1474
			write_lock_bh(&mrt_lock);
1475
			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1476 1477 1478 1479
			/* Note that mroute_sk had SOCK_RCU_FREE set,
			 * so the RCU grace period before sk freeing
			 * is guaranteed by sk_destruct()
			 */
1480
			net->ipv6.devconf_all->mc_forwarding--;
1481
			write_unlock_bh(&mrt_lock);
1482
			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1483 1484 1485
						     NETCONFA_MC_FORWARDING,
						     NETCONFA_IFINDEX_ALL,
						     net->ipv6.devconf_all);
1486

1487
			mroute_clean_tables(mrt, false);
1488 1489 1490 1491
			err = 0;
			break;
		}
	}
1492 1493 1494 1495 1496
	rtnl_unlock();

	return err;
}

Y
Yuval Mintz 已提交
1497
bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1498
{
1499
	struct mr_table *mrt;
1500
	struct flowi6 fl6 = {
1501
		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1502 1503
		.flowi6_oif	= skb->dev->ifindex,
		.flowi6_mark	= skb->mark,
1504 1505
	};

1506
	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1507
		return NULL;
1508

1509
	return rcu_access_pointer(mrt->mroute_sk);
1510
}
Y
Yuval Mintz 已提交
1511
EXPORT_SYMBOL(mroute6_is_socket);
1512

1513 1514 1515 1516 1517 1518 1519
/*
 *	Socket options and virtual interface manipulation. The whole
 *	virtual interface system is a complete heap, but unfortunately
 *	that's how BSD mrouted happens to think. Maybe one day with a proper
 *	MOSPF/PIM router set up we can clean this up.
 */

1520
int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1521
{
1522
	int ret, parent = 0;
1523 1524 1525
	struct mif6ctl vif;
	struct mf6cctl mfc;
	mifi_t mifi;
1526
	struct net *net = sock_net(sk);
1527
	struct mr_table *mrt;
1528

1529 1530 1531 1532
	if (sk->sk_type != SOCK_RAW ||
	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
		return -EOPNOTSUPP;

1533
	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1534
	if (!mrt)
1535
		return -ENOENT;
1536 1537

	if (optname != MRT6_INIT) {
1538
		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
Y
Yuval Mintz 已提交
1539
		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1540 1541 1542 1543 1544 1545 1546 1547
			return -EACCES;
	}

	switch (optname) {
	case MRT6_INIT:
		if (optlen < sizeof(int))
			return -EINVAL;

1548
		return ip6mr_sk_init(mrt, sk);
1549 1550 1551 1552 1553 1554 1555 1556 1557

	case MRT6_DONE:
		return ip6mr_sk_done(sk);

	case MRT6_ADD_MIF:
		if (optlen < sizeof(vif))
			return -EINVAL;
		if (copy_from_user(&vif, optval, sizeof(vif)))
			return -EFAULT;
1558
		if (vif.mif6c_mifi >= MAXMIFS)
1559 1560
			return -ENFILE;
		rtnl_lock();
Y
Yuval Mintz 已提交
1561
		ret = mif6_add(net, mrt, &vif,
1562
			       sk == rtnl_dereference(mrt->mroute_sk));
1563 1564 1565 1566 1567 1568 1569 1570 1571
		rtnl_unlock();
		return ret;

	case MRT6_DEL_MIF:
		if (optlen < sizeof(mifi_t))
			return -EINVAL;
		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
			return -EFAULT;
		rtnl_lock();
1572
		ret = mif6_delete(mrt, mifi, 0, NULL);
1573 1574 1575 1576 1577 1578 1579 1580 1581
		rtnl_unlock();
		return ret;

	/*
	 *	Manipulate the forwarding caches. These live
	 *	in a sort of kernel/user symbiosis.
	 */
	case MRT6_ADD_MFC:
	case MRT6_DEL_MFC:
1582
		parent = -1;
1583
		/* fall through */
1584 1585
	case MRT6_ADD_MFC_PROXY:
	case MRT6_DEL_MFC_PROXY:
1586 1587 1588 1589
		if (optlen < sizeof(mfc))
			return -EINVAL;
		if (copy_from_user(&mfc, optval, sizeof(mfc)))
			return -EFAULT;
1590 1591
		if (parent == 0)
			parent = mfc.mf6cc_parent;
1592
		rtnl_lock();
1593 1594
		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1595
		else
1596
			ret = ip6mr_mfc_add(net, mrt, &mfc,
Y
Yuval Mintz 已提交
1597
					    sk ==
1598
					    rtnl_dereference(mrt->mroute_sk),
Y
Yuval Mintz 已提交
1599
					    parent);
1600 1601 1602
		rtnl_unlock();
		return ret;

1603 1604 1605 1606 1607 1608
	/*
	 *	Control PIM assert (to activate pim will activate assert)
	 */
	case MRT6_ASSERT:
	{
		int v;
1609 1610 1611

		if (optlen != sizeof(v))
			return -EINVAL;
1612 1613
		if (get_user(v, (int __user *)optval))
			return -EFAULT;
1614
		mrt->mroute_do_assert = v;
1615 1616 1617 1618 1619 1620
		return 0;
	}

#ifdef CONFIG_IPV6_PIMSM_V2
	case MRT6_PIM:
	{
1621
		int v;
1622 1623 1624

		if (optlen != sizeof(v))
			return -EINVAL;
1625 1626 1627 1628 1629
		if (get_user(v, (int __user *)optval))
			return -EFAULT;
		v = !!v;
		rtnl_lock();
		ret = 0;
1630 1631 1632
		if (v != mrt->mroute_do_pim) {
			mrt->mroute_do_pim = v;
			mrt->mroute_do_assert = v;
1633 1634 1635 1636 1637
		}
		rtnl_unlock();
		return ret;
	}

1638 1639 1640 1641 1642 1643 1644 1645 1646 1647
#endif
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
	case MRT6_TABLE:
	{
		u32 v;

		if (optlen != sizeof(u32))
			return -EINVAL;
		if (get_user(v, (u32 __user *)optval))
			return -EFAULT;
1648 1649 1650
		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
		if (v != RT_TABLE_DEFAULT && v >= 100000000)
			return -EINVAL;
1651
		if (sk == rcu_access_pointer(mrt->mroute_sk))
1652 1653 1654 1655 1656 1657 1658 1659 1660 1661
			return -EBUSY;

		rtnl_lock();
		ret = 0;
		if (!ip6mr_new_table(net, v))
			ret = -ENOMEM;
		raw6_sk(sk)->ip6mr_table = v;
		rtnl_unlock();
		return ret;
	}
1662
#endif
1663
	/*
1664
	 *	Spurious command, or MRT6_VERSION which you cannot
1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680
	 *	set.
	 */
	default:
		return -ENOPROTOOPT;
	}
}

/*
 *	Getsock opt support for the multicast routing system.
 */

int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
			  int __user *optlen)
{
	int olr;
	int val;
1681
	struct net *net = sock_net(sk);
1682
	struct mr_table *mrt;
1683

1684 1685 1686 1687
	if (sk->sk_type != SOCK_RAW ||
	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
		return -EOPNOTSUPP;

1688
	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1689
	if (!mrt)
1690
		return -ENOENT;
1691 1692 1693 1694 1695

	switch (optname) {
	case MRT6_VERSION:
		val = 0x0305;
		break;
1696 1697
#ifdef CONFIG_IPV6_PIMSM_V2
	case MRT6_PIM:
1698
		val = mrt->mroute_do_pim;
1699 1700 1701
		break;
#endif
	case MRT6_ASSERT:
1702
		val = mrt->mroute_do_assert;
1703
		break;
1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729
	default:
		return -ENOPROTOOPT;
	}

	if (get_user(olr, optlen))
		return -EFAULT;

	olr = min_t(int, olr, sizeof(int));
	if (olr < 0)
		return -EINVAL;

	if (put_user(olr, optlen))
		return -EFAULT;
	if (copy_to_user(optval, &val, olr))
		return -EFAULT;
	return 0;
}

/*
 *	The IP multicast ioctl support routines.
 */

int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
{
	struct sioc_sg_req6 sr;
	struct sioc_mif_req6 vr;
1730
	struct vif_device *vif;
1731
	struct mfc6_cache *c;
1732
	struct net *net = sock_net(sk);
1733
	struct mr_table *mrt;
1734 1735

	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1736
	if (!mrt)
1737
		return -ENOENT;
1738 1739 1740 1741 1742

	switch (cmd) {
	case SIOCGETMIFCNT_IN6:
		if (copy_from_user(&vr, arg, sizeof(vr)))
			return -EFAULT;
1743
		if (vr.mifi >= mrt->maxvif)
1744 1745
			return -EINVAL;
		read_lock(&mrt_lock);
1746 1747
		vif = &mrt->vif_table[vr.mifi];
		if (VIF_EXISTS(mrt, vr.mifi)) {
1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763
			vr.icount = vif->pkt_in;
			vr.ocount = vif->pkt_out;
			vr.ibytes = vif->bytes_in;
			vr.obytes = vif->bytes_out;
			read_unlock(&mrt_lock);

			if (copy_to_user(arg, &vr, sizeof(vr)))
				return -EFAULT;
			return 0;
		}
		read_unlock(&mrt_lock);
		return -EADDRNOTAVAIL;
	case SIOCGETSGCNT_IN6:
		if (copy_from_user(&sr, arg, sizeof(sr)))
			return -EFAULT;

1764
		rcu_read_lock();
1765
		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1766
		if (c) {
1767 1768 1769
			sr.pktcnt = c->_c.mfc_un.res.pkt;
			sr.bytecnt = c->_c.mfc_un.res.bytes;
			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1770
			rcu_read_unlock();
1771 1772 1773 1774 1775

			if (copy_to_user(arg, &sr, sizeof(sr)))
				return -EFAULT;
			return 0;
		}
1776
		rcu_read_unlock();
1777 1778 1779 1780 1781 1782
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}

1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803
#ifdef CONFIG_COMPAT
struct compat_sioc_sg_req6 {
	struct sockaddr_in6 src;
	struct sockaddr_in6 grp;
	compat_ulong_t pktcnt;
	compat_ulong_t bytecnt;
	compat_ulong_t wrong_if;
};

struct compat_sioc_mif_req6 {
	mifi_t	mifi;
	compat_ulong_t icount;
	compat_ulong_t ocount;
	compat_ulong_t ibytes;
	compat_ulong_t obytes;
};

int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
	struct compat_sioc_sg_req6 sr;
	struct compat_sioc_mif_req6 vr;
1804
	struct vif_device *vif;
1805 1806
	struct mfc6_cache *c;
	struct net *net = sock_net(sk);
1807
	struct mr_table *mrt;
1808 1809

	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1810
	if (!mrt)
1811 1812 1813 1814 1815 1816 1817 1818 1819
		return -ENOENT;

	switch (cmd) {
	case SIOCGETMIFCNT_IN6:
		if (copy_from_user(&vr, arg, sizeof(vr)))
			return -EFAULT;
		if (vr.mifi >= mrt->maxvif)
			return -EINVAL;
		read_lock(&mrt_lock);
1820 1821
		vif = &mrt->vif_table[vr.mifi];
		if (VIF_EXISTS(mrt, vr.mifi)) {
1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837
			vr.icount = vif->pkt_in;
			vr.ocount = vif->pkt_out;
			vr.ibytes = vif->bytes_in;
			vr.obytes = vif->bytes_out;
			read_unlock(&mrt_lock);

			if (copy_to_user(arg, &vr, sizeof(vr)))
				return -EFAULT;
			return 0;
		}
		read_unlock(&mrt_lock);
		return -EADDRNOTAVAIL;
	case SIOCGETSGCNT_IN6:
		if (copy_from_user(&sr, arg, sizeof(sr)))
			return -EFAULT;

1838
		rcu_read_lock();
1839 1840
		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
		if (c) {
1841 1842 1843
			sr.pktcnt = c->_c.mfc_un.res.pkt;
			sr.bytecnt = c->_c.mfc_un.res.bytes;
			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1844
			rcu_read_unlock();
1845 1846 1847 1848 1849

			if (copy_to_user(arg, &sr, sizeof(sr)))
				return -EFAULT;
			return 0;
		}
1850
		rcu_read_unlock();
1851 1852 1853 1854 1855 1856
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}
#endif
1857

1858
static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1859
{
E
Eric Dumazet 已提交
1860 1861 1862 1863
	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
			IPSTATS_MIB_OUTFORWDATAGRAMS);
	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
			IPSTATS_MIB_OUTOCTETS, skb->len);
1864
	return dst_output(net, sk, skb);
1865 1866 1867 1868 1869 1870
}

/*
 *	Processing handlers for ip6mr_forward
 */

1871
static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1872
			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1873 1874
{
	struct ipv6hdr *ipv6h;
1875
	struct vif_device *vif = &mrt->vif_table[vifi];
1876 1877
	struct net_device *dev;
	struct dst_entry *dst;
1878
	struct flowi6 fl6;
1879

1880
	if (!vif->dev)
1881 1882
		goto out_free;

1883 1884 1885 1886
#ifdef CONFIG_IPV6_PIMSM_V2
	if (vif->flags & MIFF_REGISTER) {
		vif->pkt_out++;
		vif->bytes_out += skb->len;
1887 1888
		vif->dev->stats.tx_bytes += skb->len;
		vif->dev->stats.tx_packets++;
1889
		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1890
		goto out_free;
1891 1892 1893
	}
#endif

1894 1895
	ipv6h = ipv6_hdr(skb);

1896 1897 1898
	fl6 = (struct flowi6) {
		.flowi6_oif = vif->link,
		.daddr = ipv6h->daddr,
1899 1900
	};

1901
	dst = ip6_route_output(net, NULL, &fl6);
1902 1903
	if (dst->error) {
		dst_release(dst);
1904
		goto out_free;
1905
	}
1906

E
Eric Dumazet 已提交
1907 1908
	skb_dst_drop(skb);
	skb_dst_set(skb, dst);
1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935

	/*
	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
	 * not only before forwarding, but after forwarding on all output
	 * interfaces. It is clear, if mrouter runs a multicasting
	 * program, it should receive packets not depending to what interface
	 * program is joined.
	 * If we will not make it, the program will have to join on all
	 * interfaces. On the other hand, multihoming host (or router, but
	 * not mrouter) cannot join to more than one interface - it will
	 * result in receiving multiple packets.
	 */
	dev = vif->dev;
	skb->dev = dev;
	vif->pkt_out++;
	vif->bytes_out += skb->len;

	/* We are about to write */
	/* XXX: extension headers? */
	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
		goto out_free;

	ipv6h = ipv6_hdr(skb);
	ipv6h->hop_limit--;

	IP6CB(skb)->flags |= IP6SKB_FORWARDED;

1936 1937
	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
		       net, NULL, skb, skb->dev, dev,
1938 1939 1940 1941 1942 1943 1944
		       ip6mr_forward2_finish);

out_free:
	kfree_skb(skb);
	return 0;
}

1945
static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
1946 1947
{
	int ct;
1948 1949

	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1950
		if (mrt->vif_table[ct].dev == dev)
1951 1952 1953 1954 1955
			break;
	}
	return ct;
}

1956
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
1957
			   struct sk_buff *skb, struct mfc6_cache *c)
1958 1959 1960
{
	int psend = -1;
	int vif, ct;
1961
	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
1962

1963 1964 1965 1966
	vif = c->_c.mfc_parent;
	c->_c.mfc_un.res.pkt++;
	c->_c.mfc_un.res.bytes += skb->len;
	c->_c.mfc_un.res.lastuse = jiffies;
1967

1968
	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
1969 1970
		struct mfc6_cache *cache_proxy;

1971
		/* For an (*,G) entry, we only check that the incoming
1972 1973
		 * interface is part of the static tree.
		 */
1974
		rcu_read_lock();
1975
		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
1976
		if (cache_proxy &&
1977
		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
1978
			rcu_read_unlock();
1979
			goto forward;
1980 1981
		}
		rcu_read_unlock();
1982 1983
	}

1984 1985 1986
	/*
	 * Wrong interface: drop packet and (maybe) send PIM assert.
	 */
1987
	if (mrt->vif_table[vif].dev != skb->dev) {
1988
		c->_c.mfc_un.res.wrong_if++;
1989

1990
		if (true_vifi >= 0 && mrt->mroute_do_assert &&
1991 1992 1993 1994 1995
		    /* pimsm uses asserts, when switching from RPT to SPT,
		       so that we cannot check that packet arrived on an oif.
		       It is bad, but otherwise we would need to move pretty
		       large chunk of pimd to kernel. Ough... --ANK
		     */
1996
		    (mrt->mroute_do_pim ||
1997
		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
1998
		    time_after(jiffies,
1999 2000 2001
			       c->_c.mfc_un.res.last_assert +
			       MFC_ASSERT_THRESH)) {
			c->_c.mfc_un.res.last_assert = jiffies;
2002
			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2003 2004 2005 2006
		}
		goto dont_forward;
	}

2007
forward:
2008 2009
	mrt->vif_table[vif].pkt_in++;
	mrt->vif_table[vif].bytes_in += skb->len;
2010 2011 2012 2013

	/*
	 *	Forward the frame
	 */
2014 2015
	if (ipv6_addr_any(&c->mf6c_origin) &&
	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2016
		if (true_vifi >= 0 &&
2017
		    true_vifi != c->_c.mfc_parent &&
2018
		    ipv6_hdr(skb)->hop_limit >
2019
				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2020 2021 2022 2023
			/* It's an (*,*) entry and the packet is not coming from
			 * the upstream: forward the packet to the upstream
			 * only.
			 */
2024
			psend = c->_c.mfc_parent;
2025 2026 2027 2028
			goto last_forward;
		}
		goto dont_forward;
	}
2029 2030
	for (ct = c->_c.mfc_un.res.maxvif - 1;
	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2031
		/* For (*,G) entry, don't forward to the incoming interface */
2032 2033
		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2034 2035 2036
			if (psend != -1) {
				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
				if (skb2)
2037 2038
					ip6mr_forward2(net, mrt, skb2,
						       c, psend);
2039 2040 2041 2042
			}
			psend = ct;
		}
	}
2043
last_forward:
2044
	if (psend != -1) {
2045
		ip6mr_forward2(net, mrt, skb, c, psend);
2046
		return;
2047 2048
	}

2049
dont_forward:
2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060
	kfree_skb(skb);
}


/*
 *	Multicast packets for forwarding arrive here
 */

int ip6_mr_input(struct sk_buff *skb)
{
	struct mfc6_cache *cache;
2061
	struct net *net = dev_net(skb->dev);
2062
	struct mr_table *mrt;
2063 2064 2065
	struct flowi6 fl6 = {
		.flowi6_iif	= skb->dev->ifindex,
		.flowi6_mark	= skb->mark,
2066 2067 2068
	};
	int err;

2069
	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2070 2071
	if (err < 0) {
		kfree_skb(skb);
2072
		return err;
2073
	}
2074 2075

	read_lock(&mrt_lock);
2076
	cache = ip6mr_cache_find(mrt,
2077
				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2078
	if (!cache) {
2079 2080 2081 2082 2083 2084 2085
		int vif = ip6mr_find_vif(mrt, skb->dev);

		if (vif >= 0)
			cache = ip6mr_cache_find_any(mrt,
						     &ipv6_hdr(skb)->daddr,
						     vif);
	}
2086 2087 2088 2089

	/*
	 *	No usable cache entry
	 */
2090
	if (!cache) {
2091 2092
		int vif;

2093
		vif = ip6mr_find_vif(mrt, skb->dev);
2094
		if (vif >= 0) {
2095
			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2096 2097 2098 2099 2100 2101 2102 2103 2104
			read_unlock(&mrt_lock);

			return err;
		}
		read_unlock(&mrt_lock);
		kfree_skb(skb);
		return -ENODEV;
	}

2105
	ip6_mr_forward(net, mrt, skb, cache);
2106 2107 2108 2109 2110 2111

	read_unlock(&mrt_lock);

	return 0;
}

2112
int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2113
		    u32 portid)
2114 2115
{
	int err;
2116
	struct mr_table *mrt;
2117
	struct mfc6_cache *cache;
E
Eric Dumazet 已提交
2118
	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2119

2120
	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2121
	if (!mrt)
2122 2123
		return -ENOENT;

2124
	read_lock(&mrt_lock);
2125
	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2126 2127 2128 2129 2130 2131 2132
	if (!cache && skb->dev) {
		int vif = ip6mr_find_vif(mrt, skb->dev);

		if (vif >= 0)
			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
						     vif);
	}
2133 2134 2135 2136 2137 2138 2139 2140

	if (!cache) {
		struct sk_buff *skb2;
		struct ipv6hdr *iph;
		struct net_device *dev;
		int vif;

		dev = skb->dev;
2141
		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152
			read_unlock(&mrt_lock);
			return -ENODEV;
		}

		/* really correct? */
		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
		if (!skb2) {
			read_unlock(&mrt_lock);
			return -ENOMEM;
		}

2153
		NETLINK_CB(skb2).portid = portid;
2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167
		skb_reset_transport_header(skb2);

		skb_put(skb2, sizeof(struct ipv6hdr));
		skb_reset_network_header(skb2);

		iph = ipv6_hdr(skb2);
		iph->version = 0;
		iph->priority = 0;
		iph->flow_lbl[0] = 0;
		iph->flow_lbl[1] = 0;
		iph->flow_lbl[2] = 0;
		iph->payload_len = 0;
		iph->nexthdr = IPPROTO_NONE;
		iph->hop_limit = 0;
A
Alexey Dobriyan 已提交
2168 2169
		iph->saddr = rt->rt6i_src.addr;
		iph->daddr = rt->rt6i_dst.addr;
2170

2171
		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2172 2173 2174 2175 2176
		read_unlock(&mrt_lock);

		return err;
	}

Y
Yuval Mintz 已提交
2177
	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2178 2179 2180 2181
	read_unlock(&mrt_lock);
	return err;
}

2182
static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2183 2184
			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
			     int flags)
2185 2186 2187
{
	struct nlmsghdr *nlh;
	struct rtmsg *rtm;
2188
	int err;
2189

2190
	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2191
	if (!nlh)
2192 2193 2194
		return -EMSGSIZE;

	rtm = nlmsg_data(nlh);
2195
	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2196 2197 2198 2199
	rtm->rtm_dst_len  = 128;
	rtm->rtm_src_len  = 128;
	rtm->rtm_tos      = 0;
	rtm->rtm_table    = mrt->id;
D
David S. Miller 已提交
2200 2201
	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
		goto nla_put_failure;
2202
	rtm->rtm_type = RTN_MULTICAST;
2203
	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2204
	if (c->_c.mfc_flags & MFC_STATIC)
2205 2206 2207
		rtm->rtm_protocol = RTPROT_STATIC;
	else
		rtm->rtm_protocol = RTPROT_MROUTED;
2208 2209
	rtm->rtm_flags    = 0;

2210 2211
	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
D
David S. Miller 已提交
2212
		goto nla_put_failure;
Y
Yuval Mintz 已提交
2213
	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2214 2215
	/* do not break the dump if cache is unresolved */
	if (err < 0 && err != -ENOENT)
2216 2217
		goto nla_put_failure;

2218 2219
	nlmsg_end(skb, nlh);
	return 0;
2220 2221 2222 2223 2224 2225

nla_put_failure:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

Y
Yuval Mintz 已提交
2226 2227 2228 2229 2230 2231 2232 2233
static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
			      u32 portid, u32 seq, struct mr_mfc *c,
			      int cmd, int flags)
{
	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
				 cmd, flags);
}

2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248
static int mr6_msgsize(bool unresolved, int maxvif)
{
	size_t len =
		NLMSG_ALIGN(sizeof(struct rtmsg))
		+ nla_total_size(4)	/* RTA_TABLE */
		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
		;

	if (!unresolved)
		len = len
		      + nla_total_size(4)	/* RTA_IIF */
		      + nla_total_size(0)	/* RTA_MULTIPATH */
		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
						/* RTA_MFC_STATS */
2249
		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2250 2251 2252 2253 2254
		;

	return len;
}

2255
static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2256 2257 2258 2259 2260 2261
			      int cmd)
{
	struct net *net = read_pnet(&mrt->net);
	struct sk_buff *skb;
	int err = -ENOBUFS;

2262
	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2263
			GFP_ATOMIC);
2264
	if (!skb)
2265 2266
		goto errout;

2267
	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279
	if (err < 0)
		goto errout;

	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
	return;

errout:
	kfree_skb(skb);
	if (err < 0)
		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
}

2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296
static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
{
	size_t len =
		NLMSG_ALIGN(sizeof(struct rtgenmsg))
		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
					/* IP6MRA_CREPORT_SRC_ADDR */
		+ nla_total_size(sizeof(struct in6_addr))
					/* IP6MRA_CREPORT_DST_ADDR */
		+ nla_total_size(sizeof(struct in6_addr))
					/* IP6MRA_CREPORT_PKT */
		+ nla_total_size(payloadlen)
		;

	return len;
}

2297
static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344
{
	struct net *net = read_pnet(&mrt->net);
	struct nlmsghdr *nlh;
	struct rtgenmsg *rtgenm;
	struct mrt6msg *msg;
	struct sk_buff *skb;
	struct nlattr *nla;
	int payloadlen;

	payloadlen = pkt->len - sizeof(struct mrt6msg);
	msg = (struct mrt6msg *)skb_transport_header(pkt);

	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
	if (!skb)
		goto errout;

	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
			sizeof(struct rtgenmsg), 0);
	if (!nlh)
		goto errout;
	rtgenm = nlmsg_data(nlh);
	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
			     &msg->im6_src) ||
	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
			     &msg->im6_dst))
		goto nla_put_failure;

	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
				  nla_data(nla), payloadlen))
		goto nla_put_failure;

	nlmsg_end(skb, nlh);

	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
	return;

nla_put_failure:
	nlmsg_cancel(skb, nlh);
errout:
	kfree_skb(skb);
	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
}

2345 2346
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
Y
Yuval Mintz 已提交
2347 2348
	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
				_ip6mr_fill_mroute, &mfc_unres_lock);
2349
}