ipmr.c 63.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 *	IP multicast routing support for mrouted 3.6/3.8
 *
4
 *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
L
Linus Torvalds 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *	  Linux Consultancy and Custom Driver Development
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 *	Fixes:
 *	Michael Chastain	:	Incorrect size of copying.
 *	Alan Cox		:	Added the cache manager code
 *	Alan Cox		:	Fixed the clone/copy bug and device race.
 *	Mike McLagan		:	Routing by source
 *	Malcolm Beattie		:	Buffer handling fixes.
 *	Alexey Kuznetsov	:	Double buffer free and other fixes.
 *	SVR Anand		:	Fixed several multicast bugs and problems.
 *	Alexey Kuznetsov	:	Status, optimisations and more.
 *	Brad Parker		:	Better behaviour on mrouted upcall
 *					overflow.
 *      Carlos Picoto           :       PIMv1 Support
 *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
25
 *					Relax this requirement to work with older peers.
L
Linus Torvalds 已提交
26 27 28 29 30
 *
 */

#include <asm/uaccess.h>
#include <linux/types.h>
31
#include <linux/capability.h>
L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/mroute.h>
#include <linux/init.h>
48
#include <linux/if_ether.h>
49
#include <linux/slab.h>
50
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
51 52 53
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
54
#include <net/route.h>
L
Linus Torvalds 已提交
55 56 57 58 59 60 61
#include <net/sock.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
#include <linux/netfilter_ipv4.h>
62
#include <linux/compat.h>
63
#include <linux/export.h>
64
#include <net/ip_tunnels.h>
L
Linus Torvalds 已提交
65
#include <net/checksum.h>
66
#include <net/netlink.h>
67
#include <net/fib_rules.h>
68
#include <linux/netconf.h>
L
Linus Torvalds 已提交
69 70 71 72 73

#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
#define CONFIG_IP_PIMSM	1
#endif

74
struct mr_table {
75
	struct list_head	list;
76
	possible_net_t		net;
77
	u32			id;
E
Eric Dumazet 已提交
78
	struct sock __rcu	*mroute_sk;
79 80 81 82 83 84
	struct timer_list	ipmr_expire_timer;
	struct list_head	mfc_unres_queue;
	struct list_head	mfc_cache_array[MFC_LINES];
	struct vif_device	vif_table[MAXVIFS];
	int			maxvif;
	atomic_t		cache_resolve_queue_len;
85 86
	bool			mroute_do_assert;
	bool			mroute_do_pim;
87 88 89 90 91
#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
	int			mroute_reg_vif_num;
#endif
};

92 93 94 95 96 97 98 99
struct ipmr_rule {
	struct fib_rule		common;
};

struct ipmr_result {
	struct mr_table		*mrt;
};

L
Linus Torvalds 已提交
100
/* Big lock, protecting vif table, mrt cache and mroute socket state.
E
Eric Dumazet 已提交
101
 * Note that the changes are semaphored via rtnl_lock.
L
Linus Torvalds 已提交
102 103 104 105 106 107 108 109
 */

static DEFINE_RWLOCK(mrt_lock);

/*
 *	Multicast router control variables
 */

110
#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
L
Linus Torvalds 已提交
111 112 113 114 115

/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);

/* We return to original Alan's scheme. Hash table of resolved
E
Eric Dumazet 已提交
116 117 118 119 120
 * entries is changed only in process context and protected
 * with weak lock mrt_lock. Queue of unresolved entries is protected
 * with strong spinlock mfc_unres_lock.
 *
 * In this case data path is free of exclusive locks at all.
L
Linus Torvalds 已提交
121 122
 */

123
static struct kmem_cache *mrt_cachep __read_mostly;
L
Linus Torvalds 已提交
124

125
static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126 127
static void ipmr_free_table(struct mr_table *mrt);

128 129 130
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
			  struct sk_buff *skb, struct mfc_cache *cache,
			  int local);
131
static int ipmr_cache_report(struct mr_table *mrt,
132
			     struct sk_buff *pkt, vifi_t vifi, int assert);
133 134
static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
			      struct mfc_cache *c, struct rtmsg *rtm);
135 136
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
				 int cmd);
137
static void mroute_clean_tables(struct mr_table *mrt);
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
static void ipmr_expire_process(unsigned long arg);

#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
#define ipmr_for_each_table(mrt, net) \
	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)

static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
	struct mr_table *mrt;

	ipmr_for_each_table(mrt, net) {
		if (mrt->id == id)
			return mrt;
	}
	return NULL;
}

D
David S. Miller 已提交
155
static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
156 157 158
			   struct mr_table **mrt)
{
	int err;
159 160 161 162 163
	struct ipmr_result res;
	struct fib_lookup_arg arg = {
		.result = &res,
		.flags = FIB_LOOKUP_NOREF,
	};
164

D
David S. Miller 已提交
165 166
	err = fib_rules_lookup(net->ipv4.mr_rules_ops,
			       flowi4_to_flowi(flp4), 0, &arg);
167 168 169 170 171 172 173 174 175 176 177
	if (err < 0)
		return err;
	*mrt = res.mrt;
	return 0;
}

static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
			    int flags, struct fib_lookup_arg *arg)
{
	struct ipmr_result *res = arg->result;
	struct mr_table *mrt;
L
Linus Torvalds 已提交
178

179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
	switch (rule->action) {
	case FR_ACT_TO_TBL:
		break;
	case FR_ACT_UNREACHABLE:
		return -ENETUNREACH;
	case FR_ACT_PROHIBIT:
		return -EACCES;
	case FR_ACT_BLACKHOLE:
	default:
		return -EINVAL;
	}

	mrt = ipmr_get_table(rule->fr_net, rule->table);
	if (mrt == NULL)
		return -EAGAIN;
	res->mrt = mrt;
	return 0;
}

static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
{
	return 1;
}

static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
	FRA_GENERIC_POLICY,
};

static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
			       struct fib_rule_hdr *frh, struct nlattr **tb)
{
	return 0;
}

static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
			     struct nlattr **tb)
{
	return 1;
}

static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
			  struct fib_rule_hdr *frh)
{
	frh->dst_len = 0;
	frh->src_len = 0;
	frh->tos     = 0;
	return 0;
}

228
static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = {
229
	.family		= RTNL_FAMILY_IPMR,
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
	.rule_size	= sizeof(struct ipmr_rule),
	.addr_size	= sizeof(u32),
	.action		= ipmr_rule_action,
	.match		= ipmr_rule_match,
	.configure	= ipmr_rule_configure,
	.compare	= ipmr_rule_compare,
	.default_pref	= fib_default_rule_pref,
	.fill		= ipmr_rule_fill,
	.nlgroup	= RTNLGRP_IPV4_RULE,
	.policy		= ipmr_rule_policy,
	.owner		= THIS_MODULE,
};

static int __net_init ipmr_rules_init(struct net *net)
{
	struct fib_rules_ops *ops;
	struct mr_table *mrt;
	int err;

	ops = fib_rules_register(&ipmr_rules_ops_template, net);
	if (IS_ERR(ops))
		return PTR_ERR(ops);

	INIT_LIST_HEAD(&net->ipv4.mr_tables);

	mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
	if (mrt == NULL) {
		err = -ENOMEM;
		goto err1;
	}

	err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
	if (err < 0)
		goto err2;

	net->ipv4.mr_rules_ops = ops;
	return 0;

err2:
269
	ipmr_free_table(mrt);
270 271 272 273 274 275 276 277 278
err1:
	fib_rules_unregister(ops);
	return err;
}

static void __net_exit ipmr_rules_exit(struct net *net)
{
	struct mr_table *mrt, *next;

E
Eric Dumazet 已提交
279 280
	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
		list_del(&mrt->list);
281
		ipmr_free_table(mrt);
E
Eric Dumazet 已提交
282
	}
283 284 285 286 287 288 289 290 291 292 293
	fib_rules_unregister(net->ipv4.mr_rules_ops);
}
#else
#define ipmr_for_each_table(mrt, net) \
	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)

static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
	return net->ipv4.mrt;
}

D
David S. Miller 已提交
294
static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
295 296 297 298 299 300 301 302 303 304 305 306 307 308
			   struct mr_table **mrt)
{
	*mrt = net->ipv4.mrt;
	return 0;
}

static int __net_init ipmr_rules_init(struct net *net)
{
	net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
	return net->ipv4.mrt ? 0 : -ENOMEM;
}

static void __net_exit ipmr_rules_exit(struct net *net)
{
309
	ipmr_free_table(net->ipv4.mrt);
310 311 312 313 314 315 316
}
#endif

static struct mr_table *ipmr_new_table(struct net *net, u32 id)
{
	struct mr_table *mrt;
	unsigned int i;
L
Linus Torvalds 已提交
317

318 319 320 321 322 323 324
	mrt = ipmr_get_table(net, id);
	if (mrt != NULL)
		return mrt;

	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
	if (mrt == NULL)
		return NULL;
325
	write_pnet(&mrt->net, net);
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
	mrt->id = id;

	/* Forwarding cache */
	for (i = 0; i < MFC_LINES; i++)
		INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);

	INIT_LIST_HEAD(&mrt->mfc_unres_queue);

	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
		    (unsigned long)mrt);

#ifdef CONFIG_IP_PIMSM
	mrt->mroute_reg_vif_num = -1;
#endif
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
#endif
	return mrt;
}
L
Linus Torvalds 已提交
345

346 347 348 349 350 351 352
static void ipmr_free_table(struct mr_table *mrt)
{
	del_timer_sync(&mrt->ipmr_expire_timer);
	mroute_clean_tables(mrt);
	kfree(mrt);
}

L
Linus Torvalds 已提交
353 354
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */

355 356
static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
{
357 358
	struct net *net = dev_net(dev);

359 360
	dev_close(dev);

361
	dev = __dev_get_by_name(net, "tunl0");
362
	if (dev) {
363
		const struct net_device_ops *ops = dev->netdev_ops;
364 365 366 367 368 369 370 371 372 373 374 375
		struct ifreq ifr;
		struct ip_tunnel_parm p;

		memset(&p, 0, sizeof(p));
		p.iph.daddr = v->vifc_rmt_addr.s_addr;
		p.iph.saddr = v->vifc_lcl_addr.s_addr;
		p.iph.version = 4;
		p.iph.ihl = 5;
		p.iph.protocol = IPPROTO_IPIP;
		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;

376 377 378 379 380 381 382
		if (ops->ndo_do_ioctl) {
			mm_segment_t oldfs = get_fs();

			set_fs(KERNEL_DS);
			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
			set_fs(oldfs);
		}
383 384 385
	}
}

L
Linus Torvalds 已提交
386
static
387
struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
L
Linus Torvalds 已提交
388 389 390
{
	struct net_device  *dev;

391
	dev = __dev_get_by_name(net, "tunl0");
L
Linus Torvalds 已提交
392 393

	if (dev) {
394
		const struct net_device_ops *ops = dev->netdev_ops;
L
Linus Torvalds 已提交
395 396 397 398 399 400 401 402 403 404 405 406
		int err;
		struct ifreq ifr;
		struct ip_tunnel_parm p;
		struct in_device  *in_dev;

		memset(&p, 0, sizeof(p));
		p.iph.daddr = v->vifc_rmt_addr.s_addr;
		p.iph.saddr = v->vifc_lcl_addr.s_addr;
		p.iph.version = 4;
		p.iph.ihl = 5;
		p.iph.protocol = IPPROTO_IPIP;
		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
S
Stephen Hemminger 已提交
407
		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
L
Linus Torvalds 已提交
408

409 410 411 412 413 414
		if (ops->ndo_do_ioctl) {
			mm_segment_t oldfs = get_fs();

			set_fs(KERNEL_DS);
			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
			set_fs(oldfs);
E
Eric Dumazet 已提交
415
		} else {
416
			err = -EOPNOTSUPP;
E
Eric Dumazet 已提交
417
		}
L
Linus Torvalds 已提交
418 419
		dev = NULL;

420 421
		if (err == 0 &&
		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
L
Linus Torvalds 已提交
422 423
			dev->flags |= IFF_MULTICAST;

424
			in_dev = __in_dev_get_rtnl(dev);
425
			if (in_dev == NULL)
L
Linus Torvalds 已提交
426
				goto failure;
427 428

			ipv4_devconf_setall(in_dev);
429
			neigh_parms_data_state_setall(in_dev->arp_parms);
430
			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
L
Linus Torvalds 已提交
431 432 433

			if (dev_open(dev))
				goto failure;
434
			dev_hold(dev);
L
Linus Torvalds 已提交
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
		}
	}
	return dev;

failure:
	/* allow the register to be completed before unregistering. */
	rtnl_unlock();
	rtnl_lock();

	unregister_netdevice(dev);
	return NULL;
}

#ifdef CONFIG_IP_PIMSM

450
static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
L
Linus Torvalds 已提交
451
{
452
	struct net *net = dev_net(dev);
453
	struct mr_table *mrt;
D
David S. Miller 已提交
454 455
	struct flowi4 fl4 = {
		.flowi4_oif	= dev->ifindex,
456
		.flowi4_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
D
David S. Miller 已提交
457
		.flowi4_mark	= skb->mark,
458 459 460
	};
	int err;

D
David S. Miller 已提交
461
	err = ipmr_fib_lookup(net, &fl4, &mrt);
462 463
	if (err < 0) {
		kfree_skb(skb);
464
		return err;
465
	}
466

L
Linus Torvalds 已提交
467
	read_lock(&mrt_lock);
468 469
	dev->stats.tx_bytes += skb->len;
	dev->stats.tx_packets++;
470
	ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
L
Linus Torvalds 已提交
471 472
	read_unlock(&mrt_lock);
	kfree_skb(skb);
473
	return NETDEV_TX_OK;
L
Linus Torvalds 已提交
474 475
}

476 477 478 479 480
static int reg_vif_get_iflink(const struct net_device *dev)
{
	return 0;
}

481 482
static const struct net_device_ops reg_vif_netdev_ops = {
	.ndo_start_xmit	= reg_vif_xmit,
483
	.ndo_get_iflink = reg_vif_get_iflink,
484 485
};

L
Linus Torvalds 已提交
486 487 488
static void reg_vif_setup(struct net_device *dev)
{
	dev->type		= ARPHRD_PIMREG;
489
	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
L
Linus Torvalds 已提交
490
	dev->flags		= IFF_NOARP;
491
	dev->netdev_ops		= &reg_vif_netdev_ops;
L
Linus Torvalds 已提交
492
	dev->destructor		= free_netdev;
T
Tom Goff 已提交
493
	dev->features		|= NETIF_F_NETNS_LOCAL;
L
Linus Torvalds 已提交
494 495
}

496
static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
L
Linus Torvalds 已提交
497 498 499
{
	struct net_device *dev;
	struct in_device *in_dev;
500
	char name[IFNAMSIZ];
L
Linus Torvalds 已提交
501

502 503 504 505
	if (mrt->id == RT_TABLE_DEFAULT)
		sprintf(name, "pimreg");
	else
		sprintf(name, "pimreg%u", mrt->id);
L
Linus Torvalds 已提交
506

507
	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
L
Linus Torvalds 已提交
508 509 510 511

	if (dev == NULL)
		return NULL;

T
Tom Goff 已提交
512 513
	dev_net_set(dev, net);

L
Linus Torvalds 已提交
514 515 516 517 518
	if (register_netdevice(dev)) {
		free_netdev(dev);
		return NULL;
	}

519
	rcu_read_lock();
E
Eric Dumazet 已提交
520 521
	in_dev = __in_dev_get_rcu(dev);
	if (!in_dev) {
522
		rcu_read_unlock();
L
Linus Torvalds 已提交
523
		goto failure;
524
	}
L
Linus Torvalds 已提交
525

526
	ipv4_devconf_setall(in_dev);
527
	neigh_parms_data_state_setall(in_dev->arp_parms);
528 529
	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
	rcu_read_unlock();
L
Linus Torvalds 已提交
530 531 532 533

	if (dev_open(dev))
		goto failure;

534 535
	dev_hold(dev);

L
Linus Torvalds 已提交
536 537 538 539 540 541 542 543 544 545 546 547
	return dev;

failure:
	/* allow the register to be completed before unregistering. */
	rtnl_unlock();
	rtnl_lock();

	unregister_netdevice(dev);
	return NULL;
}
#endif

548 549
/**
 *	vif_delete - Delete a VIF entry
550
 *	@notify: Set to 1, if the caller is a notifier_call
L
Linus Torvalds 已提交
551
 */
552

553
static int vif_delete(struct mr_table *mrt, int vifi, int notify,
554
		      struct list_head *head)
L
Linus Torvalds 已提交
555 556 557 558 559
{
	struct vif_device *v;
	struct net_device *dev;
	struct in_device *in_dev;

560
	if (vifi < 0 || vifi >= mrt->maxvif)
L
Linus Torvalds 已提交
561 562
		return -EADDRNOTAVAIL;

563
	v = &mrt->vif_table[vifi];
L
Linus Torvalds 已提交
564 565 566 567 568 569 570 571 572 573 574

	write_lock_bh(&mrt_lock);
	dev = v->dev;
	v->dev = NULL;

	if (!dev) {
		write_unlock_bh(&mrt_lock);
		return -EADDRNOTAVAIL;
	}

#ifdef CONFIG_IP_PIMSM
575 576
	if (vifi == mrt->mroute_reg_vif_num)
		mrt->mroute_reg_vif_num = -1;
L
Linus Torvalds 已提交
577 578
#endif

E
Eric Dumazet 已提交
579
	if (vifi + 1 == mrt->maxvif) {
L
Linus Torvalds 已提交
580
		int tmp;
E
Eric Dumazet 已提交
581 582

		for (tmp = vifi - 1; tmp >= 0; tmp--) {
583
			if (VIF_EXISTS(mrt, tmp))
L
Linus Torvalds 已提交
584 585
				break;
		}
586
		mrt->maxvif = tmp+1;
L
Linus Torvalds 已提交
587 588 589 590 591 592
	}

	write_unlock_bh(&mrt_lock);

	dev_set_allmulti(dev, -1);

E
Eric Dumazet 已提交
593 594
	in_dev = __in_dev_get_rtnl(dev);
	if (in_dev) {
595
		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
596 597 598
		inet_netconf_notify_devconf(dev_net(dev),
					    NETCONFA_MC_FORWARDING,
					    dev->ifindex, &in_dev->cnf);
L
Linus Torvalds 已提交
599 600 601
		ip_rt_multicast_event(in_dev);
	}

E
Eric Dumazet 已提交
602
	if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
603
		unregister_netdevice_queue(dev, head);
L
Linus Torvalds 已提交
604 605 606 607 608

	dev_put(dev);
	return 0;
}

609
static void ipmr_cache_free_rcu(struct rcu_head *head)
610
{
611 612
	struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);

613 614 615
	kmem_cache_free(mrt_cachep, c);
}

616 617 618 619 620
static inline void ipmr_cache_free(struct mfc_cache *c)
{
	call_rcu(&c->rcu, ipmr_cache_free_rcu);
}

L
Linus Torvalds 已提交
621
/* Destroy an unresolved cache entry, killing queued skbs
E
Eric Dumazet 已提交
622
 * and reporting error to netlink readers.
L
Linus Torvalds 已提交
623 624
 */

625
static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
L
Linus Torvalds 已提交
626
{
627
	struct net *net = read_pnet(&mrt->net);
L
Linus Torvalds 已提交
628
	struct sk_buff *skb;
629
	struct nlmsgerr *e;
L
Linus Torvalds 已提交
630

631
	atomic_dec(&mrt->cache_resolve_queue_len);
L
Linus Torvalds 已提交
632

J
Jianjun Kong 已提交
633
	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
634
		if (ip_hdr(skb)->version == 0) {
L
Linus Torvalds 已提交
635 636
			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
			nlh->nlmsg_type = NLMSG_ERROR;
637
			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
L
Linus Torvalds 已提交
638
			skb_trim(skb, nlh->nlmsg_len);
639
			e = nlmsg_data(nlh);
640 641
			e->error = -ETIMEDOUT;
			memset(&e->msg, 0, sizeof(e->msg));
642

643
			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
E
Eric Dumazet 已提交
644
		} else {
L
Linus Torvalds 已提交
645
			kfree_skb(skb);
E
Eric Dumazet 已提交
646
		}
L
Linus Torvalds 已提交
647 648
	}

649
	ipmr_cache_free(c);
L
Linus Torvalds 已提交
650 651 652
}


653
/* Timer process for the unresolved queue. */
L
Linus Torvalds 已提交
654

655
static void ipmr_expire_process(unsigned long arg)
L
Linus Torvalds 已提交
656
{
657
	struct mr_table *mrt = (struct mr_table *)arg;
L
Linus Torvalds 已提交
658 659
	unsigned long now;
	unsigned long expires;
660
	struct mfc_cache *c, *next;
L
Linus Torvalds 已提交
661 662

	if (!spin_trylock(&mfc_unres_lock)) {
663
		mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
L
Linus Torvalds 已提交
664 665 666
		return;
	}

667
	if (list_empty(&mrt->mfc_unres_queue))
L
Linus Torvalds 已提交
668 669 670 671 672
		goto out;

	now = jiffies;
	expires = 10*HZ;

673
	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
L
Linus Torvalds 已提交
674 675 676 677 678 679 680
		if (time_after(c->mfc_un.unres.expires, now)) {
			unsigned long interval = c->mfc_un.unres.expires - now;
			if (interval < expires)
				expires = interval;
			continue;
		}

681
		list_del(&c->list);
682
		mroute_netlink_event(mrt, c, RTM_DELROUTE);
683
		ipmr_destroy_unres(mrt, c);
L
Linus Torvalds 已提交
684 685
	}

686 687
	if (!list_empty(&mrt->mfc_unres_queue))
		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
L
Linus Torvalds 已提交
688 689 690 691 692 693 694

out:
	spin_unlock(&mfc_unres_lock);
}

/* Fill oifs list. It is called under write locked mrt_lock. */

695
static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
696
				   unsigned char *ttls)
L
Linus Torvalds 已提交
697 698 699 700 701 702 703
{
	int vifi;

	cache->mfc_un.res.minvif = MAXVIFS;
	cache->mfc_un.res.maxvif = 0;
	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);

704 705
	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
		if (VIF_EXISTS(mrt, vifi) &&
706
		    ttls[vifi] && ttls[vifi] < 255) {
L
Linus Torvalds 已提交
707 708 709 710 711 712 713 714 715
			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
			if (cache->mfc_un.res.minvif > vifi)
				cache->mfc_un.res.minvif = vifi;
			if (cache->mfc_un.res.maxvif <= vifi)
				cache->mfc_un.res.maxvif = vifi + 1;
		}
	}
}

716 717
static int vif_add(struct net *net, struct mr_table *mrt,
		   struct vifctl *vifc, int mrtsock)
L
Linus Torvalds 已提交
718 719
{
	int vifi = vifc->vifc_vifi;
720
	struct vif_device *v = &mrt->vif_table[vifi];
L
Linus Torvalds 已提交
721 722
	struct net_device *dev;
	struct in_device *in_dev;
723
	int err;
L
Linus Torvalds 已提交
724 725

	/* Is vif busy ? */
726
	if (VIF_EXISTS(mrt, vifi))
L
Linus Torvalds 已提交
727 728 729 730 731 732 733 734 735
		return -EADDRINUSE;

	switch (vifc->vifc_flags) {
#ifdef CONFIG_IP_PIMSM
	case VIFF_REGISTER:
		/*
		 * Special Purpose VIF in PIM
		 * All the packets will be sent to the daemon
		 */
736
		if (mrt->mroute_reg_vif_num >= 0)
L
Linus Torvalds 已提交
737
			return -EADDRINUSE;
738
		dev = ipmr_reg_vif(net, mrt);
L
Linus Torvalds 已提交
739 740
		if (!dev)
			return -ENOBUFS;
741 742 743
		err = dev_set_allmulti(dev, 1);
		if (err) {
			unregister_netdevice(dev);
744
			dev_put(dev);
745 746
			return err;
		}
L
Linus Torvalds 已提交
747 748
		break;
#endif
749
	case VIFF_TUNNEL:
750
		dev = ipmr_new_tunnel(net, vifc);
L
Linus Torvalds 已提交
751 752
		if (!dev)
			return -ENOBUFS;
753 754 755
		err = dev_set_allmulti(dev, 1);
		if (err) {
			ipmr_del_tunnel(dev, vifc);
756
			dev_put(dev);
757 758
			return err;
		}
L
Linus Torvalds 已提交
759
		break;
760 761

	case VIFF_USE_IFINDEX:
L
Linus Torvalds 已提交
762
	case 0:
763 764
		if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
			dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
E
Eric Dumazet 已提交
765
			if (dev && __in_dev_get_rtnl(dev) == NULL) {
766 767 768
				dev_put(dev);
				return -EADDRNOTAVAIL;
			}
E
Eric Dumazet 已提交
769
		} else {
770
			dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
E
Eric Dumazet 已提交
771
		}
L
Linus Torvalds 已提交
772 773
		if (!dev)
			return -EADDRNOTAVAIL;
774
		err = dev_set_allmulti(dev, 1);
775 776
		if (err) {
			dev_put(dev);
777
			return err;
778
		}
L
Linus Torvalds 已提交
779 780 781 782 783
		break;
	default:
		return -EINVAL;
	}

E
Eric Dumazet 已提交
784 785
	in_dev = __in_dev_get_rtnl(dev);
	if (!in_dev) {
786
		dev_put(dev);
L
Linus Torvalds 已提交
787
		return -EADDRNOTAVAIL;
788
	}
789
	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
790 791
	inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex,
				    &in_dev->cnf);
L
Linus Torvalds 已提交
792 793
	ip_rt_multicast_event(in_dev);

E
Eric Dumazet 已提交
794 795
	/* Fill in the VIF structures */

J
Jianjun Kong 已提交
796 797 798 799
	v->rate_limit = vifc->vifc_rate_limit;
	v->local = vifc->vifc_lcl_addr.s_addr;
	v->remote = vifc->vifc_rmt_addr.s_addr;
	v->flags = vifc->vifc_flags;
L
Linus Torvalds 已提交
800 801
	if (!mrtsock)
		v->flags |= VIFF_STATIC;
J
Jianjun Kong 已提交
802
	v->threshold = vifc->vifc_threshold;
L
Linus Torvalds 已提交
803 804 805 806 807
	v->bytes_in = 0;
	v->bytes_out = 0;
	v->pkt_in = 0;
	v->pkt_out = 0;
	v->link = dev->ifindex;
E
Eric Dumazet 已提交
808
	if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
809
		v->link = dev_get_iflink(dev);
L
Linus Torvalds 已提交
810 811 812

	/* And finish update writing critical data */
	write_lock_bh(&mrt_lock);
J
Jianjun Kong 已提交
813
	v->dev = dev;
L
Linus Torvalds 已提交
814
#ifdef CONFIG_IP_PIMSM
E
Eric Dumazet 已提交
815
	if (v->flags & VIFF_REGISTER)
816
		mrt->mroute_reg_vif_num = vifi;
L
Linus Torvalds 已提交
817
#endif
818 819
	if (vifi+1 > mrt->maxvif)
		mrt->maxvif = vifi+1;
L
Linus Torvalds 已提交
820 821 822 823
	write_unlock_bh(&mrt_lock);
	return 0;
}

824
/* called with rcu_read_lock() */
825
static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
826 827
					 __be32 origin,
					 __be32 mcastgrp)
L
Linus Torvalds 已提交
828
{
J
Jianjun Kong 已提交
829
	int line = MFC_HASH(mcastgrp, origin);
L
Linus Torvalds 已提交
830 831
	struct mfc_cache *c;

832
	list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) {
833 834
		if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
			return c;
L
Linus Torvalds 已提交
835
	}
836
	return NULL;
L
Linus Torvalds 已提交
837 838
}

839 840 841 842
/* Look for a (*,*,oif) entry */
static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
						    int vifi)
{
843
	int line = MFC_HASH(htonl(INADDR_ANY), htonl(INADDR_ANY));
844 845 846
	struct mfc_cache *c;

	list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list)
847 848
		if (c->mfc_origin == htonl(INADDR_ANY) &&
		    c->mfc_mcastgrp == htonl(INADDR_ANY) &&
849 850 851 852 853 854 855 856 857 858
		    c->mfc_un.res.ttls[vifi] < 255)
			return c;

	return NULL;
}

/* Look for a (*,G) entry */
static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
					     __be32 mcastgrp, int vifi)
{
859
	int line = MFC_HASH(mcastgrp, htonl(INADDR_ANY));
860 861
	struct mfc_cache *c, *proxy;

862
	if (mcastgrp == htonl(INADDR_ANY))
863 864 865
		goto skip;

	list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list)
866
		if (c->mfc_origin == htonl(INADDR_ANY) &&
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881
		    c->mfc_mcastgrp == mcastgrp) {
			if (c->mfc_un.res.ttls[vifi] < 255)
				return c;

			/* It's ok if the vifi is part of the static tree */
			proxy = ipmr_cache_find_any_parent(mrt,
							   c->mfc_parent);
			if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
				return c;
		}

skip:
	return ipmr_cache_find_any_parent(mrt, vifi);
}

L
Linus Torvalds 已提交
882 883 884
/*
 *	Allocate a multicast cache entry
 */
885
static struct mfc_cache *ipmr_cache_alloc(void)
L
Linus Torvalds 已提交
886
{
J
Jianjun Kong 已提交
887
	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
888 889 890

	if (c)
		c->mfc_un.res.minvif = MAXVIFS;
L
Linus Torvalds 已提交
891 892 893
	return c;
}

894
static struct mfc_cache *ipmr_cache_alloc_unres(void)
L
Linus Torvalds 已提交
895
{
J
Jianjun Kong 已提交
896
	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
897 898 899 900 901

	if (c) {
		skb_queue_head_init(&c->mfc_un.unres.unresolved);
		c->mfc_un.unres.expires = jiffies + 10*HZ;
	}
L
Linus Torvalds 已提交
902 903 904 905 906 907
	return c;
}

/*
 *	A cache entry has gone into a resolved state from queued
 */
908

909 910
static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
			       struct mfc_cache *uc, struct mfc_cache *c)
L
Linus Torvalds 已提交
911 912
{
	struct sk_buff *skb;
913
	struct nlmsgerr *e;
L
Linus Torvalds 已提交
914

E
Eric Dumazet 已提交
915
	/* Play the pending entries through our router */
L
Linus Torvalds 已提交
916

J
Jianjun Kong 已提交
917
	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
918
		if (ip_hdr(skb)->version == 0) {
L
Linus Torvalds 已提交
919 920
			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));

921
			if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
E
Eric Dumazet 已提交
922 923
				nlh->nlmsg_len = skb_tail_pointer(skb) -
						 (u8 *)nlh;
L
Linus Torvalds 已提交
924 925
			} else {
				nlh->nlmsg_type = NLMSG_ERROR;
926
				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
L
Linus Torvalds 已提交
927
				skb_trim(skb, nlh->nlmsg_len);
928
				e = nlmsg_data(nlh);
929 930
				e->error = -EMSGSIZE;
				memset(&e->msg, 0, sizeof(e->msg));
L
Linus Torvalds 已提交
931
			}
932

933
			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
E
Eric Dumazet 已提交
934
		} else {
935
			ip_mr_forward(net, mrt, skb, c, 0);
E
Eric Dumazet 已提交
936
		}
L
Linus Torvalds 已提交
937 938 939 940 941 942 943 944 945
	}
}

/*
 *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 *	expects the following bizarre scheme.
 *
 *	Called under mrt_lock.
 */
946

947
static int ipmr_cache_report(struct mr_table *mrt,
948
			     struct sk_buff *pkt, vifi_t vifi, int assert)
L
Linus Torvalds 已提交
949 950
{
	struct sk_buff *skb;
951
	const int ihl = ip_hdrlen(pkt);
L
Linus Torvalds 已提交
952 953
	struct igmphdr *igmp;
	struct igmpmsg *msg;
E
Eric Dumazet 已提交
954
	struct sock *mroute_sk;
L
Linus Torvalds 已提交
955 956 957 958 959 960 961 962 963
	int ret;

#ifdef CONFIG_IP_PIMSM
	if (assert == IGMPMSG_WHOLEPKT)
		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
	else
#endif
		skb = alloc_skb(128, GFP_ATOMIC);

S
Stephen Hemminger 已提交
964
	if (!skb)
L
Linus Torvalds 已提交
965 966 967 968 969
		return -ENOBUFS;

#ifdef CONFIG_IP_PIMSM
	if (assert == IGMPMSG_WHOLEPKT) {
		/* Ugly, but we have no choice with this interface.
E
Eric Dumazet 已提交
970 971 972
		 * Duplicate old header, fix ihl, length etc.
		 * And all this only to mangle msg->im_msgtype and
		 * to set msg->im_mbz to "mbz" :-)
L
Linus Torvalds 已提交
973
		 */
974 975
		skb_push(skb, sizeof(struct iphdr));
		skb_reset_network_header(skb);
976
		skb_reset_transport_header(skb);
977
		msg = (struct igmpmsg *)skb_network_header(skb);
978
		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
L
Linus Torvalds 已提交
979 980
		msg->im_msgtype = IGMPMSG_WHOLEPKT;
		msg->im_mbz = 0;
981
		msg->im_vif = mrt->mroute_reg_vif_num;
982 983 984
		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
					     sizeof(struct iphdr));
985
	} else
L
Linus Torvalds 已提交
986
#endif
987 988
	{

E
Eric Dumazet 已提交
989
	/* Copy the IP header */
L
Linus Torvalds 已提交
990

991
	skb_set_network_header(skb, skb->len);
992
	skb_put(skb, ihl);
993
	skb_copy_to_linear_data(skb, pkt->data, ihl);
E
Eric Dumazet 已提交
994
	ip_hdr(skb)->protocol = 0;	/* Flag to the kernel this is a route add */
995
	msg = (struct igmpmsg *)skb_network_header(skb);
L
Linus Torvalds 已提交
996
	msg->im_vif = vifi;
E
Eric Dumazet 已提交
997
	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
L
Linus Torvalds 已提交
998

E
Eric Dumazet 已提交
999
	/* Add our header */
L
Linus Torvalds 已提交
1000

E
Eric Dumazet 已提交
1001
	igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
L
Linus Torvalds 已提交
1002 1003
	igmp->type	=
	msg->im_msgtype = assert;
E
Eric Dumazet 已提交
1004 1005
	igmp->code	= 0;
	ip_hdr(skb)->tot_len = htons(skb->len);		/* Fix the length */
1006
	skb->transport_header = skb->network_header;
1007
	}
L
Linus Torvalds 已提交
1008

E
Eric Dumazet 已提交
1009 1010 1011 1012
	rcu_read_lock();
	mroute_sk = rcu_dereference(mrt->mroute_sk);
	if (mroute_sk == NULL) {
		rcu_read_unlock();
L
Linus Torvalds 已提交
1013 1014 1015 1016
		kfree_skb(skb);
		return -EINVAL;
	}

E
Eric Dumazet 已提交
1017 1018
	/* Deliver to mrouted */

E
Eric Dumazet 已提交
1019 1020
	ret = sock_queue_rcv_skb(mroute_sk, skb);
	rcu_read_unlock();
1021
	if (ret < 0) {
1022
		net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
L
Linus Torvalds 已提交
1023 1024 1025 1026 1027 1028 1029 1030 1031
		kfree_skb(skb);
	}

	return ret;
}

/*
 *	Queue a packet for resolution. It gets locked cache entry!
 */
1032

L
Linus Torvalds 已提交
1033
static int
1034
ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
L
Linus Torvalds 已提交
1035
{
1036
	bool found = false;
L
Linus Torvalds 已提交
1037 1038
	int err;
	struct mfc_cache *c;
1039
	const struct iphdr *iph = ip_hdr(skb);
L
Linus Torvalds 已提交
1040 1041

	spin_lock_bh(&mfc_unres_lock);
1042
	list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
1043
		if (c->mfc_mcastgrp == iph->daddr &&
1044 1045
		    c->mfc_origin == iph->saddr) {
			found = true;
L
Linus Torvalds 已提交
1046
			break;
1047
		}
L
Linus Torvalds 已提交
1048 1049
	}

1050
	if (!found) {
E
Eric Dumazet 已提交
1051
		/* Create a new entry if allowable */
L
Linus Torvalds 已提交
1052

1053
		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1054
		    (c = ipmr_cache_alloc_unres()) == NULL) {
L
Linus Torvalds 已提交
1055 1056 1057 1058 1059 1060
			spin_unlock_bh(&mfc_unres_lock);

			kfree_skb(skb);
			return -ENOBUFS;
		}

E
Eric Dumazet 已提交
1061 1062
		/* Fill in the new cache entry */

1063 1064 1065
		c->mfc_parent	= -1;
		c->mfc_origin	= iph->saddr;
		c->mfc_mcastgrp	= iph->daddr;
L
Linus Torvalds 已提交
1066

E
Eric Dumazet 已提交
1067 1068
		/* Reflect first query at mrouted. */

1069
		err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
1070
		if (err < 0) {
1071
			/* If the report failed throw the cache entry
L
Linus Torvalds 已提交
1072 1073 1074 1075
			   out - Brad Parker
			 */
			spin_unlock_bh(&mfc_unres_lock);

1076
			ipmr_cache_free(c);
L
Linus Torvalds 已提交
1077 1078 1079 1080
			kfree_skb(skb);
			return err;
		}

1081 1082
		atomic_inc(&mrt->cache_resolve_queue_len);
		list_add(&c->list, &mrt->mfc_unres_queue);
1083
		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
L
Linus Torvalds 已提交
1084

1085 1086
		if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
			mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
L
Linus Torvalds 已提交
1087 1088
	}

E
Eric Dumazet 已提交
1089 1090 1091
	/* See if we can append the packet */

	if (c->mfc_un.unres.unresolved.qlen > 3) {
L
Linus Torvalds 已提交
1092 1093 1094
		kfree_skb(skb);
		err = -ENOBUFS;
	} else {
J
Jianjun Kong 已提交
1095
		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
L
Linus Torvalds 已提交
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106
		err = 0;
	}

	spin_unlock_bh(&mfc_unres_lock);
	return err;
}

/*
 *	MFC cache manipulation by user space mroute daemon
 */

1107
static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
L
Linus Torvalds 已提交
1108 1109
{
	int line;
1110
	struct mfc_cache *c, *next;
L
Linus Torvalds 已提交
1111

J
Jianjun Kong 已提交
1112
	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
L
Linus Torvalds 已提交
1113

1114
	list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
L
Linus Torvalds 已提交
1115
		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1116 1117
		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr &&
		    (parent == -1 || parent == c->mfc_parent)) {
1118
			list_del_rcu(&c->list);
1119
			mroute_netlink_event(mrt, c, RTM_DELROUTE);
1120
			ipmr_cache_free(c);
L
Linus Torvalds 已提交
1121 1122 1123 1124 1125 1126
			return 0;
		}
	}
	return -ENOENT;
}

1127
static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1128
			struct mfcctl *mfc, int mrtsock, int parent)
L
Linus Torvalds 已提交
1129
{
1130
	bool found = false;
L
Linus Torvalds 已提交
1131
	int line;
1132
	struct mfc_cache *uc, *c;
L
Linus Torvalds 已提交
1133

1134 1135 1136
	if (mfc->mfcc_parent >= MAXVIFS)
		return -ENFILE;

J
Jianjun Kong 已提交
1137
	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
L
Linus Torvalds 已提交
1138

1139
	list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
L
Linus Torvalds 已提交
1140
		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1141 1142
		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr &&
		    (parent == -1 || parent == c->mfc_parent)) {
1143
			found = true;
L
Linus Torvalds 已提交
1144
			break;
1145
		}
L
Linus Torvalds 已提交
1146 1147
	}

1148
	if (found) {
L
Linus Torvalds 已提交
1149 1150
		write_lock_bh(&mrt_lock);
		c->mfc_parent = mfc->mfcc_parent;
1151
		ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
L
Linus Torvalds 已提交
1152 1153 1154
		if (!mrtsock)
			c->mfc_flags |= MFC_STATIC;
		write_unlock_bh(&mrt_lock);
1155
		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
L
Linus Torvalds 已提交
1156 1157 1158
		return 0;
	}

1159
	if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) &&
1160
	    !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
L
Linus Torvalds 已提交
1161 1162
		return -EINVAL;

1163
	c = ipmr_cache_alloc();
J
Jianjun Kong 已提交
1164
	if (c == NULL)
L
Linus Torvalds 已提交
1165 1166
		return -ENOMEM;

J
Jianjun Kong 已提交
1167 1168 1169
	c->mfc_origin = mfc->mfcc_origin.s_addr;
	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
	c->mfc_parent = mfc->mfcc_parent;
1170
	ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
L
Linus Torvalds 已提交
1171 1172 1173
	if (!mrtsock)
		c->mfc_flags |= MFC_STATIC;

1174
	list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
L
Linus Torvalds 已提交
1175 1176 1177 1178 1179

	/*
	 *	Check to see if we resolved a queued list. If so we
	 *	need to send on the frames and tidy up.
	 */
1180
	found = false;
L
Linus Torvalds 已提交
1181
	spin_lock_bh(&mfc_unres_lock);
1182
	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1183
		if (uc->mfc_origin == c->mfc_origin &&
L
Linus Torvalds 已提交
1184
		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1185
			list_del(&uc->list);
1186
			atomic_dec(&mrt->cache_resolve_queue_len);
1187
			found = true;
L
Linus Torvalds 已提交
1188 1189 1190
			break;
		}
	}
1191 1192
	if (list_empty(&mrt->mfc_unres_queue))
		del_timer(&mrt->ipmr_expire_timer);
L
Linus Torvalds 已提交
1193 1194
	spin_unlock_bh(&mfc_unres_lock);

1195
	if (found) {
1196
		ipmr_cache_resolve(net, mrt, uc, c);
1197
		ipmr_cache_free(uc);
L
Linus Torvalds 已提交
1198
	}
1199
	mroute_netlink_event(mrt, c, RTM_NEWROUTE);
L
Linus Torvalds 已提交
1200 1201 1202 1203 1204 1205
	return 0;
}

/*
 *	Close the multicast socket, and clear the vif tables etc
 */
1206

1207
static void mroute_clean_tables(struct mr_table *mrt)
L
Linus Torvalds 已提交
1208 1209
{
	int i;
1210
	LIST_HEAD(list);
1211
	struct mfc_cache *c, *next;
1212

E
Eric Dumazet 已提交
1213 1214
	/* Shut down all active vif entries */

1215
	for (i = 0; i < mrt->maxvif; i++) {
E
Eric Dumazet 已提交
1216
		if (!(mrt->vif_table[i].flags & VIFF_STATIC))
1217
			vif_delete(mrt, i, 0, &list);
L
Linus Torvalds 已提交
1218
	}
1219
	unregister_netdevice_many(&list);
L
Linus Torvalds 已提交
1220

E
Eric Dumazet 已提交
1221 1222
	/* Wipe the cache */

1223
	for (i = 0; i < MFC_LINES; i++) {
1224
		list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1225
			if (c->mfc_flags & MFC_STATIC)
L
Linus Torvalds 已提交
1226
				continue;
1227
			list_del_rcu(&c->list);
1228
			mroute_netlink_event(mrt, c, RTM_DELROUTE);
1229
			ipmr_cache_free(c);
L
Linus Torvalds 已提交
1230 1231 1232
		}
	}

1233
	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
L
Linus Torvalds 已提交
1234
		spin_lock_bh(&mfc_unres_lock);
1235
		list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1236
			list_del(&c->list);
1237
			mroute_netlink_event(mrt, c, RTM_DELROUTE);
1238
			ipmr_destroy_unres(mrt, c);
L
Linus Torvalds 已提交
1239 1240 1241 1242 1243
		}
		spin_unlock_bh(&mfc_unres_lock);
	}
}

E
Eric Dumazet 已提交
1244 1245 1246
/* called from ip_ra_control(), before an RCU grace period,
 * we dont need to call synchronize_rcu() here
 */
L
Linus Torvalds 已提交
1247 1248
static void mrtsock_destruct(struct sock *sk)
{
1249
	struct net *net = sock_net(sk);
1250
	struct mr_table *mrt;
1251

L
Linus Torvalds 已提交
1252
	rtnl_lock();
1253
	ipmr_for_each_table(mrt, net) {
E
Eric Dumazet 已提交
1254
		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1255
			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1256 1257 1258
			inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
						    NETCONFA_IFINDEX_ALL,
						    net->ipv4.devconf_all);
1259
			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1260 1261
			mroute_clean_tables(mrt);
		}
L
Linus Torvalds 已提交
1262 1263 1264 1265 1266 1267 1268 1269 1270 1271
	}
	rtnl_unlock();
}

/*
 *	Socket options and virtual interface manipulation. The whole
 *	virtual interface system is a complete heap, but unfortunately
 *	that's how BSD mrouted happens to think. Maybe one day with a proper
 *	MOSPF/PIM router set up we can clean this up.
 */
1272

1273
int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
L
Linus Torvalds 已提交
1274
{
1275
	int ret, parent = 0;
L
Linus Torvalds 已提交
1276 1277
	struct vifctl vif;
	struct mfcctl mfc;
1278
	struct net *net = sock_net(sk);
1279 1280
	struct mr_table *mrt;

1281 1282 1283 1284
	if (sk->sk_type != SOCK_RAW ||
	    inet_sk(sk)->inet_num != IPPROTO_IGMP)
		return -EOPNOTSUPP;

1285 1286 1287
	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
	if (mrt == NULL)
		return -ENOENT;
1288

S
Stephen Hemminger 已提交
1289
	if (optname != MRT_INIT) {
1290
		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1291
		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
L
Linus Torvalds 已提交
1292 1293 1294
			return -EACCES;
	}

S
Stephen Hemminger 已提交
1295 1296
	switch (optname) {
	case MRT_INIT:
J
Jianjun Kong 已提交
1297
		if (optlen != sizeof(int))
1298
			return -EINVAL;
L
Linus Torvalds 已提交
1299

S
Stephen Hemminger 已提交
1300
		rtnl_lock();
E
Eric Dumazet 已提交
1301
		if (rtnl_dereference(mrt->mroute_sk)) {
L
Linus Torvalds 已提交
1302
			rtnl_unlock();
S
Stephen Hemminger 已提交
1303 1304 1305 1306 1307
			return -EADDRINUSE;
		}

		ret = ip_ra_control(sk, 1, mrtsock_destruct);
		if (ret == 0) {
1308
			rcu_assign_pointer(mrt->mroute_sk, sk);
1309
			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1310 1311 1312
			inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
						    NETCONFA_IFINDEX_ALL,
						    net->ipv4.devconf_all);
S
Stephen Hemminger 已提交
1313 1314 1315 1316
		}
		rtnl_unlock();
		return ret;
	case MRT_DONE:
1317
		if (sk != rcu_access_pointer(mrt->mroute_sk))
S
Stephen Hemminger 已提交
1318 1319 1320 1321
			return -EACCES;
		return ip_ra_control(sk, 0, NULL);
	case MRT_ADD_VIF:
	case MRT_DEL_VIF:
J
Jianjun Kong 已提交
1322
		if (optlen != sizeof(vif))
S
Stephen Hemminger 已提交
1323
			return -EINVAL;
J
Jianjun Kong 已提交
1324
		if (copy_from_user(&vif, optval, sizeof(vif)))
S
Stephen Hemminger 已提交
1325 1326 1327 1328
			return -EFAULT;
		if (vif.vifc_vifi >= MAXVIFS)
			return -ENFILE;
		rtnl_lock();
J
Jianjun Kong 已提交
1329
		if (optname == MRT_ADD_VIF) {
E
Eric Dumazet 已提交
1330 1331
			ret = vif_add(net, mrt, &vif,
				      sk == rtnl_dereference(mrt->mroute_sk));
S
Stephen Hemminger 已提交
1332
		} else {
1333
			ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
S
Stephen Hemminger 已提交
1334 1335 1336
		}
		rtnl_unlock();
		return ret;
L
Linus Torvalds 已提交
1337 1338 1339 1340 1341

		/*
		 *	Manipulate the forwarding caches. These live
		 *	in a sort of kernel/user symbiosis.
		 */
S
Stephen Hemminger 已提交
1342 1343
	case MRT_ADD_MFC:
	case MRT_DEL_MFC:
1344 1345 1346
		parent = -1;
	case MRT_ADD_MFC_PROXY:
	case MRT_DEL_MFC_PROXY:
J
Jianjun Kong 已提交
1347
		if (optlen != sizeof(mfc))
S
Stephen Hemminger 已提交
1348
			return -EINVAL;
J
Jianjun Kong 已提交
1349
		if (copy_from_user(&mfc, optval, sizeof(mfc)))
S
Stephen Hemminger 已提交
1350
			return -EFAULT;
1351 1352
		if (parent == 0)
			parent = mfc.mfcc_parent;
S
Stephen Hemminger 已提交
1353
		rtnl_lock();
1354 1355
		if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
			ret = ipmr_mfc_delete(mrt, &mfc, parent);
S
Stephen Hemminger 已提交
1356
		else
E
Eric Dumazet 已提交
1357
			ret = ipmr_mfc_add(net, mrt, &mfc,
1358 1359
					   sk == rtnl_dereference(mrt->mroute_sk),
					   parent);
S
Stephen Hemminger 已提交
1360 1361
		rtnl_unlock();
		return ret;
L
Linus Torvalds 已提交
1362 1363 1364
		/*
		 *	Control PIM assert.
		 */
S
Stephen Hemminger 已提交
1365 1366 1367
	case MRT_ASSERT:
	{
		int v;
1368 1369
		if (optlen != sizeof(v))
			return -EINVAL;
E
Eric Dumazet 已提交
1370
		if (get_user(v, (int __user *)optval))
S
Stephen Hemminger 已提交
1371
			return -EFAULT;
1372
		mrt->mroute_do_assert = v;
S
Stephen Hemminger 已提交
1373 1374
		return 0;
	}
L
Linus Torvalds 已提交
1375
#ifdef CONFIG_IP_PIMSM
S
Stephen Hemminger 已提交
1376 1377
	case MRT_PIM:
	{
S
Stephen Hemminger 已提交
1378 1379
		int v;

1380 1381
		if (optlen != sizeof(v))
			return -EINVAL;
E
Eric Dumazet 已提交
1382
		if (get_user(v, (int __user *)optval))
S
Stephen Hemminger 已提交
1383
			return -EFAULT;
1384
		v = !!v;
S
Stephen Hemminger 已提交
1385

S
Stephen Hemminger 已提交
1386 1387
		rtnl_lock();
		ret = 0;
1388 1389 1390
		if (v != mrt->mroute_do_pim) {
			mrt->mroute_do_pim = v;
			mrt->mroute_do_assert = v;
L
Linus Torvalds 已提交
1391
		}
S
Stephen Hemminger 已提交
1392 1393 1394
		rtnl_unlock();
		return ret;
	}
1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405
#endif
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
	case MRT_TABLE:
	{
		u32 v;

		if (optlen != sizeof(u32))
			return -EINVAL;
		if (get_user(v, (u32 __user *)optval))
			return -EFAULT;

1406 1407 1408 1409
		/* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
		if (v != RT_TABLE_DEFAULT && v >= 1000000000)
			return -EINVAL;

1410 1411
		rtnl_lock();
		ret = 0;
E
Eric Dumazet 已提交
1412 1413 1414 1415 1416
		if (sk == rtnl_dereference(mrt->mroute_sk)) {
			ret = -EBUSY;
		} else {
			if (!ipmr_new_table(net, v))
				ret = -ENOMEM;
1417 1418
			else
				raw_sk(sk)->ipmr_table = v;
E
Eric Dumazet 已提交
1419
		}
1420 1421 1422
		rtnl_unlock();
		return ret;
	}
L
Linus Torvalds 已提交
1423
#endif
S
Stephen Hemminger 已提交
1424 1425 1426 1427 1428 1429
	/*
	 *	Spurious command, or MRT_VERSION which you cannot
	 *	set.
	 */
	default:
		return -ENOPROTOOPT;
L
Linus Torvalds 已提交
1430 1431 1432 1433 1434 1435
	}
}

/*
 *	Getsock opt support for the multicast routing system.
 */
1436

J
Jianjun Kong 已提交
1437
int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
L
Linus Torvalds 已提交
1438 1439 1440
{
	int olr;
	int val;
1441
	struct net *net = sock_net(sk);
1442 1443
	struct mr_table *mrt;

1444 1445 1446 1447
	if (sk->sk_type != SOCK_RAW ||
	    inet_sk(sk)->inet_num != IPPROTO_IGMP)
		return -EOPNOTSUPP;

1448 1449 1450
	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
	if (mrt == NULL)
		return -ENOENT;
L
Linus Torvalds 已提交
1451

J
Jianjun Kong 已提交
1452
	if (optname != MRT_VERSION &&
L
Linus Torvalds 已提交
1453
#ifdef CONFIG_IP_PIMSM
E
Eric Dumazet 已提交
1454
	   optname != MRT_PIM &&
L
Linus Torvalds 已提交
1455
#endif
E
Eric Dumazet 已提交
1456
	   optname != MRT_ASSERT)
L
Linus Torvalds 已提交
1457 1458 1459 1460 1461 1462 1463 1464
		return -ENOPROTOOPT;

	if (get_user(olr, optlen))
		return -EFAULT;

	olr = min_t(unsigned int, olr, sizeof(int));
	if (olr < 0)
		return -EINVAL;
1465

J
Jianjun Kong 已提交
1466
	if (put_user(olr, optlen))
L
Linus Torvalds 已提交
1467
		return -EFAULT;
J
Jianjun Kong 已提交
1468 1469
	if (optname == MRT_VERSION)
		val = 0x0305;
L
Linus Torvalds 已提交
1470
#ifdef CONFIG_IP_PIMSM
J
Jianjun Kong 已提交
1471
	else if (optname == MRT_PIM)
1472
		val = mrt->mroute_do_pim;
L
Linus Torvalds 已提交
1473 1474
#endif
	else
1475
		val = mrt->mroute_do_assert;
J
Jianjun Kong 已提交
1476
	if (copy_to_user(optval, &val, olr))
L
Linus Torvalds 已提交
1477 1478 1479 1480 1481 1482 1483
		return -EFAULT;
	return 0;
}

/*
 *	The IP multicast ioctl support routines.
 */
1484

L
Linus Torvalds 已提交
1485 1486 1487 1488 1489 1490
int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
{
	struct sioc_sg_req sr;
	struct sioc_vif_req vr;
	struct vif_device *vif;
	struct mfc_cache *c;
1491
	struct net *net = sock_net(sk);
1492 1493 1494 1495 1496
	struct mr_table *mrt;

	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
	if (mrt == NULL)
		return -ENOENT;
1497

S
Stephen Hemminger 已提交
1498 1499
	switch (cmd) {
	case SIOCGETVIFCNT:
J
Jianjun Kong 已提交
1500
		if (copy_from_user(&vr, arg, sizeof(vr)))
S
Stephen Hemminger 已提交
1501
			return -EFAULT;
1502
		if (vr.vifi >= mrt->maxvif)
S
Stephen Hemminger 已提交
1503 1504
			return -EINVAL;
		read_lock(&mrt_lock);
1505 1506
		vif = &mrt->vif_table[vr.vifi];
		if (VIF_EXISTS(mrt, vr.vifi)) {
J
Jianjun Kong 已提交
1507 1508 1509 1510
			vr.icount = vif->pkt_in;
			vr.ocount = vif->pkt_out;
			vr.ibytes = vif->bytes_in;
			vr.obytes = vif->bytes_out;
L
Linus Torvalds 已提交
1511 1512
			read_unlock(&mrt_lock);

J
Jianjun Kong 已提交
1513
			if (copy_to_user(arg, &vr, sizeof(vr)))
S
Stephen Hemminger 已提交
1514 1515 1516 1517 1518 1519
				return -EFAULT;
			return 0;
		}
		read_unlock(&mrt_lock);
		return -EADDRNOTAVAIL;
	case SIOCGETSGCNT:
J
Jianjun Kong 已提交
1520
		if (copy_from_user(&sr, arg, sizeof(sr)))
S
Stephen Hemminger 已提交
1521 1522
			return -EFAULT;

1523
		rcu_read_lock();
1524
		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
S
Stephen Hemminger 已提交
1525 1526 1527 1528
		if (c) {
			sr.pktcnt = c->mfc_un.res.pkt;
			sr.bytecnt = c->mfc_un.res.bytes;
			sr.wrong_if = c->mfc_un.res.wrong_if;
1529
			rcu_read_unlock();
S
Stephen Hemminger 已提交
1530

J
Jianjun Kong 已提交
1531
			if (copy_to_user(arg, &sr, sizeof(sr)))
S
Stephen Hemminger 已提交
1532 1533 1534
				return -EFAULT;
			return 0;
		}
1535
		rcu_read_unlock();
S
Stephen Hemminger 已提交
1536 1537 1538
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
L
Linus Torvalds 已提交
1539 1540 1541
	}
}

1542 1543 1544 1545 1546 1547 1548 1549 1550
#ifdef CONFIG_COMPAT
struct compat_sioc_sg_req {
	struct in_addr src;
	struct in_addr grp;
	compat_ulong_t pktcnt;
	compat_ulong_t bytecnt;
	compat_ulong_t wrong_if;
};

1551 1552 1553 1554 1555 1556 1557 1558
struct compat_sioc_vif_req {
	vifi_t	vifi;		/* Which iface */
	compat_ulong_t icount;
	compat_ulong_t ocount;
	compat_ulong_t ibytes;
	compat_ulong_t obytes;
};

1559 1560
int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
1561
	struct compat_sioc_sg_req sr;
1562 1563
	struct compat_sioc_vif_req vr;
	struct vif_device *vif;
1564 1565 1566 1567 1568 1569 1570 1571 1572
	struct mfc_cache *c;
	struct net *net = sock_net(sk);
	struct mr_table *mrt;

	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
	if (mrt == NULL)
		return -ENOENT;

	switch (cmd) {
1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592
	case SIOCGETVIFCNT:
		if (copy_from_user(&vr, arg, sizeof(vr)))
			return -EFAULT;
		if (vr.vifi >= mrt->maxvif)
			return -EINVAL;
		read_lock(&mrt_lock);
		vif = &mrt->vif_table[vr.vifi];
		if (VIF_EXISTS(mrt, vr.vifi)) {
			vr.icount = vif->pkt_in;
			vr.ocount = vif->pkt_out;
			vr.ibytes = vif->bytes_in;
			vr.obytes = vif->bytes_out;
			read_unlock(&mrt_lock);

			if (copy_to_user(arg, &vr, sizeof(vr)))
				return -EFAULT;
			return 0;
		}
		read_unlock(&mrt_lock);
		return -EADDRNOTAVAIL;
1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616
	case SIOCGETSGCNT:
		if (copy_from_user(&sr, arg, sizeof(sr)))
			return -EFAULT;

		rcu_read_lock();
		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
		if (c) {
			sr.pktcnt = c->mfc_un.res.pkt;
			sr.bytecnt = c->mfc_un.res.bytes;
			sr.wrong_if = c->mfc_un.res.wrong_if;
			rcu_read_unlock();

			if (copy_to_user(arg, &sr, sizeof(sr)))
				return -EFAULT;
			return 0;
		}
		rcu_read_unlock();
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}
#endif

L
Linus Torvalds 已提交
1617 1618 1619

static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
{
1620
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1621
	struct net *net = dev_net(dev);
1622
	struct mr_table *mrt;
L
Linus Torvalds 已提交
1623 1624
	struct vif_device *v;
	int ct;
1625

L
Linus Torvalds 已提交
1626 1627
	if (event != NETDEV_UNREGISTER)
		return NOTIFY_DONE;
1628 1629 1630 1631 1632

	ipmr_for_each_table(mrt, net) {
		v = &mrt->vif_table[0];
		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
			if (v->dev == dev)
1633
				vif_delete(mrt, ct, 1, NULL);
1634
		}
L
Linus Torvalds 已提交
1635 1636 1637 1638 1639
	}
	return NOTIFY_DONE;
}


J
Jianjun Kong 已提交
1640
static struct notifier_block ip_mr_notifier = {
L
Linus Torvalds 已提交
1641 1642 1643 1644
	.notifier_call = ipmr_device_event,
};

/*
E
Eric Dumazet 已提交
1645
 *	Encapsulate a packet by attaching a valid IPIP header to it.
L
Linus Torvalds 已提交
1646 1647 1648
 *	This avoids tunnel drivers and other mess and gives us the speed so
 *	important for multicast video.
 */
1649

1650 1651
static void ip_encap(struct net *net, struct sk_buff *skb,
		     __be32 saddr, __be32 daddr)
L
Linus Torvalds 已提交
1652
{
1653
	struct iphdr *iph;
1654
	const struct iphdr *old_iph = ip_hdr(skb);
1655 1656

	skb_push(skb, sizeof(struct iphdr));
1657
	skb->transport_header = skb->network_header;
1658
	skb_reset_network_header(skb);
1659
	iph = ip_hdr(skb);
L
Linus Torvalds 已提交
1660

E
Eric Dumazet 已提交
1661
	iph->version	=	4;
1662 1663
	iph->tos	=	old_iph->tos;
	iph->ttl	=	old_iph->ttl;
L
Linus Torvalds 已提交
1664 1665 1666 1667 1668 1669
	iph->frag_off	=	0;
	iph->daddr	=	daddr;
	iph->saddr	=	saddr;
	iph->protocol	=	IPPROTO_IPIP;
	iph->ihl	=	5;
	iph->tot_len	=	htons(skb->len);
1670
	ip_select_ident(net, skb, NULL);
L
Linus Torvalds 已提交
1671 1672 1673 1674 1675 1676 1677 1678
	ip_send_check(iph);

	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
	nf_reset(skb);
}

static inline int ipmr_forward_finish(struct sk_buff *skb)
{
E
Eric Dumazet 已提交
1679
	struct ip_options *opt = &(IPCB(skb)->opt);
L
Linus Torvalds 已提交
1680

E
Eric Dumazet 已提交
1681
	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1682
	IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
L
Linus Torvalds 已提交
1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693

	if (unlikely(opt->optlen))
		ip_forward_options(skb);

	return dst_output(skb);
}

/*
 *	Processing handlers for ipmr_forward
 */

1694 1695
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
			    struct sk_buff *skb, struct mfc_cache *c, int vifi)
L
Linus Torvalds 已提交
1696
{
1697
	const struct iphdr *iph = ip_hdr(skb);
1698
	struct vif_device *vif = &mrt->vif_table[vifi];
L
Linus Torvalds 已提交
1699 1700
	struct net_device *dev;
	struct rtable *rt;
1701
	struct flowi4 fl4;
L
Linus Torvalds 已提交
1702 1703 1704 1705 1706 1707 1708 1709
	int    encap = 0;

	if (vif->dev == NULL)
		goto out_free;

#ifdef CONFIG_IP_PIMSM
	if (vif->flags & VIFF_REGISTER) {
		vif->pkt_out++;
J
Jianjun Kong 已提交
1710
		vif->bytes_out += skb->len;
1711 1712
		vif->dev->stats.tx_bytes += skb->len;
		vif->dev->stats.tx_packets++;
1713
		ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1714
		goto out_free;
L
Linus Torvalds 已提交
1715 1716 1717
	}
#endif

E
Eric Dumazet 已提交
1718
	if (vif->flags & VIFF_TUNNEL) {
1719
		rt = ip_route_output_ports(net, &fl4, NULL,
1720 1721 1722 1723
					   vif->remote, vif->local,
					   0, 0,
					   IPPROTO_IPIP,
					   RT_TOS(iph->tos), vif->link);
1724
		if (IS_ERR(rt))
L
Linus Torvalds 已提交
1725 1726 1727
			goto out_free;
		encap = sizeof(struct iphdr);
	} else {
1728
		rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
1729 1730 1731
					   0, 0,
					   IPPROTO_IPIP,
					   RT_TOS(iph->tos), vif->link);
1732
		if (IS_ERR(rt))
L
Linus Torvalds 已提交
1733 1734 1735
			goto out_free;
	}

1736
	dev = rt->dst.dev;
L
Linus Torvalds 已提交
1737

1738
	if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
L
Linus Torvalds 已提交
1739
		/* Do not fragment multicasts. Alas, IPv4 does not
E
Eric Dumazet 已提交
1740 1741
		 * allow to send ICMP, so that packets will disappear
		 * to blackhole.
L
Linus Torvalds 已提交
1742 1743
		 */

1744
		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
L
Linus Torvalds 已提交
1745 1746 1747 1748
		ip_rt_put(rt);
		goto out_free;
	}

1749
	encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
L
Linus Torvalds 已提交
1750 1751

	if (skb_cow(skb, encap)) {
1752
		ip_rt_put(rt);
L
Linus Torvalds 已提交
1753 1754 1755 1756
		goto out_free;
	}

	vif->pkt_out++;
J
Jianjun Kong 已提交
1757
	vif->bytes_out += skb->len;
L
Linus Torvalds 已提交
1758

E
Eric Dumazet 已提交
1759
	skb_dst_drop(skb);
1760
	skb_dst_set(skb, &rt->dst);
1761
	ip_decrease_ttl(ip_hdr(skb));
L
Linus Torvalds 已提交
1762 1763

	/* FIXME: forward and output firewalls used to be called here.
E
Eric Dumazet 已提交
1764 1765
	 * What do we do with netfilter? -- RR
	 */
L
Linus Torvalds 已提交
1766
	if (vif->flags & VIFF_TUNNEL) {
1767
		ip_encap(net, skb, vif->local, vif->remote);
L
Linus Torvalds 已提交
1768
		/* FIXME: extra output firewall step used to be here. --RR */
1769 1770
		vif->dev->stats.tx_packets++;
		vif->dev->stats.tx_bytes += skb->len;
L
Linus Torvalds 已提交
1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785
	}

	IPCB(skb)->flags |= IPSKB_FORWARDED;

	/*
	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
	 * not only before forwarding, but after forwarding on all output
	 * interfaces. It is clear, if mrouter runs a multicasting
	 * program, it should receive packets not depending to what interface
	 * program is joined.
	 * If we will not make it, the program will have to join on all
	 * interfaces. On the other hand, multihoming host (or router, but
	 * not mrouter) cannot join to more than one interface - it will
	 * result in receiving multiple packets.
	 */
1786
	NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
L
Linus Torvalds 已提交
1787 1788 1789 1790 1791 1792 1793
		ipmr_forward_finish);
	return;

out_free:
	kfree_skb(skb);
}

1794
static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
L
Linus Torvalds 已提交
1795 1796
{
	int ct;
1797 1798 1799

	for (ct = mrt->maxvif-1; ct >= 0; ct--) {
		if (mrt->vif_table[ct].dev == dev)
L
Linus Torvalds 已提交
1800 1801 1802 1803 1804 1805 1806
			break;
	}
	return ct;
}

/* "local" means that we should preserve one skb (for local delivery) */

1807 1808 1809
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
			  struct sk_buff *skb, struct mfc_cache *cache,
			  int local)
L
Linus Torvalds 已提交
1810 1811 1812
{
	int psend = -1;
	int vif, ct;
1813
	int true_vifi = ipmr_find_vif(mrt, skb->dev);
L
Linus Torvalds 已提交
1814 1815 1816 1817 1818

	vif = cache->mfc_parent;
	cache->mfc_un.res.pkt++;
	cache->mfc_un.res.bytes += skb->len;

1819
	if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830
		struct mfc_cache *cache_proxy;

		/* For an (*,G) entry, we only check that the incomming
		 * interface is part of the static tree.
		 */
		cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
		if (cache_proxy &&
		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
			goto forward;
	}

L
Linus Torvalds 已提交
1831 1832 1833
	/*
	 * Wrong interface: drop packet and (maybe) send PIM assert.
	 */
1834
	if (mrt->vif_table[vif].dev != skb->dev) {
1835
		if (rt_is_output_route(skb_rtable(skb))) {
L
Linus Torvalds 已提交
1836
			/* It is our own packet, looped back.
E
Eric Dumazet 已提交
1837 1838 1839 1840 1841 1842 1843 1844 1845
			 * Very complicated situation...
			 *
			 * The best workaround until routing daemons will be
			 * fixed is not to redistribute packet, if it was
			 * send through wrong interface. It means, that
			 * multicast applications WILL NOT work for
			 * (S,G), which have default multicast route pointing
			 * to wrong oif. In any case, it is not a good
			 * idea to use multicasting applications on router.
L
Linus Torvalds 已提交
1846 1847 1848 1849 1850 1851
			 */
			goto dont_forward;
		}

		cache->mfc_un.res.wrong_if++;

1852
		if (true_vifi >= 0 && mrt->mroute_do_assert &&
L
Linus Torvalds 已提交
1853
		    /* pimsm uses asserts, when switching from RPT to SPT,
E
Eric Dumazet 已提交
1854 1855 1856
		     * so that we cannot check that packet arrived on an oif.
		     * It is bad, but otherwise we would need to move pretty
		     * large chunk of pimd to kernel. Ough... --ANK
L
Linus Torvalds 已提交
1857
		     */
1858
		    (mrt->mroute_do_pim ||
1859
		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1860
		    time_after(jiffies,
L
Linus Torvalds 已提交
1861 1862
			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
			cache->mfc_un.res.last_assert = jiffies;
1863
			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
L
Linus Torvalds 已提交
1864 1865 1866 1867
		}
		goto dont_forward;
	}

1868
forward:
1869 1870
	mrt->vif_table[vif].pkt_in++;
	mrt->vif_table[vif].bytes_in += skb->len;
L
Linus Torvalds 已提交
1871 1872 1873 1874

	/*
	 *	Forward the frame
	 */
1875 1876
	if (cache->mfc_origin == htonl(INADDR_ANY) &&
	    cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889
		if (true_vifi >= 0 &&
		    true_vifi != cache->mfc_parent &&
		    ip_hdr(skb)->ttl >
				cache->mfc_un.res.ttls[cache->mfc_parent]) {
			/* It's an (*,*) entry and the packet is not coming from
			 * the upstream: forward the packet to the upstream
			 * only.
			 */
			psend = cache->mfc_parent;
			goto last_forward;
		}
		goto dont_forward;
	}
E
Eric Dumazet 已提交
1890 1891
	for (ct = cache->mfc_un.res.maxvif - 1;
	     ct >= cache->mfc_un.res.minvif; ct--) {
1892
		/* For (*,G) entry, don't forward to the incoming interface */
1893 1894
		if ((cache->mfc_origin != htonl(INADDR_ANY) ||
		     ct != true_vifi) &&
1895
		    ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
L
Linus Torvalds 已提交
1896 1897
			if (psend != -1) {
				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
E
Eric Dumazet 已提交
1898

L
Linus Torvalds 已提交
1899
				if (skb2)
1900 1901
					ipmr_queue_xmit(net, mrt, skb2, cache,
							psend);
L
Linus Torvalds 已提交
1902
			}
J
Jianjun Kong 已提交
1903
			psend = ct;
L
Linus Torvalds 已提交
1904 1905
		}
	}
1906
last_forward:
L
Linus Torvalds 已提交
1907 1908 1909
	if (psend != -1) {
		if (local) {
			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
E
Eric Dumazet 已提交
1910

L
Linus Torvalds 已提交
1911
			if (skb2)
1912
				ipmr_queue_xmit(net, mrt, skb2, cache, psend);
L
Linus Torvalds 已提交
1913
		} else {
1914
			ipmr_queue_xmit(net, mrt, skb, cache, psend);
1915
			return;
L
Linus Torvalds 已提交
1916 1917 1918 1919 1920 1921 1922 1923
		}
	}

dont_forward:
	if (!local)
		kfree_skb(skb);
}

1924
static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
1925
{
1926 1927
	struct rtable *rt = skb_rtable(skb);
	struct iphdr *iph = ip_hdr(skb);
D
David S. Miller 已提交
1928
	struct flowi4 fl4 = {
1929 1930
		.daddr = iph->daddr,
		.saddr = iph->saddr,
1931
		.flowi4_tos = RT_TOS(iph->tos),
D
David S. Miller 已提交
1932 1933 1934
		.flowi4_oif = (rt_is_output_route(rt) ?
			       skb->dev->ifindex : 0),
		.flowi4_iif = (rt_is_output_route(rt) ?
1935
			       LOOPBACK_IFINDEX :
D
David S. Miller 已提交
1936
			       skb->dev->ifindex),
1937
		.flowi4_mark = skb->mark,
1938 1939 1940 1941
	};
	struct mr_table *mrt;
	int err;

D
David S. Miller 已提交
1942
	err = ipmr_fib_lookup(net, &fl4, &mrt);
1943 1944 1945 1946
	if (err)
		return ERR_PTR(err);
	return mrt;
}
L
Linus Torvalds 已提交
1947 1948 1949

/*
 *	Multicast packets for forwarding arrive here
E
Eric Dumazet 已提交
1950
 *	Called with rcu_read_lock();
L
Linus Torvalds 已提交
1951 1952 1953 1954 1955
 */

int ip_mr_input(struct sk_buff *skb)
{
	struct mfc_cache *cache;
1956
	struct net *net = dev_net(skb->dev);
E
Eric Dumazet 已提交
1957
	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1958
	struct mr_table *mrt;
L
Linus Torvalds 已提交
1959 1960

	/* Packet is looped back after forward, it should not be
E
Eric Dumazet 已提交
1961
	 * forwarded second time, but still can be delivered locally.
L
Linus Torvalds 已提交
1962
	 */
E
Eric Dumazet 已提交
1963
	if (IPCB(skb)->flags & IPSKB_FORWARDED)
L
Linus Torvalds 已提交
1964 1965
		goto dont_forward;

1966
	mrt = ipmr_rt_fib_lookup(net, skb);
1967 1968 1969
	if (IS_ERR(mrt)) {
		kfree_skb(skb);
		return PTR_ERR(mrt);
1970
	}
L
Linus Torvalds 已提交
1971
	if (!local) {
E
Eric Dumazet 已提交
1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989
		if (IPCB(skb)->opt.router_alert) {
			if (ip_call_ra_chain(skb))
				return 0;
		} else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
			/* IGMPv1 (and broken IGMPv2 implementations sort of
			 * Cisco IOS <= 11.2(8)) do not put router alert
			 * option to IGMP packets destined to routable
			 * groups. It is very bad, because it means
			 * that we can forward NO IGMP messages.
			 */
			struct sock *mroute_sk;

			mroute_sk = rcu_dereference(mrt->mroute_sk);
			if (mroute_sk) {
				nf_reset(skb);
				raw_rcv(mroute_sk, skb);
				return 0;
			}
L
Linus Torvalds 已提交
1990 1991 1992
		    }
	}

1993
	/* already under rcu_read_lock() */
1994
	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1995 1996 1997 1998 1999 2000 2001
	if (cache == NULL) {
		int vif = ipmr_find_vif(mrt, skb->dev);

		if (vif >= 0)
			cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
						    vif);
	}
L
Linus Torvalds 已提交
2002 2003 2004 2005

	/*
	 *	No usable cache entry
	 */
J
Jianjun Kong 已提交
2006
	if (cache == NULL) {
L
Linus Torvalds 已提交
2007 2008 2009 2010 2011
		int vif;

		if (local) {
			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
			ip_local_deliver(skb);
2012
			if (skb2 == NULL)
L
Linus Torvalds 已提交
2013 2014 2015 2016
				return -ENOBUFS;
			skb = skb2;
		}

2017
		read_lock(&mrt_lock);
2018
		vif = ipmr_find_vif(mrt, skb->dev);
L
Linus Torvalds 已提交
2019
		if (vif >= 0) {
2020
			int err2 = ipmr_cache_unresolved(mrt, vif, skb);
L
Linus Torvalds 已提交
2021 2022
			read_unlock(&mrt_lock);

2023
			return err2;
L
Linus Torvalds 已提交
2024 2025 2026 2027 2028 2029
		}
		read_unlock(&mrt_lock);
		kfree_skb(skb);
		return -ENODEV;
	}

2030
	read_lock(&mrt_lock);
2031
	ip_mr_forward(net, mrt, skb, cache, local);
L
Linus Torvalds 已提交
2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045
	read_unlock(&mrt_lock);

	if (local)
		return ip_local_deliver(skb);

	return 0;

dont_forward:
	if (local)
		return ip_local_deliver(skb);
	kfree_skb(skb);
	return 0;
}

I
Ilpo Järvinen 已提交
2046
#ifdef CONFIG_IP_PIMSM
2047
/* called with rcu_read_lock() */
2048 2049
static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
		     unsigned int pimlen)
L
Linus Torvalds 已提交
2050
{
I
Ilpo Järvinen 已提交
2051 2052
	struct net_device *reg_dev = NULL;
	struct iphdr *encap;
L
Linus Torvalds 已提交
2053

I
Ilpo Järvinen 已提交
2054
	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
L
Linus Torvalds 已提交
2055
	/*
E
Eric Dumazet 已提交
2056 2057 2058 2059
	 * Check that:
	 * a. packet is really sent to a multicast group
	 * b. packet is not a NULL-REGISTER
	 * c. packet is not truncated
L
Linus Torvalds 已提交
2060
	 */
2061
	if (!ipv4_is_multicast(encap->daddr) ||
L
Linus Torvalds 已提交
2062
	    encap->tot_len == 0 ||
I
Ilpo Järvinen 已提交
2063 2064
	    ntohs(encap->tot_len) + pimlen > skb->len)
		return 1;
L
Linus Torvalds 已提交
2065 2066

	read_lock(&mrt_lock);
2067 2068
	if (mrt->mroute_reg_vif_num >= 0)
		reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
L
Linus Torvalds 已提交
2069 2070
	read_unlock(&mrt_lock);

2071
	if (reg_dev == NULL)
I
Ilpo Järvinen 已提交
2072
		return 1;
L
Linus Torvalds 已提交
2073

2074
	skb->mac_header = skb->network_header;
2075
	skb_pull(skb, (u8 *)encap - skb->data);
2076
	skb_reset_network_header(skb);
L
Linus Torvalds 已提交
2077
	skb->protocol = htons(ETH_P_IP);
2078
	skb->ip_summed = CHECKSUM_NONE;
2079

2080
	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
2081

L
Linus Torvalds 已提交
2082
	netif_rx(skb);
I
Ilpo Järvinen 已提交
2083

2084
	return NET_RX_SUCCESS;
I
Ilpo Järvinen 已提交
2085 2086 2087 2088 2089 2090 2091 2092
}
#endif

#ifdef CONFIG_IP_PIMSM_V1
/*
 * Handle IGMP messages of PIMv1
 */

E
Eric Dumazet 已提交
2093
int pim_rcv_v1(struct sk_buff *skb)
I
Ilpo Järvinen 已提交
2094 2095
{
	struct igmphdr *pim;
2096
	struct net *net = dev_net(skb->dev);
2097
	struct mr_table *mrt;
I
Ilpo Järvinen 已提交
2098 2099 2100 2101 2102 2103

	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
		goto drop;

	pim = igmp_hdr(skb);

2104
	mrt = ipmr_rt_fib_lookup(net, skb);
2105 2106
	if (IS_ERR(mrt))
		goto drop;
2107
	if (!mrt->mroute_do_pim ||
I
Ilpo Järvinen 已提交
2108 2109 2110
	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
		goto drop;

2111
	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
I
Ilpo Järvinen 已提交
2112 2113 2114
drop:
		kfree_skb(skb);
	}
L
Linus Torvalds 已提交
2115 2116 2117 2118 2119
	return 0;
}
#endif

#ifdef CONFIG_IP_PIMSM_V2
E
Eric Dumazet 已提交
2120
static int pim_rcv(struct sk_buff *skb)
L
Linus Torvalds 已提交
2121 2122
{
	struct pimreghdr *pim;
2123 2124
	struct net *net = dev_net(skb->dev);
	struct mr_table *mrt;
L
Linus Torvalds 已提交
2125

I
Ilpo Järvinen 已提交
2126
	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
L
Linus Torvalds 已提交
2127 2128
		goto drop;

2129
	pim = (struct pimreghdr *)skb_transport_header(skb);
E
Eric Dumazet 已提交
2130 2131
	if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) ||
	    (pim->flags & PIM_NULL_REGISTER) ||
2132
	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
2133
	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
L
Linus Torvalds 已提交
2134 2135
		goto drop;

2136
	mrt = ipmr_rt_fib_lookup(net, skb);
2137 2138
	if (IS_ERR(mrt))
		goto drop;
2139
	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
I
Ilpo Järvinen 已提交
2140 2141 2142
drop:
		kfree_skb(skb);
	}
L
Linus Torvalds 已提交
2143 2144 2145 2146
	return 0;
}
#endif

2147 2148
static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
			      struct mfc_cache *c, struct rtmsg *rtm)
L
Linus Torvalds 已提交
2149 2150 2151
{
	int ct;
	struct rtnexthop *nhp;
T
Thomas Graf 已提交
2152
	struct nlattr *mp_attr;
2153
	struct rta_mfc_stats mfcs;
L
Linus Torvalds 已提交
2154

2155
	/* If cache is unresolved, don't try to parse IIF and OIF */
2156
	if (c->mfc_parent >= MAXVIFS)
2157 2158
		return -ENOENT;

T
Thomas Graf 已提交
2159 2160 2161
	if (VIF_EXISTS(mrt, c->mfc_parent) &&
	    nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
		return -EMSGSIZE;
L
Linus Torvalds 已提交
2162

T
Thomas Graf 已提交
2163 2164
	if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
		return -EMSGSIZE;
L
Linus Torvalds 已提交
2165 2166

	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2167
		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
T
Thomas Graf 已提交
2168 2169 2170 2171 2172
			if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
				nla_nest_cancel(skb, mp_attr);
				return -EMSGSIZE;
			}

L
Linus Torvalds 已提交
2173 2174
			nhp->rtnh_flags = 0;
			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2175
			nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
L
Linus Torvalds 已提交
2176 2177 2178
			nhp->rtnh_len = sizeof(*nhp);
		}
	}
T
Thomas Graf 已提交
2179 2180 2181

	nla_nest_end(skb, mp_attr);

2182 2183 2184 2185 2186 2187
	mfcs.mfcs_packets = c->mfc_un.res.pkt;
	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
		return -EMSGSIZE;

L
Linus Torvalds 已提交
2188 2189 2190 2191
	rtm->rtm_type = RTN_MULTICAST;
	return 1;
}

2192 2193 2194
int ipmr_get_route(struct net *net, struct sk_buff *skb,
		   __be32 saddr, __be32 daddr,
		   struct rtmsg *rtm, int nowait)
L
Linus Torvalds 已提交
2195 2196
{
	struct mfc_cache *cache;
2197 2198
	struct mr_table *mrt;
	int err;
L
Linus Torvalds 已提交
2199

2200 2201 2202 2203
	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
	if (mrt == NULL)
		return -ENOENT;

2204
	rcu_read_lock();
2205
	cache = ipmr_cache_find(mrt, saddr, daddr);
2206 2207
	if (cache == NULL && skb->dev) {
		int vif = ipmr_find_vif(mrt, skb->dev);
L
Linus Torvalds 已提交
2208

2209 2210 2211
		if (vif >= 0)
			cache = ipmr_cache_find_any(mrt, daddr, vif);
	}
J
Jianjun Kong 已提交
2212
	if (cache == NULL) {
2213
		struct sk_buff *skb2;
2214
		struct iphdr *iph;
L
Linus Torvalds 已提交
2215
		struct net_device *dev;
E
Eric Dumazet 已提交
2216
		int vif = -1;
L
Linus Torvalds 已提交
2217 2218

		if (nowait) {
2219
			rcu_read_unlock();
L
Linus Torvalds 已提交
2220 2221 2222 2223
			return -EAGAIN;
		}

		dev = skb->dev;
2224
		read_lock(&mrt_lock);
E
Eric Dumazet 已提交
2225 2226 2227
		if (dev)
			vif = ipmr_find_vif(mrt, dev);
		if (vif < 0) {
L
Linus Torvalds 已提交
2228
			read_unlock(&mrt_lock);
2229
			rcu_read_unlock();
L
Linus Torvalds 已提交
2230 2231
			return -ENODEV;
		}
2232 2233 2234
		skb2 = skb_clone(skb, GFP_ATOMIC);
		if (!skb2) {
			read_unlock(&mrt_lock);
2235
			rcu_read_unlock();
2236 2237 2238
			return -ENOMEM;
		}

2239 2240
		skb_push(skb2, sizeof(struct iphdr));
		skb_reset_network_header(skb2);
2241 2242
		iph = ip_hdr(skb2);
		iph->ihl = sizeof(struct iphdr) >> 2;
2243 2244
		iph->saddr = saddr;
		iph->daddr = daddr;
2245
		iph->version = 0;
2246
		err = ipmr_cache_unresolved(mrt, vif, skb2);
L
Linus Torvalds 已提交
2247
		read_unlock(&mrt_lock);
2248
		rcu_read_unlock();
L
Linus Torvalds 已提交
2249 2250 2251
		return err;
	}

2252 2253
	read_lock(&mrt_lock);
	if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
L
Linus Torvalds 已提交
2254
		cache->mfc_flags |= MFC_NOTIFY;
2255
	err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
L
Linus Torvalds 已提交
2256
	read_unlock(&mrt_lock);
2257
	rcu_read_unlock();
L
Linus Torvalds 已提交
2258 2259 2260
	return err;
}

2261
static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2262 2263
			    u32 portid, u32 seq, struct mfc_cache *c, int cmd,
			    int flags)
2264 2265 2266
{
	struct nlmsghdr *nlh;
	struct rtmsg *rtm;
2267
	int err;
2268

2269
	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2270 2271 2272 2273 2274 2275 2276 2277 2278
	if (nlh == NULL)
		return -EMSGSIZE;

	rtm = nlmsg_data(nlh);
	rtm->rtm_family   = RTNL_FAMILY_IPMR;
	rtm->rtm_dst_len  = 32;
	rtm->rtm_src_len  = 32;
	rtm->rtm_tos      = 0;
	rtm->rtm_table    = mrt->id;
D
David S. Miller 已提交
2279 2280
	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
		goto nla_put_failure;
2281 2282
	rtm->rtm_type     = RTN_MULTICAST;
	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2283 2284 2285 2286
	if (c->mfc_flags & MFC_STATIC)
		rtm->rtm_protocol = RTPROT_STATIC;
	else
		rtm->rtm_protocol = RTPROT_MROUTED;
2287 2288
	rtm->rtm_flags    = 0;

2289 2290
	if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
	    nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
D
David S. Miller 已提交
2291
		goto nla_put_failure;
2292 2293 2294
	err = __ipmr_fill_mroute(mrt, skb, c, rtm);
	/* do not break the dump if cache is unresolved */
	if (err < 0 && err != -ENOENT)
2295 2296
		goto nla_put_failure;

2297 2298
	nlmsg_end(skb, nlh);
	return 0;
2299 2300 2301 2302 2303 2304

nla_put_failure:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337
static size_t mroute_msgsize(bool unresolved, int maxvif)
{
	size_t len =
		NLMSG_ALIGN(sizeof(struct rtmsg))
		+ nla_total_size(4)	/* RTA_TABLE */
		+ nla_total_size(4)	/* RTA_SRC */
		+ nla_total_size(4)	/* RTA_DST */
		;

	if (!unresolved)
		len = len
		      + nla_total_size(4)	/* RTA_IIF */
		      + nla_total_size(0)	/* RTA_MULTIPATH */
		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
						/* RTA_MFC_STATS */
		      + nla_total_size(sizeof(struct rta_mfc_stats))
		;

	return len;
}

static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
				 int cmd)
{
	struct net *net = read_pnet(&mrt->net);
	struct sk_buff *skb;
	int err = -ENOBUFS;

	skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
			GFP_ATOMIC);
	if (skb == NULL)
		goto errout;

2338
	err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350
	if (err < 0)
		goto errout;

	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC);
	return;

errout:
	kfree_skb(skb);
	if (err < 0)
		rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
}

2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct net *net = sock_net(skb->sk);
	struct mr_table *mrt;
	struct mfc_cache *mfc;
	unsigned int t = 0, s_t;
	unsigned int h = 0, s_h;
	unsigned int e = 0, s_e;

	s_t = cb->args[0];
	s_h = cb->args[1];
	s_e = cb->args[2];

2364
	rcu_read_lock();
2365 2366 2367 2368 2369 2370
	ipmr_for_each_table(mrt, net) {
		if (t < s_t)
			goto next_table;
		if (t > s_t)
			s_h = 0;
		for (h = s_h; h < MFC_LINES; h++) {
2371
			list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) {
2372 2373 2374
				if (e < s_e)
					goto next_entry;
				if (ipmr_fill_mroute(mrt, skb,
2375
						     NETLINK_CB(cb->skb).portid,
2376
						     cb->nlh->nlmsg_seq,
2377 2378
						     mfc, RTM_NEWROUTE,
						     NLM_F_MULTI) < 0)
2379 2380 2381 2382 2383 2384
					goto done;
next_entry:
				e++;
			}
			e = s_e = 0;
		}
2385 2386 2387 2388 2389 2390 2391
		spin_lock_bh(&mfc_unres_lock);
		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
			if (e < s_e)
				goto next_entry2;
			if (ipmr_fill_mroute(mrt, skb,
					     NETLINK_CB(cb->skb).portid,
					     cb->nlh->nlmsg_seq,
2392 2393
					     mfc, RTM_NEWROUTE,
					     NLM_F_MULTI) < 0) {
2394 2395 2396 2397 2398 2399 2400 2401
				spin_unlock_bh(&mfc_unres_lock);
				goto done;
			}
next_entry2:
			e++;
		}
		spin_unlock_bh(&mfc_unres_lock);
		e = s_e = 0;
2402 2403 2404 2405 2406
		s_h = 0;
next_table:
		t++;
	}
done:
2407
	rcu_read_unlock();
2408 2409 2410 2411 2412 2413 2414 2415

	cb->args[2] = e;
	cb->args[1] = h;
	cb->args[0] = t;

	return skb->len;
}

2416
#ifdef CONFIG_PROC_FS
L
Linus Torvalds 已提交
2417
/*
E
Eric Dumazet 已提交
2418 2419
 *	The /proc interfaces to multicast routing :
 *	/proc/net/ip_mr_cache & /proc/net/ip_mr_vif
L
Linus Torvalds 已提交
2420 2421
 */
struct ipmr_vif_iter {
2422
	struct seq_net_private p;
2423
	struct mr_table *mrt;
L
Linus Torvalds 已提交
2424 2425 2426
	int ct;
};

2427 2428
static struct vif_device *ipmr_vif_seq_idx(struct net *net,
					   struct ipmr_vif_iter *iter,
L
Linus Torvalds 已提交
2429 2430
					   loff_t pos)
{
2431
	struct mr_table *mrt = iter->mrt;
2432 2433 2434

	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
		if (!VIF_EXISTS(mrt, iter->ct))
L
Linus Torvalds 已提交
2435
			continue;
2436
		if (pos-- == 0)
2437
			return &mrt->vif_table[iter->ct];
L
Linus Torvalds 已提交
2438 2439 2440 2441 2442
	}
	return NULL;
}

static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
S
Stephen Hemminger 已提交
2443
	__acquires(mrt_lock)
L
Linus Torvalds 已提交
2444
{
2445
	struct ipmr_vif_iter *iter = seq->private;
2446
	struct net *net = seq_file_net(seq);
2447 2448 2449 2450 2451 2452 2453
	struct mr_table *mrt;

	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
	if (mrt == NULL)
		return ERR_PTR(-ENOENT);

	iter->mrt = mrt;
2454

L
Linus Torvalds 已提交
2455
	read_lock(&mrt_lock);
2456
	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
L
Linus Torvalds 已提交
2457 2458 2459 2460 2461 2462
		: SEQ_START_TOKEN;
}

static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct ipmr_vif_iter *iter = seq->private;
2463
	struct net *net = seq_file_net(seq);
2464
	struct mr_table *mrt = iter->mrt;
L
Linus Torvalds 已提交
2465 2466 2467

	++*pos;
	if (v == SEQ_START_TOKEN)
2468
		return ipmr_vif_seq_idx(net, iter, 0);
2469

2470 2471
	while (++iter->ct < mrt->maxvif) {
		if (!VIF_EXISTS(mrt, iter->ct))
L
Linus Torvalds 已提交
2472
			continue;
2473
		return &mrt->vif_table[iter->ct];
L
Linus Torvalds 已提交
2474 2475 2476 2477 2478
	}
	return NULL;
}

static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
S
Stephen Hemminger 已提交
2479
	__releases(mrt_lock)
L
Linus Torvalds 已提交
2480 2481 2482 2483 2484 2485
{
	read_unlock(&mrt_lock);
}

static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
{
2486 2487
	struct ipmr_vif_iter *iter = seq->private;
	struct mr_table *mrt = iter->mrt;
2488

L
Linus Torvalds 已提交
2489
	if (v == SEQ_START_TOKEN) {
2490
		seq_puts(seq,
L
Linus Torvalds 已提交
2491 2492 2493 2494 2495 2496 2497
			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
	} else {
		const struct vif_device *vif = v;
		const char *name =  vif->dev ? vif->dev->name : "none";

		seq_printf(seq,
			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
2498
			   vif - mrt->vif_table,
2499
			   name, vif->bytes_in, vif->pkt_in,
L
Linus Torvalds 已提交
2500 2501 2502 2503 2504 2505
			   vif->bytes_out, vif->pkt_out,
			   vif->flags, vif->local, vif->remote);
	}
	return 0;
}

2506
static const struct seq_operations ipmr_vif_seq_ops = {
L
Linus Torvalds 已提交
2507 2508 2509 2510 2511 2512 2513 2514
	.start = ipmr_vif_seq_start,
	.next  = ipmr_vif_seq_next,
	.stop  = ipmr_vif_seq_stop,
	.show  = ipmr_vif_seq_show,
};

static int ipmr_vif_open(struct inode *inode, struct file *file)
{
2515 2516
	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
			    sizeof(struct ipmr_vif_iter));
L
Linus Torvalds 已提交
2517 2518
}

2519
static const struct file_operations ipmr_vif_fops = {
L
Linus Torvalds 已提交
2520 2521 2522 2523
	.owner	 = THIS_MODULE,
	.open    = ipmr_vif_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
2524
	.release = seq_release_net,
L
Linus Torvalds 已提交
2525 2526 2527
};

struct ipmr_mfc_iter {
2528
	struct seq_net_private p;
2529
	struct mr_table *mrt;
2530
	struct list_head *cache;
L
Linus Torvalds 已提交
2531 2532 2533 2534
	int ct;
};


2535 2536
static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
					  struct ipmr_mfc_iter *it, loff_t pos)
L
Linus Torvalds 已提交
2537
{
2538
	struct mr_table *mrt = it->mrt;
L
Linus Torvalds 已提交
2539 2540
	struct mfc_cache *mfc;

2541
	rcu_read_lock();
2542
	for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2543
		it->cache = &mrt->mfc_cache_array[it->ct];
2544
		list_for_each_entry_rcu(mfc, it->cache, list)
2545
			if (pos-- == 0)
L
Linus Torvalds 已提交
2546
				return mfc;
2547
	}
2548
	rcu_read_unlock();
L
Linus Torvalds 已提交
2549 2550

	spin_lock_bh(&mfc_unres_lock);
2551
	it->cache = &mrt->mfc_unres_queue;
2552
	list_for_each_entry(mfc, it->cache, list)
2553
		if (pos-- == 0)
L
Linus Torvalds 已提交
2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564
			return mfc;
	spin_unlock_bh(&mfc_unres_lock);

	it->cache = NULL;
	return NULL;
}


static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
	struct ipmr_mfc_iter *it = seq->private;
2565
	struct net *net = seq_file_net(seq);
2566
	struct mr_table *mrt;
2567

2568 2569 2570
	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
	if (mrt == NULL)
		return ERR_PTR(-ENOENT);
2571

2572
	it->mrt = mrt;
L
Linus Torvalds 已提交
2573 2574
	it->cache = NULL;
	it->ct = 0;
2575
	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
L
Linus Torvalds 已提交
2576 2577 2578 2579 2580 2581 2582
		: SEQ_START_TOKEN;
}

static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct mfc_cache *mfc = v;
	struct ipmr_mfc_iter *it = seq->private;
2583
	struct net *net = seq_file_net(seq);
2584
	struct mr_table *mrt = it->mrt;
L
Linus Torvalds 已提交
2585 2586 2587 2588

	++*pos;

	if (v == SEQ_START_TOKEN)
2589
		return ipmr_mfc_seq_idx(net, seq->private, 0);
L
Linus Torvalds 已提交
2590

2591 2592
	if (mfc->list.next != it->cache)
		return list_entry(mfc->list.next, struct mfc_cache, list);
2593

2594
	if (it->cache == &mrt->mfc_unres_queue)
L
Linus Torvalds 已提交
2595 2596
		goto end_of_list;

2597
	BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
L
Linus Torvalds 已提交
2598 2599

	while (++it->ct < MFC_LINES) {
2600
		it->cache = &mrt->mfc_cache_array[it->ct];
2601 2602 2603
		if (list_empty(it->cache))
			continue;
		return list_first_entry(it->cache, struct mfc_cache, list);
L
Linus Torvalds 已提交
2604 2605 2606
	}

	/* exhausted cache_array, show unresolved */
2607
	rcu_read_unlock();
2608
	it->cache = &mrt->mfc_unres_queue;
L
Linus Torvalds 已提交
2609
	it->ct = 0;
2610

L
Linus Torvalds 已提交
2611
	spin_lock_bh(&mfc_unres_lock);
2612 2613
	if (!list_empty(it->cache))
		return list_first_entry(it->cache, struct mfc_cache, list);
L
Linus Torvalds 已提交
2614

E
Eric Dumazet 已提交
2615
end_of_list:
L
Linus Torvalds 已提交
2616 2617 2618 2619 2620 2621 2622 2623 2624
	spin_unlock_bh(&mfc_unres_lock);
	it->cache = NULL;

	return NULL;
}

static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
{
	struct ipmr_mfc_iter *it = seq->private;
2625
	struct mr_table *mrt = it->mrt;
L
Linus Torvalds 已提交
2626

2627
	if (it->cache == &mrt->mfc_unres_queue)
L
Linus Torvalds 已提交
2628
		spin_unlock_bh(&mfc_unres_lock);
2629
	else if (it->cache == &mrt->mfc_cache_array[it->ct])
2630
		rcu_read_unlock();
L
Linus Torvalds 已提交
2631 2632 2633 2634 2635 2636 2637
}

static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
{
	int n;

	if (v == SEQ_START_TOKEN) {
2638
		seq_puts(seq,
L
Linus Torvalds 已提交
2639 2640 2641 2642
		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
	} else {
		const struct mfc_cache *mfc = v;
		const struct ipmr_mfc_iter *it = seq->private;
2643
		const struct mr_table *mrt = it->mrt;
2644

2645 2646 2647
		seq_printf(seq, "%08X %08X %-3hd",
			   (__force u32) mfc->mfc_mcastgrp,
			   (__force u32) mfc->mfc_origin,
2648
			   mfc->mfc_parent);
L
Linus Torvalds 已提交
2649

2650
		if (it->cache != &mrt->mfc_unres_queue) {
2651 2652 2653 2654
			seq_printf(seq, " %8lu %8lu %8lu",
				   mfc->mfc_un.res.pkt,
				   mfc->mfc_un.res.bytes,
				   mfc->mfc_un.res.wrong_if);
S
Stephen Hemminger 已提交
2655
			for (n = mfc->mfc_un.res.minvif;
E
Eric Dumazet 已提交
2656
			     n < mfc->mfc_un.res.maxvif; n++) {
2657
				if (VIF_EXISTS(mrt, n) &&
2658 2659
				    mfc->mfc_un.res.ttls[n] < 255)
					seq_printf(seq,
2660
					   " %2d:%-3d",
L
Linus Torvalds 已提交
2661 2662
					   n, mfc->mfc_un.res.ttls[n]);
			}
2663 2664 2665 2666 2667
		} else {
			/* unresolved mfc_caches don't contain
			 * pkt, bytes and wrong_if values
			 */
			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
L
Linus Torvalds 已提交
2668 2669 2670 2671 2672 2673
		}
		seq_putc(seq, '\n');
	}
	return 0;
}

2674
static const struct seq_operations ipmr_mfc_seq_ops = {
L
Linus Torvalds 已提交
2675 2676 2677 2678 2679 2680 2681 2682
	.start = ipmr_mfc_seq_start,
	.next  = ipmr_mfc_seq_next,
	.stop  = ipmr_mfc_seq_stop,
	.show  = ipmr_mfc_seq_show,
};

static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
2683 2684
	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
			    sizeof(struct ipmr_mfc_iter));
L
Linus Torvalds 已提交
2685 2686
}

2687
static const struct file_operations ipmr_mfc_fops = {
L
Linus Torvalds 已提交
2688 2689 2690 2691
	.owner	 = THIS_MODULE,
	.open    = ipmr_mfc_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
2692
	.release = seq_release_net,
L
Linus Torvalds 已提交
2693
};
2694
#endif
L
Linus Torvalds 已提交
2695 2696

#ifdef CONFIG_IP_PIMSM_V2
2697
static const struct net_protocol pim_protocol = {
L
Linus Torvalds 已提交
2698
	.handler	=	pim_rcv,
T
Tom Goff 已提交
2699
	.netns_ok	=	1,
L
Linus Torvalds 已提交
2700 2701 2702 2703 2704 2705 2706
};
#endif


/*
 *	Setup for IP multicast routing
 */
2707 2708
static int __net_init ipmr_net_init(struct net *net)
{
2709
	int err;
2710

2711 2712
	err = ipmr_rules_init(net);
	if (err < 0)
2713
		goto fail;
2714 2715 2716

#ifdef CONFIG_PROC_FS
	err = -ENOMEM;
2717
	if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops))
2718
		goto proc_vif_fail;
2719
	if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops))
2720 2721
		goto proc_cache_fail;
#endif
2722 2723
	return 0;

2724 2725
#ifdef CONFIG_PROC_FS
proc_cache_fail:
2726
	remove_proc_entry("ip_mr_vif", net->proc_net);
2727
proc_vif_fail:
2728
	ipmr_rules_exit(net);
2729
#endif
2730 2731 2732 2733 2734 2735
fail:
	return err;
}

static void __net_exit ipmr_net_exit(struct net *net)
{
2736
#ifdef CONFIG_PROC_FS
2737 2738
	remove_proc_entry("ip_mr_cache", net->proc_net);
	remove_proc_entry("ip_mr_vif", net->proc_net);
2739
#endif
2740
	ipmr_rules_exit(net);
2741 2742 2743 2744 2745 2746
}

static struct pernet_operations ipmr_net_ops = {
	.init = ipmr_net_init,
	.exit = ipmr_net_exit,
};
2747

W
Wang Chen 已提交
2748
int __init ip_mr_init(void)
L
Linus Torvalds 已提交
2749
{
W
Wang Chen 已提交
2750 2751
	int err;

L
Linus Torvalds 已提交
2752 2753
	mrt_cachep = kmem_cache_create("ip_mrt_cache",
				       sizeof(struct mfc_cache),
2754
				       0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
2755
				       NULL);
W
Wang Chen 已提交
2756 2757 2758
	if (!mrt_cachep)
		return -ENOMEM;

2759 2760 2761 2762
	err = register_pernet_subsys(&ipmr_net_ops);
	if (err)
		goto reg_pernet_fail;

W
Wang Chen 已提交
2763 2764 2765
	err = register_netdevice_notifier(&ip_mr_notifier);
	if (err)
		goto reg_notif_fail;
T
Tom Goff 已提交
2766 2767
#ifdef CONFIG_IP_PIMSM_V2
	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
J
Joe Perches 已提交
2768
		pr_err("%s: can't add PIM protocol\n", __func__);
T
Tom Goff 已提交
2769 2770 2771 2772
		err = -EAGAIN;
		goto add_proto_fail;
	}
#endif
2773 2774
	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
		      NULL, ipmr_rtm_dumproute, NULL);
W
Wang Chen 已提交
2775
	return 0;
2776

T
Tom Goff 已提交
2777 2778 2779 2780
#ifdef CONFIG_IP_PIMSM_V2
add_proto_fail:
	unregister_netdevice_notifier(&ip_mr_notifier);
#endif
B
Benjamin Thery 已提交
2781
reg_notif_fail:
2782 2783
	unregister_pernet_subsys(&ipmr_net_ops);
reg_pernet_fail:
B
Benjamin Thery 已提交
2784
	kmem_cache_destroy(mrt_cachep);
W
Wang Chen 已提交
2785
	return err;
L
Linus Torvalds 已提交
2786
}