route.c 77.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *	Linux INET6 implementation
 *	FIB front-end.
 *
 *	Authors:
6
 *	Pedro Roque		<roque@di.fc.ul.pt>
L
Linus Torvalds 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

/*	Changes:
 *
 *	YOSHIFUJI Hideaki @USAGI
 *		reworked default router selection.
 *		- respect outgoing interface
 *		- select from (probably) reachable routers (i.e.
 *		routers in REACHABLE, STALE, DELAY or PROBE states).
 *		- always select the same router if it is (probably)
 *		reachable.  otherwise, round-robin the list.
23 24
 *	Ville Nuorvala
 *		Fixed routing subtrees.
L
Linus Torvalds 已提交
25 26
 */

27 28
#define pr_fmt(fmt) "IPv6: " fmt

29
#include <linux/capability.h>
L
Linus Torvalds 已提交
30
#include <linux/errno.h>
31
#include <linux/export.h>
L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39
#include <linux/types.h>
#include <linux/times.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/route.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
40
#include <linux/mroute6.h>
L
Linus Torvalds 已提交
41 42 43 44
#include <linux/init.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
45
#include <linux/nsproxy.h>
46
#include <linux/slab.h>
47
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
48 49 50 51 52 53 54 55 56 57
#include <net/snmp.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/tcp.h>
#include <linux/rtnetlink.h>
#include <net/dst.h>
#include <net/xfrm.h>
58
#include <net/netevent.h>
59
#include <net/netlink.h>
60
#include <net/nexthop.h>
L
Linus Torvalds 已提交
61 62 63 64 65 66 67

#include <asm/uaccess.h>

#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

68
static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
E
Eric Dumazet 已提交
69
				    const struct in6_addr *dest);
L
Linus Torvalds 已提交
70
static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
71
static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
72
static unsigned int	 ip6_mtu(const struct dst_entry *dst);
L
Linus Torvalds 已提交
73 74 75 76
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void		ip6_dst_destroy(struct dst_entry *);
static void		ip6_dst_ifdown(struct dst_entry *,
				       struct net_device *dev, int how);
77
static int		 ip6_dst_gc(struct dst_ops *ops);
L
Linus Torvalds 已提交
78 79 80 81

static int		ip6_pkt_discard(struct sk_buff *skb);
static int		ip6_pkt_discard_out(struct sk_buff *skb);
static void		ip6_link_failure(struct sk_buff *skb);
82 83 84 85
static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
					   struct sk_buff *skb, u32 mtu);
static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
					struct sk_buff *skb);
L
Linus Torvalds 已提交
86

87
#ifdef CONFIG_IPV6_ROUTE_INFO
88
static struct rt6_info *rt6_add_route_info(struct net *net,
89 90
					   const struct in6_addr *prefix, int prefixlen,
					   const struct in6_addr *gwaddr, int ifindex,
91
					   unsigned int pref);
92
static struct rt6_info *rt6_get_route_info(struct net *net,
93 94
					   const struct in6_addr *prefix, int prefixlen,
					   const struct in6_addr *gwaddr, int ifindex);
95 96
#endif

97 98 99 100 101 102
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
{
	struct rt6_info *rt = (struct rt6_info *) dst;
	struct inet_peer *peer;
	u32 *p = NULL;

103 104 105
	if (!(rt->dst.flags & DST_HOST))
		return NULL;

106
	peer = rt6_get_peer_create(rt);
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
	if (peer) {
		u32 *old_p = __DST_METRICS_PTR(old);
		unsigned long prev, new;

		p = peer->metrics;
		if (inet_metrics_new(peer))
			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);

		new = (unsigned long) p;
		prev = cmpxchg(&dst->_metrics, old, new);

		if (prev != old) {
			p = __DST_METRICS_PTR(prev);
			if (prev & DST_METRICS_READ_ONLY)
				p = NULL;
		}
	}
	return p;
}

127 128 129
static inline const void *choose_neigh_daddr(struct rt6_info *rt,
					     struct sk_buff *skb,
					     const void *daddr)
130 131 132
{
	struct in6_addr *p = &rt->rt6i_gateway;

D
David S. Miller 已提交
133
	if (!ipv6_addr_any(p))
134
		return (const void *) p;
135 136
	else if (skb)
		return &ipv6_hdr(skb)->daddr;
137 138 139
	return daddr;
}

140 141 142
static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
					  struct sk_buff *skb,
					  const void *daddr)
143
{
144 145 146
	struct rt6_info *rt = (struct rt6_info *) dst;
	struct neighbour *n;

147
	daddr = choose_neigh_daddr(rt, skb, daddr);
148
	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149 150 151 152 153
	if (n)
		return n;
	return neigh_create(&nd_tbl, daddr, dst->dev);
}

154
static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155
{
156 157 158 159 160 161
	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
	if (!n) {
		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
		if (IS_ERR(n))
			return PTR_ERR(n);
	}
162
	rt->n = n;
163 164

	return 0;
165 166
}

167
static struct dst_ops ip6_dst_ops_template = {
L
Linus Torvalds 已提交
168
	.family			=	AF_INET6,
169
	.protocol		=	cpu_to_be16(ETH_P_IPV6),
L
Linus Torvalds 已提交
170 171 172
	.gc			=	ip6_dst_gc,
	.gc_thresh		=	1024,
	.check			=	ip6_dst_check,
173
	.default_advmss		=	ip6_default_advmss,
174
	.mtu			=	ip6_mtu,
175
	.cow_metrics		=	ipv6_cow_metrics,
L
Linus Torvalds 已提交
176 177 178 179 180
	.destroy		=	ip6_dst_destroy,
	.ifdown			=	ip6_dst_ifdown,
	.negative_advice	=	ip6_negative_advice,
	.link_failure		=	ip6_link_failure,
	.update_pmtu		=	ip6_rt_update_pmtu,
181
	.redirect		=	rt6_do_redirect,
182
	.local_out		=	__ip6_local_out,
183
	.neigh_lookup		=	ip6_neigh_lookup,
L
Linus Torvalds 已提交
184 185
};

186
static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187
{
188 189 190
	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);

	return mtu ? : dst->dev->mtu;
191 192
}

193 194
static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
					 struct sk_buff *skb, u32 mtu)
195 196 197
{
}

198 199
static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
				      struct sk_buff *skb)
200 201 202
{
}

203 204 205 206 207 208
static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
					 unsigned long old)
{
	return NULL;
}

209 210
static struct dst_ops ip6_dst_blackhole_ops = {
	.family			=	AF_INET6,
211
	.protocol		=	cpu_to_be16(ETH_P_IPV6),
212 213
	.destroy		=	ip6_dst_destroy,
	.check			=	ip6_dst_check,
214
	.mtu			=	ip6_blackhole_mtu,
215
	.default_advmss		=	ip6_default_advmss,
216
	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
217
	.redirect		=	ip6_rt_blackhole_redirect,
218
	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
219
	.neigh_lookup		=	ip6_neigh_lookup,
220 221
};

222 223 224 225
static const u32 ip6_template_metrics[RTAX_MAX] = {
	[RTAX_HOPLIMIT - 1] = 255,
};

226
static const struct rt6_info ip6_null_entry_template = {
227 228 229
	.dst = {
		.__refcnt	= ATOMIC_INIT(1),
		.__use		= 1,
230
		.obsolete	= DST_OBSOLETE_FORCE_CHK,
231 232 233
		.error		= -ENETUNREACH,
		.input		= ip6_pkt_discard,
		.output		= ip6_pkt_discard_out,
L
Linus Torvalds 已提交
234 235
	},
	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
236
	.rt6i_protocol  = RTPROT_KERNEL,
L
Linus Torvalds 已提交
237 238 239 240
	.rt6i_metric	= ~(u32) 0,
	.rt6i_ref	= ATOMIC_INIT(1),
};

T
Thomas Graf 已提交
241 242
#ifdef CONFIG_IPV6_MULTIPLE_TABLES

243 244 245
static int ip6_pkt_prohibit(struct sk_buff *skb);
static int ip6_pkt_prohibit_out(struct sk_buff *skb);

246
static const struct rt6_info ip6_prohibit_entry_template = {
247 248 249
	.dst = {
		.__refcnt	= ATOMIC_INIT(1),
		.__use		= 1,
250
		.obsolete	= DST_OBSOLETE_FORCE_CHK,
251 252 253
		.error		= -EACCES,
		.input		= ip6_pkt_prohibit,
		.output		= ip6_pkt_prohibit_out,
T
Thomas Graf 已提交
254 255
	},
	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
256
	.rt6i_protocol  = RTPROT_KERNEL,
T
Thomas Graf 已提交
257 258 259 260
	.rt6i_metric	= ~(u32) 0,
	.rt6i_ref	= ATOMIC_INIT(1),
};

261
static const struct rt6_info ip6_blk_hole_entry_template = {
262 263 264
	.dst = {
		.__refcnt	= ATOMIC_INIT(1),
		.__use		= 1,
265
		.obsolete	= DST_OBSOLETE_FORCE_CHK,
266 267 268
		.error		= -EINVAL,
		.input		= dst_discard,
		.output		= dst_discard,
T
Thomas Graf 已提交
269 270
	},
	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
271
	.rt6i_protocol  = RTPROT_KERNEL,
T
Thomas Graf 已提交
272 273 274 275 276 277
	.rt6i_metric	= ~(u32) 0,
	.rt6i_ref	= ATOMIC_INIT(1),
};

#endif

L
Linus Torvalds 已提交
278
/* allocate dst with ip6_dst_ops */
279
static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280
					     struct net_device *dev,
281 282
					     int flags,
					     struct fib6_table *table)
L
Linus Torvalds 已提交
283
{
284
	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285
					0, DST_OBSOLETE_FORCE_CHK, flags);
286

287
	if (rt) {
288 289 290
		struct dst_entry *dst = &rt->dst;

		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291
		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292
		rt->rt6i_genid = rt_genid(net);
293 294
		INIT_LIST_HEAD(&rt->rt6i_siblings);
		rt->rt6i_nsiblings = 0;
295
	}
296
	return rt;
L
Linus Torvalds 已提交
297 298 299 300 301 302 303
}

static void ip6_dst_destroy(struct dst_entry *dst)
{
	struct rt6_info *rt = (struct rt6_info *)dst;
	struct inet6_dev *idev = rt->rt6i_idev;

304 305 306
	if (rt->n)
		neigh_release(rt->n);

307 308 309
	if (!(rt->dst.flags & DST_HOST))
		dst_destroy_metrics_generic(dst);

310
	if (idev) {
L
Linus Torvalds 已提交
311 312
		rt->rt6i_idev = NULL;
		in6_dev_put(idev);
313
	}
314 315 316 317

	if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
		dst_release(dst->from);

318 319
	if (rt6_has_peer(rt)) {
		struct inet_peer *peer = rt6_peer_ptr(rt);
320 321 322 323
		inet_putpeer(peer);
	}
}

324 325 326 327 328 329 330
static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);

static u32 rt6_peer_genid(void)
{
	return atomic_read(&__rt6_peer_genid);
}

331 332
void rt6_bind_peer(struct rt6_info *rt, int create)
{
333
	struct inet_peer_base *base;
334 335
	struct inet_peer *peer;

336 337 338 339 340
	base = inetpeer_base_ptr(rt->_rt6i_peer);
	if (!base)
		return;

	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
341 342 343 344 345 346
	if (peer) {
		if (!rt6_set_peer(rt, peer))
			inet_putpeer(peer);
		else
			rt->rt6i_peer_genid = rt6_peer_genid();
	}
L
Linus Torvalds 已提交
347 348 349 350 351 352 353
}

static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
			   int how)
{
	struct rt6_info *rt = (struct rt6_info *)dst;
	struct inet6_dev *idev = rt->rt6i_idev;
354
	struct net_device *loopback_dev =
355
		dev_net(dev)->loopback_dev;
L
Linus Torvalds 已提交
356

357 358 359 360 361 362 363 364 365 366 367 368 369
	if (dev != loopback_dev) {
		if (idev && idev->dev == dev) {
			struct inet6_dev *loopback_idev =
				in6_dev_get(loopback_dev);
			if (loopback_idev) {
				rt->rt6i_idev = loopback_idev;
				in6_dev_put(idev);
			}
		}
		if (rt->n && rt->n->dev == dev) {
			rt->n->dev = loopback_dev;
			dev_hold(loopback_dev);
			dev_put(dev);
L
Linus Torvalds 已提交
370 371 372 373
		}
	}
}

374
static bool rt6_check_expired(const struct rt6_info *rt)
L
Linus Torvalds 已提交
375
{
376 377
	if (rt->rt6i_flags & RTF_EXPIRES) {
		if (time_after(jiffies, rt->dst.expires))
378
			return true;
379
	} else if (rt->dst.from) {
380
		return rt6_check_expired((struct rt6_info *) rt->dst.from);
381
	}
382
	return false;
L
Linus Torvalds 已提交
383 384
}

385
static bool rt6_need_strict(const struct in6_addr *daddr)
T
Thomas Graf 已提交
386
{
E
Eric Dumazet 已提交
387 388
	return ipv6_addr_type(daddr) &
		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
T
Thomas Graf 已提交
389 390
}

391 392 393 394 395 396 397 398 399
/* Multipath route selection:
 *   Hash based function using packet header and flowlabel.
 * Adapted from fib_info_hashfn()
 */
static int rt6_info_hash_nhsfn(unsigned int candidate_count,
			       const struct flowi6 *fl6)
{
	unsigned int val = fl6->flowi6_proto;

400 401 402 403
	val ^= (__force u32)fl6->daddr.s6_addr32[0];
	val ^= (__force u32)fl6->daddr.s6_addr32[1];
	val ^= (__force u32)fl6->daddr.s6_addr32[2];
	val ^= (__force u32)fl6->daddr.s6_addr32[3];
404

405 406 407 408
	val ^= (__force u32)fl6->saddr.s6_addr32[0];
	val ^= (__force u32)fl6->saddr.s6_addr32[1];
	val ^= (__force u32)fl6->saddr.s6_addr32[2];
	val ^= (__force u32)fl6->saddr.s6_addr32[3];
409 410 411 412 413 414

	/* Work only if this not encapsulated */
	switch (fl6->flowi6_proto) {
	case IPPROTO_UDP:
	case IPPROTO_TCP:
	case IPPROTO_SCTP:
415 416
		val ^= (__force u16)fl6->fl6_sport;
		val ^= (__force u16)fl6->fl6_dport;
417 418 419
		break;

	case IPPROTO_ICMPV6:
420 421
		val ^= (__force u16)fl6->fl6_icmp_type;
		val ^= (__force u16)fl6->fl6_icmp_code;
422 423 424
		break;
	}
	/* RFC6438 recommands to use flowlabel */
425
	val ^= (__force u32)fl6->flowlabel;
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453

	/* Perhaps, we need to tune, this function? */
	val = val ^ (val >> 7) ^ (val >> 12);
	return val % candidate_count;
}

static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
					     struct flowi6 *fl6)
{
	struct rt6_info *sibling, *next_sibling;
	int route_choosen;

	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
	/* Don't change the route, if route_choosen == 0
	 * (siblings does not include ourself)
	 */
	if (route_choosen)
		list_for_each_entry_safe(sibling, next_sibling,
				&match->rt6i_siblings, rt6i_siblings) {
			route_choosen--;
			if (route_choosen == 0) {
				match = sibling;
				break;
			}
		}
	return match;
}

L
Linus Torvalds 已提交
454
/*
T
Thomas Graf 已提交
455
 *	Route lookup. Any table->tb6_lock is implied.
L
Linus Torvalds 已提交
456 457
 */

458 459
static inline struct rt6_info *rt6_device_match(struct net *net,
						    struct rt6_info *rt,
460
						    const struct in6_addr *saddr,
L
Linus Torvalds 已提交
461
						    int oif,
462
						    int flags)
L
Linus Torvalds 已提交
463 464 465 466
{
	struct rt6_info *local = NULL;
	struct rt6_info *sprt;

467 468 469
	if (!oif && ipv6_addr_any(saddr))
		goto out;

470
	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
471
		struct net_device *dev = sprt->dst.dev;
472 473

		if (oif) {
L
Linus Torvalds 已提交
474 475 476
			if (dev->ifindex == oif)
				return sprt;
			if (dev->flags & IFF_LOOPBACK) {
477
				if (!sprt->rt6i_idev ||
L
Linus Torvalds 已提交
478
				    sprt->rt6i_idev->dev->ifindex != oif) {
479
					if (flags & RT6_LOOKUP_F_IFACE && oif)
L
Linus Torvalds 已提交
480
						continue;
481
					if (local && (!oif ||
L
Linus Torvalds 已提交
482 483 484 485 486
						      local->rt6i_idev->dev->ifindex == oif))
						continue;
				}
				local = sprt;
			}
487 488 489 490
		} else {
			if (ipv6_chk_addr(net, saddr, dev,
					  flags & RT6_LOOKUP_F_IFACE))
				return sprt;
L
Linus Torvalds 已提交
491
		}
492
	}
L
Linus Torvalds 已提交
493

494
	if (oif) {
L
Linus Torvalds 已提交
495 496 497
		if (local)
			return local;

498
		if (flags & RT6_LOOKUP_F_IFACE)
499
			return net->ipv6.ip6_null_entry;
L
Linus Torvalds 已提交
500
	}
501
out:
L
Linus Torvalds 已提交
502 503 504
	return rt;
}

505 506 507
#ifdef CONFIG_IPV6_ROUTER_PREF
static void rt6_probe(struct rt6_info *rt)
{
508
	struct neighbour *neigh;
509 510 511 512 513 514 515 516
	/*
	 * Okay, this does not seem to be appropriate
	 * for now, however, we need to check if it
	 * is really so; aka Router Reachability Probing.
	 *
	 * Router Reachability Probe MUST be rate-limited
	 * to no more than one per minute.
	 */
517
	neigh = rt ? rt->n : NULL;
518
	if (!neigh || (neigh->nud_state & NUD_VALID))
519
		return;
520 521
	read_lock_bh(&neigh->lock);
	if (!(neigh->nud_state & NUD_VALID) &&
522
	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
523 524 525 526 527 528 529 530
		struct in6_addr mcaddr;
		struct in6_addr *target;

		neigh->updated = jiffies;
		read_unlock_bh(&neigh->lock);

		target = (struct in6_addr *)&neigh->primary_key;
		addrconf_addr_solict_mult(target, &mcaddr);
531
		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
532
	} else {
533
		read_unlock_bh(&neigh->lock);
534
	}
535 536 537 538 539 540 541
}
#else
static inline void rt6_probe(struct rt6_info *rt)
{
}
#endif

L
Linus Torvalds 已提交
542
/*
543
 * Default Router Selection (RFC 2461 6.3.6)
L
Linus Torvalds 已提交
544
 */
D
Dave Jones 已提交
545
static inline int rt6_check_dev(struct rt6_info *rt, int oif)
546
{
547
	struct net_device *dev = rt->dst.dev;
548
	if (!oif || dev->ifindex == oif)
549
		return 2;
550 551 552 553
	if ((dev->flags & IFF_LOOPBACK) &&
	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
		return 1;
	return 0;
554
}
L
Linus Torvalds 已提交
555

D
Dave Jones 已提交
556
static inline int rt6_check_neigh(struct rt6_info *rt)
L
Linus Torvalds 已提交
557
{
558
	struct neighbour *neigh;
559
	int m;
560

561
	neigh = rt->n;
562 563 564 565
	if (rt->rt6i_flags & RTF_NONEXTHOP ||
	    !(rt->rt6i_flags & RTF_GATEWAY))
		m = 1;
	else if (neigh) {
566 567
		read_lock_bh(&neigh->lock);
		if (neigh->nud_state & NUD_VALID)
568
			m = 2;
569 570 571 572 573
#ifdef CONFIG_IPV6_ROUTER_PREF
		else if (neigh->nud_state & NUD_FAILED)
			m = 0;
#endif
		else
574
			m = 1;
575
		read_unlock_bh(&neigh->lock);
576 577
	} else
		m = 0;
578
	return m;
L
Linus Torvalds 已提交
579 580
}

581 582
static int rt6_score_route(struct rt6_info *rt, int oif,
			   int strict)
L
Linus Torvalds 已提交
583
{
584
	int m, n;
585

586
	m = rt6_check_dev(rt, oif);
587
	if (!m && (strict & RT6_LOOKUP_F_IFACE))
588
		return -1;
589 590 591
#ifdef CONFIG_IPV6_ROUTER_PREF
	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
#endif
592
	n = rt6_check_neigh(rt);
593
	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
594 595 596 597
		return -1;
	return m;
}

598 599
static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
				   int *mpri, struct rt6_info *match)
600
{
601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
	int m;

	if (rt6_check_expired(rt))
		goto out;

	m = rt6_score_route(rt, oif, strict);
	if (m < 0)
		goto out;

	if (m > *mpri) {
		if (strict & RT6_LOOKUP_F_REACHABLE)
			rt6_probe(match);
		*mpri = m;
		match = rt;
	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
		rt6_probe(rt);
	}

out:
	return match;
}

static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
				     struct rt6_info *rr_head,
				     u32 metric, int oif, int strict)
{
	struct rt6_info *rt, *match;
628
	int mpri = -1;
L
Linus Torvalds 已提交
629

630 631
	match = NULL;
	for (rt = rr_head; rt && rt->rt6i_metric == metric;
632
	     rt = rt->dst.rt6_next)
633 634
		match = find_match(rt, oif, strict, &mpri, match);
	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
635
	     rt = rt->dst.rt6_next)
636
		match = find_match(rt, oif, strict, &mpri, match);
L
Linus Torvalds 已提交
637

638 639
	return match;
}
L
Linus Torvalds 已提交
640

641 642 643
static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
{
	struct rt6_info *match, *rt0;
644
	struct net *net;
L
Linus Torvalds 已提交
645

646 647 648
	rt0 = fn->rr_ptr;
	if (!rt0)
		fn->rr_ptr = rt0 = fn->leaf;
L
Linus Torvalds 已提交
649

650
	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
L
Linus Torvalds 已提交
651

652
	if (!match &&
653
	    (strict & RT6_LOOKUP_F_REACHABLE)) {
654
		struct rt6_info *next = rt0->dst.rt6_next;
655

656
		/* no entries matched; do round-robin */
657 658 659 660 661
		if (!next || next->rt6i_metric != rt0->rt6i_metric)
			next = fn->leaf;

		if (next != rt0)
			fn->rr_ptr = next;
L
Linus Torvalds 已提交
662 663
	}

664
	net = dev_net(rt0->dst.dev);
E
Eric Dumazet 已提交
665
	return match ? match : net->ipv6.ip6_null_entry;
L
Linus Torvalds 已提交
666 667
}

668 669
#ifdef CONFIG_IPV6_ROUTE_INFO
int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
670
		  const struct in6_addr *gwaddr)
671
{
672
	struct net *net = dev_net(dev);
673 674 675
	struct route_info *rinfo = (struct route_info *) opt;
	struct in6_addr prefix_buf, *prefix;
	unsigned int pref;
676
	unsigned long lifetime;
677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
	struct rt6_info *rt;

	if (len < sizeof(struct route_info)) {
		return -EINVAL;
	}

	/* Sanity check for prefix_len and length */
	if (rinfo->length > 3) {
		return -EINVAL;
	} else if (rinfo->prefix_len > 128) {
		return -EINVAL;
	} else if (rinfo->prefix_len > 64) {
		if (rinfo->length < 2) {
			return -EINVAL;
		}
	} else if (rinfo->prefix_len > 0) {
		if (rinfo->length < 1) {
			return -EINVAL;
		}
	}

	pref = rinfo->route_pref;
	if (pref == ICMPV6_ROUTER_PREF_INVALID)
700
		return -EINVAL;
701

702
	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
703 704 705 706 707 708 709 710 711 712 713

	if (rinfo->length == 3)
		prefix = (struct in6_addr *)rinfo->prefix;
	else {
		/* this function is safe */
		ipv6_addr_prefix(&prefix_buf,
				 (struct in6_addr *)rinfo->prefix,
				 rinfo->prefix_len);
		prefix = &prefix_buf;
	}

714 715
	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
				dev->ifindex);
716 717

	if (rt && !lifetime) {
718
		ip6_del_rt(rt);
719 720 721 722
		rt = NULL;
	}

	if (!rt && lifetime)
723
		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
724 725 726 727 728 729
					pref);
	else if (rt)
		rt->rt6i_flags = RTF_ROUTEINFO |
				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);

	if (rt) {
730 731 732 733 734
		if (!addrconf_finite_timeout(lifetime))
			rt6_clean_expires(rt);
		else
			rt6_set_expires(rt, jiffies + HZ * lifetime);

A
Amerigo Wang 已提交
735
		ip6_rt_put(rt);
736 737 738 739 740
	}
	return 0;
}
#endif

741
#define BACKTRACK(__net, saddr)			\
742
do { \
743
	if (rt == __net->ipv6.ip6_null_entry) {	\
744
		struct fib6_node *pn; \
V
Ville Nuorvala 已提交
745
		while (1) { \
746 747 748 749
			if (fn->fn_flags & RTN_TL_ROOT) \
				goto out; \
			pn = fn->parent; \
			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
750
				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
751 752 753 754
			else \
				fn = pn; \
			if (fn->fn_flags & RTN_RTINFO) \
				goto restart; \
T
Thomas Graf 已提交
755 756
		} \
	} \
757
} while (0)
T
Thomas Graf 已提交
758

759 760
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
					     struct fib6_table *table,
761
					     struct flowi6 *fl6, int flags)
L
Linus Torvalds 已提交
762 763 764 765
{
	struct fib6_node *fn;
	struct rt6_info *rt;

T
Thomas Graf 已提交
766
	read_lock_bh(&table->tb6_lock);
767
	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
T
Thomas Graf 已提交
768 769
restart:
	rt = fn->leaf;
770
	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
771 772
	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
		rt = rt6_multipath_select(rt, fl6);
773
	BACKTRACK(net, &fl6->saddr);
T
Thomas Graf 已提交
774
out:
775
	dst_use(&rt->dst, jiffies);
T
Thomas Graf 已提交
776 777 778 779 780
	read_unlock_bh(&table->tb6_lock);
	return rt;

}

F
Florian Westphal 已提交
781 782 783 784 785 786 787
struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
				    int flags)
{
	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
}
EXPORT_SYMBOL_GPL(ip6_route_lookup);

788 789
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
			    const struct in6_addr *saddr, int oif, int strict)
T
Thomas Graf 已提交
790
{
791 792 793
	struct flowi6 fl6 = {
		.flowi6_oif = oif,
		.daddr = *daddr,
T
Thomas Graf 已提交
794 795
	};
	struct dst_entry *dst;
796
	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
T
Thomas Graf 已提交
797

798
	if (saddr) {
799
		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
800 801 802
		flags |= RT6_LOOKUP_F_HAS_SADDR;
	}

803
	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
T
Thomas Graf 已提交
804 805 806 807 808
	if (dst->error == 0)
		return (struct rt6_info *) dst;

	dst_release(dst);

L
Linus Torvalds 已提交
809 810 811
	return NULL;
}

812 813
EXPORT_SYMBOL(rt6_lookup);

T
Thomas Graf 已提交
814
/* ip6_ins_rt is called with FREE table->tb6_lock.
L
Linus Torvalds 已提交
815 816 817 818 819
   It takes new route entry, the addition fails by any reason the
   route is freed. In any case, if caller does not hold it, it may
   be destroyed.
 */

820
static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
L
Linus Torvalds 已提交
821 822
{
	int err;
T
Thomas Graf 已提交
823
	struct fib6_table *table;
L
Linus Torvalds 已提交
824

T
Thomas Graf 已提交
825 826
	table = rt->rt6i_table;
	write_lock_bh(&table->tb6_lock);
827
	err = fib6_add(&table->tb6_root, rt, info);
T
Thomas Graf 已提交
828
	write_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
829 830 831 832

	return err;
}

833 834
int ip6_ins_rt(struct rt6_info *rt)
{
835
	struct nl_info info = {
836
		.nl_net = dev_net(rt->dst.dev),
837
	};
838
	return __ip6_ins_rt(rt, &info);
839 840
}

841
static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
E
Eric Dumazet 已提交
842
				      const struct in6_addr *daddr,
843
				      const struct in6_addr *saddr)
L
Linus Torvalds 已提交
844 845 846 847 848 849 850
{
	struct rt6_info *rt;

	/*
	 *	Clone the route.
	 */

E
Eric Dumazet 已提交
851
	rt = ip6_rt_copy(ort, daddr);
L
Linus Torvalds 已提交
852 853

	if (rt) {
854 855
		int attempts = !in_softirq();

856
		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
857
			if (ort->rt6i_dst.plen != 128 &&
E
Eric Dumazet 已提交
858
			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
859
				rt->rt6i_flags |= RTF_ANYCAST;
A
Alexey Dobriyan 已提交
860
			rt->rt6i_gateway = *daddr;
861
		}
L
Linus Torvalds 已提交
862 863 864 865 866

		rt->rt6i_flags |= RTF_CACHE;

#ifdef CONFIG_IPV6_SUBTREES
		if (rt->rt6i_src.plen && saddr) {
A
Alexey Dobriyan 已提交
867
			rt->rt6i_src.addr = *saddr;
L
Linus Torvalds 已提交
868 869 870 871
			rt->rt6i_src.plen = 128;
		}
#endif

872
	retry:
873
		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
874
			struct net *net = dev_net(rt->dst.dev);
875 876 877 878 879 880 881 882 883
			int saved_rt_min_interval =
				net->ipv6.sysctl.ip6_rt_gc_min_interval;
			int saved_rt_elasticity =
				net->ipv6.sysctl.ip6_rt_gc_elasticity;

			if (attempts-- > 0) {
				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;

884
				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
885 886 887 888 889 890 891 892

				net->ipv6.sysctl.ip6_rt_gc_elasticity =
					saved_rt_elasticity;
				net->ipv6.sysctl.ip6_rt_gc_min_interval =
					saved_rt_min_interval;
				goto retry;
			}

893
			net_warn_ratelimited("Neighbour table overflow\n");
894
			dst_free(&rt->dst);
895 896
			return NULL;
		}
897
	}
L
Linus Torvalds 已提交
898

899 900
	return rt;
}
L
Linus Torvalds 已提交
901

E
Eric Dumazet 已提交
902 903
static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
					const struct in6_addr *daddr)
904
{
E
Eric Dumazet 已提交
905 906
	struct rt6_info *rt = ip6_rt_copy(ort, daddr);

907 908
	if (rt) {
		rt->rt6i_flags |= RTF_CACHE;
909
		rt->n = neigh_clone(ort->n);
910 911 912 913
	}
	return rt;
}

914
static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
915
				      struct flowi6 *fl6, int flags)
L
Linus Torvalds 已提交
916 917
{
	struct fib6_node *fn;
918
	struct rt6_info *rt, *nrt;
T
Thomas Graf 已提交
919
	int strict = 0;
L
Linus Torvalds 已提交
920
	int attempts = 3;
921
	int err;
922
	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
L
Linus Torvalds 已提交
923

924
	strict |= flags & RT6_LOOKUP_F_IFACE;
L
Linus Torvalds 已提交
925 926

relookup:
T
Thomas Graf 已提交
927
	read_lock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
928

929
restart_2:
930
	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
L
Linus Torvalds 已提交
931 932

restart:
933
	rt = rt6_select(fn, oif, strict | reachable);
934 935
	if (rt->rt6i_nsiblings && oif == 0)
		rt = rt6_multipath_select(rt, fl6);
936
	BACKTRACK(net, &fl6->saddr);
937
	if (rt == net->ipv6.ip6_null_entry ||
938
	    rt->rt6i_flags & RTF_CACHE)
939
		goto out;
L
Linus Torvalds 已提交
940

941
	dst_hold(&rt->dst);
T
Thomas Graf 已提交
942
	read_unlock_bh(&table->tb6_lock);
943

944
	if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
945
		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
946
	else if (!(rt->dst.flags & DST_HOST))
947
		nrt = rt6_alloc_clone(rt, &fl6->daddr);
948 949
	else
		goto out2;
950

A
Amerigo Wang 已提交
951
	ip6_rt_put(rt);
952
	rt = nrt ? : net->ipv6.ip6_null_entry;
L
Linus Torvalds 已提交
953

954
	dst_hold(&rt->dst);
955
	if (nrt) {
956
		err = ip6_ins_rt(nrt);
957
		if (!err)
L
Linus Torvalds 已提交
958 959 960
			goto out2;
	}

961 962 963 964
	if (--attempts <= 0)
		goto out2;

	/*
T
Thomas Graf 已提交
965
	 * Race condition! In the gap, when table->tb6_lock was
966 967
	 * released someone could insert this route.  Relookup.
	 */
A
Amerigo Wang 已提交
968
	ip6_rt_put(rt);
969 970 971
	goto relookup;

out:
972 973 974 975
	if (reachable) {
		reachable = 0;
		goto restart_2;
	}
976
	dst_hold(&rt->dst);
T
Thomas Graf 已提交
977
	read_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
978
out2:
979 980
	rt->dst.lastuse = jiffies;
	rt->dst.__use++;
T
Thomas Graf 已提交
981 982

	return rt;
L
Linus Torvalds 已提交
983 984
}

985
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
986
					    struct flowi6 *fl6, int flags)
987
{
988
	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
989 990
}

991 992 993 994 995 996 997 998 999 1000
static struct dst_entry *ip6_route_input_lookup(struct net *net,
						struct net_device *dev,
						struct flowi6 *fl6, int flags)
{
	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
		flags |= RT6_LOOKUP_F_IFACE;

	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
}

T
Thomas Graf 已提交
1001 1002
void ip6_route_input(struct sk_buff *skb)
{
1003
	const struct ipv6hdr *iph = ipv6_hdr(skb);
1004
	struct net *net = dev_net(skb->dev);
1005
	int flags = RT6_LOOKUP_F_HAS_SADDR;
1006 1007 1008 1009
	struct flowi6 fl6 = {
		.flowi6_iif = skb->dev->ifindex,
		.daddr = iph->daddr,
		.saddr = iph->saddr,
1010
		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
1011 1012
		.flowi6_mark = skb->mark,
		.flowi6_proto = iph->nexthdr,
T
Thomas Graf 已提交
1013
	};
1014

1015
	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
T
Thomas Graf 已提交
1016 1017
}

1018
static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1019
					     struct flowi6 *fl6, int flags)
L
Linus Torvalds 已提交
1020
{
1021
	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
T
Thomas Graf 已提交
1022 1023
}

1024
struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1025
				    struct flowi6 *fl6)
T
Thomas Graf 已提交
1026 1027 1028
{
	int flags = 0;

1029
	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1030

1031
	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1032
		flags |= RT6_LOOKUP_F_IFACE;
T
Thomas Graf 已提交
1033

1034
	if (!ipv6_addr_any(&fl6->saddr))
1035
		flags |= RT6_LOOKUP_F_HAS_SADDR;
1036 1037
	else if (sk)
		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1038

1039
	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
L
Linus Torvalds 已提交
1040 1041
}

1042
EXPORT_SYMBOL(ip6_route_output);
L
Linus Torvalds 已提交
1043

1044
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1045
{
1046
	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1047 1048
	struct dst_entry *new = NULL;

1049
	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1050
	if (rt) {
1051
		new = &rt->dst;
1052

1053 1054 1055
		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
		rt6_init_peer(rt, net->ipv6.peers);

1056
		new->__use = 1;
1057 1058
		new->input = dst_discard;
		new->output = dst_discard;
1059

E
Eric Dumazet 已提交
1060 1061 1062 1063
		if (dst_metrics_read_only(&ort->dst))
			new->_metrics = ort->dst._metrics;
		else
			dst_copy_metrics(new, &ort->dst);
1064 1065 1066 1067
		rt->rt6i_idev = ort->rt6i_idev;
		if (rt->rt6i_idev)
			in6_dev_hold(rt->rt6i_idev);

A
Alexey Dobriyan 已提交
1068
		rt->rt6i_gateway = ort->rt6i_gateway;
1069 1070
		rt->rt6i_flags = ort->rt6i_flags;
		rt6_clean_expires(rt);
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
		rt->rt6i_metric = 0;

		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
#ifdef CONFIG_IPV6_SUBTREES
		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif

		dst_free(new);
	}

1081 1082
	dst_release(dst_orig);
	return new ? new : ERR_PTR(-ENOMEM);
1083 1084
}

L
Linus Torvalds 已提交
1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
/*
 *	Destination cache support functions
 */

static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
{
	struct rt6_info *rt;

	rt = (struct rt6_info *) dst;

1095 1096 1097 1098 1099 1100 1101
	/* All IPV6 dsts are created with ->obsolete set to the value
	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
	 * into this function always.
	 */
	if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
		return NULL;

1102 1103
	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1104
			if (!rt6_has_peer(rt))
1105 1106 1107
				rt6_bind_peer(rt, 0);
			rt->rt6i_peer_genid = rt6_peer_genid();
		}
L
Linus Torvalds 已提交
1108
		return dst;
1109
	}
L
Linus Torvalds 已提交
1110 1111 1112 1113 1114 1115 1116 1117
	return NULL;
}

static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
{
	struct rt6_info *rt = (struct rt6_info *) dst;

	if (rt) {
1118 1119 1120 1121 1122 1123
		if (rt->rt6i_flags & RTF_CACHE) {
			if (rt6_check_expired(rt)) {
				ip6_del_rt(rt);
				dst = NULL;
			}
		} else {
L
Linus Torvalds 已提交
1124
			dst_release(dst);
1125 1126
			dst = NULL;
		}
L
Linus Torvalds 已提交
1127
	}
1128
	return dst;
L
Linus Torvalds 已提交
1129 1130 1131 1132 1133 1134
}

static void ip6_link_failure(struct sk_buff *skb)
{
	struct rt6_info *rt;

1135
	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
L
Linus Torvalds 已提交
1136

E
Eric Dumazet 已提交
1137
	rt = (struct rt6_info *) skb_dst(skb);
L
Linus Torvalds 已提交
1138
	if (rt) {
1139 1140 1141
		if (rt->rt6i_flags & RTF_CACHE)
			rt6_update_expires(rt, 0);
		else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
L
Linus Torvalds 已提交
1142 1143 1144 1145
			rt->rt6i_node->fn_sernum = -1;
	}
}

1146 1147
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
			       struct sk_buff *skb, u32 mtu)
L
Linus Torvalds 已提交
1148 1149 1150
{
	struct rt6_info *rt6 = (struct rt6_info*)dst;

1151
	dst_confirm(dst);
L
Linus Torvalds 已提交
1152
	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1153 1154
		struct net *net = dev_net(dst->dev);

L
Linus Torvalds 已提交
1155 1156
		rt6->rt6i_flags |= RTF_MODIFIED;
		if (mtu < IPV6_MIN_MTU) {
1157
			u32 features = dst_metric(dst, RTAX_FEATURES);
L
Linus Torvalds 已提交
1158
			mtu = IPV6_MIN_MTU;
1159 1160
			features |= RTAX_FEATURE_ALLFRAG;
			dst_metric_set(dst, RTAX_FEATURES, features);
L
Linus Torvalds 已提交
1161
		}
1162
		dst_metric_set(dst, RTAX_MTU, mtu);
1163
		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
L
Linus Torvalds 已提交
1164 1165 1166
	}
}

1167 1168
void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
		     int oif, u32 mark)
1169 1170 1171 1172 1173 1174 1175 1176
{
	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
	struct dst_entry *dst;
	struct flowi6 fl6;

	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_oif = oif;
	fl6.flowi6_mark = mark;
1177
	fl6.flowi6_flags = 0;
1178 1179 1180 1181 1182 1183
	fl6.daddr = iph->daddr;
	fl6.saddr = iph->saddr;
	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;

	dst = ip6_route_output(net, NULL, &fl6);
	if (!dst->error)
1184
		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
	dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_update_pmtu);

void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
{
	ip6_update_pmtu(skb, sock_net(sk), mtu,
			sk->sk_bound_dev_if, sk->sk_mark);
}
EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);

1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
{
	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
	struct dst_entry *dst;
	struct flowi6 fl6;

	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_oif = oif;
	fl6.flowi6_mark = mark;
	fl6.flowi6_flags = 0;
	fl6.daddr = iph->daddr;
	fl6.saddr = iph->saddr;
	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;

	dst = ip6_route_output(net, NULL, &fl6);
	if (!dst->error)
1212
		rt6_do_redirect(dst, NULL, skb);
1213 1214 1215 1216 1217 1218 1219 1220 1221 1222
	dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_redirect);

void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
}
EXPORT_SYMBOL_GPL(ip6_sk_redirect);

1223
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
L
Linus Torvalds 已提交
1224
{
1225 1226 1227 1228
	struct net_device *dev = dst->dev;
	unsigned int mtu = dst_mtu(dst);
	struct net *net = dev_net(dev);

L
Linus Torvalds 已提交
1229 1230
	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);

1231 1232
	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
L
Linus Torvalds 已提交
1233 1234

	/*
1235 1236 1237
	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
	 * IPV6_MAXPLEN is also valid and means: "any MSS,
L
Linus Torvalds 已提交
1238 1239 1240 1241 1242 1243 1244
	 * rely only on pmtu discovery"
	 */
	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
		mtu = IPV6_MAXPLEN;
	return mtu;
}

1245
static unsigned int ip6_mtu(const struct dst_entry *dst)
1246 1247
{
	struct inet6_dev *idev;
1248 1249 1250 1251 1252 1253
	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);

	if (mtu)
		return mtu;

	mtu = IPV6_MIN_MTU;
1254 1255 1256 1257 1258 1259 1260 1261 1262 1263

	rcu_read_lock();
	idev = __in6_dev_get(dst->dev);
	if (idev)
		mtu = idev->cnf.mtu6;
	rcu_read_unlock();

	return mtu;
}

1264 1265
static struct dst_entry *icmp6_dst_gc_list;
static DEFINE_SPINLOCK(icmp6_dst_lock);
1266

1267
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
L
Linus Torvalds 已提交
1268
				  struct neighbour *neigh,
1269
				  struct flowi6 *fl6)
L
Linus Torvalds 已提交
1270
{
1271
	struct dst_entry *dst;
L
Linus Torvalds 已提交
1272 1273
	struct rt6_info *rt;
	struct inet6_dev *idev = in6_dev_get(dev);
1274
	struct net *net = dev_net(dev);
L
Linus Torvalds 已提交
1275

1276
	if (unlikely(!idev))
E
Eric Dumazet 已提交
1277
		return ERR_PTR(-ENODEV);
L
Linus Torvalds 已提交
1278

1279
	rt = ip6_dst_alloc(net, dev, 0, NULL);
1280
	if (unlikely(!rt)) {
L
Linus Torvalds 已提交
1281
		in6_dev_put(idev);
1282
		dst = ERR_PTR(-ENOMEM);
L
Linus Torvalds 已提交
1283 1284 1285 1286 1287
		goto out;
	}

	if (neigh)
		neigh_hold(neigh);
1288
	else {
1289
		neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1290
		if (IS_ERR(neigh)) {
1291
			in6_dev_put(idev);
1292 1293 1294
			dst_free(&rt->dst);
			return ERR_CAST(neigh);
		}
1295
	}
L
Linus Torvalds 已提交
1296

1297 1298
	rt->dst.flags |= DST_HOST;
	rt->dst.output  = ip6_output;
1299
	rt->n = neigh;
1300
	atomic_set(&rt->dst.__refcnt, 1);
1301
	rt->rt6i_dst.addr = fl6->daddr;
1302 1303
	rt->rt6i_dst.plen = 128;
	rt->rt6i_idev     = idev;
1304
	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
L
Linus Torvalds 已提交
1305

1306
	spin_lock_bh(&icmp6_dst_lock);
1307 1308
	rt->dst.next = icmp6_dst_gc_list;
	icmp6_dst_gc_list = &rt->dst;
1309
	spin_unlock_bh(&icmp6_dst_lock);
L
Linus Torvalds 已提交
1310

1311
	fib6_force_start_gc(net);
L
Linus Torvalds 已提交
1312

1313 1314
	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);

L
Linus Torvalds 已提交
1315
out:
1316
	return dst;
L
Linus Torvalds 已提交
1317 1318
}

1319
int icmp6_dst_gc(void)
L
Linus Torvalds 已提交
1320
{
1321
	struct dst_entry *dst, **pprev;
1322
	int more = 0;
L
Linus Torvalds 已提交
1323

1324 1325
	spin_lock_bh(&icmp6_dst_lock);
	pprev = &icmp6_dst_gc_list;
1326

L
Linus Torvalds 已提交
1327 1328 1329 1330 1331 1332
	while ((dst = *pprev) != NULL) {
		if (!atomic_read(&dst->__refcnt)) {
			*pprev = dst->next;
			dst_free(dst);
		} else {
			pprev = &dst->next;
1333
			++more;
L
Linus Torvalds 已提交
1334 1335 1336
		}
	}

1337
	spin_unlock_bh(&icmp6_dst_lock);
1338

1339
	return more;
L
Linus Torvalds 已提交
1340 1341
}

1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360
static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
			    void *arg)
{
	struct dst_entry *dst, **pprev;

	spin_lock_bh(&icmp6_dst_lock);
	pprev = &icmp6_dst_gc_list;
	while ((dst = *pprev) != NULL) {
		struct rt6_info *rt = (struct rt6_info *) dst;
		if (func(rt, arg)) {
			*pprev = dst->next;
			dst_free(dst);
		} else {
			pprev = &dst->next;
		}
	}
	spin_unlock_bh(&icmp6_dst_lock);
}

1361
static int ip6_dst_gc(struct dst_ops *ops)
L
Linus Torvalds 已提交
1362 1363
{
	unsigned long now = jiffies;
1364
	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1365 1366 1367 1368 1369
	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1370
	int entries;
1371

1372
	entries = dst_entries_get_fast(ops);
1373
	if (time_after(rt_last_gc + rt_min_interval, now) &&
1374
	    entries <= rt_max_size)
L
Linus Torvalds 已提交
1375 1376
		goto out;

1377 1378 1379
	net->ipv6.ip6_rt_gc_expire++;
	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
	net->ipv6.ip6_rt_last_gc = now;
1380 1381
	entries = dst_entries_get_slow(ops);
	if (entries < ops->gc_thresh)
1382
		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
L
Linus Torvalds 已提交
1383
out:
1384
	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1385
	return entries > rt_max_size;
L
Linus Torvalds 已提交
1386 1387 1388 1389 1390 1391 1392 1393
}

/* Clean host part of a prefix. Not necessary in radix tree,
   but results in cleaner routing tables.

   Remove it only when all the things will work!
 */

1394
int ip6_dst_hoplimit(struct dst_entry *dst)
L
Linus Torvalds 已提交
1395
{
1396
	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1397
	if (hoplimit == 0) {
1398
		struct net_device *dev = dst->dev;
1399 1400 1401 1402 1403
		struct inet6_dev *idev;

		rcu_read_lock();
		idev = __in6_dev_get(dev);
		if (idev)
1404
			hoplimit = idev->cnf.hop_limit;
1405
		else
1406
			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1407
		rcu_read_unlock();
L
Linus Torvalds 已提交
1408 1409 1410
	}
	return hoplimit;
}
1411
EXPORT_SYMBOL(ip6_dst_hoplimit);
L
Linus Torvalds 已提交
1412 1413 1414 1415 1416

/*
 *
 */

1417
int ip6_route_add(struct fib6_config *cfg)
L
Linus Torvalds 已提交
1418 1419
{
	int err;
1420
	struct net *net = cfg->fc_nlinfo.nl_net;
L
Linus Torvalds 已提交
1421 1422 1423
	struct rt6_info *rt = NULL;
	struct net_device *dev = NULL;
	struct inet6_dev *idev = NULL;
T
Thomas Graf 已提交
1424
	struct fib6_table *table;
L
Linus Torvalds 已提交
1425 1426
	int addr_type;

1427
	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
L
Linus Torvalds 已提交
1428 1429
		return -EINVAL;
#ifndef CONFIG_IPV6_SUBTREES
1430
	if (cfg->fc_src_len)
L
Linus Torvalds 已提交
1431 1432
		return -EINVAL;
#endif
1433
	if (cfg->fc_ifindex) {
L
Linus Torvalds 已提交
1434
		err = -ENODEV;
1435
		dev = dev_get_by_index(net, cfg->fc_ifindex);
L
Linus Torvalds 已提交
1436 1437 1438 1439 1440 1441 1442
		if (!dev)
			goto out;
		idev = in6_dev_get(dev);
		if (!idev)
			goto out;
	}

1443 1444
	if (cfg->fc_metric == 0)
		cfg->fc_metric = IP6_RT_PRIO_USER;
L
Linus Torvalds 已提交
1445

1446
	err = -ENOBUFS;
1447 1448
	if (cfg->fc_nlinfo.nlh &&
	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1449
		table = fib6_get_table(net, cfg->fc_table);
1450
		if (!table) {
1451
			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1452 1453 1454 1455 1456
			table = fib6_new_table(net, cfg->fc_table);
		}
	} else {
		table = fib6_new_table(net, cfg->fc_table);
	}
1457 1458

	if (!table)
T
Thomas Graf 已提交
1459 1460
		goto out;

1461
	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
L
Linus Torvalds 已提交
1462

1463
	if (!rt) {
L
Linus Torvalds 已提交
1464 1465 1466 1467
		err = -ENOMEM;
		goto out;
	}

1468 1469 1470 1471 1472
	if (cfg->fc_flags & RTF_EXPIRES)
		rt6_set_expires(rt, jiffies +
				clock_t_to_jiffies(cfg->fc_expires));
	else
		rt6_clean_expires(rt);
L
Linus Torvalds 已提交
1473

1474 1475 1476 1477 1478
	if (cfg->fc_protocol == RTPROT_UNSPEC)
		cfg->fc_protocol = RTPROT_BOOT;
	rt->rt6i_protocol = cfg->fc_protocol;

	addr_type = ipv6_addr_type(&cfg->fc_dst);
L
Linus Torvalds 已提交
1479 1480

	if (addr_type & IPV6_ADDR_MULTICAST)
1481
		rt->dst.input = ip6_mc_input;
1482 1483
	else if (cfg->fc_flags & RTF_LOCAL)
		rt->dst.input = ip6_input;
L
Linus Torvalds 已提交
1484
	else
1485
		rt->dst.input = ip6_forward;
L
Linus Torvalds 已提交
1486

1487
	rt->dst.output = ip6_output;
L
Linus Torvalds 已提交
1488

1489 1490
	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
	rt->rt6i_dst.plen = cfg->fc_dst_len;
L
Linus Torvalds 已提交
1491
	if (rt->rt6i_dst.plen == 128)
1492
	       rt->dst.flags |= DST_HOST;
L
Linus Torvalds 已提交
1493

1494 1495 1496 1497 1498 1499 1500 1501
	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
		if (!metrics) {
			err = -ENOMEM;
			goto out;
		}
		dst_init_metrics(&rt->dst, metrics, 0);
	}
L
Linus Torvalds 已提交
1502
#ifdef CONFIG_IPV6_SUBTREES
1503 1504
	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
	rt->rt6i_src.plen = cfg->fc_src_len;
L
Linus Torvalds 已提交
1505 1506
#endif

1507
	rt->rt6i_metric = cfg->fc_metric;
L
Linus Torvalds 已提交
1508 1509 1510 1511

	/* We cannot add true routes via loopback here,
	   they would result in kernel looping; promote them to reject routes
	 */
1512
	if ((cfg->fc_flags & RTF_REJECT) ||
1513 1514 1515
	    (dev && (dev->flags & IFF_LOOPBACK) &&
	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
	     !(cfg->fc_flags & RTF_LOCAL))) {
L
Linus Torvalds 已提交
1516
		/* hold loopback dev/idev if we haven't done so. */
1517
		if (dev != net->loopback_dev) {
L
Linus Torvalds 已提交
1518 1519 1520 1521
			if (dev) {
				dev_put(dev);
				in6_dev_put(idev);
			}
1522
			dev = net->loopback_dev;
L
Linus Torvalds 已提交
1523 1524 1525 1526 1527 1528 1529
			dev_hold(dev);
			idev = in6_dev_get(dev);
			if (!idev) {
				err = -ENODEV;
				goto out;
			}
		}
1530 1531
		rt->dst.output = ip6_pkt_discard_out;
		rt->dst.input = ip6_pkt_discard;
L
Linus Torvalds 已提交
1532
		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1533 1534 1535 1536 1537 1538 1539
		switch (cfg->fc_type) {
		case RTN_BLACKHOLE:
			rt->dst.error = -EINVAL;
			break;
		case RTN_PROHIBIT:
			rt->dst.error = -EACCES;
			break;
1540 1541 1542
		case RTN_THROW:
			rt->dst.error = -EAGAIN;
			break;
1543 1544 1545 1546
		default:
			rt->dst.error = -ENETUNREACH;
			break;
		}
L
Linus Torvalds 已提交
1547 1548 1549
		goto install_route;
	}

1550
	if (cfg->fc_flags & RTF_GATEWAY) {
1551
		const struct in6_addr *gw_addr;
L
Linus Torvalds 已提交
1552 1553
		int gwa_type;

1554
		gw_addr = &cfg->fc_gateway;
A
Alexey Dobriyan 已提交
1555
		rt->rt6i_gateway = *gw_addr;
L
Linus Torvalds 已提交
1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568
		gwa_type = ipv6_addr_type(gw_addr);

		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
			struct rt6_info *grt;

			/* IPv6 strictly inhibits using not link-local
			   addresses as nexthop address.
			   Otherwise, router will not able to send redirects.
			   It is very good, but in some (rare!) circumstances
			   (SIT, PtP, NBMA NOARP links) it is handy to allow
			   some exceptions. --ANK
			 */
			err = -EINVAL;
1569
			if (!(gwa_type & IPV6_ADDR_UNICAST))
L
Linus Torvalds 已提交
1570 1571
				goto out;

1572
			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
L
Linus Torvalds 已提交
1573 1574

			err = -EHOSTUNREACH;
1575
			if (!grt)
L
Linus Torvalds 已提交
1576 1577
				goto out;
			if (dev) {
1578
				if (dev != grt->dst.dev) {
A
Amerigo Wang 已提交
1579
					ip6_rt_put(grt);
L
Linus Torvalds 已提交
1580 1581 1582
					goto out;
				}
			} else {
1583
				dev = grt->dst.dev;
L
Linus Torvalds 已提交
1584 1585 1586 1587
				idev = grt->rt6i_idev;
				dev_hold(dev);
				in6_dev_hold(grt->rt6i_idev);
			}
1588
			if (!(grt->rt6i_flags & RTF_GATEWAY))
L
Linus Torvalds 已提交
1589
				err = 0;
A
Amerigo Wang 已提交
1590
			ip6_rt_put(grt);
L
Linus Torvalds 已提交
1591 1592 1593 1594 1595

			if (err)
				goto out;
		}
		err = -EINVAL;
1596
		if (!dev || (dev->flags & IFF_LOOPBACK))
L
Linus Torvalds 已提交
1597 1598 1599 1600
			goto out;
	}

	err = -ENODEV;
1601
	if (!dev)
L
Linus Torvalds 已提交
1602 1603
		goto out;

1604 1605 1606 1607 1608
	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
			err = -EINVAL;
			goto out;
		}
A
Alexey Dobriyan 已提交
1609
		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1610 1611 1612 1613
		rt->rt6i_prefsrc.plen = 128;
	} else
		rt->rt6i_prefsrc.plen = 0;

1614
	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1615
		err = rt6_bind_neighbour(rt, dev);
1616
		if (err)
L
Linus Torvalds 已提交
1617 1618 1619
			goto out;
	}

1620
	rt->rt6i_flags = cfg->fc_flags;
L
Linus Torvalds 已提交
1621 1622

install_route:
1623 1624 1625 1626 1627
	if (cfg->fc_mx) {
		struct nlattr *nla;
		int remaining;

		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1628
			int type = nla_type(nla);
1629 1630 1631

			if (type) {
				if (type > RTAX_MAX) {
L
Linus Torvalds 已提交
1632 1633 1634
					err = -EINVAL;
					goto out;
				}
1635

1636
				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
L
Linus Torvalds 已提交
1637 1638 1639 1640
			}
		}
	}

1641
	rt->dst.dev = dev;
L
Linus Torvalds 已提交
1642
	rt->rt6i_idev = idev;
T
Thomas Graf 已提交
1643
	rt->rt6i_table = table;
1644

1645
	cfg->fc_nlinfo.nl_net = dev_net(dev);
1646

1647
	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
L
Linus Torvalds 已提交
1648 1649 1650 1651 1652 1653 1654

out:
	if (dev)
		dev_put(dev);
	if (idev)
		in6_dev_put(idev);
	if (rt)
1655
		dst_free(&rt->dst);
L
Linus Torvalds 已提交
1656 1657 1658
	return err;
}

1659
static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
L
Linus Torvalds 已提交
1660 1661
{
	int err;
T
Thomas Graf 已提交
1662
	struct fib6_table *table;
1663
	struct net *net = dev_net(rt->dst.dev);
L
Linus Torvalds 已提交
1664

1665 1666 1667 1668
	if (rt == net->ipv6.ip6_null_entry) {
		err = -ENOENT;
		goto out;
	}
1669

T
Thomas Graf 已提交
1670 1671
	table = rt->rt6i_table;
	write_lock_bh(&table->tb6_lock);
1672
	err = fib6_del(rt, info);
T
Thomas Graf 已提交
1673
	write_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
1674

1675
out:
A
Amerigo Wang 已提交
1676
	ip6_rt_put(rt);
L
Linus Torvalds 已提交
1677 1678 1679
	return err;
}

1680 1681
int ip6_del_rt(struct rt6_info *rt)
{
1682
	struct nl_info info = {
1683
		.nl_net = dev_net(rt->dst.dev),
1684
	};
1685
	return __ip6_del_rt(rt, &info);
1686 1687
}

1688
static int ip6_route_del(struct fib6_config *cfg)
L
Linus Torvalds 已提交
1689
{
T
Thomas Graf 已提交
1690
	struct fib6_table *table;
L
Linus Torvalds 已提交
1691 1692 1693 1694
	struct fib6_node *fn;
	struct rt6_info *rt;
	int err = -ESRCH;

1695
	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1696
	if (!table)
T
Thomas Graf 已提交
1697 1698 1699
		return err;

	read_lock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
1700

T
Thomas Graf 已提交
1701
	fn = fib6_locate(&table->tb6_root,
1702 1703
			 &cfg->fc_dst, cfg->fc_dst_len,
			 &cfg->fc_src, cfg->fc_src_len);
1704

L
Linus Torvalds 已提交
1705
	if (fn) {
1706
		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1707
			if (cfg->fc_ifindex &&
1708 1709
			    (!rt->dst.dev ||
			     rt->dst.dev->ifindex != cfg->fc_ifindex))
L
Linus Torvalds 已提交
1710
				continue;
1711 1712
			if (cfg->fc_flags & RTF_GATEWAY &&
			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
L
Linus Torvalds 已提交
1713
				continue;
1714
			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
L
Linus Torvalds 已提交
1715
				continue;
1716
			dst_hold(&rt->dst);
T
Thomas Graf 已提交
1717
			read_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
1718

1719
			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
L
Linus Torvalds 已提交
1720 1721
		}
	}
T
Thomas Graf 已提交
1722
	read_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
1723 1724 1725 1726

	return err;
}

1727
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1728
{
1729
	struct net *net = dev_net(skb->dev);
1730
	struct netevent_redirect netevent;
1731 1732 1733
	struct rt6_info *rt, *nrt = NULL;
	const struct in6_addr *target;
	struct ndisc_options ndopts;
1734 1735
	const struct in6_addr *dest;
	struct neighbour *old_neigh;
1736 1737 1738
	struct inet6_dev *in6_dev;
	struct neighbour *neigh;
	struct icmp6hdr *icmph;
1739 1740
	int optlen, on_link;
	u8 *lladdr;
1741 1742 1743 1744 1745

	optlen = skb->tail - skb->transport_header;
	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);

	if (optlen < 0) {
1746
		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1747 1748 1749 1750 1751 1752 1753 1754
		return;
	}

	icmph = icmp6_hdr(skb);
	target = (const struct in6_addr *) (icmph + 1);
	dest = target + 1;

	if (ipv6_addr_is_multicast(dest)) {
1755
		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1756 1757 1758
		return;
	}

1759
	on_link = 0;
1760 1761 1762 1763
	if (ipv6_addr_equal(dest, target)) {
		on_link = 1;
	} else if (ipv6_addr_type(target) !=
		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1764
		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782
		return;
	}

	in6_dev = __in6_dev_get(skb->dev);
	if (!in6_dev)
		return;
	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
		return;

	/* RFC2461 8.1:
	 *	The IP source address of the Redirect MUST be the same as the current
	 *	first-hop router for the specified ICMP Destination Address.
	 */

	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
		return;
	}
1783 1784

	lladdr = NULL;
1785 1786 1787 1788 1789 1790 1791 1792 1793
	if (ndopts.nd_opts_tgt_lladdr) {
		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
					     skb->dev);
		if (!lladdr) {
			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
			return;
		}
	}

1794 1795 1796
	rt = (struct rt6_info *) dst;
	if (rt == net->ipv6.ip6_null_entry) {
		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1797
		return;
1798
	}
1799

1800 1801 1802 1803 1804
	/* Redirect received -> path was valid.
	 * Look, redirects are sent only in response to data packets,
	 * so that this nexthop apparently is reachable. --ANK
	 */
	dst_confirm(&rt->dst);
1805

1806 1807 1808
	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
	if (!neigh)
		return;
1809

1810 1811 1812
	/* Duplicate redirect: silently ignore. */
	old_neigh = rt->n;
	if (neigh == old_neigh)
1813
		goto out;
L
Linus Torvalds 已提交
1814 1815 1816 1817 1818

	/*
	 *	We have finally decided to accept it.
	 */

1819
	neigh_update(neigh, lladdr, NUD_STALE,
L
Linus Torvalds 已提交
1820 1821 1822 1823 1824 1825
		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
		     NEIGH_UPDATE_F_OVERRIDE|
		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
				     NEIGH_UPDATE_F_ISROUTER))
		     );

E
Eric Dumazet 已提交
1826
	nrt = ip6_rt_copy(rt, dest);
1827
	if (!nrt)
L
Linus Torvalds 已提交
1828 1829 1830 1831 1832 1833
		goto out;

	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
	if (on_link)
		nrt->rt6i_flags &= ~RTF_GATEWAY;

A
Alexey Dobriyan 已提交
1834
	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1835
	nrt->n = neigh_clone(neigh);
L
Linus Torvalds 已提交
1836

1837
	if (ip6_ins_rt(nrt))
L
Linus Torvalds 已提交
1838 1839
		goto out;

1840
	netevent.old = &rt->dst;
1841
	netevent.old_neigh = old_neigh;
1842
	netevent.new = &nrt->dst;
1843 1844
	netevent.new_neigh = neigh;
	netevent.daddr = dest;
1845 1846
	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);

1847
	if (rt->rt6i_flags & RTF_CACHE) {
1848
		rt = (struct rt6_info *) dst_clone(&rt->dst);
1849
		ip6_del_rt(rt);
L
Linus Torvalds 已提交
1850 1851 1852
	}

out:
1853
	neigh_release(neigh);
1854 1855
}

L
Linus Torvalds 已提交
1856 1857 1858 1859
/*
 *	Misc support functions
 */

1860
static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
E
Eric Dumazet 已提交
1861
				    const struct in6_addr *dest)
L
Linus Torvalds 已提交
1862
{
1863
	struct net *net = dev_net(ort->dst.dev);
1864 1865
	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
					    ort->rt6i_table);
L
Linus Torvalds 已提交
1866 1867

	if (rt) {
1868 1869
		rt->dst.input = ort->dst.input;
		rt->dst.output = ort->dst.output;
1870
		rt->dst.flags |= DST_HOST;
1871

A
Alexey Dobriyan 已提交
1872
		rt->rt6i_dst.addr = *dest;
1873
		rt->rt6i_dst.plen = 128;
1874
		dst_copy_metrics(&rt->dst, &ort->dst);
1875
		rt->dst.error = ort->dst.error;
L
Linus Torvalds 已提交
1876 1877 1878
		rt->rt6i_idev = ort->rt6i_idev;
		if (rt->rt6i_idev)
			in6_dev_hold(rt->rt6i_idev);
1879
		rt->dst.lastuse = jiffies;
L
Linus Torvalds 已提交
1880

A
Alexey Dobriyan 已提交
1881
		rt->rt6i_gateway = ort->rt6i_gateway;
1882 1883 1884 1885 1886 1887
		rt->rt6i_flags = ort->rt6i_flags;
		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
		    (RTF_DEFAULT | RTF_ADDRCONF))
			rt6_set_from(rt, ort);
		else
			rt6_clean_expires(rt);
L
Linus Torvalds 已提交
1888 1889 1890 1891 1892
		rt->rt6i_metric = 0;

#ifdef CONFIG_IPV6_SUBTREES
		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif
1893
		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
T
Thomas Graf 已提交
1894
		rt->rt6i_table = ort->rt6i_table;
L
Linus Torvalds 已提交
1895 1896 1897 1898
	}
	return rt;
}

1899
#ifdef CONFIG_IPV6_ROUTE_INFO
1900
static struct rt6_info *rt6_get_route_info(struct net *net,
1901 1902
					   const struct in6_addr *prefix, int prefixlen,
					   const struct in6_addr *gwaddr, int ifindex)
1903 1904 1905
{
	struct fib6_node *fn;
	struct rt6_info *rt = NULL;
T
Thomas Graf 已提交
1906 1907
	struct fib6_table *table;

1908
	table = fib6_get_table(net, RT6_TABLE_INFO);
1909
	if (!table)
T
Thomas Graf 已提交
1910
		return NULL;
1911

1912
	read_lock_bh(&table->tb6_lock);
T
Thomas Graf 已提交
1913
	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1914 1915 1916
	if (!fn)
		goto out;

1917
	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1918
		if (rt->dst.dev->ifindex != ifindex)
1919 1920 1921 1922 1923
			continue;
		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
			continue;
		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
			continue;
1924
		dst_hold(&rt->dst);
1925 1926 1927
		break;
	}
out:
1928
	read_unlock_bh(&table->tb6_lock);
1929 1930 1931
	return rt;
}

1932
static struct rt6_info *rt6_add_route_info(struct net *net,
1933 1934
					   const struct in6_addr *prefix, int prefixlen,
					   const struct in6_addr *gwaddr, int ifindex,
1935
					   unsigned int pref)
1936
{
1937 1938
	struct fib6_config cfg = {
		.fc_table	= RT6_TABLE_INFO,
1939
		.fc_metric	= IP6_RT_PRIO_USER,
1940 1941 1942 1943
		.fc_ifindex	= ifindex,
		.fc_dst_len	= prefixlen,
		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
				  RTF_UP | RTF_PREF(pref),
1944
		.fc_nlinfo.portid = 0,
1945 1946
		.fc_nlinfo.nlh = NULL,
		.fc_nlinfo.nl_net = net,
1947 1948
	};

A
Alexey Dobriyan 已提交
1949 1950
	cfg.fc_dst = *prefix;
	cfg.fc_gateway = *gwaddr;
1951

1952 1953
	/* We should treat it as a default route if prefix length is 0. */
	if (!prefixlen)
1954
		cfg.fc_flags |= RTF_DEFAULT;
1955

1956
	ip6_route_add(&cfg);
1957

1958
	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1959 1960 1961
}
#endif

1962
struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1963
{
L
Linus Torvalds 已提交
1964
	struct rt6_info *rt;
T
Thomas Graf 已提交
1965
	struct fib6_table *table;
L
Linus Torvalds 已提交
1966

1967
	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1968
	if (!table)
T
Thomas Graf 已提交
1969
		return NULL;
L
Linus Torvalds 已提交
1970

1971
	read_lock_bh(&table->tb6_lock);
1972
	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1973
		if (dev == rt->dst.dev &&
1974
		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
L
Linus Torvalds 已提交
1975 1976 1977 1978
		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
			break;
	}
	if (rt)
1979
		dst_hold(&rt->dst);
1980
	read_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
1981 1982 1983
	return rt;
}

1984
struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1985 1986
				     struct net_device *dev,
				     unsigned int pref)
L
Linus Torvalds 已提交
1987
{
1988 1989
	struct fib6_config cfg = {
		.fc_table	= RT6_TABLE_DFLT,
1990
		.fc_metric	= IP6_RT_PRIO_USER,
1991 1992 1993
		.fc_ifindex	= dev->ifindex,
		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1994
		.fc_nlinfo.portid = 0,
1995
		.fc_nlinfo.nlh = NULL,
1996
		.fc_nlinfo.nl_net = dev_net(dev),
1997
	};
L
Linus Torvalds 已提交
1998

A
Alexey Dobriyan 已提交
1999
	cfg.fc_gateway = *gwaddr;
L
Linus Torvalds 已提交
2000

2001
	ip6_route_add(&cfg);
L
Linus Torvalds 已提交
2002 2003 2004 2005

	return rt6_get_dflt_router(gwaddr, dev);
}

2006
void rt6_purge_dflt_routers(struct net *net)
L
Linus Torvalds 已提交
2007 2008
{
	struct rt6_info *rt;
T
Thomas Graf 已提交
2009 2010 2011
	struct fib6_table *table;

	/* NOTE: Keep consistent with rt6_get_dflt_router */
2012
	table = fib6_get_table(net, RT6_TABLE_DFLT);
2013
	if (!table)
T
Thomas Graf 已提交
2014
		return;
L
Linus Torvalds 已提交
2015 2016

restart:
T
Thomas Graf 已提交
2017
	read_lock_bh(&table->tb6_lock);
2018
	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
L
Linus Torvalds 已提交
2019
		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
2020
			dst_hold(&rt->dst);
T
Thomas Graf 已提交
2021
			read_unlock_bh(&table->tb6_lock);
2022
			ip6_del_rt(rt);
L
Linus Torvalds 已提交
2023 2024 2025
			goto restart;
		}
	}
T
Thomas Graf 已提交
2026
	read_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
2027 2028
}

2029 2030
static void rtmsg_to_fib6_config(struct net *net,
				 struct in6_rtmsg *rtmsg,
2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042
				 struct fib6_config *cfg)
{
	memset(cfg, 0, sizeof(*cfg));

	cfg->fc_table = RT6_TABLE_MAIN;
	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
	cfg->fc_metric = rtmsg->rtmsg_metric;
	cfg->fc_expires = rtmsg->rtmsg_info;
	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
	cfg->fc_src_len = rtmsg->rtmsg_src_len;
	cfg->fc_flags = rtmsg->rtmsg_flags;

2043
	cfg->fc_nlinfo.nl_net = net;
2044

A
Alexey Dobriyan 已提交
2045 2046 2047
	cfg->fc_dst = rtmsg->rtmsg_dst;
	cfg->fc_src = rtmsg->rtmsg_src;
	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2048 2049
}

2050
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
L
Linus Torvalds 已提交
2051
{
2052
	struct fib6_config cfg;
L
Linus Torvalds 已提交
2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064
	struct in6_rtmsg rtmsg;
	int err;

	switch(cmd) {
	case SIOCADDRT:		/* Add a route */
	case SIOCDELRT:		/* Delete a route */
		if (!capable(CAP_NET_ADMIN))
			return -EPERM;
		err = copy_from_user(&rtmsg, arg,
				     sizeof(struct in6_rtmsg));
		if (err)
			return -EFAULT;
2065

2066
		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2067

L
Linus Torvalds 已提交
2068 2069 2070
		rtnl_lock();
		switch (cmd) {
		case SIOCADDRT:
2071
			err = ip6_route_add(&cfg);
L
Linus Torvalds 已提交
2072 2073
			break;
		case SIOCDELRT:
2074
			err = ip6_route_del(&cfg);
L
Linus Torvalds 已提交
2075 2076 2077 2078 2079 2080 2081
			break;
		default:
			err = -EINVAL;
		}
		rtnl_unlock();

		return err;
2082
	}
L
Linus Torvalds 已提交
2083 2084 2085 2086 2087 2088 2089 2090

	return -EINVAL;
}

/*
 *	Drop the packet on the floor
 */

2091
static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
L
Linus Torvalds 已提交
2092
{
2093
	int type;
E
Eric Dumazet 已提交
2094
	struct dst_entry *dst = skb_dst(skb);
2095 2096
	switch (ipstats_mib_noroutes) {
	case IPSTATS_MIB_INNOROUTES:
2097
		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
U
Ulrich Weber 已提交
2098
		if (type == IPV6_ADDR_ANY) {
2099 2100
			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
				      IPSTATS_MIB_INADDRERRORS);
2101 2102 2103 2104
			break;
		}
		/* FALLTHROUGH */
	case IPSTATS_MIB_OUTNOROUTES:
2105 2106
		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
			      ipstats_mib_noroutes);
2107 2108
		break;
	}
2109
	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
L
Linus Torvalds 已提交
2110 2111 2112 2113
	kfree_skb(skb);
	return 0;
}

2114 2115
static int ip6_pkt_discard(struct sk_buff *skb)
{
2116
	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2117 2118
}

2119
static int ip6_pkt_discard_out(struct sk_buff *skb)
L
Linus Torvalds 已提交
2120
{
E
Eric Dumazet 已提交
2121
	skb->dev = skb_dst(skb)->dev;
2122
	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
L
Linus Torvalds 已提交
2123 2124
}

2125 2126
#ifdef CONFIG_IPV6_MULTIPLE_TABLES

2127 2128
static int ip6_pkt_prohibit(struct sk_buff *skb)
{
2129
	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2130 2131 2132 2133
}

static int ip6_pkt_prohibit_out(struct sk_buff *skb)
{
E
Eric Dumazet 已提交
2134
	skb->dev = skb_dst(skb)->dev;
2135
	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2136 2137
}

2138 2139
#endif

L
Linus Torvalds 已提交
2140 2141 2142 2143 2144 2145
/*
 *	Allocate a dst for local (unicast / anycast) address.
 */

struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
				    const struct in6_addr *addr,
2146
				    bool anycast)
L
Linus Torvalds 已提交
2147
{
2148
	struct net *net = dev_net(idev->dev);
2149
	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2150
	int err;
L
Linus Torvalds 已提交
2151

2152
	if (!rt) {
2153
		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
L
Linus Torvalds 已提交
2154
		return ERR_PTR(-ENOMEM);
2155
	}
L
Linus Torvalds 已提交
2156 2157 2158

	in6_dev_hold(idev);

2159
	rt->dst.flags |= DST_HOST;
2160 2161
	rt->dst.input = ip6_input;
	rt->dst.output = ip6_output;
L
Linus Torvalds 已提交
2162 2163 2164
	rt->rt6i_idev = idev;

	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2165 2166 2167
	if (anycast)
		rt->rt6i_flags |= RTF_ANYCAST;
	else
L
Linus Torvalds 已提交
2168
		rt->rt6i_flags |= RTF_LOCAL;
2169
	err = rt6_bind_neighbour(rt, rt->dst.dev);
2170
	if (err) {
2171
		dst_free(&rt->dst);
2172
		return ERR_PTR(err);
L
Linus Torvalds 已提交
2173 2174
	}

A
Alexey Dobriyan 已提交
2175
	rt->rt6i_dst.addr = *addr;
L
Linus Torvalds 已提交
2176
	rt->rt6i_dst.plen = 128;
2177
	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
L
Linus Torvalds 已提交
2178

2179
	atomic_set(&rt->dst.__refcnt, 1);
L
Linus Torvalds 已提交
2180 2181 2182 2183

	return rt;
}

2184 2185
int ip6_route_get_saddr(struct net *net,
			struct rt6_info *rt,
2186
			const struct in6_addr *daddr,
2187 2188 2189 2190 2191 2192
			unsigned int prefs,
			struct in6_addr *saddr)
{
	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
	int err = 0;
	if (rt->rt6i_prefsrc.plen)
A
Alexey Dobriyan 已提交
2193
		*saddr = rt->rt6i_prefsrc.addr;
2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212
	else
		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
					 daddr, prefs, saddr);
	return err;
}

/* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip {
	struct net_device *dev;
	struct net *net;
	struct in6_addr *addr;
};

static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
{
	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;

2213
	if (((void *)rt->dst.dev == dev || !dev) &&
2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232
	    rt != net->ipv6.ip6_null_entry &&
	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
		/* remove prefsrc entry */
		rt->rt6i_prefsrc.plen = 0;
	}
	return 0;
}

void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
{
	struct net *net = dev_net(ifp->idev->dev);
	struct arg_dev_net_ip adni = {
		.dev = ifp->idev->dev,
		.net = net,
		.addr = &ifp->addr,
	};
	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
}

2233 2234 2235 2236 2237
struct arg_dev_net {
	struct net_device *dev;
	struct net *net;
};

L
Linus Torvalds 已提交
2238 2239
static int fib6_ifdown(struct rt6_info *rt, void *arg)
{
S
stephen hemminger 已提交
2240 2241
	const struct arg_dev_net *adn = arg;
	const struct net_device *dev = adn->dev;
2242

2243
	if ((rt->dst.dev == dev || !dev) &&
2244
	    rt != adn->net->ipv6.ip6_null_entry)
L
Linus Torvalds 已提交
2245
		return -1;
2246

L
Linus Torvalds 已提交
2247 2248 2249
	return 0;
}

2250
void rt6_ifdown(struct net *net, struct net_device *dev)
L
Linus Torvalds 已提交
2251
{
2252 2253 2254 2255 2256 2257
	struct arg_dev_net adn = {
		.dev = dev,
		.net = net,
	};

	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2258
	icmp6_clean_all(fib6_ifdown, &adn);
L
Linus Torvalds 已提交
2259 2260
}

2261
struct rt6_mtu_change_arg {
L
Linus Torvalds 已提交
2262
	struct net_device *dev;
2263
	unsigned int mtu;
L
Linus Torvalds 已提交
2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277
};

static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
{
	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
	struct inet6_dev *idev;

	/* In IPv6 pmtu discovery is not optional,
	   so that RTAX_MTU lock cannot disable it.
	   We still use this lock to block changes
	   caused by addrconf/ndisc.
	*/

	idev = __in6_dev_get(arg->dev);
2278
	if (!idev)
L
Linus Torvalds 已提交
2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294
		return 0;

	/* For administrative MTU increase, there is no way to discover
	   IPv6 PMTU increase, so PMTU increase should be updated here.
	   Since RFC 1981 doesn't include administrative MTU increase
	   update PMTU increase is a MUST. (i.e. jumbo frame)
	 */
	/*
	   If new MTU is less than route PMTU, this new MTU will be the
	   lowest MTU in the path, update the route PMTU to reflect PMTU
	   decreases; if new MTU is greater than route PMTU, and the
	   old MTU is the lowest MTU in the path, update the route PMTU
	   to reflect the increase. In this case if the other nodes' MTU
	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
	   PMTU discouvery.
	 */
2295
	if (rt->dst.dev == arg->dev &&
2296 2297 2298 2299
	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
	    (dst_mtu(&rt->dst) >= arg->mtu ||
	     (dst_mtu(&rt->dst) < arg->mtu &&
	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2300
		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2301
	}
L
Linus Torvalds 已提交
2302 2303 2304
	return 0;
}

2305
void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
L
Linus Torvalds 已提交
2306
{
T
Thomas Graf 已提交
2307 2308 2309 2310
	struct rt6_mtu_change_arg arg = {
		.dev = dev,
		.mtu = mtu,
	};
L
Linus Torvalds 已提交
2311

2312
	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
L
Linus Torvalds 已提交
2313 2314
}

2315
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2316
	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2317
	[RTA_OIF]               = { .type = NLA_U32 },
2318
	[RTA_IIF]		= { .type = NLA_U32 },
2319 2320
	[RTA_PRIORITY]          = { .type = NLA_U32 },
	[RTA_METRICS]           = { .type = NLA_NESTED },
2321
	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2322 2323 2324 2325
};

static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
			      struct fib6_config *cfg)
L
Linus Torvalds 已提交
2326
{
2327 2328 2329
	struct rtmsg *rtm;
	struct nlattr *tb[RTA_MAX+1];
	int err;
L
Linus Torvalds 已提交
2330

2331 2332 2333
	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
	if (err < 0)
		goto errout;
L
Linus Torvalds 已提交
2334

2335 2336 2337 2338 2339 2340 2341 2342 2343
	err = -EINVAL;
	rtm = nlmsg_data(nlh);
	memset(cfg, 0, sizeof(*cfg));

	cfg->fc_table = rtm->rtm_table;
	cfg->fc_dst_len = rtm->rtm_dst_len;
	cfg->fc_src_len = rtm->rtm_src_len;
	cfg->fc_flags = RTF_UP;
	cfg->fc_protocol = rtm->rtm_protocol;
2344
	cfg->fc_type = rtm->rtm_type;
2345

2346 2347
	if (rtm->rtm_type == RTN_UNREACHABLE ||
	    rtm->rtm_type == RTN_BLACKHOLE ||
2348 2349
	    rtm->rtm_type == RTN_PROHIBIT ||
	    rtm->rtm_type == RTN_THROW)
2350 2351
		cfg->fc_flags |= RTF_REJECT;

2352 2353 2354
	if (rtm->rtm_type == RTN_LOCAL)
		cfg->fc_flags |= RTF_LOCAL;

2355
	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2356
	cfg->fc_nlinfo.nlh = nlh;
2357
	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2358 2359 2360 2361

	if (tb[RTA_GATEWAY]) {
		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
		cfg->fc_flags |= RTF_GATEWAY;
L
Linus Torvalds 已提交
2362
	}
2363 2364 2365 2366 2367 2368 2369 2370

	if (tb[RTA_DST]) {
		int plen = (rtm->rtm_dst_len + 7) >> 3;

		if (nla_len(tb[RTA_DST]) < plen)
			goto errout;

		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
L
Linus Torvalds 已提交
2371
	}
2372 2373 2374 2375 2376 2377 2378 2379

	if (tb[RTA_SRC]) {
		int plen = (rtm->rtm_src_len + 7) >> 3;

		if (nla_len(tb[RTA_SRC]) < plen)
			goto errout;

		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
L
Linus Torvalds 已提交
2380
	}
2381

2382 2383 2384
	if (tb[RTA_PREFSRC])
		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);

2385 2386 2387 2388 2389 2390 2391 2392 2393
	if (tb[RTA_OIF])
		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);

	if (tb[RTA_PRIORITY])
		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);

	if (tb[RTA_METRICS]) {
		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
L
Linus Torvalds 已提交
2394
	}
2395 2396 2397 2398

	if (tb[RTA_TABLE])
		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);

2399 2400 2401 2402 2403
	if (tb[RTA_MULTIPATH]) {
		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
	}

2404 2405 2406
	err = 0;
errout:
	return err;
L
Linus Torvalds 已提交
2407 2408
}

2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451
static int ip6_route_multipath(struct fib6_config *cfg, int add)
{
	struct fib6_config r_cfg;
	struct rtnexthop *rtnh;
	int remaining;
	int attrlen;
	int err = 0, last_err = 0;

beginning:
	rtnh = (struct rtnexthop *)cfg->fc_mp;
	remaining = cfg->fc_mp_len;

	/* Parse a Multipath Entry */
	while (rtnh_ok(rtnh, remaining)) {
		memcpy(&r_cfg, cfg, sizeof(*cfg));
		if (rtnh->rtnh_ifindex)
			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;

		attrlen = rtnh_attrlen(rtnh);
		if (attrlen > 0) {
			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);

			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
			if (nla) {
				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
				r_cfg.fc_flags |= RTF_GATEWAY;
			}
		}
		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
		if (err) {
			last_err = err;
			/* If we are trying to remove a route, do not stop the
			 * loop when ip6_route_del() fails (because next hop is
			 * already gone), we should try to remove all next hops.
			 */
			if (add) {
				/* If add fails, we should try to delete all
				 * next hops that have been already added.
				 */
				add = 0;
				goto beginning;
			}
		}
2452 2453 2454 2455 2456 2457
		/* Because each route is added like a single route we remove
		 * this flag after the first nexthop (if there is a collision,
		 * we have already fail to add the first nexthop:
		 * fib6_add_rt2node() has reject it).
		 */
		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2458 2459 2460 2461 2462 2463
		rtnh = rtnh_next(rtnh, &remaining);
	}

	return last_err;
}

2464
static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
L
Linus Torvalds 已提交
2465
{
2466 2467
	struct fib6_config cfg;
	int err;
L
Linus Torvalds 已提交
2468

2469 2470 2471 2472
	err = rtm_to_fib6_config(skb, nlh, &cfg);
	if (err < 0)
		return err;

2473 2474 2475 2476
	if (cfg.fc_mp)
		return ip6_route_multipath(&cfg, 0);
	else
		return ip6_route_del(&cfg);
L
Linus Torvalds 已提交
2477 2478
}

2479
static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
L
Linus Torvalds 已提交
2480
{
2481 2482
	struct fib6_config cfg;
	int err;
L
Linus Torvalds 已提交
2483

2484 2485 2486 2487
	err = rtm_to_fib6_config(skb, nlh, &cfg);
	if (err < 0)
		return err;

2488 2489 2490 2491
	if (cfg.fc_mp)
		return ip6_route_multipath(&cfg, 1);
	else
		return ip6_route_add(&cfg);
L
Linus Torvalds 已提交
2492 2493
}

2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504
static inline size_t rt6_nlmsg_size(void)
{
	return NLMSG_ALIGN(sizeof(struct rtmsg))
	       + nla_total_size(16) /* RTA_SRC */
	       + nla_total_size(16) /* RTA_DST */
	       + nla_total_size(16) /* RTA_GATEWAY */
	       + nla_total_size(16) /* RTA_PREFSRC */
	       + nla_total_size(4) /* RTA_TABLE */
	       + nla_total_size(4) /* RTA_IIF */
	       + nla_total_size(4) /* RTA_OIF */
	       + nla_total_size(4) /* RTA_PRIORITY */
2505
	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2506 2507 2508
	       + nla_total_size(sizeof(struct rta_cacheinfo));
}

2509 2510
static int rt6_fill_node(struct net *net,
			 struct sk_buff *skb, struct rt6_info *rt,
2511
			 struct in6_addr *dst, struct in6_addr *src,
2512
			 int iif, int type, u32 portid, u32 seq,
2513
			 int prefix, int nowait, unsigned int flags)
L
Linus Torvalds 已提交
2514 2515
{
	struct rtmsg *rtm;
2516
	struct nlmsghdr *nlh;
2517
	long expires;
2518
	u32 table;
2519
	struct neighbour *n;
L
Linus Torvalds 已提交
2520 2521 2522 2523 2524 2525 2526 2527

	if (prefix) {	/* user wants prefix routes only */
		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
			/* success since this is not a prefix route */
			return 1;
		}
	}

2528
	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2529
	if (!nlh)
2530
		return -EMSGSIZE;
2531 2532

	rtm = nlmsg_data(nlh);
L
Linus Torvalds 已提交
2533 2534 2535 2536
	rtm->rtm_family = AF_INET6;
	rtm->rtm_dst_len = rt->rt6i_dst.plen;
	rtm->rtm_src_len = rt->rt6i_src.plen;
	rtm->rtm_tos = 0;
T
Thomas Graf 已提交
2537
	if (rt->rt6i_table)
2538
		table = rt->rt6i_table->tb6_id;
T
Thomas Graf 已提交
2539
	else
2540 2541
		table = RT6_TABLE_UNSPEC;
	rtm->rtm_table = table;
D
David S. Miller 已提交
2542 2543
	if (nla_put_u32(skb, RTA_TABLE, table))
		goto nla_put_failure;
2544 2545 2546 2547 2548 2549 2550 2551
	if (rt->rt6i_flags & RTF_REJECT) {
		switch (rt->dst.error) {
		case -EINVAL:
			rtm->rtm_type = RTN_BLACKHOLE;
			break;
		case -EACCES:
			rtm->rtm_type = RTN_PROHIBIT;
			break;
2552 2553 2554
		case -EAGAIN:
			rtm->rtm_type = RTN_THROW;
			break;
2555 2556 2557 2558 2559
		default:
			rtm->rtm_type = RTN_UNREACHABLE;
			break;
		}
	}
2560
	else if (rt->rt6i_flags & RTF_LOCAL)
2561
		rtm->rtm_type = RTN_LOCAL;
2562
	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
L
Linus Torvalds 已提交
2563 2564 2565 2566 2567 2568
		rtm->rtm_type = RTN_LOCAL;
	else
		rtm->rtm_type = RTN_UNICAST;
	rtm->rtm_flags = 0;
	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
	rtm->rtm_protocol = rt->rt6i_protocol;
2569
	if (rt->rt6i_flags & RTF_DYNAMIC)
L
Linus Torvalds 已提交
2570
		rtm->rtm_protocol = RTPROT_REDIRECT;
2571 2572 2573 2574 2575 2576
	else if (rt->rt6i_flags & RTF_ADDRCONF) {
		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
			rtm->rtm_protocol = RTPROT_RA;
		else
			rtm->rtm_protocol = RTPROT_KERNEL;
	}
L
Linus Torvalds 已提交
2577

2578
	if (rt->rt6i_flags & RTF_CACHE)
L
Linus Torvalds 已提交
2579 2580 2581
		rtm->rtm_flags |= RTM_F_CLONED;

	if (dst) {
D
David S. Miller 已提交
2582 2583
		if (nla_put(skb, RTA_DST, 16, dst))
			goto nla_put_failure;
2584
		rtm->rtm_dst_len = 128;
L
Linus Torvalds 已提交
2585
	} else if (rtm->rtm_dst_len)
D
David S. Miller 已提交
2586 2587
		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
			goto nla_put_failure;
L
Linus Torvalds 已提交
2588 2589
#ifdef CONFIG_IPV6_SUBTREES
	if (src) {
D
David S. Miller 已提交
2590 2591
		if (nla_put(skb, RTA_SRC, 16, src))
			goto nla_put_failure;
2592
		rtm->rtm_src_len = 128;
D
David S. Miller 已提交
2593 2594 2595
	} else if (rtm->rtm_src_len &&
		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
		goto nla_put_failure;
L
Linus Torvalds 已提交
2596
#endif
2597 2598 2599
	if (iif) {
#ifdef CONFIG_IPV6_MROUTE
		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2600
			int err = ip6mr_get_route(net, skb, rtm, nowait);
2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612
			if (err <= 0) {
				if (!nowait) {
					if (err == 0)
						return 0;
					goto nla_put_failure;
				} else {
					if (err == -EMSGSIZE)
						goto nla_put_failure;
				}
			}
		} else
#endif
D
David S. Miller 已提交
2613 2614
			if (nla_put_u32(skb, RTA_IIF, iif))
				goto nla_put_failure;
2615
	} else if (dst) {
L
Linus Torvalds 已提交
2616
		struct in6_addr saddr_buf;
D
David S. Miller 已提交
2617 2618 2619
		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
			goto nla_put_failure;
L
Linus Torvalds 已提交
2620
	}
2621

2622 2623
	if (rt->rt6i_prefsrc.plen) {
		struct in6_addr saddr_buf;
A
Alexey Dobriyan 已提交
2624
		saddr_buf = rt->rt6i_prefsrc.addr;
D
David S. Miller 已提交
2625 2626
		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
			goto nla_put_failure;
2627 2628
	}

2629
	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2630 2631
		goto nla_put_failure;

2632
	n = rt->n;
2633
	if (n) {
2634
		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2635 2636
			goto nla_put_failure;
	}
2637

D
David S. Miller 已提交
2638 2639 2640 2641 2642
	if (rt->dst.dev &&
	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
		goto nla_put_failure;
	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
		goto nla_put_failure;
2643 2644

	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2645

2646
	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2647
		goto nla_put_failure;
2648 2649 2650 2651

	return nlmsg_end(skb, nlh);

nla_put_failure:
2652 2653
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
L
Linus Torvalds 已提交
2654 2655
}

2656
int rt6_dump_route(struct rt6_info *rt, void *p_arg)
L
Linus Torvalds 已提交
2657 2658 2659 2660
{
	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
	int prefix;

2661 2662
	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
L
Linus Torvalds 已提交
2663 2664 2665 2666
		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
	} else
		prefix = 0;

2667 2668
	return rt6_fill_node(arg->net,
		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2669
		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2670
		     prefix, 0, NLM_F_MULTI);
L
Linus Torvalds 已提交
2671 2672
}

2673
static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
L
Linus Torvalds 已提交
2674
{
2675
	struct net *net = sock_net(in_skb->sk);
2676 2677
	struct nlattr *tb[RTA_MAX+1];
	struct rt6_info *rt;
L
Linus Torvalds 已提交
2678
	struct sk_buff *skb;
2679
	struct rtmsg *rtm;
2680
	struct flowi6 fl6;
2681
	int err, iif = 0, oif = 0;
L
Linus Torvalds 已提交
2682

2683 2684 2685
	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
	if (err < 0)
		goto errout;
L
Linus Torvalds 已提交
2686

2687
	err = -EINVAL;
2688
	memset(&fl6, 0, sizeof(fl6));
L
Linus Torvalds 已提交
2689

2690 2691 2692 2693
	if (tb[RTA_SRC]) {
		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
			goto errout;

A
Alexey Dobriyan 已提交
2694
		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2695 2696 2697 2698 2699 2700
	}

	if (tb[RTA_DST]) {
		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
			goto errout;

A
Alexey Dobriyan 已提交
2701
		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2702 2703 2704 2705 2706 2707
	}

	if (tb[RTA_IIF])
		iif = nla_get_u32(tb[RTA_IIF]);

	if (tb[RTA_OIF])
2708
		oif = nla_get_u32(tb[RTA_OIF]);
L
Linus Torvalds 已提交
2709 2710 2711

	if (iif) {
		struct net_device *dev;
2712 2713
		int flags = 0;

2714
		dev = __dev_get_by_index(net, iif);
L
Linus Torvalds 已提交
2715 2716
		if (!dev) {
			err = -ENODEV;
2717
			goto errout;
L
Linus Torvalds 已提交
2718
		}
2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730

		fl6.flowi6_iif = iif;

		if (!ipv6_addr_any(&fl6.saddr))
			flags |= RT6_LOOKUP_F_HAS_SADDR;

		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
							       flags);
	} else {
		fl6.flowi6_oif = oif;

		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
L
Linus Torvalds 已提交
2731 2732
	}

2733
	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2734
	if (!skb) {
A
Amerigo Wang 已提交
2735
		ip6_rt_put(rt);
2736 2737 2738
		err = -ENOBUFS;
		goto errout;
	}
L
Linus Torvalds 已提交
2739

2740 2741 2742
	/* Reserve room for dummy headers, this skb can pass
	   through good chunk of routing engine.
	 */
2743
	skb_reset_mac_header(skb);
2744
	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
L
Linus Torvalds 已提交
2745

2746
	skb_dst_set(skb, &rt->dst);
L
Linus Torvalds 已提交
2747

2748
	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2749
			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2750
			    nlh->nlmsg_seq, 0, 0, 0);
L
Linus Torvalds 已提交
2751
	if (err < 0) {
2752 2753
		kfree_skb(skb);
		goto errout;
L
Linus Torvalds 已提交
2754 2755
	}

2756
	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2757
errout:
L
Linus Torvalds 已提交
2758 2759 2760
	return err;
}

2761
void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
L
Linus Torvalds 已提交
2762 2763
{
	struct sk_buff *skb;
2764
	struct net *net = info->nl_net;
2765 2766 2767 2768
	u32 seq;
	int err;

	err = -ENOBUFS;
2769
	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2770

2771
	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2772
	if (!skb)
2773 2774
		goto errout;

2775
	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2776
				event, info->portid, seq, 0, 0, 0);
2777 2778 2779 2780 2781 2782
	if (err < 0) {
		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
2783
	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2784 2785
		    info->nlh, gfp_any());
	return;
2786 2787
errout:
	if (err < 0)
2788
		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
L
Linus Torvalds 已提交
2789 2790
}

2791 2792 2793 2794
static int ip6_route_dev_notify(struct notifier_block *this,
				unsigned long event, void *data)
{
	struct net_device *dev = (struct net_device *)data;
2795
	struct net *net = dev_net(dev);
2796 2797

	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2798
		net->ipv6.ip6_null_entry->dst.dev = dev;
2799 2800
		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2801
		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2802
		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2803
		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2804 2805 2806 2807 2808 2809 2810
		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
#endif
	}

	return NOTIFY_OK;
}

L
Linus Torvalds 已提交
2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827
/*
 *	/proc
 */

#ifdef CONFIG_PROC_FS

struct rt6_proc_arg
{
	char *buffer;
	int offset;
	int length;
	int skip;
	int len;
};

static int rt6_info_route(struct rt6_info *rt, void *p_arg)
{
2828
	struct seq_file *m = p_arg;
2829
	struct neighbour *n;
L
Linus Torvalds 已提交
2830

2831
	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
L
Linus Torvalds 已提交
2832 2833

#ifdef CONFIG_IPV6_SUBTREES
2834
	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
L
Linus Torvalds 已提交
2835
#else
2836
	seq_puts(m, "00000000000000000000000000000000 00 ");
L
Linus Torvalds 已提交
2837
#endif
2838
	n = rt->n;
2839 2840
	if (n) {
		seq_printf(m, "%pi6", n->primary_key);
L
Linus Torvalds 已提交
2841
	} else {
2842
		seq_puts(m, "00000000000000000000000000000000");
L
Linus Torvalds 已提交
2843
	}
2844
	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2845 2846
		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
		   rt->dst.__use, rt->rt6i_flags,
2847
		   rt->dst.dev ? rt->dst.dev->name : "");
L
Linus Torvalds 已提交
2848 2849 2850
	return 0;
}

2851
static int ipv6_route_show(struct seq_file *m, void *v)
L
Linus Torvalds 已提交
2852
{
2853
	struct net *net = (struct net *)m->private;
2854
	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2855 2856
	return 0;
}
L
Linus Torvalds 已提交
2857

2858 2859
static int ipv6_route_open(struct inode *inode, struct file *file)
{
2860
	return single_open_net(inode, file, ipv6_route_show);
2861 2862
}

2863 2864 2865 2866 2867
static const struct file_operations ipv6_route_proc_fops = {
	.owner		= THIS_MODULE,
	.open		= ipv6_route_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
2868
	.release	= single_release_net,
2869 2870
};

L
Linus Torvalds 已提交
2871 2872
static int rt6_stats_seq_show(struct seq_file *seq, void *v)
{
2873
	struct net *net = (struct net *)seq->private;
L
Linus Torvalds 已提交
2874
	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2875 2876 2877 2878 2879
		   net->ipv6.rt6_stats->fib_nodes,
		   net->ipv6.rt6_stats->fib_route_nodes,
		   net->ipv6.rt6_stats->fib_rt_alloc,
		   net->ipv6.rt6_stats->fib_rt_entries,
		   net->ipv6.rt6_stats->fib_rt_cache,
2880
		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2881
		   net->ipv6.rt6_stats->fib_discarded_routes);
L
Linus Torvalds 已提交
2882 2883 2884 2885 2886 2887

	return 0;
}

static int rt6_stats_seq_open(struct inode *inode, struct file *file)
{
2888
	return single_open_net(inode, file, rt6_stats_seq_show);
2889 2890
}

2891
static const struct file_operations rt6_stats_seq_fops = {
L
Linus Torvalds 已提交
2892 2893 2894 2895
	.owner	 = THIS_MODULE,
	.open	 = rt6_stats_seq_open,
	.read	 = seq_read,
	.llseek	 = seq_lseek,
2896
	.release = single_release_net,
L
Linus Torvalds 已提交
2897 2898 2899 2900 2901 2902
};
#endif	/* CONFIG_PROC_FS */

#ifdef CONFIG_SYSCTL

static
2903
int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
L
Linus Torvalds 已提交
2904 2905
			      void __user *buffer, size_t *lenp, loff_t *ppos)
{
2906 2907 2908
	struct net *net;
	int delay;
	if (!write)
L
Linus Torvalds 已提交
2909
		return -EINVAL;
2910 2911 2912 2913 2914 2915

	net = (struct net *)ctl->extra1;
	delay = net->ipv6.sysctl.flush_delay;
	proc_dointvec(ctl, write, buffer, lenp, ppos);
	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
	return 0;
L
Linus Torvalds 已提交
2916 2917
}

2918
ctl_table ipv6_route_table_template[] = {
2919
	{
L
Linus Torvalds 已提交
2920
		.procname	=	"flush",
2921
		.data		=	&init_net.ipv6.sysctl.flush_delay,
L
Linus Torvalds 已提交
2922
		.maxlen		=	sizeof(int),
2923
		.mode		=	0200,
A
Alexey Dobriyan 已提交
2924
		.proc_handler	=	ipv6_sysctl_rtcache_flush
L
Linus Torvalds 已提交
2925 2926 2927
	},
	{
		.procname	=	"gc_thresh",
2928
		.data		=	&ip6_dst_ops_template.gc_thresh,
L
Linus Torvalds 已提交
2929 2930
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2931
		.proc_handler	=	proc_dointvec,
L
Linus Torvalds 已提交
2932 2933 2934
	},
	{
		.procname	=	"max_size",
2935
		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
L
Linus Torvalds 已提交
2936 2937
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2938
		.proc_handler	=	proc_dointvec,
L
Linus Torvalds 已提交
2939 2940 2941
	},
	{
		.procname	=	"gc_min_interval",
2942
		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
L
Linus Torvalds 已提交
2943 2944
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2945
		.proc_handler	=	proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2946 2947 2948
	},
	{
		.procname	=	"gc_timeout",
2949
		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
L
Linus Torvalds 已提交
2950 2951
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2952
		.proc_handler	=	proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2953 2954 2955
	},
	{
		.procname	=	"gc_interval",
2956
		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
L
Linus Torvalds 已提交
2957 2958
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2959
		.proc_handler	=	proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2960 2961 2962
	},
	{
		.procname	=	"gc_elasticity",
2963
		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
L
Linus Torvalds 已提交
2964 2965
		.maxlen		=	sizeof(int),
		.mode		=	0644,
2966
		.proc_handler	=	proc_dointvec,
L
Linus Torvalds 已提交
2967 2968 2969
	},
	{
		.procname	=	"mtu_expires",
2970
		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
L
Linus Torvalds 已提交
2971 2972
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2973
		.proc_handler	=	proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2974 2975 2976
	},
	{
		.procname	=	"min_adv_mss",
2977
		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
L
Linus Torvalds 已提交
2978 2979
		.maxlen		=	sizeof(int),
		.mode		=	0644,
2980
		.proc_handler	=	proc_dointvec,
L
Linus Torvalds 已提交
2981 2982 2983
	},
	{
		.procname	=	"gc_min_interval_ms",
2984
		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
L
Linus Torvalds 已提交
2985 2986
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2987
		.proc_handler	=	proc_dointvec_ms_jiffies,
L
Linus Torvalds 已提交
2988
	},
2989
	{ }
L
Linus Torvalds 已提交
2990 2991
};

2992
struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2993 2994 2995 2996 2997 2998
{
	struct ctl_table *table;

	table = kmemdup(ipv6_route_table_template,
			sizeof(ipv6_route_table_template),
			GFP_KERNEL);
2999 3000 3001

	if (table) {
		table[0].data = &net->ipv6.sysctl.flush_delay;
3002
		table[0].extra1 = net;
3003
		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3004 3005 3006 3007 3008 3009 3010
		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3011
		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3012 3013
	}

3014 3015
	return table;
}
L
Linus Torvalds 已提交
3016 3017
#endif

3018
static int __net_init ip6_route_net_init(struct net *net)
3019
{
3020
	int ret = -ENOMEM;
3021

3022 3023
	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
	       sizeof(net->ipv6.ip6_dst_ops));
3024

3025 3026 3027
	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
		goto out_ip6_dst_ops;

3028 3029 3030 3031
	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
					   sizeof(*net->ipv6.ip6_null_entry),
					   GFP_KERNEL);
	if (!net->ipv6.ip6_null_entry)
3032
		goto out_ip6_dst_entries;
3033
	net->ipv6.ip6_null_entry->dst.path =
3034
		(struct dst_entry *)net->ipv6.ip6_null_entry;
3035
	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3036 3037
	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
			 ip6_template_metrics, true);
3038 3039 3040 3041 3042

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
					       sizeof(*net->ipv6.ip6_prohibit_entry),
					       GFP_KERNEL);
3043 3044
	if (!net->ipv6.ip6_prohibit_entry)
		goto out_ip6_null_entry;
3045
	net->ipv6.ip6_prohibit_entry->dst.path =
3046
		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3047
	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3048 3049
	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
			 ip6_template_metrics, true);
3050 3051 3052 3053

	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
					       sizeof(*net->ipv6.ip6_blk_hole_entry),
					       GFP_KERNEL);
3054 3055
	if (!net->ipv6.ip6_blk_hole_entry)
		goto out_ip6_prohibit_entry;
3056
	net->ipv6.ip6_blk_hole_entry->dst.path =
3057
		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3058
	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3059 3060
	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
			 ip6_template_metrics, true);
3061 3062
#endif

3063 3064 3065 3066 3067 3068 3069 3070 3071
	net->ipv6.sysctl.flush_delay = 0;
	net->ipv6.sysctl.ip6_rt_max_size = 4096;
	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;

3072 3073
	net->ipv6.ip6_rt_gc_expire = 30*HZ;

3074 3075 3076
	ret = 0;
out:
	return ret;
3077

3078 3079 3080 3081 3082 3083
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
out_ip6_prohibit_entry:
	kfree(net->ipv6.ip6_prohibit_entry);
out_ip6_null_entry:
	kfree(net->ipv6.ip6_null_entry);
#endif
3084 3085
out_ip6_dst_entries:
	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3086 3087
out_ip6_dst_ops:
	goto out;
3088 3089
}

3090
static void __net_exit ip6_route_net_exit(struct net *net)
3091
{
3092 3093 3094 3095 3096
	kfree(net->ipv6.ip6_null_entry);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
	kfree(net->ipv6.ip6_prohibit_entry);
	kfree(net->ipv6.ip6_blk_hole_entry);
#endif
3097
	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3098 3099
}

3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116
static int __net_init ip6_route_net_init_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
#endif
	return 0;
}

static void __net_exit ip6_route_net_exit_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
	proc_net_remove(net, "ipv6_route");
	proc_net_remove(net, "rt6_stats");
#endif
}

3117 3118 3119 3120 3121
static struct pernet_operations ip6_route_net_ops = {
	.init = ip6_route_net_init,
	.exit = ip6_route_net_exit,
};

3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137
static int __net_init ipv6_inetpeer_init(struct net *net)
{
	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);

	if (!bp)
		return -ENOMEM;
	inet_peer_base_init(bp);
	net->ipv6.peers = bp;
	return 0;
}

static void __net_exit ipv6_inetpeer_exit(struct net *net)
{
	struct inet_peer_base *bp = net->ipv6.peers;

	net->ipv6.peers = NULL;
3138
	inetpeer_invalidate_tree(bp);
3139 3140 3141
	kfree(bp);
}

3142
static struct pernet_operations ipv6_inetpeer_ops = {
3143 3144 3145 3146
	.init	=	ipv6_inetpeer_init,
	.exit	=	ipv6_inetpeer_exit,
};

3147 3148 3149 3150 3151
static struct pernet_operations ip6_route_net_late_ops = {
	.init = ip6_route_net_init_late,
	.exit = ip6_route_net_exit_late,
};

3152 3153 3154 3155 3156
static struct notifier_block ip6_route_dev_notifier = {
	.notifier_call = ip6_route_dev_notify,
	.priority = 0,
};

3157
int __init ip6_route_init(void)
L
Linus Torvalds 已提交
3158
{
3159 3160
	int ret;

3161 3162
	ret = -ENOMEM;
	ip6_dst_ops_template.kmem_cachep =
A
Alexey Dobriyan 已提交
3163
		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3164
				  SLAB_HWCACHE_ALIGN, NULL);
3165
	if (!ip6_dst_ops_template.kmem_cachep)
3166
		goto out;
3167

3168
	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3169
	if (ret)
3170 3171
		goto out_kmem_cache;

3172 3173
	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
	if (ret)
3174
		goto out_dst_entries;
3175

3176 3177 3178
	ret = register_pernet_subsys(&ip6_route_net_ops);
	if (ret)
		goto out_register_inetpeer;
3179

3180 3181
	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;

3182 3183 3184
	/* Registering of the loopback is done before this portion of code,
	 * the loopback reference in rt6_info will not be taken, do it
	 * manually for init_net */
3185
	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3186 3187
	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3188
	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3189
	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3190
	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3191 3192
	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
  #endif
3193
	ret = fib6_init();
3194
	if (ret)
3195
		goto out_register_subsys;
3196 3197 3198

	ret = xfrm6_init();
	if (ret)
3199
		goto out_fib6_init;
3200

3201 3202 3203
	ret = fib6_rules_init();
	if (ret)
		goto xfrm6_init;
3204

3205 3206 3207 3208
	ret = register_pernet_subsys(&ip6_route_net_late_ops);
	if (ret)
		goto fib6_rules_init;

3209
	ret = -ENOBUFS;
3210 3211 3212
	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3213
		goto out_register_late_subsys;
3214

3215
	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3216
	if (ret)
3217
		goto out_register_late_subsys;
3218

3219 3220 3221
out:
	return ret;

3222 3223
out_register_late_subsys:
	unregister_pernet_subsys(&ip6_route_net_late_ops);
3224 3225 3226 3227
fib6_rules_init:
	fib6_rules_cleanup();
xfrm6_init:
	xfrm6_fini();
3228 3229
out_fib6_init:
	fib6_gc_cleanup();
3230 3231
out_register_subsys:
	unregister_pernet_subsys(&ip6_route_net_ops);
3232 3233
out_register_inetpeer:
	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3234 3235
out_dst_entries:
	dst_entries_destroy(&ip6_dst_blackhole_ops);
3236
out_kmem_cache:
3237
	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3238
	goto out;
L
Linus Torvalds 已提交
3239 3240 3241 3242
}

void ip6_route_cleanup(void)
{
3243
	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3244
	unregister_pernet_subsys(&ip6_route_net_late_ops);
T
Thomas Graf 已提交
3245
	fib6_rules_cleanup();
L
Linus Torvalds 已提交
3246 3247
	xfrm6_fini();
	fib6_gc_cleanup();
3248
	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3249
	unregister_pernet_subsys(&ip6_route_net_ops);
3250
	dst_entries_destroy(&ip6_dst_blackhole_ops);
3251
	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
L
Linus Torvalds 已提交
3252
}