route.c 76.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *	Linux INET6 implementation
 *	FIB front-end.
 *
 *	Authors:
6
 *	Pedro Roque		<roque@di.fc.ul.pt>
L
Linus Torvalds 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

/*	Changes:
 *
 *	YOSHIFUJI Hideaki @USAGI
 *		reworked default router selection.
 *		- respect outgoing interface
 *		- select from (probably) reachable routers (i.e.
 *		routers in REACHABLE, STALE, DELAY or PROBE states).
 *		- always select the same router if it is (probably)
 *		reachable.  otherwise, round-robin the list.
23 24
 *	Ville Nuorvala
 *		Fixed routing subtrees.
L
Linus Torvalds 已提交
25 26
 */

27 28
#define pr_fmt(fmt) "IPv6: " fmt

29
#include <linux/capability.h>
L
Linus Torvalds 已提交
30
#include <linux/errno.h>
31
#include <linux/export.h>
L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39
#include <linux/types.h>
#include <linux/times.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/route.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
40
#include <linux/mroute6.h>
L
Linus Torvalds 已提交
41 42 43 44
#include <linux/init.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
45
#include <linux/nsproxy.h>
46
#include <linux/slab.h>
47
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
48 49 50 51 52 53 54 55 56 57
#include <net/snmp.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/tcp.h>
#include <linux/rtnetlink.h>
#include <net/dst.h>
#include <net/xfrm.h>
58
#include <net/netevent.h>
59
#include <net/netlink.h>
60
#include <net/nexthop.h>
L
Linus Torvalds 已提交
61 62 63 64 65 66 67

#include <asm/uaccess.h>

#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

68
static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
E
Eric Dumazet 已提交
69
				    const struct in6_addr *dest);
L
Linus Torvalds 已提交
70
static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
71
static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
72
static unsigned int	 ip6_mtu(const struct dst_entry *dst);
L
Linus Torvalds 已提交
73 74 75 76
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void		ip6_dst_destroy(struct dst_entry *);
static void		ip6_dst_ifdown(struct dst_entry *,
				       struct net_device *dev, int how);
77
static int		 ip6_dst_gc(struct dst_ops *ops);
L
Linus Torvalds 已提交
78 79 80 81

static int		ip6_pkt_discard(struct sk_buff *skb);
static int		ip6_pkt_discard_out(struct sk_buff *skb);
static void		ip6_link_failure(struct sk_buff *skb);
82 83 84 85
static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
					   struct sk_buff *skb, u32 mtu);
static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
					struct sk_buff *skb);
L
Linus Torvalds 已提交
86

87
#ifdef CONFIG_IPV6_ROUTE_INFO
88
static struct rt6_info *rt6_add_route_info(struct net *net,
89 90
					   const struct in6_addr *prefix, int prefixlen,
					   const struct in6_addr *gwaddr, int ifindex,
91
					   unsigned int pref);
92
static struct rt6_info *rt6_get_route_info(struct net *net,
93 94
					   const struct in6_addr *prefix, int prefixlen,
					   const struct in6_addr *gwaddr, int ifindex);
95 96
#endif

97 98 99 100 101 102
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
{
	struct rt6_info *rt = (struct rt6_info *) dst;
	struct inet_peer *peer;
	u32 *p = NULL;

103 104 105
	if (!(rt->dst.flags & DST_HOST))
		return NULL;

106
	peer = rt6_get_peer_create(rt);
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
	if (peer) {
		u32 *old_p = __DST_METRICS_PTR(old);
		unsigned long prev, new;

		p = peer->metrics;
		if (inet_metrics_new(peer))
			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);

		new = (unsigned long) p;
		prev = cmpxchg(&dst->_metrics, old, new);

		if (prev != old) {
			p = __DST_METRICS_PTR(prev);
			if (prev & DST_METRICS_READ_ONLY)
				p = NULL;
		}
	}
	return p;
}

127 128 129
static inline const void *choose_neigh_daddr(struct rt6_info *rt,
					     struct sk_buff *skb,
					     const void *daddr)
130 131 132
{
	struct in6_addr *p = &rt->rt6i_gateway;

D
David S. Miller 已提交
133
	if (!ipv6_addr_any(p))
134
		return (const void *) p;
135 136
	else if (skb)
		return &ipv6_hdr(skb)->daddr;
137 138 139
	return daddr;
}

140 141 142
static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
					  struct sk_buff *skb,
					  const void *daddr)
143
{
144 145 146
	struct rt6_info *rt = (struct rt6_info *) dst;
	struct neighbour *n;

147
	daddr = choose_neigh_daddr(rt, skb, daddr);
148
	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149 150 151 152 153
	if (n)
		return n;
	return neigh_create(&nd_tbl, daddr, dst->dev);
}

154
static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155
{
156 157 158 159 160 161
	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
	if (!n) {
		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
		if (IS_ERR(n))
			return PTR_ERR(n);
	}
162
	rt->n = n;
163 164

	return 0;
165 166
}

167
static struct dst_ops ip6_dst_ops_template = {
L
Linus Torvalds 已提交
168
	.family			=	AF_INET6,
169
	.protocol		=	cpu_to_be16(ETH_P_IPV6),
L
Linus Torvalds 已提交
170 171 172
	.gc			=	ip6_dst_gc,
	.gc_thresh		=	1024,
	.check			=	ip6_dst_check,
173
	.default_advmss		=	ip6_default_advmss,
174
	.mtu			=	ip6_mtu,
175
	.cow_metrics		=	ipv6_cow_metrics,
L
Linus Torvalds 已提交
176 177 178 179 180
	.destroy		=	ip6_dst_destroy,
	.ifdown			=	ip6_dst_ifdown,
	.negative_advice	=	ip6_negative_advice,
	.link_failure		=	ip6_link_failure,
	.update_pmtu		=	ip6_rt_update_pmtu,
181
	.redirect		=	rt6_do_redirect,
182
	.local_out		=	__ip6_local_out,
183
	.neigh_lookup		=	ip6_neigh_lookup,
L
Linus Torvalds 已提交
184 185
};

186
static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187
{
188 189 190
	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);

	return mtu ? : dst->dev->mtu;
191 192
}

193 194
static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
					 struct sk_buff *skb, u32 mtu)
195 196 197
{
}

198 199
static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
				      struct sk_buff *skb)
200 201 202
{
}

203 204 205 206 207 208
static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
					 unsigned long old)
{
	return NULL;
}

209 210
static struct dst_ops ip6_dst_blackhole_ops = {
	.family			=	AF_INET6,
211
	.protocol		=	cpu_to_be16(ETH_P_IPV6),
212 213
	.destroy		=	ip6_dst_destroy,
	.check			=	ip6_dst_check,
214
	.mtu			=	ip6_blackhole_mtu,
215
	.default_advmss		=	ip6_default_advmss,
216
	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
217
	.redirect		=	ip6_rt_blackhole_redirect,
218
	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
219
	.neigh_lookup		=	ip6_neigh_lookup,
220 221
};

222
static const u32 ip6_template_metrics[RTAX_MAX] = {
L
Li RongQing 已提交
223
	[RTAX_HOPLIMIT - 1] = 0,
224 225
};

226
static const struct rt6_info ip6_null_entry_template = {
227 228 229
	.dst = {
		.__refcnt	= ATOMIC_INIT(1),
		.__use		= 1,
230
		.obsolete	= DST_OBSOLETE_FORCE_CHK,
231 232 233
		.error		= -ENETUNREACH,
		.input		= ip6_pkt_discard,
		.output		= ip6_pkt_discard_out,
L
Linus Torvalds 已提交
234 235
	},
	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
236
	.rt6i_protocol  = RTPROT_KERNEL,
L
Linus Torvalds 已提交
237 238 239 240
	.rt6i_metric	= ~(u32) 0,
	.rt6i_ref	= ATOMIC_INIT(1),
};

T
Thomas Graf 已提交
241 242
#ifdef CONFIG_IPV6_MULTIPLE_TABLES

243 244 245
static int ip6_pkt_prohibit(struct sk_buff *skb);
static int ip6_pkt_prohibit_out(struct sk_buff *skb);

246
static const struct rt6_info ip6_prohibit_entry_template = {
247 248 249
	.dst = {
		.__refcnt	= ATOMIC_INIT(1),
		.__use		= 1,
250
		.obsolete	= DST_OBSOLETE_FORCE_CHK,
251 252 253
		.error		= -EACCES,
		.input		= ip6_pkt_prohibit,
		.output		= ip6_pkt_prohibit_out,
T
Thomas Graf 已提交
254 255
	},
	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
256
	.rt6i_protocol  = RTPROT_KERNEL,
T
Thomas Graf 已提交
257 258 259 260
	.rt6i_metric	= ~(u32) 0,
	.rt6i_ref	= ATOMIC_INIT(1),
};

261
static const struct rt6_info ip6_blk_hole_entry_template = {
262 263 264
	.dst = {
		.__refcnt	= ATOMIC_INIT(1),
		.__use		= 1,
265
		.obsolete	= DST_OBSOLETE_FORCE_CHK,
266 267 268
		.error		= -EINVAL,
		.input		= dst_discard,
		.output		= dst_discard,
T
Thomas Graf 已提交
269 270
	},
	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
271
	.rt6i_protocol  = RTPROT_KERNEL,
T
Thomas Graf 已提交
272 273 274 275 276 277
	.rt6i_metric	= ~(u32) 0,
	.rt6i_ref	= ATOMIC_INIT(1),
};

#endif

L
Linus Torvalds 已提交
278
/* allocate dst with ip6_dst_ops */
279
static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280
					     struct net_device *dev,
281 282
					     int flags,
					     struct fib6_table *table)
L
Linus Torvalds 已提交
283
{
284
	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285
					0, DST_OBSOLETE_FORCE_CHK, flags);
286

287
	if (rt) {
288 289 290
		struct dst_entry *dst = &rt->dst;

		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291
		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292
		rt->rt6i_genid = rt_genid(net);
293 294
		INIT_LIST_HEAD(&rt->rt6i_siblings);
		rt->rt6i_nsiblings = 0;
295
	}
296
	return rt;
L
Linus Torvalds 已提交
297 298 299 300 301 302 303
}

static void ip6_dst_destroy(struct dst_entry *dst)
{
	struct rt6_info *rt = (struct rt6_info *)dst;
	struct inet6_dev *idev = rt->rt6i_idev;

304 305 306
	if (rt->n)
		neigh_release(rt->n);

307 308 309
	if (!(rt->dst.flags & DST_HOST))
		dst_destroy_metrics_generic(dst);

310
	if (idev) {
L
Linus Torvalds 已提交
311 312
		rt->rt6i_idev = NULL;
		in6_dev_put(idev);
313
	}
314 315 316 317

	if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
		dst_release(dst->from);

318 319
	if (rt6_has_peer(rt)) {
		struct inet_peer *peer = rt6_peer_ptr(rt);
320 321 322 323 324 325
		inet_putpeer(peer);
	}
}

void rt6_bind_peer(struct rt6_info *rt, int create)
{
326
	struct inet_peer_base *base;
327 328
	struct inet_peer *peer;

329 330 331 332 333
	base = inetpeer_base_ptr(rt->_rt6i_peer);
	if (!base)
		return;

	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
334 335 336 337
	if (peer) {
		if (!rt6_set_peer(rt, peer))
			inet_putpeer(peer);
	}
L
Linus Torvalds 已提交
338 339 340 341 342 343 344
}

static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
			   int how)
{
	struct rt6_info *rt = (struct rt6_info *)dst;
	struct inet6_dev *idev = rt->rt6i_idev;
345
	struct net_device *loopback_dev =
346
		dev_net(dev)->loopback_dev;
L
Linus Torvalds 已提交
347

348 349 350 351 352 353 354 355 356 357 358 359 360
	if (dev != loopback_dev) {
		if (idev && idev->dev == dev) {
			struct inet6_dev *loopback_idev =
				in6_dev_get(loopback_dev);
			if (loopback_idev) {
				rt->rt6i_idev = loopback_idev;
				in6_dev_put(idev);
			}
		}
		if (rt->n && rt->n->dev == dev) {
			rt->n->dev = loopback_dev;
			dev_hold(loopback_dev);
			dev_put(dev);
L
Linus Torvalds 已提交
361 362 363 364
		}
	}
}

365
static bool rt6_check_expired(const struct rt6_info *rt)
L
Linus Torvalds 已提交
366
{
367 368
	if (rt->rt6i_flags & RTF_EXPIRES) {
		if (time_after(jiffies, rt->dst.expires))
369
			return true;
370
	} else if (rt->dst.from) {
371
		return rt6_check_expired((struct rt6_info *) rt->dst.from);
372
	}
373
	return false;
L
Linus Torvalds 已提交
374 375
}

376
static bool rt6_need_strict(const struct in6_addr *daddr)
T
Thomas Graf 已提交
377
{
E
Eric Dumazet 已提交
378 379
	return ipv6_addr_type(daddr) &
		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
T
Thomas Graf 已提交
380 381
}

382 383 384 385 386 387 388 389 390
/* Multipath route selection:
 *   Hash based function using packet header and flowlabel.
 * Adapted from fib_info_hashfn()
 */
static int rt6_info_hash_nhsfn(unsigned int candidate_count,
			       const struct flowi6 *fl6)
{
	unsigned int val = fl6->flowi6_proto;

391 392 393 394
	val ^= (__force u32)fl6->daddr.s6_addr32[0];
	val ^= (__force u32)fl6->daddr.s6_addr32[1];
	val ^= (__force u32)fl6->daddr.s6_addr32[2];
	val ^= (__force u32)fl6->daddr.s6_addr32[3];
395

396 397 398 399
	val ^= (__force u32)fl6->saddr.s6_addr32[0];
	val ^= (__force u32)fl6->saddr.s6_addr32[1];
	val ^= (__force u32)fl6->saddr.s6_addr32[2];
	val ^= (__force u32)fl6->saddr.s6_addr32[3];
400 401 402 403 404 405

	/* Work only if this not encapsulated */
	switch (fl6->flowi6_proto) {
	case IPPROTO_UDP:
	case IPPROTO_TCP:
	case IPPROTO_SCTP:
406 407
		val ^= (__force u16)fl6->fl6_sport;
		val ^= (__force u16)fl6->fl6_dport;
408 409 410
		break;

	case IPPROTO_ICMPV6:
411 412
		val ^= (__force u16)fl6->fl6_icmp_type;
		val ^= (__force u16)fl6->fl6_icmp_code;
413 414 415
		break;
	}
	/* RFC6438 recommands to use flowlabel */
416
	val ^= (__force u32)fl6->flowlabel;
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444

	/* Perhaps, we need to tune, this function? */
	val = val ^ (val >> 7) ^ (val >> 12);
	return val % candidate_count;
}

static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
					     struct flowi6 *fl6)
{
	struct rt6_info *sibling, *next_sibling;
	int route_choosen;

	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
	/* Don't change the route, if route_choosen == 0
	 * (siblings does not include ourself)
	 */
	if (route_choosen)
		list_for_each_entry_safe(sibling, next_sibling,
				&match->rt6i_siblings, rt6i_siblings) {
			route_choosen--;
			if (route_choosen == 0) {
				match = sibling;
				break;
			}
		}
	return match;
}

L
Linus Torvalds 已提交
445
/*
T
Thomas Graf 已提交
446
 *	Route lookup. Any table->tb6_lock is implied.
L
Linus Torvalds 已提交
447 448
 */

449 450
static inline struct rt6_info *rt6_device_match(struct net *net,
						    struct rt6_info *rt,
451
						    const struct in6_addr *saddr,
L
Linus Torvalds 已提交
452
						    int oif,
453
						    int flags)
L
Linus Torvalds 已提交
454 455 456 457
{
	struct rt6_info *local = NULL;
	struct rt6_info *sprt;

458 459 460
	if (!oif && ipv6_addr_any(saddr))
		goto out;

461
	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
462
		struct net_device *dev = sprt->dst.dev;
463 464

		if (oif) {
L
Linus Torvalds 已提交
465 466 467
			if (dev->ifindex == oif)
				return sprt;
			if (dev->flags & IFF_LOOPBACK) {
468
				if (!sprt->rt6i_idev ||
L
Linus Torvalds 已提交
469
				    sprt->rt6i_idev->dev->ifindex != oif) {
470
					if (flags & RT6_LOOKUP_F_IFACE && oif)
L
Linus Torvalds 已提交
471
						continue;
472
					if (local && (!oif ||
L
Linus Torvalds 已提交
473 474 475 476 477
						      local->rt6i_idev->dev->ifindex == oif))
						continue;
				}
				local = sprt;
			}
478 479 480 481
		} else {
			if (ipv6_chk_addr(net, saddr, dev,
					  flags & RT6_LOOKUP_F_IFACE))
				return sprt;
L
Linus Torvalds 已提交
482
		}
483
	}
L
Linus Torvalds 已提交
484

485
	if (oif) {
L
Linus Torvalds 已提交
486 487 488
		if (local)
			return local;

489
		if (flags & RT6_LOOKUP_F_IFACE)
490
			return net->ipv6.ip6_null_entry;
L
Linus Torvalds 已提交
491
	}
492
out:
L
Linus Torvalds 已提交
493 494 495
	return rt;
}

496 497 498
#ifdef CONFIG_IPV6_ROUTER_PREF
static void rt6_probe(struct rt6_info *rt)
{
499
	struct neighbour *neigh;
500 501 502 503 504 505 506 507
	/*
	 * Okay, this does not seem to be appropriate
	 * for now, however, we need to check if it
	 * is really so; aka Router Reachability Probing.
	 *
	 * Router Reachability Probe MUST be rate-limited
	 * to no more than one per minute.
	 */
508
	neigh = rt ? rt->n : NULL;
509
	if (!neigh || (neigh->nud_state & NUD_VALID))
510
		return;
511 512
	read_lock_bh(&neigh->lock);
	if (!(neigh->nud_state & NUD_VALID) &&
513
	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
514 515 516 517 518 519 520 521
		struct in6_addr mcaddr;
		struct in6_addr *target;

		neigh->updated = jiffies;
		read_unlock_bh(&neigh->lock);

		target = (struct in6_addr *)&neigh->primary_key;
		addrconf_addr_solict_mult(target, &mcaddr);
522
		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
523
	} else {
524
		read_unlock_bh(&neigh->lock);
525
	}
526 527 528 529 530 531 532
}
#else
static inline void rt6_probe(struct rt6_info *rt)
{
}
#endif

L
Linus Torvalds 已提交
533
/*
534
 * Default Router Selection (RFC 2461 6.3.6)
L
Linus Torvalds 已提交
535
 */
D
Dave Jones 已提交
536
static inline int rt6_check_dev(struct rt6_info *rt, int oif)
537
{
538
	struct net_device *dev = rt->dst.dev;
539
	if (!oif || dev->ifindex == oif)
540
		return 2;
541 542 543 544
	if ((dev->flags & IFF_LOOPBACK) &&
	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
		return 1;
	return 0;
545
}
L
Linus Torvalds 已提交
546

D
Dave Jones 已提交
547
static inline int rt6_check_neigh(struct rt6_info *rt)
L
Linus Torvalds 已提交
548
{
549
	struct neighbour *neigh;
550
	int m;
551

552
	neigh = rt->n;
553 554 555 556
	if (rt->rt6i_flags & RTF_NONEXTHOP ||
	    !(rt->rt6i_flags & RTF_GATEWAY))
		m = 1;
	else if (neigh) {
557 558
		read_lock_bh(&neigh->lock);
		if (neigh->nud_state & NUD_VALID)
559
			m = 2;
560 561 562 563 564
#ifdef CONFIG_IPV6_ROUTER_PREF
		else if (neigh->nud_state & NUD_FAILED)
			m = 0;
#endif
		else
565
			m = 1;
566
		read_unlock_bh(&neigh->lock);
567 568
	} else
		m = 0;
569
	return m;
L
Linus Torvalds 已提交
570 571
}

572 573
static int rt6_score_route(struct rt6_info *rt, int oif,
			   int strict)
L
Linus Torvalds 已提交
574
{
575
	int m, n;
576

577
	m = rt6_check_dev(rt, oif);
578
	if (!m && (strict & RT6_LOOKUP_F_IFACE))
579
		return -1;
580 581 582
#ifdef CONFIG_IPV6_ROUTER_PREF
	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
#endif
583
	n = rt6_check_neigh(rt);
584
	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
585 586 587 588
		return -1;
	return m;
}

589 590
static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
				   int *mpri, struct rt6_info *match)
591
{
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
	int m;

	if (rt6_check_expired(rt))
		goto out;

	m = rt6_score_route(rt, oif, strict);
	if (m < 0)
		goto out;

	if (m > *mpri) {
		if (strict & RT6_LOOKUP_F_REACHABLE)
			rt6_probe(match);
		*mpri = m;
		match = rt;
	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
		rt6_probe(rt);
	}

out:
	return match;
}

static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
				     struct rt6_info *rr_head,
				     u32 metric, int oif, int strict)
{
	struct rt6_info *rt, *match;
619
	int mpri = -1;
L
Linus Torvalds 已提交
620

621 622
	match = NULL;
	for (rt = rr_head; rt && rt->rt6i_metric == metric;
623
	     rt = rt->dst.rt6_next)
624 625
		match = find_match(rt, oif, strict, &mpri, match);
	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
626
	     rt = rt->dst.rt6_next)
627
		match = find_match(rt, oif, strict, &mpri, match);
L
Linus Torvalds 已提交
628

629 630
	return match;
}
L
Linus Torvalds 已提交
631

632 633 634
static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
{
	struct rt6_info *match, *rt0;
635
	struct net *net;
L
Linus Torvalds 已提交
636

637 638 639
	rt0 = fn->rr_ptr;
	if (!rt0)
		fn->rr_ptr = rt0 = fn->leaf;
L
Linus Torvalds 已提交
640

641
	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
L
Linus Torvalds 已提交
642

643
	if (!match &&
644
	    (strict & RT6_LOOKUP_F_REACHABLE)) {
645
		struct rt6_info *next = rt0->dst.rt6_next;
646

647
		/* no entries matched; do round-robin */
648 649 650 651 652
		if (!next || next->rt6i_metric != rt0->rt6i_metric)
			next = fn->leaf;

		if (next != rt0)
			fn->rr_ptr = next;
L
Linus Torvalds 已提交
653 654
	}

655
	net = dev_net(rt0->dst.dev);
E
Eric Dumazet 已提交
656
	return match ? match : net->ipv6.ip6_null_entry;
L
Linus Torvalds 已提交
657 658
}

659 660
#ifdef CONFIG_IPV6_ROUTE_INFO
int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
661
		  const struct in6_addr *gwaddr)
662
{
663
	struct net *net = dev_net(dev);
664 665 666
	struct route_info *rinfo = (struct route_info *) opt;
	struct in6_addr prefix_buf, *prefix;
	unsigned int pref;
667
	unsigned long lifetime;
668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
	struct rt6_info *rt;

	if (len < sizeof(struct route_info)) {
		return -EINVAL;
	}

	/* Sanity check for prefix_len and length */
	if (rinfo->length > 3) {
		return -EINVAL;
	} else if (rinfo->prefix_len > 128) {
		return -EINVAL;
	} else if (rinfo->prefix_len > 64) {
		if (rinfo->length < 2) {
			return -EINVAL;
		}
	} else if (rinfo->prefix_len > 0) {
		if (rinfo->length < 1) {
			return -EINVAL;
		}
	}

	pref = rinfo->route_pref;
	if (pref == ICMPV6_ROUTER_PREF_INVALID)
691
		return -EINVAL;
692

693
	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
694 695 696 697 698 699 700 701 702 703 704

	if (rinfo->length == 3)
		prefix = (struct in6_addr *)rinfo->prefix;
	else {
		/* this function is safe */
		ipv6_addr_prefix(&prefix_buf,
				 (struct in6_addr *)rinfo->prefix,
				 rinfo->prefix_len);
		prefix = &prefix_buf;
	}

705 706
	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
				dev->ifindex);
707 708

	if (rt && !lifetime) {
709
		ip6_del_rt(rt);
710 711 712 713
		rt = NULL;
	}

	if (!rt && lifetime)
714
		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
715 716 717 718 719 720
					pref);
	else if (rt)
		rt->rt6i_flags = RTF_ROUTEINFO |
				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);

	if (rt) {
721 722 723 724 725
		if (!addrconf_finite_timeout(lifetime))
			rt6_clean_expires(rt);
		else
			rt6_set_expires(rt, jiffies + HZ * lifetime);

A
Amerigo Wang 已提交
726
		ip6_rt_put(rt);
727 728 729 730 731
	}
	return 0;
}
#endif

732
#define BACKTRACK(__net, saddr)			\
733
do { \
734
	if (rt == __net->ipv6.ip6_null_entry) {	\
735
		struct fib6_node *pn; \
V
Ville Nuorvala 已提交
736
		while (1) { \
737 738 739 740
			if (fn->fn_flags & RTN_TL_ROOT) \
				goto out; \
			pn = fn->parent; \
			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
741
				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
742 743 744 745
			else \
				fn = pn; \
			if (fn->fn_flags & RTN_RTINFO) \
				goto restart; \
T
Thomas Graf 已提交
746 747
		} \
	} \
748
} while (0)
T
Thomas Graf 已提交
749

750 751
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
					     struct fib6_table *table,
752
					     struct flowi6 *fl6, int flags)
L
Linus Torvalds 已提交
753 754 755 756
{
	struct fib6_node *fn;
	struct rt6_info *rt;

T
Thomas Graf 已提交
757
	read_lock_bh(&table->tb6_lock);
758
	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
T
Thomas Graf 已提交
759 760
restart:
	rt = fn->leaf;
761
	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
762 763
	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
		rt = rt6_multipath_select(rt, fl6);
764
	BACKTRACK(net, &fl6->saddr);
T
Thomas Graf 已提交
765
out:
766
	dst_use(&rt->dst, jiffies);
T
Thomas Graf 已提交
767 768 769 770 771
	read_unlock_bh(&table->tb6_lock);
	return rt;

}

F
Florian Westphal 已提交
772 773 774 775 776 777 778
struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
				    int flags)
{
	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
}
EXPORT_SYMBOL_GPL(ip6_route_lookup);

779 780
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
			    const struct in6_addr *saddr, int oif, int strict)
T
Thomas Graf 已提交
781
{
782 783 784
	struct flowi6 fl6 = {
		.flowi6_oif = oif,
		.daddr = *daddr,
T
Thomas Graf 已提交
785 786
	};
	struct dst_entry *dst;
787
	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
T
Thomas Graf 已提交
788

789
	if (saddr) {
790
		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
791 792 793
		flags |= RT6_LOOKUP_F_HAS_SADDR;
	}

794
	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
T
Thomas Graf 已提交
795 796 797 798 799
	if (dst->error == 0)
		return (struct rt6_info *) dst;

	dst_release(dst);

L
Linus Torvalds 已提交
800 801 802
	return NULL;
}

803 804
EXPORT_SYMBOL(rt6_lookup);

T
Thomas Graf 已提交
805
/* ip6_ins_rt is called with FREE table->tb6_lock.
L
Linus Torvalds 已提交
806 807 808 809 810
   It takes new route entry, the addition fails by any reason the
   route is freed. In any case, if caller does not hold it, it may
   be destroyed.
 */

811
static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
L
Linus Torvalds 已提交
812 813
{
	int err;
T
Thomas Graf 已提交
814
	struct fib6_table *table;
L
Linus Torvalds 已提交
815

T
Thomas Graf 已提交
816 817
	table = rt->rt6i_table;
	write_lock_bh(&table->tb6_lock);
818
	err = fib6_add(&table->tb6_root, rt, info);
T
Thomas Graf 已提交
819
	write_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
820 821 822 823

	return err;
}

824 825
int ip6_ins_rt(struct rt6_info *rt)
{
826
	struct nl_info info = {
827
		.nl_net = dev_net(rt->dst.dev),
828
	};
829
	return __ip6_ins_rt(rt, &info);
830 831
}

832
static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
E
Eric Dumazet 已提交
833
				      const struct in6_addr *daddr,
834
				      const struct in6_addr *saddr)
L
Linus Torvalds 已提交
835 836 837 838 839 840 841
{
	struct rt6_info *rt;

	/*
	 *	Clone the route.
	 */

E
Eric Dumazet 已提交
842
	rt = ip6_rt_copy(ort, daddr);
L
Linus Torvalds 已提交
843 844

	if (rt) {
845 846
		int attempts = !in_softirq();

847
		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
848
			if (ort->rt6i_dst.plen != 128 &&
E
Eric Dumazet 已提交
849
			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
850
				rt->rt6i_flags |= RTF_ANYCAST;
A
Alexey Dobriyan 已提交
851
			rt->rt6i_gateway = *daddr;
852
		}
L
Linus Torvalds 已提交
853 854 855 856 857

		rt->rt6i_flags |= RTF_CACHE;

#ifdef CONFIG_IPV6_SUBTREES
		if (rt->rt6i_src.plen && saddr) {
A
Alexey Dobriyan 已提交
858
			rt->rt6i_src.addr = *saddr;
L
Linus Torvalds 已提交
859 860 861 862
			rt->rt6i_src.plen = 128;
		}
#endif

863
	retry:
864
		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
865
			struct net *net = dev_net(rt->dst.dev);
866 867 868 869 870 871 872 873 874
			int saved_rt_min_interval =
				net->ipv6.sysctl.ip6_rt_gc_min_interval;
			int saved_rt_elasticity =
				net->ipv6.sysctl.ip6_rt_gc_elasticity;

			if (attempts-- > 0) {
				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;

875
				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
876 877 878 879 880 881 882 883

				net->ipv6.sysctl.ip6_rt_gc_elasticity =
					saved_rt_elasticity;
				net->ipv6.sysctl.ip6_rt_gc_min_interval =
					saved_rt_min_interval;
				goto retry;
			}

884
			net_warn_ratelimited("Neighbour table overflow\n");
885
			dst_free(&rt->dst);
886 887
			return NULL;
		}
888
	}
L
Linus Torvalds 已提交
889

890 891
	return rt;
}
L
Linus Torvalds 已提交
892

E
Eric Dumazet 已提交
893 894
static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
					const struct in6_addr *daddr)
895
{
E
Eric Dumazet 已提交
896 897
	struct rt6_info *rt = ip6_rt_copy(ort, daddr);

898 899
	if (rt) {
		rt->rt6i_flags |= RTF_CACHE;
900
		rt->n = neigh_clone(ort->n);
901 902 903 904
	}
	return rt;
}

905
static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
906
				      struct flowi6 *fl6, int flags)
L
Linus Torvalds 已提交
907 908
{
	struct fib6_node *fn;
909
	struct rt6_info *rt, *nrt;
T
Thomas Graf 已提交
910
	int strict = 0;
L
Linus Torvalds 已提交
911
	int attempts = 3;
912
	int err;
913
	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
L
Linus Torvalds 已提交
914

915
	strict |= flags & RT6_LOOKUP_F_IFACE;
L
Linus Torvalds 已提交
916 917

relookup:
T
Thomas Graf 已提交
918
	read_lock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
919

920
restart_2:
921
	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
L
Linus Torvalds 已提交
922 923

restart:
924
	rt = rt6_select(fn, oif, strict | reachable);
925 926
	if (rt->rt6i_nsiblings && oif == 0)
		rt = rt6_multipath_select(rt, fl6);
927
	BACKTRACK(net, &fl6->saddr);
928
	if (rt == net->ipv6.ip6_null_entry ||
929
	    rt->rt6i_flags & RTF_CACHE)
930
		goto out;
L
Linus Torvalds 已提交
931

932
	dst_hold(&rt->dst);
T
Thomas Graf 已提交
933
	read_unlock_bh(&table->tb6_lock);
934

935
	if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
936
		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
937
	else if (!(rt->dst.flags & DST_HOST))
938
		nrt = rt6_alloc_clone(rt, &fl6->daddr);
939 940
	else
		goto out2;
941

A
Amerigo Wang 已提交
942
	ip6_rt_put(rt);
943
	rt = nrt ? : net->ipv6.ip6_null_entry;
L
Linus Torvalds 已提交
944

945
	dst_hold(&rt->dst);
946
	if (nrt) {
947
		err = ip6_ins_rt(nrt);
948
		if (!err)
L
Linus Torvalds 已提交
949 950 951
			goto out2;
	}

952 953 954 955
	if (--attempts <= 0)
		goto out2;

	/*
T
Thomas Graf 已提交
956
	 * Race condition! In the gap, when table->tb6_lock was
957 958
	 * released someone could insert this route.  Relookup.
	 */
A
Amerigo Wang 已提交
959
	ip6_rt_put(rt);
960 961 962
	goto relookup;

out:
963 964 965 966
	if (reachable) {
		reachable = 0;
		goto restart_2;
	}
967
	dst_hold(&rt->dst);
T
Thomas Graf 已提交
968
	read_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
969
out2:
970 971
	rt->dst.lastuse = jiffies;
	rt->dst.__use++;
T
Thomas Graf 已提交
972 973

	return rt;
L
Linus Torvalds 已提交
974 975
}

976
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
977
					    struct flowi6 *fl6, int flags)
978
{
979
	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
980 981
}

982 983 984 985 986 987 988 989 990 991
static struct dst_entry *ip6_route_input_lookup(struct net *net,
						struct net_device *dev,
						struct flowi6 *fl6, int flags)
{
	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
		flags |= RT6_LOOKUP_F_IFACE;

	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
}

T
Thomas Graf 已提交
992 993
void ip6_route_input(struct sk_buff *skb)
{
994
	const struct ipv6hdr *iph = ipv6_hdr(skb);
995
	struct net *net = dev_net(skb->dev);
996
	int flags = RT6_LOOKUP_F_HAS_SADDR;
997 998 999 1000
	struct flowi6 fl6 = {
		.flowi6_iif = skb->dev->ifindex,
		.daddr = iph->daddr,
		.saddr = iph->saddr,
1001
		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
1002 1003
		.flowi6_mark = skb->mark,
		.flowi6_proto = iph->nexthdr,
T
Thomas Graf 已提交
1004
	};
1005

1006
	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
T
Thomas Graf 已提交
1007 1008
}

1009
static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1010
					     struct flowi6 *fl6, int flags)
L
Linus Torvalds 已提交
1011
{
1012
	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
T
Thomas Graf 已提交
1013 1014
}

1015
struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1016
				    struct flowi6 *fl6)
T
Thomas Graf 已提交
1017 1018 1019
{
	int flags = 0;

1020
	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1021

1022
	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1023
		flags |= RT6_LOOKUP_F_IFACE;
T
Thomas Graf 已提交
1024

1025
	if (!ipv6_addr_any(&fl6->saddr))
1026
		flags |= RT6_LOOKUP_F_HAS_SADDR;
1027 1028
	else if (sk)
		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1029

1030
	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
L
Linus Torvalds 已提交
1031 1032
}

1033
EXPORT_SYMBOL(ip6_route_output);
L
Linus Torvalds 已提交
1034

1035
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1036
{
1037
	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1038 1039
	struct dst_entry *new = NULL;

1040
	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1041
	if (rt) {
1042
		new = &rt->dst;
1043

1044 1045 1046
		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
		rt6_init_peer(rt, net->ipv6.peers);

1047
		new->__use = 1;
1048 1049
		new->input = dst_discard;
		new->output = dst_discard;
1050

E
Eric Dumazet 已提交
1051 1052 1053 1054
		if (dst_metrics_read_only(&ort->dst))
			new->_metrics = ort->dst._metrics;
		else
			dst_copy_metrics(new, &ort->dst);
1055 1056 1057 1058
		rt->rt6i_idev = ort->rt6i_idev;
		if (rt->rt6i_idev)
			in6_dev_hold(rt->rt6i_idev);

A
Alexey Dobriyan 已提交
1059
		rt->rt6i_gateway = ort->rt6i_gateway;
1060 1061
		rt->rt6i_flags = ort->rt6i_flags;
		rt6_clean_expires(rt);
1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
		rt->rt6i_metric = 0;

		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
#ifdef CONFIG_IPV6_SUBTREES
		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif

		dst_free(new);
	}

1072 1073
	dst_release(dst_orig);
	return new ? new : ERR_PTR(-ENOMEM);
1074 1075
}

L
Linus Torvalds 已提交
1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
/*
 *	Destination cache support functions
 */

static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
{
	struct rt6_info *rt;

	rt = (struct rt6_info *) dst;

1086 1087 1088 1089 1090 1091 1092
	/* All IPV6 dsts are created with ->obsolete set to the value
	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
	 * into this function always.
	 */
	if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
		return NULL;

1093
	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
L
Linus Torvalds 已提交
1094
		return dst;
1095

L
Linus Torvalds 已提交
1096 1097 1098 1099 1100 1101 1102 1103
	return NULL;
}

static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
{
	struct rt6_info *rt = (struct rt6_info *) dst;

	if (rt) {
1104 1105 1106 1107 1108 1109
		if (rt->rt6i_flags & RTF_CACHE) {
			if (rt6_check_expired(rt)) {
				ip6_del_rt(rt);
				dst = NULL;
			}
		} else {
L
Linus Torvalds 已提交
1110
			dst_release(dst);
1111 1112
			dst = NULL;
		}
L
Linus Torvalds 已提交
1113
	}
1114
	return dst;
L
Linus Torvalds 已提交
1115 1116 1117 1118 1119 1120
}

static void ip6_link_failure(struct sk_buff *skb)
{
	struct rt6_info *rt;

1121
	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
L
Linus Torvalds 已提交
1122

E
Eric Dumazet 已提交
1123
	rt = (struct rt6_info *) skb_dst(skb);
L
Linus Torvalds 已提交
1124
	if (rt) {
1125 1126 1127
		if (rt->rt6i_flags & RTF_CACHE)
			rt6_update_expires(rt, 0);
		else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
L
Linus Torvalds 已提交
1128 1129 1130 1131
			rt->rt6i_node->fn_sernum = -1;
	}
}

1132 1133
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
			       struct sk_buff *skb, u32 mtu)
L
Linus Torvalds 已提交
1134 1135 1136
{
	struct rt6_info *rt6 = (struct rt6_info*)dst;

1137
	dst_confirm(dst);
L
Linus Torvalds 已提交
1138
	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1139 1140
		struct net *net = dev_net(dst->dev);

L
Linus Torvalds 已提交
1141 1142
		rt6->rt6i_flags |= RTF_MODIFIED;
		if (mtu < IPV6_MIN_MTU) {
1143
			u32 features = dst_metric(dst, RTAX_FEATURES);
L
Linus Torvalds 已提交
1144
			mtu = IPV6_MIN_MTU;
1145 1146
			features |= RTAX_FEATURE_ALLFRAG;
			dst_metric_set(dst, RTAX_FEATURES, features);
L
Linus Torvalds 已提交
1147
		}
1148
		dst_metric_set(dst, RTAX_MTU, mtu);
1149
		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
L
Linus Torvalds 已提交
1150 1151 1152
	}
}

1153 1154
void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
		     int oif, u32 mark)
1155 1156 1157 1158 1159 1160 1161 1162
{
	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
	struct dst_entry *dst;
	struct flowi6 fl6;

	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_oif = oif;
	fl6.flowi6_mark = mark;
1163
	fl6.flowi6_flags = 0;
1164 1165 1166 1167 1168 1169
	fl6.daddr = iph->daddr;
	fl6.saddr = iph->saddr;
	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;

	dst = ip6_route_output(net, NULL, &fl6);
	if (!dst->error)
1170
		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181
	dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_update_pmtu);

void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
{
	ip6_update_pmtu(skb, sock_net(sk), mtu,
			sk->sk_bound_dev_if, sk->sk_mark);
}
EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);

1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197
void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
{
	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
	struct dst_entry *dst;
	struct flowi6 fl6;

	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_oif = oif;
	fl6.flowi6_mark = mark;
	fl6.flowi6_flags = 0;
	fl6.daddr = iph->daddr;
	fl6.saddr = iph->saddr;
	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;

	dst = ip6_route_output(net, NULL, &fl6);
	if (!dst->error)
1198
		rt6_do_redirect(dst, NULL, skb);
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
	dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_redirect);

void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
}
EXPORT_SYMBOL_GPL(ip6_sk_redirect);

1209
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
L
Linus Torvalds 已提交
1210
{
1211 1212 1213 1214
	struct net_device *dev = dst->dev;
	unsigned int mtu = dst_mtu(dst);
	struct net *net = dev_net(dev);

L
Linus Torvalds 已提交
1215 1216
	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);

1217 1218
	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
L
Linus Torvalds 已提交
1219 1220

	/*
1221 1222 1223
	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
	 * IPV6_MAXPLEN is also valid and means: "any MSS,
L
Linus Torvalds 已提交
1224 1225 1226 1227 1228 1229 1230
	 * rely only on pmtu discovery"
	 */
	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
		mtu = IPV6_MAXPLEN;
	return mtu;
}

1231
static unsigned int ip6_mtu(const struct dst_entry *dst)
1232 1233
{
	struct inet6_dev *idev;
1234 1235 1236 1237 1238 1239
	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);

	if (mtu)
		return mtu;

	mtu = IPV6_MIN_MTU;
1240 1241 1242 1243 1244 1245 1246 1247 1248 1249

	rcu_read_lock();
	idev = __in6_dev_get(dst->dev);
	if (idev)
		mtu = idev->cnf.mtu6;
	rcu_read_unlock();

	return mtu;
}

1250 1251
static struct dst_entry *icmp6_dst_gc_list;
static DEFINE_SPINLOCK(icmp6_dst_lock);
1252

1253
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
L
Linus Torvalds 已提交
1254
				  struct neighbour *neigh,
1255
				  struct flowi6 *fl6)
L
Linus Torvalds 已提交
1256
{
1257
	struct dst_entry *dst;
L
Linus Torvalds 已提交
1258 1259
	struct rt6_info *rt;
	struct inet6_dev *idev = in6_dev_get(dev);
1260
	struct net *net = dev_net(dev);
L
Linus Torvalds 已提交
1261

1262
	if (unlikely(!idev))
E
Eric Dumazet 已提交
1263
		return ERR_PTR(-ENODEV);
L
Linus Torvalds 已提交
1264

1265
	rt = ip6_dst_alloc(net, dev, 0, NULL);
1266
	if (unlikely(!rt)) {
L
Linus Torvalds 已提交
1267
		in6_dev_put(idev);
1268
		dst = ERR_PTR(-ENOMEM);
L
Linus Torvalds 已提交
1269 1270 1271 1272 1273
		goto out;
	}

	if (neigh)
		neigh_hold(neigh);
1274
	else {
1275
		neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1276
		if (IS_ERR(neigh)) {
1277
			in6_dev_put(idev);
1278 1279 1280
			dst_free(&rt->dst);
			return ERR_CAST(neigh);
		}
1281
	}
L
Linus Torvalds 已提交
1282

1283 1284
	rt->dst.flags |= DST_HOST;
	rt->dst.output  = ip6_output;
1285
	rt->n = neigh;
1286
	atomic_set(&rt->dst.__refcnt, 1);
1287
	rt->rt6i_dst.addr = fl6->daddr;
1288 1289
	rt->rt6i_dst.plen = 128;
	rt->rt6i_idev     = idev;
L
Li RongQing 已提交
1290
	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
L
Linus Torvalds 已提交
1291

1292
	spin_lock_bh(&icmp6_dst_lock);
1293 1294
	rt->dst.next = icmp6_dst_gc_list;
	icmp6_dst_gc_list = &rt->dst;
1295
	spin_unlock_bh(&icmp6_dst_lock);
L
Linus Torvalds 已提交
1296

1297
	fib6_force_start_gc(net);
L
Linus Torvalds 已提交
1298

1299 1300
	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);

L
Linus Torvalds 已提交
1301
out:
1302
	return dst;
L
Linus Torvalds 已提交
1303 1304
}

1305
int icmp6_dst_gc(void)
L
Linus Torvalds 已提交
1306
{
1307
	struct dst_entry *dst, **pprev;
1308
	int more = 0;
L
Linus Torvalds 已提交
1309

1310 1311
	spin_lock_bh(&icmp6_dst_lock);
	pprev = &icmp6_dst_gc_list;
1312

L
Linus Torvalds 已提交
1313 1314 1315 1316 1317 1318
	while ((dst = *pprev) != NULL) {
		if (!atomic_read(&dst->__refcnt)) {
			*pprev = dst->next;
			dst_free(dst);
		} else {
			pprev = &dst->next;
1319
			++more;
L
Linus Torvalds 已提交
1320 1321 1322
		}
	}

1323
	spin_unlock_bh(&icmp6_dst_lock);
1324

1325
	return more;
L
Linus Torvalds 已提交
1326 1327
}

1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346
static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
			    void *arg)
{
	struct dst_entry *dst, **pprev;

	spin_lock_bh(&icmp6_dst_lock);
	pprev = &icmp6_dst_gc_list;
	while ((dst = *pprev) != NULL) {
		struct rt6_info *rt = (struct rt6_info *) dst;
		if (func(rt, arg)) {
			*pprev = dst->next;
			dst_free(dst);
		} else {
			pprev = &dst->next;
		}
	}
	spin_unlock_bh(&icmp6_dst_lock);
}

1347
static int ip6_dst_gc(struct dst_ops *ops)
L
Linus Torvalds 已提交
1348 1349
{
	unsigned long now = jiffies;
1350
	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1351 1352 1353 1354 1355
	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1356
	int entries;
1357

1358
	entries = dst_entries_get_fast(ops);
1359
	if (time_after(rt_last_gc + rt_min_interval, now) &&
1360
	    entries <= rt_max_size)
L
Linus Torvalds 已提交
1361 1362
		goto out;

1363 1364 1365
	net->ipv6.ip6_rt_gc_expire++;
	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
	net->ipv6.ip6_rt_last_gc = now;
1366 1367
	entries = dst_entries_get_slow(ops);
	if (entries < ops->gc_thresh)
1368
		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
L
Linus Torvalds 已提交
1369
out:
1370
	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1371
	return entries > rt_max_size;
L
Linus Torvalds 已提交
1372 1373
}

1374
int ip6_dst_hoplimit(struct dst_entry *dst)
L
Linus Torvalds 已提交
1375
{
1376
	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1377
	if (hoplimit == 0) {
1378
		struct net_device *dev = dst->dev;
1379 1380 1381 1382 1383
		struct inet6_dev *idev;

		rcu_read_lock();
		idev = __in6_dev_get(dev);
		if (idev)
1384
			hoplimit = idev->cnf.hop_limit;
1385
		else
1386
			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1387
		rcu_read_unlock();
L
Linus Torvalds 已提交
1388 1389 1390
	}
	return hoplimit;
}
1391
EXPORT_SYMBOL(ip6_dst_hoplimit);
L
Linus Torvalds 已提交
1392 1393 1394 1395 1396

/*
 *
 */

1397
int ip6_route_add(struct fib6_config *cfg)
L
Linus Torvalds 已提交
1398 1399
{
	int err;
1400
	struct net *net = cfg->fc_nlinfo.nl_net;
L
Linus Torvalds 已提交
1401 1402 1403
	struct rt6_info *rt = NULL;
	struct net_device *dev = NULL;
	struct inet6_dev *idev = NULL;
T
Thomas Graf 已提交
1404
	struct fib6_table *table;
L
Linus Torvalds 已提交
1405 1406
	int addr_type;

1407
	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
L
Linus Torvalds 已提交
1408 1409
		return -EINVAL;
#ifndef CONFIG_IPV6_SUBTREES
1410
	if (cfg->fc_src_len)
L
Linus Torvalds 已提交
1411 1412
		return -EINVAL;
#endif
1413
	if (cfg->fc_ifindex) {
L
Linus Torvalds 已提交
1414
		err = -ENODEV;
1415
		dev = dev_get_by_index(net, cfg->fc_ifindex);
L
Linus Torvalds 已提交
1416 1417 1418 1419 1420 1421 1422
		if (!dev)
			goto out;
		idev = in6_dev_get(dev);
		if (!idev)
			goto out;
	}

1423 1424
	if (cfg->fc_metric == 0)
		cfg->fc_metric = IP6_RT_PRIO_USER;
L
Linus Torvalds 已提交
1425

1426
	err = -ENOBUFS;
1427 1428
	if (cfg->fc_nlinfo.nlh &&
	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1429
		table = fib6_get_table(net, cfg->fc_table);
1430
		if (!table) {
1431
			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1432 1433 1434 1435 1436
			table = fib6_new_table(net, cfg->fc_table);
		}
	} else {
		table = fib6_new_table(net, cfg->fc_table);
	}
1437 1438

	if (!table)
T
Thomas Graf 已提交
1439 1440
		goto out;

1441
	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
L
Linus Torvalds 已提交
1442

1443
	if (!rt) {
L
Linus Torvalds 已提交
1444 1445 1446 1447
		err = -ENOMEM;
		goto out;
	}

1448 1449 1450 1451 1452
	if (cfg->fc_flags & RTF_EXPIRES)
		rt6_set_expires(rt, jiffies +
				clock_t_to_jiffies(cfg->fc_expires));
	else
		rt6_clean_expires(rt);
L
Linus Torvalds 已提交
1453

1454 1455 1456 1457 1458
	if (cfg->fc_protocol == RTPROT_UNSPEC)
		cfg->fc_protocol = RTPROT_BOOT;
	rt->rt6i_protocol = cfg->fc_protocol;

	addr_type = ipv6_addr_type(&cfg->fc_dst);
L
Linus Torvalds 已提交
1459 1460

	if (addr_type & IPV6_ADDR_MULTICAST)
1461
		rt->dst.input = ip6_mc_input;
1462 1463
	else if (cfg->fc_flags & RTF_LOCAL)
		rt->dst.input = ip6_input;
L
Linus Torvalds 已提交
1464
	else
1465
		rt->dst.input = ip6_forward;
L
Linus Torvalds 已提交
1466

1467
	rt->dst.output = ip6_output;
L
Linus Torvalds 已提交
1468

1469 1470
	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
	rt->rt6i_dst.plen = cfg->fc_dst_len;
L
Linus Torvalds 已提交
1471
	if (rt->rt6i_dst.plen == 128)
1472
	       rt->dst.flags |= DST_HOST;
L
Linus Torvalds 已提交
1473

1474 1475 1476 1477 1478 1479 1480 1481
	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
		if (!metrics) {
			err = -ENOMEM;
			goto out;
		}
		dst_init_metrics(&rt->dst, metrics, 0);
	}
L
Linus Torvalds 已提交
1482
#ifdef CONFIG_IPV6_SUBTREES
1483 1484
	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
	rt->rt6i_src.plen = cfg->fc_src_len;
L
Linus Torvalds 已提交
1485 1486
#endif

1487
	rt->rt6i_metric = cfg->fc_metric;
L
Linus Torvalds 已提交
1488 1489 1490 1491

	/* We cannot add true routes via loopback here,
	   they would result in kernel looping; promote them to reject routes
	 */
1492
	if ((cfg->fc_flags & RTF_REJECT) ||
1493 1494 1495
	    (dev && (dev->flags & IFF_LOOPBACK) &&
	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
	     !(cfg->fc_flags & RTF_LOCAL))) {
L
Linus Torvalds 已提交
1496
		/* hold loopback dev/idev if we haven't done so. */
1497
		if (dev != net->loopback_dev) {
L
Linus Torvalds 已提交
1498 1499 1500 1501
			if (dev) {
				dev_put(dev);
				in6_dev_put(idev);
			}
1502
			dev = net->loopback_dev;
L
Linus Torvalds 已提交
1503 1504 1505 1506 1507 1508 1509
			dev_hold(dev);
			idev = in6_dev_get(dev);
			if (!idev) {
				err = -ENODEV;
				goto out;
			}
		}
1510 1511
		rt->dst.output = ip6_pkt_discard_out;
		rt->dst.input = ip6_pkt_discard;
L
Linus Torvalds 已提交
1512
		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1513 1514 1515 1516 1517 1518 1519
		switch (cfg->fc_type) {
		case RTN_BLACKHOLE:
			rt->dst.error = -EINVAL;
			break;
		case RTN_PROHIBIT:
			rt->dst.error = -EACCES;
			break;
1520 1521 1522
		case RTN_THROW:
			rt->dst.error = -EAGAIN;
			break;
1523 1524 1525 1526
		default:
			rt->dst.error = -ENETUNREACH;
			break;
		}
L
Linus Torvalds 已提交
1527 1528 1529
		goto install_route;
	}

1530
	if (cfg->fc_flags & RTF_GATEWAY) {
1531
		const struct in6_addr *gw_addr;
L
Linus Torvalds 已提交
1532 1533
		int gwa_type;

1534
		gw_addr = &cfg->fc_gateway;
A
Alexey Dobriyan 已提交
1535
		rt->rt6i_gateway = *gw_addr;
L
Linus Torvalds 已提交
1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548
		gwa_type = ipv6_addr_type(gw_addr);

		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
			struct rt6_info *grt;

			/* IPv6 strictly inhibits using not link-local
			   addresses as nexthop address.
			   Otherwise, router will not able to send redirects.
			   It is very good, but in some (rare!) circumstances
			   (SIT, PtP, NBMA NOARP links) it is handy to allow
			   some exceptions. --ANK
			 */
			err = -EINVAL;
1549
			if (!(gwa_type & IPV6_ADDR_UNICAST))
L
Linus Torvalds 已提交
1550 1551
				goto out;

1552
			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
L
Linus Torvalds 已提交
1553 1554

			err = -EHOSTUNREACH;
1555
			if (!grt)
L
Linus Torvalds 已提交
1556 1557
				goto out;
			if (dev) {
1558
				if (dev != grt->dst.dev) {
A
Amerigo Wang 已提交
1559
					ip6_rt_put(grt);
L
Linus Torvalds 已提交
1560 1561 1562
					goto out;
				}
			} else {
1563
				dev = grt->dst.dev;
L
Linus Torvalds 已提交
1564 1565 1566 1567
				idev = grt->rt6i_idev;
				dev_hold(dev);
				in6_dev_hold(grt->rt6i_idev);
			}
1568
			if (!(grt->rt6i_flags & RTF_GATEWAY))
L
Linus Torvalds 已提交
1569
				err = 0;
A
Amerigo Wang 已提交
1570
			ip6_rt_put(grt);
L
Linus Torvalds 已提交
1571 1572 1573 1574 1575

			if (err)
				goto out;
		}
		err = -EINVAL;
1576
		if (!dev || (dev->flags & IFF_LOOPBACK))
L
Linus Torvalds 已提交
1577 1578 1579 1580
			goto out;
	}

	err = -ENODEV;
1581
	if (!dev)
L
Linus Torvalds 已提交
1582 1583
		goto out;

1584 1585 1586 1587 1588
	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
			err = -EINVAL;
			goto out;
		}
A
Alexey Dobriyan 已提交
1589
		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1590 1591 1592 1593
		rt->rt6i_prefsrc.plen = 128;
	} else
		rt->rt6i_prefsrc.plen = 0;

1594
	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1595
		err = rt6_bind_neighbour(rt, dev);
1596
		if (err)
L
Linus Torvalds 已提交
1597 1598 1599
			goto out;
	}

1600
	rt->rt6i_flags = cfg->fc_flags;
L
Linus Torvalds 已提交
1601 1602

install_route:
1603 1604 1605 1606 1607
	if (cfg->fc_mx) {
		struct nlattr *nla;
		int remaining;

		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1608
			int type = nla_type(nla);
1609 1610 1611

			if (type) {
				if (type > RTAX_MAX) {
L
Linus Torvalds 已提交
1612 1613 1614
					err = -EINVAL;
					goto out;
				}
1615

1616
				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
L
Linus Torvalds 已提交
1617 1618 1619 1620
			}
		}
	}

1621
	rt->dst.dev = dev;
L
Linus Torvalds 已提交
1622
	rt->rt6i_idev = idev;
T
Thomas Graf 已提交
1623
	rt->rt6i_table = table;
1624

1625
	cfg->fc_nlinfo.nl_net = dev_net(dev);
1626

1627
	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
L
Linus Torvalds 已提交
1628 1629 1630 1631 1632 1633 1634

out:
	if (dev)
		dev_put(dev);
	if (idev)
		in6_dev_put(idev);
	if (rt)
1635
		dst_free(&rt->dst);
L
Linus Torvalds 已提交
1636 1637 1638
	return err;
}

1639
static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
L
Linus Torvalds 已提交
1640 1641
{
	int err;
T
Thomas Graf 已提交
1642
	struct fib6_table *table;
1643
	struct net *net = dev_net(rt->dst.dev);
L
Linus Torvalds 已提交
1644

1645 1646 1647 1648
	if (rt == net->ipv6.ip6_null_entry) {
		err = -ENOENT;
		goto out;
	}
1649

T
Thomas Graf 已提交
1650 1651
	table = rt->rt6i_table;
	write_lock_bh(&table->tb6_lock);
1652
	err = fib6_del(rt, info);
T
Thomas Graf 已提交
1653
	write_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
1654

1655
out:
A
Amerigo Wang 已提交
1656
	ip6_rt_put(rt);
L
Linus Torvalds 已提交
1657 1658 1659
	return err;
}

1660 1661
int ip6_del_rt(struct rt6_info *rt)
{
1662
	struct nl_info info = {
1663
		.nl_net = dev_net(rt->dst.dev),
1664
	};
1665
	return __ip6_del_rt(rt, &info);
1666 1667
}

1668
static int ip6_route_del(struct fib6_config *cfg)
L
Linus Torvalds 已提交
1669
{
T
Thomas Graf 已提交
1670
	struct fib6_table *table;
L
Linus Torvalds 已提交
1671 1672 1673 1674
	struct fib6_node *fn;
	struct rt6_info *rt;
	int err = -ESRCH;

1675
	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1676
	if (!table)
T
Thomas Graf 已提交
1677 1678 1679
		return err;

	read_lock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
1680

T
Thomas Graf 已提交
1681
	fn = fib6_locate(&table->tb6_root,
1682 1683
			 &cfg->fc_dst, cfg->fc_dst_len,
			 &cfg->fc_src, cfg->fc_src_len);
1684

L
Linus Torvalds 已提交
1685
	if (fn) {
1686
		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1687
			if (cfg->fc_ifindex &&
1688 1689
			    (!rt->dst.dev ||
			     rt->dst.dev->ifindex != cfg->fc_ifindex))
L
Linus Torvalds 已提交
1690
				continue;
1691 1692
			if (cfg->fc_flags & RTF_GATEWAY &&
			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
L
Linus Torvalds 已提交
1693
				continue;
1694
			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
L
Linus Torvalds 已提交
1695
				continue;
1696
			dst_hold(&rt->dst);
T
Thomas Graf 已提交
1697
			read_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
1698

1699
			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
L
Linus Torvalds 已提交
1700 1701
		}
	}
T
Thomas Graf 已提交
1702
	read_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
1703 1704 1705 1706

	return err;
}

1707
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1708
{
1709
	struct net *net = dev_net(skb->dev);
1710
	struct netevent_redirect netevent;
1711 1712 1713
	struct rt6_info *rt, *nrt = NULL;
	const struct in6_addr *target;
	struct ndisc_options ndopts;
1714 1715
	const struct in6_addr *dest;
	struct neighbour *old_neigh;
1716 1717 1718
	struct inet6_dev *in6_dev;
	struct neighbour *neigh;
	struct icmp6hdr *icmph;
1719 1720
	int optlen, on_link;
	u8 *lladdr;
1721 1722 1723 1724 1725

	optlen = skb->tail - skb->transport_header;
	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);

	if (optlen < 0) {
1726
		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1727 1728 1729 1730 1731 1732 1733 1734
		return;
	}

	icmph = icmp6_hdr(skb);
	target = (const struct in6_addr *) (icmph + 1);
	dest = target + 1;

	if (ipv6_addr_is_multicast(dest)) {
1735
		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1736 1737 1738
		return;
	}

1739
	on_link = 0;
1740 1741 1742 1743
	if (ipv6_addr_equal(dest, target)) {
		on_link = 1;
	} else if (ipv6_addr_type(target) !=
		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1744
		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762
		return;
	}

	in6_dev = __in6_dev_get(skb->dev);
	if (!in6_dev)
		return;
	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
		return;

	/* RFC2461 8.1:
	 *	The IP source address of the Redirect MUST be the same as the current
	 *	first-hop router for the specified ICMP Destination Address.
	 */

	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
		return;
	}
1763 1764

	lladdr = NULL;
1765 1766 1767 1768 1769 1770 1771 1772 1773
	if (ndopts.nd_opts_tgt_lladdr) {
		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
					     skb->dev);
		if (!lladdr) {
			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
			return;
		}
	}

1774 1775 1776
	rt = (struct rt6_info *) dst;
	if (rt == net->ipv6.ip6_null_entry) {
		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1777
		return;
1778
	}
1779

1780 1781 1782 1783 1784
	/* Redirect received -> path was valid.
	 * Look, redirects are sent only in response to data packets,
	 * so that this nexthop apparently is reachable. --ANK
	 */
	dst_confirm(&rt->dst);
1785

1786 1787 1788
	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
	if (!neigh)
		return;
1789

1790 1791 1792
	/* Duplicate redirect: silently ignore. */
	old_neigh = rt->n;
	if (neigh == old_neigh)
1793
		goto out;
L
Linus Torvalds 已提交
1794 1795 1796 1797 1798

	/*
	 *	We have finally decided to accept it.
	 */

1799
	neigh_update(neigh, lladdr, NUD_STALE,
L
Linus Torvalds 已提交
1800 1801 1802 1803 1804 1805
		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
		     NEIGH_UPDATE_F_OVERRIDE|
		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
				     NEIGH_UPDATE_F_ISROUTER))
		     );

E
Eric Dumazet 已提交
1806
	nrt = ip6_rt_copy(rt, dest);
1807
	if (!nrt)
L
Linus Torvalds 已提交
1808 1809 1810 1811 1812 1813
		goto out;

	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
	if (on_link)
		nrt->rt6i_flags &= ~RTF_GATEWAY;

A
Alexey Dobriyan 已提交
1814
	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1815
	nrt->n = neigh_clone(neigh);
L
Linus Torvalds 已提交
1816

1817
	if (ip6_ins_rt(nrt))
L
Linus Torvalds 已提交
1818 1819
		goto out;

1820
	netevent.old = &rt->dst;
1821
	netevent.old_neigh = old_neigh;
1822
	netevent.new = &nrt->dst;
1823 1824
	netevent.new_neigh = neigh;
	netevent.daddr = dest;
1825 1826
	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);

1827
	if (rt->rt6i_flags & RTF_CACHE) {
1828
		rt = (struct rt6_info *) dst_clone(&rt->dst);
1829
		ip6_del_rt(rt);
L
Linus Torvalds 已提交
1830 1831 1832
	}

out:
1833
	neigh_release(neigh);
1834 1835
}

L
Linus Torvalds 已提交
1836 1837 1838 1839
/*
 *	Misc support functions
 */

1840
static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
E
Eric Dumazet 已提交
1841
				    const struct in6_addr *dest)
L
Linus Torvalds 已提交
1842
{
1843
	struct net *net = dev_net(ort->dst.dev);
1844 1845
	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
					    ort->rt6i_table);
L
Linus Torvalds 已提交
1846 1847

	if (rt) {
1848 1849
		rt->dst.input = ort->dst.input;
		rt->dst.output = ort->dst.output;
1850
		rt->dst.flags |= DST_HOST;
1851

A
Alexey Dobriyan 已提交
1852
		rt->rt6i_dst.addr = *dest;
1853
		rt->rt6i_dst.plen = 128;
1854
		dst_copy_metrics(&rt->dst, &ort->dst);
1855
		rt->dst.error = ort->dst.error;
L
Linus Torvalds 已提交
1856 1857 1858
		rt->rt6i_idev = ort->rt6i_idev;
		if (rt->rt6i_idev)
			in6_dev_hold(rt->rt6i_idev);
1859
		rt->dst.lastuse = jiffies;
L
Linus Torvalds 已提交
1860

A
Alexey Dobriyan 已提交
1861
		rt->rt6i_gateway = ort->rt6i_gateway;
1862 1863 1864 1865 1866 1867
		rt->rt6i_flags = ort->rt6i_flags;
		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
		    (RTF_DEFAULT | RTF_ADDRCONF))
			rt6_set_from(rt, ort);
		else
			rt6_clean_expires(rt);
L
Linus Torvalds 已提交
1868 1869 1870 1871 1872
		rt->rt6i_metric = 0;

#ifdef CONFIG_IPV6_SUBTREES
		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif
1873
		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
T
Thomas Graf 已提交
1874
		rt->rt6i_table = ort->rt6i_table;
L
Linus Torvalds 已提交
1875 1876 1877 1878
	}
	return rt;
}

1879
#ifdef CONFIG_IPV6_ROUTE_INFO
1880
static struct rt6_info *rt6_get_route_info(struct net *net,
1881 1882
					   const struct in6_addr *prefix, int prefixlen,
					   const struct in6_addr *gwaddr, int ifindex)
1883 1884 1885
{
	struct fib6_node *fn;
	struct rt6_info *rt = NULL;
T
Thomas Graf 已提交
1886 1887
	struct fib6_table *table;

1888
	table = fib6_get_table(net, RT6_TABLE_INFO);
1889
	if (!table)
T
Thomas Graf 已提交
1890
		return NULL;
1891

1892
	read_lock_bh(&table->tb6_lock);
T
Thomas Graf 已提交
1893
	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1894 1895 1896
	if (!fn)
		goto out;

1897
	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1898
		if (rt->dst.dev->ifindex != ifindex)
1899 1900 1901 1902 1903
			continue;
		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
			continue;
		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
			continue;
1904
		dst_hold(&rt->dst);
1905 1906 1907
		break;
	}
out:
1908
	read_unlock_bh(&table->tb6_lock);
1909 1910 1911
	return rt;
}

1912
static struct rt6_info *rt6_add_route_info(struct net *net,
1913 1914
					   const struct in6_addr *prefix, int prefixlen,
					   const struct in6_addr *gwaddr, int ifindex,
1915
					   unsigned int pref)
1916
{
1917 1918
	struct fib6_config cfg = {
		.fc_table	= RT6_TABLE_INFO,
1919
		.fc_metric	= IP6_RT_PRIO_USER,
1920 1921 1922 1923
		.fc_ifindex	= ifindex,
		.fc_dst_len	= prefixlen,
		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
				  RTF_UP | RTF_PREF(pref),
1924
		.fc_nlinfo.portid = 0,
1925 1926
		.fc_nlinfo.nlh = NULL,
		.fc_nlinfo.nl_net = net,
1927 1928
	};

A
Alexey Dobriyan 已提交
1929 1930
	cfg.fc_dst = *prefix;
	cfg.fc_gateway = *gwaddr;
1931

1932 1933
	/* We should treat it as a default route if prefix length is 0. */
	if (!prefixlen)
1934
		cfg.fc_flags |= RTF_DEFAULT;
1935

1936
	ip6_route_add(&cfg);
1937

1938
	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1939 1940 1941
}
#endif

1942
struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1943
{
L
Linus Torvalds 已提交
1944
	struct rt6_info *rt;
T
Thomas Graf 已提交
1945
	struct fib6_table *table;
L
Linus Torvalds 已提交
1946

1947
	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1948
	if (!table)
T
Thomas Graf 已提交
1949
		return NULL;
L
Linus Torvalds 已提交
1950

1951
	read_lock_bh(&table->tb6_lock);
1952
	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1953
		if (dev == rt->dst.dev &&
1954
		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
L
Linus Torvalds 已提交
1955 1956 1957 1958
		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
			break;
	}
	if (rt)
1959
		dst_hold(&rt->dst);
1960
	read_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
1961 1962 1963
	return rt;
}

1964
struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1965 1966
				     struct net_device *dev,
				     unsigned int pref)
L
Linus Torvalds 已提交
1967
{
1968 1969
	struct fib6_config cfg = {
		.fc_table	= RT6_TABLE_DFLT,
1970
		.fc_metric	= IP6_RT_PRIO_USER,
1971 1972 1973
		.fc_ifindex	= dev->ifindex,
		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1974
		.fc_nlinfo.portid = 0,
1975
		.fc_nlinfo.nlh = NULL,
1976
		.fc_nlinfo.nl_net = dev_net(dev),
1977
	};
L
Linus Torvalds 已提交
1978

A
Alexey Dobriyan 已提交
1979
	cfg.fc_gateway = *gwaddr;
L
Linus Torvalds 已提交
1980

1981
	ip6_route_add(&cfg);
L
Linus Torvalds 已提交
1982 1983 1984 1985

	return rt6_get_dflt_router(gwaddr, dev);
}

1986
void rt6_purge_dflt_routers(struct net *net)
L
Linus Torvalds 已提交
1987 1988
{
	struct rt6_info *rt;
T
Thomas Graf 已提交
1989 1990 1991
	struct fib6_table *table;

	/* NOTE: Keep consistent with rt6_get_dflt_router */
1992
	table = fib6_get_table(net, RT6_TABLE_DFLT);
1993
	if (!table)
T
Thomas Graf 已提交
1994
		return;
L
Linus Torvalds 已提交
1995 1996

restart:
T
Thomas Graf 已提交
1997
	read_lock_bh(&table->tb6_lock);
1998
	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
L
Linus Torvalds 已提交
1999
		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
2000
			dst_hold(&rt->dst);
T
Thomas Graf 已提交
2001
			read_unlock_bh(&table->tb6_lock);
2002
			ip6_del_rt(rt);
L
Linus Torvalds 已提交
2003 2004 2005
			goto restart;
		}
	}
T
Thomas Graf 已提交
2006
	read_unlock_bh(&table->tb6_lock);
L
Linus Torvalds 已提交
2007 2008
}

2009 2010
static void rtmsg_to_fib6_config(struct net *net,
				 struct in6_rtmsg *rtmsg,
2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
				 struct fib6_config *cfg)
{
	memset(cfg, 0, sizeof(*cfg));

	cfg->fc_table = RT6_TABLE_MAIN;
	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
	cfg->fc_metric = rtmsg->rtmsg_metric;
	cfg->fc_expires = rtmsg->rtmsg_info;
	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
	cfg->fc_src_len = rtmsg->rtmsg_src_len;
	cfg->fc_flags = rtmsg->rtmsg_flags;

2023
	cfg->fc_nlinfo.nl_net = net;
2024

A
Alexey Dobriyan 已提交
2025 2026 2027
	cfg->fc_dst = rtmsg->rtmsg_dst;
	cfg->fc_src = rtmsg->rtmsg_src;
	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2028 2029
}

2030
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
L
Linus Torvalds 已提交
2031
{
2032
	struct fib6_config cfg;
L
Linus Torvalds 已提交
2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044
	struct in6_rtmsg rtmsg;
	int err;

	switch(cmd) {
	case SIOCADDRT:		/* Add a route */
	case SIOCDELRT:		/* Delete a route */
		if (!capable(CAP_NET_ADMIN))
			return -EPERM;
		err = copy_from_user(&rtmsg, arg,
				     sizeof(struct in6_rtmsg));
		if (err)
			return -EFAULT;
2045

2046
		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2047

L
Linus Torvalds 已提交
2048 2049 2050
		rtnl_lock();
		switch (cmd) {
		case SIOCADDRT:
2051
			err = ip6_route_add(&cfg);
L
Linus Torvalds 已提交
2052 2053
			break;
		case SIOCDELRT:
2054
			err = ip6_route_del(&cfg);
L
Linus Torvalds 已提交
2055 2056 2057 2058 2059 2060 2061
			break;
		default:
			err = -EINVAL;
		}
		rtnl_unlock();

		return err;
2062
	}
L
Linus Torvalds 已提交
2063 2064 2065 2066 2067 2068 2069 2070

	return -EINVAL;
}

/*
 *	Drop the packet on the floor
 */

2071
static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
L
Linus Torvalds 已提交
2072
{
2073
	int type;
E
Eric Dumazet 已提交
2074
	struct dst_entry *dst = skb_dst(skb);
2075 2076
	switch (ipstats_mib_noroutes) {
	case IPSTATS_MIB_INNOROUTES:
2077
		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
U
Ulrich Weber 已提交
2078
		if (type == IPV6_ADDR_ANY) {
2079 2080
			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
				      IPSTATS_MIB_INADDRERRORS);
2081 2082 2083 2084
			break;
		}
		/* FALLTHROUGH */
	case IPSTATS_MIB_OUTNOROUTES:
2085 2086
		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
			      ipstats_mib_noroutes);
2087 2088
		break;
	}
2089
	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
L
Linus Torvalds 已提交
2090 2091 2092 2093
	kfree_skb(skb);
	return 0;
}

2094 2095
static int ip6_pkt_discard(struct sk_buff *skb)
{
2096
	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2097 2098
}

2099
static int ip6_pkt_discard_out(struct sk_buff *skb)
L
Linus Torvalds 已提交
2100
{
E
Eric Dumazet 已提交
2101
	skb->dev = skb_dst(skb)->dev;
2102
	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
L
Linus Torvalds 已提交
2103 2104
}

2105 2106
#ifdef CONFIG_IPV6_MULTIPLE_TABLES

2107 2108
static int ip6_pkt_prohibit(struct sk_buff *skb)
{
2109
	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2110 2111 2112 2113
}

static int ip6_pkt_prohibit_out(struct sk_buff *skb)
{
E
Eric Dumazet 已提交
2114
	skb->dev = skb_dst(skb)->dev;
2115
	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2116 2117
}

2118 2119
#endif

L
Linus Torvalds 已提交
2120 2121 2122 2123 2124 2125
/*
 *	Allocate a dst for local (unicast / anycast) address.
 */

struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
				    const struct in6_addr *addr,
2126
				    bool anycast)
L
Linus Torvalds 已提交
2127
{
2128
	struct net *net = dev_net(idev->dev);
2129
	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2130
	int err;
L
Linus Torvalds 已提交
2131

2132
	if (!rt) {
2133
		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
L
Linus Torvalds 已提交
2134
		return ERR_PTR(-ENOMEM);
2135
	}
L
Linus Torvalds 已提交
2136 2137 2138

	in6_dev_hold(idev);

2139
	rt->dst.flags |= DST_HOST;
2140 2141
	rt->dst.input = ip6_input;
	rt->dst.output = ip6_output;
L
Linus Torvalds 已提交
2142 2143 2144
	rt->rt6i_idev = idev;

	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2145 2146 2147
	if (anycast)
		rt->rt6i_flags |= RTF_ANYCAST;
	else
L
Linus Torvalds 已提交
2148
		rt->rt6i_flags |= RTF_LOCAL;
2149
	err = rt6_bind_neighbour(rt, rt->dst.dev);
2150
	if (err) {
2151
		dst_free(&rt->dst);
2152
		return ERR_PTR(err);
L
Linus Torvalds 已提交
2153 2154
	}

A
Alexey Dobriyan 已提交
2155
	rt->rt6i_dst.addr = *addr;
L
Linus Torvalds 已提交
2156
	rt->rt6i_dst.plen = 128;
2157
	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
L
Linus Torvalds 已提交
2158

2159
	atomic_set(&rt->dst.__refcnt, 1);
L
Linus Torvalds 已提交
2160 2161 2162 2163

	return rt;
}

2164 2165
int ip6_route_get_saddr(struct net *net,
			struct rt6_info *rt,
2166
			const struct in6_addr *daddr,
2167 2168 2169 2170 2171 2172
			unsigned int prefs,
			struct in6_addr *saddr)
{
	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
	int err = 0;
	if (rt->rt6i_prefsrc.plen)
A
Alexey Dobriyan 已提交
2173
		*saddr = rt->rt6i_prefsrc.addr;
2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192
	else
		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
					 daddr, prefs, saddr);
	return err;
}

/* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip {
	struct net_device *dev;
	struct net *net;
	struct in6_addr *addr;
};

static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
{
	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;

2193
	if (((void *)rt->dst.dev == dev || !dev) &&
2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212
	    rt != net->ipv6.ip6_null_entry &&
	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
		/* remove prefsrc entry */
		rt->rt6i_prefsrc.plen = 0;
	}
	return 0;
}

void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
{
	struct net *net = dev_net(ifp->idev->dev);
	struct arg_dev_net_ip adni = {
		.dev = ifp->idev->dev,
		.net = net,
		.addr = &ifp->addr,
	};
	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
}

2213 2214 2215 2216 2217
struct arg_dev_net {
	struct net_device *dev;
	struct net *net;
};

L
Linus Torvalds 已提交
2218 2219
static int fib6_ifdown(struct rt6_info *rt, void *arg)
{
S
stephen hemminger 已提交
2220 2221
	const struct arg_dev_net *adn = arg;
	const struct net_device *dev = adn->dev;
2222

2223
	if ((rt->dst.dev == dev || !dev) &&
2224
	    rt != adn->net->ipv6.ip6_null_entry)
L
Linus Torvalds 已提交
2225
		return -1;
2226

L
Linus Torvalds 已提交
2227 2228 2229
	return 0;
}

2230
void rt6_ifdown(struct net *net, struct net_device *dev)
L
Linus Torvalds 已提交
2231
{
2232 2233 2234 2235 2236 2237
	struct arg_dev_net adn = {
		.dev = dev,
		.net = net,
	};

	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2238
	icmp6_clean_all(fib6_ifdown, &adn);
L
Linus Torvalds 已提交
2239 2240
}

2241
struct rt6_mtu_change_arg {
L
Linus Torvalds 已提交
2242
	struct net_device *dev;
2243
	unsigned int mtu;
L
Linus Torvalds 已提交
2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257
};

static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
{
	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
	struct inet6_dev *idev;

	/* In IPv6 pmtu discovery is not optional,
	   so that RTAX_MTU lock cannot disable it.
	   We still use this lock to block changes
	   caused by addrconf/ndisc.
	*/

	idev = __in6_dev_get(arg->dev);
2258
	if (!idev)
L
Linus Torvalds 已提交
2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274
		return 0;

	/* For administrative MTU increase, there is no way to discover
	   IPv6 PMTU increase, so PMTU increase should be updated here.
	   Since RFC 1981 doesn't include administrative MTU increase
	   update PMTU increase is a MUST. (i.e. jumbo frame)
	 */
	/*
	   If new MTU is less than route PMTU, this new MTU will be the
	   lowest MTU in the path, update the route PMTU to reflect PMTU
	   decreases; if new MTU is greater than route PMTU, and the
	   old MTU is the lowest MTU in the path, update the route PMTU
	   to reflect the increase. In this case if the other nodes' MTU
	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
	   PMTU discouvery.
	 */
2275
	if (rt->dst.dev == arg->dev &&
2276 2277 2278 2279
	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
	    (dst_mtu(&rt->dst) >= arg->mtu ||
	     (dst_mtu(&rt->dst) < arg->mtu &&
	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2280
		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2281
	}
L
Linus Torvalds 已提交
2282 2283 2284
	return 0;
}

2285
void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
L
Linus Torvalds 已提交
2286
{
T
Thomas Graf 已提交
2287 2288 2289 2290
	struct rt6_mtu_change_arg arg = {
		.dev = dev,
		.mtu = mtu,
	};
L
Linus Torvalds 已提交
2291

2292
	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
L
Linus Torvalds 已提交
2293 2294
}

2295
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2296
	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2297
	[RTA_OIF]               = { .type = NLA_U32 },
2298
	[RTA_IIF]		= { .type = NLA_U32 },
2299 2300
	[RTA_PRIORITY]          = { .type = NLA_U32 },
	[RTA_METRICS]           = { .type = NLA_NESTED },
2301
	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2302 2303 2304 2305
};

static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
			      struct fib6_config *cfg)
L
Linus Torvalds 已提交
2306
{
2307 2308 2309
	struct rtmsg *rtm;
	struct nlattr *tb[RTA_MAX+1];
	int err;
L
Linus Torvalds 已提交
2310

2311 2312 2313
	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
	if (err < 0)
		goto errout;
L
Linus Torvalds 已提交
2314

2315 2316 2317 2318 2319 2320 2321 2322 2323
	err = -EINVAL;
	rtm = nlmsg_data(nlh);
	memset(cfg, 0, sizeof(*cfg));

	cfg->fc_table = rtm->rtm_table;
	cfg->fc_dst_len = rtm->rtm_dst_len;
	cfg->fc_src_len = rtm->rtm_src_len;
	cfg->fc_flags = RTF_UP;
	cfg->fc_protocol = rtm->rtm_protocol;
2324
	cfg->fc_type = rtm->rtm_type;
2325

2326 2327
	if (rtm->rtm_type == RTN_UNREACHABLE ||
	    rtm->rtm_type == RTN_BLACKHOLE ||
2328 2329
	    rtm->rtm_type == RTN_PROHIBIT ||
	    rtm->rtm_type == RTN_THROW)
2330 2331
		cfg->fc_flags |= RTF_REJECT;

2332 2333 2334
	if (rtm->rtm_type == RTN_LOCAL)
		cfg->fc_flags |= RTF_LOCAL;

2335
	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2336
	cfg->fc_nlinfo.nlh = nlh;
2337
	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2338 2339 2340 2341

	if (tb[RTA_GATEWAY]) {
		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
		cfg->fc_flags |= RTF_GATEWAY;
L
Linus Torvalds 已提交
2342
	}
2343 2344 2345 2346 2347 2348 2349 2350

	if (tb[RTA_DST]) {
		int plen = (rtm->rtm_dst_len + 7) >> 3;

		if (nla_len(tb[RTA_DST]) < plen)
			goto errout;

		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
L
Linus Torvalds 已提交
2351
	}
2352 2353 2354 2355 2356 2357 2358 2359

	if (tb[RTA_SRC]) {
		int plen = (rtm->rtm_src_len + 7) >> 3;

		if (nla_len(tb[RTA_SRC]) < plen)
			goto errout;

		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
L
Linus Torvalds 已提交
2360
	}
2361

2362 2363 2364
	if (tb[RTA_PREFSRC])
		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);

2365 2366 2367 2368 2369 2370 2371 2372 2373
	if (tb[RTA_OIF])
		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);

	if (tb[RTA_PRIORITY])
		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);

	if (tb[RTA_METRICS]) {
		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
L
Linus Torvalds 已提交
2374
	}
2375 2376 2377 2378

	if (tb[RTA_TABLE])
		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);

2379 2380 2381 2382 2383
	if (tb[RTA_MULTIPATH]) {
		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
	}

2384 2385 2386
	err = 0;
errout:
	return err;
L
Linus Torvalds 已提交
2387 2388
}

2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431
static int ip6_route_multipath(struct fib6_config *cfg, int add)
{
	struct fib6_config r_cfg;
	struct rtnexthop *rtnh;
	int remaining;
	int attrlen;
	int err = 0, last_err = 0;

beginning:
	rtnh = (struct rtnexthop *)cfg->fc_mp;
	remaining = cfg->fc_mp_len;

	/* Parse a Multipath Entry */
	while (rtnh_ok(rtnh, remaining)) {
		memcpy(&r_cfg, cfg, sizeof(*cfg));
		if (rtnh->rtnh_ifindex)
			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;

		attrlen = rtnh_attrlen(rtnh);
		if (attrlen > 0) {
			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);

			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
			if (nla) {
				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
				r_cfg.fc_flags |= RTF_GATEWAY;
			}
		}
		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
		if (err) {
			last_err = err;
			/* If we are trying to remove a route, do not stop the
			 * loop when ip6_route_del() fails (because next hop is
			 * already gone), we should try to remove all next hops.
			 */
			if (add) {
				/* If add fails, we should try to delete all
				 * next hops that have been already added.
				 */
				add = 0;
				goto beginning;
			}
		}
2432 2433 2434 2435 2436 2437
		/* Because each route is added like a single route we remove
		 * this flag after the first nexthop (if there is a collision,
		 * we have already fail to add the first nexthop:
		 * fib6_add_rt2node() has reject it).
		 */
		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2438 2439 2440 2441 2442 2443
		rtnh = rtnh_next(rtnh, &remaining);
	}

	return last_err;
}

2444
static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
L
Linus Torvalds 已提交
2445
{
2446 2447
	struct fib6_config cfg;
	int err;
L
Linus Torvalds 已提交
2448

2449 2450 2451 2452
	err = rtm_to_fib6_config(skb, nlh, &cfg);
	if (err < 0)
		return err;

2453 2454 2455 2456
	if (cfg.fc_mp)
		return ip6_route_multipath(&cfg, 0);
	else
		return ip6_route_del(&cfg);
L
Linus Torvalds 已提交
2457 2458
}

2459
static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
L
Linus Torvalds 已提交
2460
{
2461 2462
	struct fib6_config cfg;
	int err;
L
Linus Torvalds 已提交
2463

2464 2465 2466 2467
	err = rtm_to_fib6_config(skb, nlh, &cfg);
	if (err < 0)
		return err;

2468 2469 2470 2471
	if (cfg.fc_mp)
		return ip6_route_multipath(&cfg, 1);
	else
		return ip6_route_add(&cfg);
L
Linus Torvalds 已提交
2472 2473
}

2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484
static inline size_t rt6_nlmsg_size(void)
{
	return NLMSG_ALIGN(sizeof(struct rtmsg))
	       + nla_total_size(16) /* RTA_SRC */
	       + nla_total_size(16) /* RTA_DST */
	       + nla_total_size(16) /* RTA_GATEWAY */
	       + nla_total_size(16) /* RTA_PREFSRC */
	       + nla_total_size(4) /* RTA_TABLE */
	       + nla_total_size(4) /* RTA_IIF */
	       + nla_total_size(4) /* RTA_OIF */
	       + nla_total_size(4) /* RTA_PRIORITY */
2485
	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2486 2487 2488
	       + nla_total_size(sizeof(struct rta_cacheinfo));
}

2489 2490
static int rt6_fill_node(struct net *net,
			 struct sk_buff *skb, struct rt6_info *rt,
2491
			 struct in6_addr *dst, struct in6_addr *src,
2492
			 int iif, int type, u32 portid, u32 seq,
2493
			 int prefix, int nowait, unsigned int flags)
L
Linus Torvalds 已提交
2494 2495
{
	struct rtmsg *rtm;
2496
	struct nlmsghdr *nlh;
2497
	long expires;
2498
	u32 table;
2499
	struct neighbour *n;
L
Linus Torvalds 已提交
2500 2501 2502 2503 2504 2505 2506 2507

	if (prefix) {	/* user wants prefix routes only */
		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
			/* success since this is not a prefix route */
			return 1;
		}
	}

2508
	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2509
	if (!nlh)
2510
		return -EMSGSIZE;
2511 2512

	rtm = nlmsg_data(nlh);
L
Linus Torvalds 已提交
2513 2514 2515 2516
	rtm->rtm_family = AF_INET6;
	rtm->rtm_dst_len = rt->rt6i_dst.plen;
	rtm->rtm_src_len = rt->rt6i_src.plen;
	rtm->rtm_tos = 0;
T
Thomas Graf 已提交
2517
	if (rt->rt6i_table)
2518
		table = rt->rt6i_table->tb6_id;
T
Thomas Graf 已提交
2519
	else
2520 2521
		table = RT6_TABLE_UNSPEC;
	rtm->rtm_table = table;
D
David S. Miller 已提交
2522 2523
	if (nla_put_u32(skb, RTA_TABLE, table))
		goto nla_put_failure;
2524 2525 2526 2527 2528 2529 2530 2531
	if (rt->rt6i_flags & RTF_REJECT) {
		switch (rt->dst.error) {
		case -EINVAL:
			rtm->rtm_type = RTN_BLACKHOLE;
			break;
		case -EACCES:
			rtm->rtm_type = RTN_PROHIBIT;
			break;
2532 2533 2534
		case -EAGAIN:
			rtm->rtm_type = RTN_THROW;
			break;
2535 2536 2537 2538 2539
		default:
			rtm->rtm_type = RTN_UNREACHABLE;
			break;
		}
	}
2540
	else if (rt->rt6i_flags & RTF_LOCAL)
2541
		rtm->rtm_type = RTN_LOCAL;
2542
	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
L
Linus Torvalds 已提交
2543 2544 2545 2546 2547 2548
		rtm->rtm_type = RTN_LOCAL;
	else
		rtm->rtm_type = RTN_UNICAST;
	rtm->rtm_flags = 0;
	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
	rtm->rtm_protocol = rt->rt6i_protocol;
2549
	if (rt->rt6i_flags & RTF_DYNAMIC)
L
Linus Torvalds 已提交
2550
		rtm->rtm_protocol = RTPROT_REDIRECT;
2551 2552 2553 2554 2555 2556
	else if (rt->rt6i_flags & RTF_ADDRCONF) {
		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
			rtm->rtm_protocol = RTPROT_RA;
		else
			rtm->rtm_protocol = RTPROT_KERNEL;
	}
L
Linus Torvalds 已提交
2557

2558
	if (rt->rt6i_flags & RTF_CACHE)
L
Linus Torvalds 已提交
2559 2560 2561
		rtm->rtm_flags |= RTM_F_CLONED;

	if (dst) {
D
David S. Miller 已提交
2562 2563
		if (nla_put(skb, RTA_DST, 16, dst))
			goto nla_put_failure;
2564
		rtm->rtm_dst_len = 128;
L
Linus Torvalds 已提交
2565
	} else if (rtm->rtm_dst_len)
D
David S. Miller 已提交
2566 2567
		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
			goto nla_put_failure;
L
Linus Torvalds 已提交
2568 2569
#ifdef CONFIG_IPV6_SUBTREES
	if (src) {
D
David S. Miller 已提交
2570 2571
		if (nla_put(skb, RTA_SRC, 16, src))
			goto nla_put_failure;
2572
		rtm->rtm_src_len = 128;
D
David S. Miller 已提交
2573 2574 2575
	} else if (rtm->rtm_src_len &&
		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
		goto nla_put_failure;
L
Linus Torvalds 已提交
2576
#endif
2577 2578 2579
	if (iif) {
#ifdef CONFIG_IPV6_MROUTE
		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2580
			int err = ip6mr_get_route(net, skb, rtm, nowait);
2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592
			if (err <= 0) {
				if (!nowait) {
					if (err == 0)
						return 0;
					goto nla_put_failure;
				} else {
					if (err == -EMSGSIZE)
						goto nla_put_failure;
				}
			}
		} else
#endif
D
David S. Miller 已提交
2593 2594
			if (nla_put_u32(skb, RTA_IIF, iif))
				goto nla_put_failure;
2595
	} else if (dst) {
L
Linus Torvalds 已提交
2596
		struct in6_addr saddr_buf;
D
David S. Miller 已提交
2597 2598 2599
		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
			goto nla_put_failure;
L
Linus Torvalds 已提交
2600
	}
2601

2602 2603
	if (rt->rt6i_prefsrc.plen) {
		struct in6_addr saddr_buf;
A
Alexey Dobriyan 已提交
2604
		saddr_buf = rt->rt6i_prefsrc.addr;
D
David S. Miller 已提交
2605 2606
		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
			goto nla_put_failure;
2607 2608
	}

2609
	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2610 2611
		goto nla_put_failure;

2612
	n = rt->n;
2613
	if (n) {
2614
		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2615 2616
			goto nla_put_failure;
	}
2617

D
David S. Miller 已提交
2618 2619 2620 2621 2622
	if (rt->dst.dev &&
	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
		goto nla_put_failure;
	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
		goto nla_put_failure;
2623 2624

	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2625

2626
	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2627
		goto nla_put_failure;
2628 2629 2630 2631

	return nlmsg_end(skb, nlh);

nla_put_failure:
2632 2633
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
L
Linus Torvalds 已提交
2634 2635
}

2636
int rt6_dump_route(struct rt6_info *rt, void *p_arg)
L
Linus Torvalds 已提交
2637 2638 2639 2640
{
	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
	int prefix;

2641 2642
	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
L
Linus Torvalds 已提交
2643 2644 2645 2646
		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
	} else
		prefix = 0;

2647 2648
	return rt6_fill_node(arg->net,
		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2649
		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2650
		     prefix, 0, NLM_F_MULTI);
L
Linus Torvalds 已提交
2651 2652
}

2653
static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
L
Linus Torvalds 已提交
2654
{
2655
	struct net *net = sock_net(in_skb->sk);
2656 2657
	struct nlattr *tb[RTA_MAX+1];
	struct rt6_info *rt;
L
Linus Torvalds 已提交
2658
	struct sk_buff *skb;
2659
	struct rtmsg *rtm;
2660
	struct flowi6 fl6;
2661
	int err, iif = 0, oif = 0;
L
Linus Torvalds 已提交
2662

2663 2664 2665
	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
	if (err < 0)
		goto errout;
L
Linus Torvalds 已提交
2666

2667
	err = -EINVAL;
2668
	memset(&fl6, 0, sizeof(fl6));
L
Linus Torvalds 已提交
2669

2670 2671 2672 2673
	if (tb[RTA_SRC]) {
		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
			goto errout;

A
Alexey Dobriyan 已提交
2674
		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2675 2676 2677 2678 2679 2680
	}

	if (tb[RTA_DST]) {
		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
			goto errout;

A
Alexey Dobriyan 已提交
2681
		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2682 2683 2684 2685 2686 2687
	}

	if (tb[RTA_IIF])
		iif = nla_get_u32(tb[RTA_IIF]);

	if (tb[RTA_OIF])
2688
		oif = nla_get_u32(tb[RTA_OIF]);
L
Linus Torvalds 已提交
2689 2690 2691

	if (iif) {
		struct net_device *dev;
2692 2693
		int flags = 0;

2694
		dev = __dev_get_by_index(net, iif);
L
Linus Torvalds 已提交
2695 2696
		if (!dev) {
			err = -ENODEV;
2697
			goto errout;
L
Linus Torvalds 已提交
2698
		}
2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710

		fl6.flowi6_iif = iif;

		if (!ipv6_addr_any(&fl6.saddr))
			flags |= RT6_LOOKUP_F_HAS_SADDR;

		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
							       flags);
	} else {
		fl6.flowi6_oif = oif;

		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
L
Linus Torvalds 已提交
2711 2712
	}

2713
	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2714
	if (!skb) {
A
Amerigo Wang 已提交
2715
		ip6_rt_put(rt);
2716 2717 2718
		err = -ENOBUFS;
		goto errout;
	}
L
Linus Torvalds 已提交
2719

2720 2721 2722
	/* Reserve room for dummy headers, this skb can pass
	   through good chunk of routing engine.
	 */
2723
	skb_reset_mac_header(skb);
2724
	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
L
Linus Torvalds 已提交
2725

2726
	skb_dst_set(skb, &rt->dst);
L
Linus Torvalds 已提交
2727

2728
	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2729
			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2730
			    nlh->nlmsg_seq, 0, 0, 0);
L
Linus Torvalds 已提交
2731
	if (err < 0) {
2732 2733
		kfree_skb(skb);
		goto errout;
L
Linus Torvalds 已提交
2734 2735
	}

2736
	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2737
errout:
L
Linus Torvalds 已提交
2738 2739 2740
	return err;
}

2741
void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
L
Linus Torvalds 已提交
2742 2743
{
	struct sk_buff *skb;
2744
	struct net *net = info->nl_net;
2745 2746 2747 2748
	u32 seq;
	int err;

	err = -ENOBUFS;
2749
	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2750

2751
	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2752
	if (!skb)
2753 2754
		goto errout;

2755
	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2756
				event, info->portid, seq, 0, 0, 0);
2757 2758 2759 2760 2761 2762
	if (err < 0) {
		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
2763
	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2764 2765
		    info->nlh, gfp_any());
	return;
2766 2767
errout:
	if (err < 0)
2768
		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
L
Linus Torvalds 已提交
2769 2770
}

2771 2772 2773 2774
static int ip6_route_dev_notify(struct notifier_block *this,
				unsigned long event, void *data)
{
	struct net_device *dev = (struct net_device *)data;
2775
	struct net *net = dev_net(dev);
2776 2777

	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2778
		net->ipv6.ip6_null_entry->dst.dev = dev;
2779 2780
		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2781
		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2782
		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2783
		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2784 2785 2786 2787 2788 2789 2790
		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
#endif
	}

	return NOTIFY_OK;
}

L
Linus Torvalds 已提交
2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807
/*
 *	/proc
 */

#ifdef CONFIG_PROC_FS

struct rt6_proc_arg
{
	char *buffer;
	int offset;
	int length;
	int skip;
	int len;
};

static int rt6_info_route(struct rt6_info *rt, void *p_arg)
{
2808
	struct seq_file *m = p_arg;
2809
	struct neighbour *n;
L
Linus Torvalds 已提交
2810

2811
	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
L
Linus Torvalds 已提交
2812 2813

#ifdef CONFIG_IPV6_SUBTREES
2814
	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
L
Linus Torvalds 已提交
2815
#else
2816
	seq_puts(m, "00000000000000000000000000000000 00 ");
L
Linus Torvalds 已提交
2817
#endif
2818
	n = rt->n;
2819 2820
	if (n) {
		seq_printf(m, "%pi6", n->primary_key);
L
Linus Torvalds 已提交
2821
	} else {
2822
		seq_puts(m, "00000000000000000000000000000000");
L
Linus Torvalds 已提交
2823
	}
2824
	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2825 2826
		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
		   rt->dst.__use, rt->rt6i_flags,
2827
		   rt->dst.dev ? rt->dst.dev->name : "");
L
Linus Torvalds 已提交
2828 2829 2830
	return 0;
}

2831
static int ipv6_route_show(struct seq_file *m, void *v)
L
Linus Torvalds 已提交
2832
{
2833
	struct net *net = (struct net *)m->private;
2834
	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2835 2836
	return 0;
}
L
Linus Torvalds 已提交
2837

2838 2839
static int ipv6_route_open(struct inode *inode, struct file *file)
{
2840
	return single_open_net(inode, file, ipv6_route_show);
2841 2842
}

2843 2844 2845 2846 2847
static const struct file_operations ipv6_route_proc_fops = {
	.owner		= THIS_MODULE,
	.open		= ipv6_route_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
2848
	.release	= single_release_net,
2849 2850
};

L
Linus Torvalds 已提交
2851 2852
static int rt6_stats_seq_show(struct seq_file *seq, void *v)
{
2853
	struct net *net = (struct net *)seq->private;
L
Linus Torvalds 已提交
2854
	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2855 2856 2857 2858 2859
		   net->ipv6.rt6_stats->fib_nodes,
		   net->ipv6.rt6_stats->fib_route_nodes,
		   net->ipv6.rt6_stats->fib_rt_alloc,
		   net->ipv6.rt6_stats->fib_rt_entries,
		   net->ipv6.rt6_stats->fib_rt_cache,
2860
		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2861
		   net->ipv6.rt6_stats->fib_discarded_routes);
L
Linus Torvalds 已提交
2862 2863 2864 2865 2866 2867

	return 0;
}

static int rt6_stats_seq_open(struct inode *inode, struct file *file)
{
2868
	return single_open_net(inode, file, rt6_stats_seq_show);
2869 2870
}

2871
static const struct file_operations rt6_stats_seq_fops = {
L
Linus Torvalds 已提交
2872 2873 2874 2875
	.owner	 = THIS_MODULE,
	.open	 = rt6_stats_seq_open,
	.read	 = seq_read,
	.llseek	 = seq_lseek,
2876
	.release = single_release_net,
L
Linus Torvalds 已提交
2877 2878 2879 2880 2881 2882
};
#endif	/* CONFIG_PROC_FS */

#ifdef CONFIG_SYSCTL

static
2883
int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
L
Linus Torvalds 已提交
2884 2885
			      void __user *buffer, size_t *lenp, loff_t *ppos)
{
2886 2887 2888
	struct net *net;
	int delay;
	if (!write)
L
Linus Torvalds 已提交
2889
		return -EINVAL;
2890 2891 2892 2893 2894 2895

	net = (struct net *)ctl->extra1;
	delay = net->ipv6.sysctl.flush_delay;
	proc_dointvec(ctl, write, buffer, lenp, ppos);
	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
	return 0;
L
Linus Torvalds 已提交
2896 2897
}

2898
ctl_table ipv6_route_table_template[] = {
2899
	{
L
Linus Torvalds 已提交
2900
		.procname	=	"flush",
2901
		.data		=	&init_net.ipv6.sysctl.flush_delay,
L
Linus Torvalds 已提交
2902
		.maxlen		=	sizeof(int),
2903
		.mode		=	0200,
A
Alexey Dobriyan 已提交
2904
		.proc_handler	=	ipv6_sysctl_rtcache_flush
L
Linus Torvalds 已提交
2905 2906 2907
	},
	{
		.procname	=	"gc_thresh",
2908
		.data		=	&ip6_dst_ops_template.gc_thresh,
L
Linus Torvalds 已提交
2909 2910
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2911
		.proc_handler	=	proc_dointvec,
L
Linus Torvalds 已提交
2912 2913 2914
	},
	{
		.procname	=	"max_size",
2915
		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
L
Linus Torvalds 已提交
2916 2917
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2918
		.proc_handler	=	proc_dointvec,
L
Linus Torvalds 已提交
2919 2920 2921
	},
	{
		.procname	=	"gc_min_interval",
2922
		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
L
Linus Torvalds 已提交
2923 2924
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2925
		.proc_handler	=	proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2926 2927 2928
	},
	{
		.procname	=	"gc_timeout",
2929
		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
L
Linus Torvalds 已提交
2930 2931
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2932
		.proc_handler	=	proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2933 2934 2935
	},
	{
		.procname	=	"gc_interval",
2936
		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
L
Linus Torvalds 已提交
2937 2938
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2939
		.proc_handler	=	proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2940 2941 2942
	},
	{
		.procname	=	"gc_elasticity",
2943
		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
L
Linus Torvalds 已提交
2944 2945
		.maxlen		=	sizeof(int),
		.mode		=	0644,
2946
		.proc_handler	=	proc_dointvec,
L
Linus Torvalds 已提交
2947 2948 2949
	},
	{
		.procname	=	"mtu_expires",
2950
		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
L
Linus Torvalds 已提交
2951 2952
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2953
		.proc_handler	=	proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2954 2955 2956
	},
	{
		.procname	=	"min_adv_mss",
2957
		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
L
Linus Torvalds 已提交
2958 2959
		.maxlen		=	sizeof(int),
		.mode		=	0644,
2960
		.proc_handler	=	proc_dointvec,
L
Linus Torvalds 已提交
2961 2962 2963
	},
	{
		.procname	=	"gc_min_interval_ms",
2964
		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
L
Linus Torvalds 已提交
2965 2966
		.maxlen		=	sizeof(int),
		.mode		=	0644,
A
Alexey Dobriyan 已提交
2967
		.proc_handler	=	proc_dointvec_ms_jiffies,
L
Linus Torvalds 已提交
2968
	},
2969
	{ }
L
Linus Torvalds 已提交
2970 2971
};

2972
struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2973 2974 2975 2976 2977 2978
{
	struct ctl_table *table;

	table = kmemdup(ipv6_route_table_template,
			sizeof(ipv6_route_table_template),
			GFP_KERNEL);
2979 2980 2981

	if (table) {
		table[0].data = &net->ipv6.sysctl.flush_delay;
2982
		table[0].extra1 = net;
2983
		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2984 2985 2986 2987 2988 2989 2990
		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2991
		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2992 2993
	}

2994 2995
	return table;
}
L
Linus Torvalds 已提交
2996 2997
#endif

2998
static int __net_init ip6_route_net_init(struct net *net)
2999
{
3000
	int ret = -ENOMEM;
3001

3002 3003
	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
	       sizeof(net->ipv6.ip6_dst_ops));
3004

3005 3006 3007
	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
		goto out_ip6_dst_ops;

3008 3009 3010 3011
	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
					   sizeof(*net->ipv6.ip6_null_entry),
					   GFP_KERNEL);
	if (!net->ipv6.ip6_null_entry)
3012
		goto out_ip6_dst_entries;
3013
	net->ipv6.ip6_null_entry->dst.path =
3014
		(struct dst_entry *)net->ipv6.ip6_null_entry;
3015
	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3016 3017
	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
			 ip6_template_metrics, true);
3018 3019 3020 3021 3022

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
					       sizeof(*net->ipv6.ip6_prohibit_entry),
					       GFP_KERNEL);
3023 3024
	if (!net->ipv6.ip6_prohibit_entry)
		goto out_ip6_null_entry;
3025
	net->ipv6.ip6_prohibit_entry->dst.path =
3026
		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3027
	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3028 3029
	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
			 ip6_template_metrics, true);
3030 3031 3032 3033

	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
					       sizeof(*net->ipv6.ip6_blk_hole_entry),
					       GFP_KERNEL);
3034 3035
	if (!net->ipv6.ip6_blk_hole_entry)
		goto out_ip6_prohibit_entry;
3036
	net->ipv6.ip6_blk_hole_entry->dst.path =
3037
		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3038
	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3039 3040
	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
			 ip6_template_metrics, true);
3041 3042
#endif

3043 3044 3045 3046 3047 3048 3049 3050 3051
	net->ipv6.sysctl.flush_delay = 0;
	net->ipv6.sysctl.ip6_rt_max_size = 4096;
	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;

3052 3053
	net->ipv6.ip6_rt_gc_expire = 30*HZ;

3054 3055 3056
	ret = 0;
out:
	return ret;
3057

3058 3059 3060 3061 3062 3063
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
out_ip6_prohibit_entry:
	kfree(net->ipv6.ip6_prohibit_entry);
out_ip6_null_entry:
	kfree(net->ipv6.ip6_null_entry);
#endif
3064 3065
out_ip6_dst_entries:
	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3066 3067
out_ip6_dst_ops:
	goto out;
3068 3069
}

3070
static void __net_exit ip6_route_net_exit(struct net *net)
3071
{
3072 3073 3074 3075 3076
	kfree(net->ipv6.ip6_null_entry);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
	kfree(net->ipv6.ip6_prohibit_entry);
	kfree(net->ipv6.ip6_blk_hole_entry);
#endif
3077
	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3078 3079
}

3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096
static int __net_init ip6_route_net_init_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
#endif
	return 0;
}

static void __net_exit ip6_route_net_exit_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
	proc_net_remove(net, "ipv6_route");
	proc_net_remove(net, "rt6_stats");
#endif
}

3097 3098 3099 3100 3101
static struct pernet_operations ip6_route_net_ops = {
	.init = ip6_route_net_init,
	.exit = ip6_route_net_exit,
};

3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117
static int __net_init ipv6_inetpeer_init(struct net *net)
{
	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);

	if (!bp)
		return -ENOMEM;
	inet_peer_base_init(bp);
	net->ipv6.peers = bp;
	return 0;
}

static void __net_exit ipv6_inetpeer_exit(struct net *net)
{
	struct inet_peer_base *bp = net->ipv6.peers;

	net->ipv6.peers = NULL;
3118
	inetpeer_invalidate_tree(bp);
3119 3120 3121
	kfree(bp);
}

3122
static struct pernet_operations ipv6_inetpeer_ops = {
3123 3124 3125 3126
	.init	=	ipv6_inetpeer_init,
	.exit	=	ipv6_inetpeer_exit,
};

3127 3128 3129 3130 3131
static struct pernet_operations ip6_route_net_late_ops = {
	.init = ip6_route_net_init_late,
	.exit = ip6_route_net_exit_late,
};

3132 3133 3134 3135 3136
static struct notifier_block ip6_route_dev_notifier = {
	.notifier_call = ip6_route_dev_notify,
	.priority = 0,
};

3137
int __init ip6_route_init(void)
L
Linus Torvalds 已提交
3138
{
3139 3140
	int ret;

3141 3142
	ret = -ENOMEM;
	ip6_dst_ops_template.kmem_cachep =
A
Alexey Dobriyan 已提交
3143
		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3144
				  SLAB_HWCACHE_ALIGN, NULL);
3145
	if (!ip6_dst_ops_template.kmem_cachep)
3146
		goto out;
3147

3148
	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3149
	if (ret)
3150 3151
		goto out_kmem_cache;

3152 3153
	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
	if (ret)
3154
		goto out_dst_entries;
3155

3156 3157 3158
	ret = register_pernet_subsys(&ip6_route_net_ops);
	if (ret)
		goto out_register_inetpeer;
3159

3160 3161
	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;

3162 3163 3164
	/* Registering of the loopback is done before this portion of code,
	 * the loopback reference in rt6_info will not be taken, do it
	 * manually for init_net */
3165
	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3166 3167
	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3168
	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3169
	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3170
	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3171 3172
	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
  #endif
3173
	ret = fib6_init();
3174
	if (ret)
3175
		goto out_register_subsys;
3176 3177 3178

	ret = xfrm6_init();
	if (ret)
3179
		goto out_fib6_init;
3180

3181 3182 3183
	ret = fib6_rules_init();
	if (ret)
		goto xfrm6_init;
3184

3185 3186 3187 3188
	ret = register_pernet_subsys(&ip6_route_net_late_ops);
	if (ret)
		goto fib6_rules_init;

3189
	ret = -ENOBUFS;
3190 3191 3192
	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3193
		goto out_register_late_subsys;
3194

3195
	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3196
	if (ret)
3197
		goto out_register_late_subsys;
3198

3199 3200 3201
out:
	return ret;

3202 3203
out_register_late_subsys:
	unregister_pernet_subsys(&ip6_route_net_late_ops);
3204 3205 3206 3207
fib6_rules_init:
	fib6_rules_cleanup();
xfrm6_init:
	xfrm6_fini();
3208 3209
out_fib6_init:
	fib6_gc_cleanup();
3210 3211
out_register_subsys:
	unregister_pernet_subsys(&ip6_route_net_ops);
3212 3213
out_register_inetpeer:
	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3214 3215
out_dst_entries:
	dst_entries_destroy(&ip6_dst_blackhole_ops);
3216
out_kmem_cache:
3217
	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3218
	goto out;
L
Linus Torvalds 已提交
3219 3220 3221 3222
}

void ip6_route_cleanup(void)
{
3223
	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3224
	unregister_pernet_subsys(&ip6_route_net_late_ops);
T
Thomas Graf 已提交
3225
	fib6_rules_cleanup();
L
Linus Torvalds 已提交
3226 3227
	xfrm6_fini();
	fib6_gc_cleanup();
3228
	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3229
	unregister_pernet_subsys(&ip6_route_net_ops);
3230
	dst_entries_destroy(&ip6_dst_blackhole_ops);
3231
	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
L
Linus Torvalds 已提交
3232
}