ndisc.c 48.3 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3
/*
 *	Neighbour Discovery for IPv6
4
 *	Linux INET6 implementation
L
Linus Torvalds 已提交
5 6
 *
 *	Authors:
7
 *	Pedro Roque		<roque@di.fc.ul.pt>
L
Linus Torvalds 已提交
8 9 10 11 12 13
 *	Mike Shaver		<shaver@ingenia.com>
 */

/*
 *	Changes:
 *
14
 *	Alexey I. Froloff		:	RFC6106 (DNSSL) support
15 16
 *	Pierre Ynard			:	export userland ND options
 *						through netlink (RDNSS support)
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25
 *	Lars Fenneberg			:	fixed MTU setting on receipt
 *						of an RA.
 *	Janos Farkas			:	kmalloc failure checks
 *	Alexey Kuznetsov		:	state machine reworked
 *						and moved to net/core.
 *	Pekka Savola			:	RFC2461 validation
 *	YOSHIFUJI Hideaki @USAGI	:	Verify ND options properly
 */

26
#define pr_fmt(fmt) "ICMPv6: " fmt
L
Linus Torvalds 已提交
27 28 29 30 31 32 33 34 35 36 37 38

#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/sched.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/route.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
39
#include <linux/slab.h>
L
Linus Torvalds 已提交
40 41 42 43
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

44
#include <linux/if_addr.h>
45
#include <linux/if_ether.h>
L
Linus Torvalds 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#include <linux/if_arp.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/jhash.h>

#include <net/sock.h>
#include <net/snmp.h>

#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/icmp.h>

61 62 63
#include <net/netlink.h>
#include <linux/rtnetlink.h>

L
Linus Torvalds 已提交
64 65
#include <net/flow.h>
#include <net/ip6_checksum.h>
66
#include <net/inet_common.h>
L
Linus Torvalds 已提交
67 68 69 70 71
#include <linux/proc_fs.h>

#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>

72 73
static u32 ndisc_hash(const void *pkey,
		      const struct net_device *dev,
74
		      __u32 *hash_rnd);
75
static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
76 77
static bool ndisc_allow_add(const struct net_device *dev,
			    struct netlink_ext_ack *extack);
L
Linus Torvalds 已提交
78 79 80 81 82 83 84
static int ndisc_constructor(struct neighbour *neigh);
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
static int pndisc_constructor(struct pneigh_entry *n);
static void pndisc_destructor(struct pneigh_entry *n);
static void pndisc_redo(struct sk_buff *skb);

S
Stephen Hemminger 已提交
85
static const struct neigh_ops ndisc_generic_ops = {
L
Linus Torvalds 已提交
86 87 88 89 90 91 92
	.family =		AF_INET6,
	.solicit =		ndisc_solicit,
	.error_report =		ndisc_error_report,
	.output =		neigh_resolve_output,
	.connected_output =	neigh_connected_output,
};

S
Stephen Hemminger 已提交
93
static const struct neigh_ops ndisc_hh_ops = {
L
Linus Torvalds 已提交
94 95 96 97 98 99 100 101
	.family =		AF_INET6,
	.solicit =		ndisc_solicit,
	.error_report =		ndisc_error_report,
	.output =		neigh_resolve_output,
	.connected_output =	neigh_resolve_output,
};


S
Stephen Hemminger 已提交
102
static const struct neigh_ops ndisc_direct_ops = {
L
Linus Torvalds 已提交
103
	.family =		AF_INET6,
104 105
	.output =		neigh_direct_output,
	.connected_output =	neigh_direct_output,
L
Linus Torvalds 已提交
106 107 108 109 110
};

struct neigh_table nd_tbl = {
	.family =	AF_INET6,
	.key_len =	sizeof(struct in6_addr),
111
	.protocol =	cpu_to_be16(ETH_P_IPV6),
L
Linus Torvalds 已提交
112
	.hash =		ndisc_hash,
113
	.key_eq =	ndisc_key_eq,
L
Linus Torvalds 已提交
114 115 116 117
	.constructor =	ndisc_constructor,
	.pconstructor =	pndisc_constructor,
	.pdestructor =	pndisc_destructor,
	.proxy_redo =	pndisc_redo,
118
	.allow_add  =   ndisc_allow_add,
L
Linus Torvalds 已提交
119 120
	.id =		"ndisc_cache",
	.parms = {
121 122
		.tbl			= &nd_tbl,
		.reachable_time		= ND_REACHABLE_TIME,
J
Jiri Pirko 已提交
123 124 125 126 127 128 129
		.data = {
			[NEIGH_VAR_MCAST_PROBES] = 3,
			[NEIGH_VAR_UCAST_PROBES] = 3,
			[NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER,
			[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
130
			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
J
Jiri Pirko 已提交
131 132 133 134
			[NEIGH_VAR_PROXY_QLEN] = 64,
			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
			[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
		},
L
Linus Torvalds 已提交
135 136 137 138 139 140
	},
	.gc_interval =	  30 * HZ,
	.gc_thresh1 =	 128,
	.gc_thresh2 =	 512,
	.gc_thresh3 =	1024,
};
141
EXPORT_SYMBOL_GPL(nd_tbl);
L
Linus Torvalds 已提交
142

A
Alexander Aring 已提交
143 144
void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
			      int data_len, int pad)
L
Linus Torvalds 已提交
145
{
146
	int space = __ndisc_opt_addr_space(data_len, pad);
147
	u8 *opt = skb_put(skb, space);
L
Linus Torvalds 已提交
148 149 150 151 152 153 154 155 156 157 158

	opt[0] = type;
	opt[1] = space>>3;

	memset(opt + 2, 0, pad);
	opt   += pad;
	space -= pad;

	memcpy(opt+2, data, data_len);
	data_len += 2;
	opt += data_len;
159 160
	space -= data_len;
	if (space > 0)
L
Linus Torvalds 已提交
161 162
		memset(opt, 0, space);
}
A
Alexander Aring 已提交
163
EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
L
Linus Torvalds 已提交
164

165
static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
166
					  void *data, u8 icmp6_type)
167 168 169
{
	__ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
				 ndisc_addr_option_pad(skb->dev->type));
170 171 172 173 174 175 176 177 178
	ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
}

static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
						   void *ha,
						   const u8 *ops_data)
{
	ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
	ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
179 180
}

L
Linus Torvalds 已提交
181 182 183 184 185 186 187 188 189
static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
					    struct nd_opt_hdr *end)
{
	int type;
	if (!cur || !end || cur >= end)
		return NULL;
	type = cur->nd_opt_type;
	do {
		cur = ((void *)cur) + (cur->nd_opt_len << 3);
190
	} while (cur < end && cur->nd_opt_type != type);
E
Eric Dumazet 已提交
191
	return cur <= end && cur->nd_opt_type == type ? cur : NULL;
L
Linus Torvalds 已提交
192 193
}

194 195
static inline int ndisc_is_useropt(const struct net_device *dev,
				   struct nd_opt_hdr *opt)
196
{
197
	return opt->nd_opt_type == ND_OPT_RDNSS ||
198
		opt->nd_opt_type == ND_OPT_DNSSL ||
199
		opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
200
		opt->nd_opt_type == ND_OPT_PREF64 ||
201
		ndisc_ops_is_useropt(dev, opt->nd_opt_type);
202 203
}

204 205
static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
					     struct nd_opt_hdr *cur,
206 207 208 209 210 211
					     struct nd_opt_hdr *end)
{
	if (!cur || !end || cur >= end)
		return NULL;
	do {
		cur = ((void *)cur) + (cur->nd_opt_len << 3);
212 213
	} while (cur < end && !ndisc_is_useropt(dev, cur));
	return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
214 215
}

216 217
struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
					  u8 *opt, int opt_len,
218
					  struct ndisc_options *ndopts)
L
Linus Torvalds 已提交
219 220 221 222 223 224 225 226 227 228 229 230 231
{
	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;

	if (!nd_opt || opt_len < 0 || !ndopts)
		return NULL;
	memset(ndopts, 0, sizeof(*ndopts));
	while (opt_len) {
		int l;
		if (opt_len < sizeof(struct nd_opt_hdr))
			return NULL;
		l = nd_opt->nd_opt_len << 3;
		if (opt_len < l || l == 0)
			return NULL;
232 233
		if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
			goto next_opt;
L
Linus Torvalds 已提交
234 235 236 237
		switch (nd_opt->nd_opt_type) {
		case ND_OPT_SOURCE_LL_ADDR:
		case ND_OPT_TARGET_LL_ADDR:
		case ND_OPT_MTU:
238
		case ND_OPT_NONCE:
L
Linus Torvalds 已提交
239 240
		case ND_OPT_REDIRECT_HDR:
			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
241 242 243
				ND_PRINTK(2, warn,
					  "%s: duplicated ND6 option found: type=%d\n",
					  __func__, nd_opt->nd_opt_type);
L
Linus Torvalds 已提交
244 245 246 247 248 249
			} else {
				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
			}
			break;
		case ND_OPT_PREFIX_INFO:
			ndopts->nd_opts_pi_end = nd_opt;
S
Stephen Hemminger 已提交
250
			if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
L
Linus Torvalds 已提交
251 252
				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
			break;
253 254 255 256 257 258 259
#ifdef CONFIG_IPV6_ROUTE_INFO
		case ND_OPT_ROUTE_INFO:
			ndopts->nd_opts_ri_end = nd_opt;
			if (!ndopts->nd_opts_ri)
				ndopts->nd_opts_ri = nd_opt;
			break;
#endif
L
Linus Torvalds 已提交
260
		default:
261
			if (ndisc_is_useropt(dev, nd_opt)) {
262 263 264 265 266 267 268 269 270
				ndopts->nd_useropts_end = nd_opt;
				if (!ndopts->nd_useropts)
					ndopts->nd_useropts = nd_opt;
			} else {
				/*
				 * Unknown options must be silently ignored,
				 * to accommodate future extension to the
				 * protocol.
				 */
271 272 273 274 275
				ND_PRINTK(2, notice,
					  "%s: ignored unsupported option; type=%d, len=%d\n",
					  __func__,
					  nd_opt->nd_opt_type,
					  nd_opt->nd_opt_len);
276
			}
L
Linus Torvalds 已提交
277
		}
278
next_opt:
L
Linus Torvalds 已提交
279 280 281 282 283 284
		opt_len -= l;
		nd_opt = ((void *)nd_opt) + l;
	}
	return ndopts;
}

285
int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
L
Linus Torvalds 已提交
286 287 288 289 290 291 292 293 294 295 296
{
	switch (dev->type) {
	case ARPHRD_ETHER:
	case ARPHRD_IEEE802:	/* Not sure. Check it later. --ANK */
	case ARPHRD_FDDI:
		ipv6_eth_mc_map(addr, buf);
		return 0;
	case ARPHRD_ARCNET:
		ipv6_arcnet_mc_map(addr, buf);
		return 0;
	case ARPHRD_INFINIBAND:
297
		ipv6_ib_mc_map(addr, dev->broadcast, buf);
L
Linus Torvalds 已提交
298
		return 0;
299 300
	case ARPHRD_IPGRE:
		return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
L
Linus Torvalds 已提交
301 302 303 304 305 306 307 308
	default:
		if (dir) {
			memcpy(buf, dev->broadcast, dev->addr_len);
			return 0;
		}
	}
	return -EINVAL;
}
309 310
EXPORT_SYMBOL(ndisc_mc_map);

311 312
static u32 ndisc_hash(const void *pkey,
		      const struct net_device *dev,
313
		      __u32 *hash_rnd)
L
Linus Torvalds 已提交
314
{
315
	return ndisc_hashfn(pkey, dev, hash_rnd);
L
Linus Torvalds 已提交
316 317
}

318 319 320 321 322
static bool ndisc_key_eq(const struct neighbour *n, const void *pkey)
{
	return neigh_key_eq128(n, pkey);
}

L
Linus Torvalds 已提交
323 324
static int ndisc_constructor(struct neighbour *neigh)
{
325
	struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key;
L
Linus Torvalds 已提交
326 327 328
	struct net_device *dev = neigh->dev;
	struct inet6_dev *in6_dev;
	struct neigh_parms *parms;
329
	bool is_multicast = ipv6_addr_is_multicast(addr);
L
Linus Torvalds 已提交
330 331

	in6_dev = in6_dev_get(dev);
332
	if (!in6_dev) {
L
Linus Torvalds 已提交
333 334 335 336 337 338 339 340
		return -EINVAL;
	}

	parms = in6_dev->nd_parms;
	__neigh_parms_put(neigh->parms);
	neigh->parms = neigh_parms_clone(parms);

	neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
341
	if (!dev->header_ops) {
L
Linus Torvalds 已提交
342 343
		neigh->nud_state = NUD_NOARP;
		neigh->ops = &ndisc_direct_ops;
344
		neigh->output = neigh_direct_output;
L
Linus Torvalds 已提交
345 346 347 348 349 350 351 352 353 354 355 356 357
	} else {
		if (is_multicast) {
			neigh->nud_state = NUD_NOARP;
			ndisc_mc_map(addr, neigh->ha, dev, 1);
		} else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
			neigh->nud_state = NUD_NOARP;
			memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
			if (dev->flags&IFF_LOOPBACK)
				neigh->type = RTN_LOCAL;
		} else if (dev->flags&IFF_POINTOPOINT) {
			neigh->nud_state = NUD_NOARP;
			memcpy(neigh->ha, dev->broadcast, dev->addr_len);
		}
358
		if (dev->header_ops->cache)
L
Linus Torvalds 已提交
359 360 361 362 363 364 365 366 367 368 369 370 371 372
			neigh->ops = &ndisc_hh_ops;
		else
			neigh->ops = &ndisc_generic_ops;
		if (neigh->nud_state&NUD_VALID)
			neigh->output = neigh->ops->connected_output;
		else
			neigh->output = neigh->ops->output;
	}
	in6_dev_put(in6_dev);
	return 0;
}

static int pndisc_constructor(struct pneigh_entry *n)
{
373
	struct in6_addr *addr = (struct in6_addr *)&n->key;
L
Linus Torvalds 已提交
374 375 376
	struct in6_addr maddr;
	struct net_device *dev = n->dev;

377
	if (!dev || !__in6_dev_get(dev))
L
Linus Torvalds 已提交
378 379 380 381 382 383 384 385
		return -EINVAL;
	addrconf_addr_solict_mult(addr, &maddr);
	ipv6_dev_mc_inc(dev, &maddr);
	return 0;
}

static void pndisc_destructor(struct pneigh_entry *n)
{
386
	struct in6_addr *addr = (struct in6_addr *)&n->key;
L
Linus Torvalds 已提交
387 388 389
	struct in6_addr maddr;
	struct net_device *dev = n->dev;

390
	if (!dev || !__in6_dev_get(dev))
L
Linus Torvalds 已提交
391 392 393 394 395
		return;
	addrconf_addr_solict_mult(addr, &maddr);
	ipv6_dev_mc_dec(dev, &maddr);
}

396 397 398 399 400 401 402 403 404 405 406 407 408 409
/* called with rtnl held */
static bool ndisc_allow_add(const struct net_device *dev,
			    struct netlink_ext_ack *extack)
{
	struct inet6_dev *idev = __in6_dev_get(dev);

	if (!idev || idev->cnf.disable_ipv6) {
		NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
		return false;
	}

	return true;
}

410 411 412 413 414 415 416 417
static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
				       int len)
{
	int hlen = LL_RESERVED_SPACE(dev);
	int tlen = dev->needed_tailroom;
	struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
	struct sk_buff *skb;

418
	skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
419
	if (!skb) {
420 421
		ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
			  __func__);
422 423 424
		return NULL;
	}

425 426 427
	skb->protocol = htons(ETH_P_IPV6);
	skb->dev = dev;

428
	skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
429
	skb_reset_transport_header(skb);
430

431 432 433 434 435
	/* Manually assign socket ownership as we avoid calling
	 * sock_alloc_send_pskb() to bypass wmem buffer limits
	 */
	skb_set_owner_w(skb, sk);

436 437 438
	return skb;
}

439
static void ip6_nd_hdr(struct sk_buff *skb,
440 441
		       const struct in6_addr *saddr,
		       const struct in6_addr *daddr,
442
		       int hop_limit, int len)
443 444
{
	struct ipv6hdr *hdr;
445 446 447 448 449 450 451
	struct inet6_dev *idev;
	unsigned tclass;

	rcu_read_lock();
	idev = __in6_dev_get(skb->dev);
	tclass = idev ? idev->cnf.ndisc_tclass : 0;
	rcu_read_unlock();
452

453
	skb_push(skb, sizeof(*hdr));
454 455 456
	skb_reset_network_header(skb);
	hdr = ipv6_hdr(skb);

457
	ip6_flow_hdr(hdr, tclass, 0);
458 459

	hdr->payload_len = htons(len);
460 461
	hdr->nexthdr = IPPROTO_ICMPV6;
	hdr->hop_limit = hop_limit;
462 463 464 465 466

	hdr->saddr = *saddr;
	hdr->daddr = *daddr;
}

467
static void ndisc_send_skb(struct sk_buff *skb,
468
			   const struct in6_addr *daddr,
469
			   const struct in6_addr *saddr)
470
{
471
	struct dst_entry *dst = skb_dst(skb);
472
	struct net *net = dev_net(skb->dev);
473
	struct sock *sk = net->ipv6.ndisc_sk;
474 475
	struct inet6_dev *idev;
	int err;
476
	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
477 478 479 480
	u8 type;

	type = icmp6h->icmp6_type;

481 482
	if (!dst) {
		struct flowi6 fl6;
483
		int oif = skb->dev->ifindex;
484

D
David Ahern 已提交
485
		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
486 487 488 489 490 491 492 493
		dst = icmp6_dst_alloc(skb->dev, &fl6);
		if (IS_ERR(dst)) {
			kfree_skb(skb);
			return;
		}

		skb_dst_set(skb, dst);
	}
494

495 496 497 498 499 500 501
	icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
					      IPPROTO_ICMPV6,
					      csum_partial(icmp6h,
							   skb->len, 0));

	ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);

E
Eric Dumazet 已提交
502 503
	rcu_read_lock();
	idev = __in6_dev_get(dst->dev);
504
	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
505

506 507
	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
		      net, sk, skb, NULL, dst->dev,
508
		      dst_output);
L
Linus Torvalds 已提交
509
	if (!err) {
510
		ICMP6MSGOUT_INC_STATS(net, idev, type);
511
		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
L
Linus Torvalds 已提交
512 513
	}

E
Eric Dumazet 已提交
514
	rcu_read_unlock();
515
}
L
Linus Torvalds 已提交
516

517
void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
C
Cong Wang 已提交
518 519
		   const struct in6_addr *solicited_addr,
		   bool router, bool solicited, bool override, bool inc_opt)
520
{
521
	struct sk_buff *skb;
522 523
	struct in6_addr tmpaddr;
	struct inet6_ifaddr *ifp;
524
	const struct in6_addr *src_addr;
525 526
	struct nd_msg *msg;
	int optlen = 0;
527 528

	/* for anycast or proxy, solicited_addr != src_addr */
529
	ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
530 531 532
	if (ifp) {
		src_addr = solicited_addr;
		if (ifp->flags & IFA_F_OPTIMISTIC)
533
			override = false;
534
		inc_opt |= ifp->idev->cnf.force_tllao;
535 536
		in6_ifa_put(ifp);
	} else {
537
		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
538
				       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
539
				       &tmpaddr))
540 541 542 543
			return;
		src_addr = &tmpaddr;
	}

544
	if (!dev->addr_len)
545
		inc_opt = false;
546
	if (inc_opt)
547 548
		optlen += ndisc_opt_addr_space(dev,
					       NDISC_NEIGHBOUR_ADVERTISEMENT);
549

550
	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
551 552 553
	if (!skb)
		return;

554
	msg = skb_put(skb, sizeof(*msg));
555 556 557 558 559 560 561 562 563 564 565 566
	*msg = (struct nd_msg) {
		.icmph = {
			.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
			.icmp6_router = router,
			.icmp6_solicited = solicited,
			.icmp6_override = override,
		},
		.target = *solicited_addr,
	};

	if (inc_opt)
		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
567 568
				       dev->dev_addr,
				       NDISC_NEIGHBOUR_ADVERTISEMENT);
569

570
	ndisc_send_skb(skb, daddr, src_addr);
571 572
}

573 574 575 576 577 578 579 580 581 582 583
static void ndisc_send_unsol_na(struct net_device *dev)
{
	struct inet6_dev *idev;
	struct inet6_ifaddr *ifa;

	idev = in6_dev_get(dev);
	if (!idev)
		return;

	read_lock_bh(&idev->lock);
	list_for_each_entry(ifa, &idev->addr_list, if_list) {
584 585 586 587 588
		/* skip tentative addresses until dad completes */
		if (ifa->flags & IFA_F_TENTATIVE &&
		    !(ifa->flags & IFA_F_OPTIMISTIC))
			continue;

589
		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
590 591 592 593 594 595 596 597 598
			      /*router=*/ !!idev->cnf.forwarding,
			      /*solicited=*/ false, /*override=*/ true,
			      /*inc_opt=*/ true);
	}
	read_unlock_bh(&idev->lock);

	in6_dev_put(idev);
}

599
void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
600 601
		   const struct in6_addr *daddr, const struct in6_addr *saddr,
		   u64 nonce)
L
Linus Torvalds 已提交
602
{
603
	struct sk_buff *skb;
L
Linus Torvalds 已提交
604
	struct in6_addr addr_buf;
605 606 607
	int inc_opt = dev->addr_len;
	int optlen = 0;
	struct nd_msg *msg;
L
Linus Torvalds 已提交
608

609
	if (!saddr) {
610 611
		if (ipv6_get_lladdr(dev, &addr_buf,
				   (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
L
Linus Torvalds 已提交
612 613 614 615
			return;
		saddr = &addr_buf;
	}

616
	if (ipv6_addr_any(saddr))
617
		inc_opt = false;
618
	if (inc_opt)
619 620
		optlen += ndisc_opt_addr_space(dev,
					       NDISC_NEIGHBOUR_SOLICITATION);
621 622
	if (nonce != 0)
		optlen += 8;
623 624

	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
625 626 627
	if (!skb)
		return;

628
	msg = skb_put(skb, sizeof(*msg));
629 630 631 632 633 634 635 636 637
	*msg = (struct nd_msg) {
		.icmph = {
			.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
		},
		.target = *solicit,
	};

	if (inc_opt)
		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
638 639
				       dev->dev_addr,
				       NDISC_NEIGHBOUR_SOLICITATION);
640 641 642 643 644 645 646
	if (nonce != 0) {
		u8 *opt = skb_put(skb, 8);

		opt[0] = ND_OPT_NONCE;
		opt[1] = 8 >> 3;
		memcpy(opt + 2, &nonce, 6);
	}
647

648
	ndisc_send_skb(skb, daddr, saddr);
L
Linus Torvalds 已提交
649 650
}

651 652
void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
		   const struct in6_addr *daddr)
L
Linus Torvalds 已提交
653
{
654
	struct sk_buff *skb;
655
	struct rs_msg *msg;
656
	int send_sllao = dev->addr_len;
657
	int optlen = 0;
658 659 660 661 662 663 664 665

#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
	/*
	 * According to section 2.2 of RFC 4429, we must not
	 * send router solicitations with a sllao from
	 * optimistic addresses, but we may send the solicitation
	 * if we don't include the sllao.  So here we check
	 * if our address is optimistic, and if so, we
J
Joe Perches 已提交
666
	 * suppress the inclusion of the sllao.
667 668
	 */
	if (send_sllao) {
669
		struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
670
							   dev, 1);
671 672
		if (ifp) {
			if (ifp->flags & IFA_F_OPTIMISTIC)  {
673
				send_sllao = 0;
674
			}
675
			in6_ifa_put(ifp);
676 677 678 679 680
		} else {
			send_sllao = 0;
		}
	}
#endif
681
	if (send_sllao)
682
		optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);
683 684

	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
685 686 687
	if (!skb)
		return;

688
	msg = skb_put(skb, sizeof(*msg));
689 690 691 692 693 694 695 696
	*msg = (struct rs_msg) {
		.icmph = {
			.icmp6_type = NDISC_ROUTER_SOLICITATION,
		},
	};

	if (send_sllao)
		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
697 698
				       dev->dev_addr,
				       NDISC_ROUTER_SOLICITATION);
699

700
	ndisc_send_skb(skb, daddr, saddr);
L
Linus Torvalds 已提交
701
}
702

L
Linus Torvalds 已提交
703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723

static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
{
	/*
	 *	"The sender MUST return an ICMP
	 *	 destination unreachable"
	 */
	dst_link_failure(skb);
	kfree_skb(skb);
}

/* Called with locked neigh: either read or both */

static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
	struct in6_addr *saddr = NULL;
	struct in6_addr mcaddr;
	struct net_device *dev = neigh->dev;
	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
	int probes = atomic_read(&neigh->probes);

724
	if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
725
					   dev, false, 1,
726
					   IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
727
		saddr = &ipv6_hdr(skb)->saddr;
728 729
	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
	if (probes < 0) {
L
Linus Torvalds 已提交
730
		if (!(neigh->nud_state & NUD_VALID)) {
731 732 733
			ND_PRINTK(1, dbg,
				  "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
				  __func__, target);
L
Linus Torvalds 已提交
734
		}
735
		ndisc_send_ns(dev, target, target, saddr, 0);
J
Jiri Pirko 已提交
736
	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
L
Linus Torvalds 已提交
737 738 739
		neigh_app_ns(neigh);
	} else {
		addrconf_addr_solict_mult(target, &mcaddr);
740
		ndisc_send_ns(dev, target, &mcaddr, saddr, 0);
L
Linus Torvalds 已提交
741 742 743
	}
}

744 745
static int pndisc_is_router(const void *pkey,
			    struct net_device *dev)
746 747
{
	struct pneigh_entry *n;
748
	int ret = -1;
749 750

	read_lock_bh(&nd_tbl.lock);
751 752 753
	n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
	if (n)
		ret = !!(n->flags & NTF_ROUTER);
754 755
	read_unlock_bh(&nd_tbl.lock);

756
	return ret;
757 758
}

759 760 761 762
void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
		  const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
		  struct ndisc_options *ndopts)
{
763
	neigh_update(neigh, lladdr, new, flags, 0);
764 765 766 767
	/* report ndisc ops about neighbour update */
	ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
}

L
Linus Torvalds 已提交
768 769
static void ndisc_recv_ns(struct sk_buff *skb)
{
770
	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
771 772
	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
L
Linus Torvalds 已提交
773
	u8 *lladdr = NULL;
774
	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
775
				    offsetof(struct nd_msg, opt));
L
Linus Torvalds 已提交
776 777 778 779 780 781
	struct ndisc_options ndopts;
	struct net_device *dev = skb->dev;
	struct inet6_ifaddr *ifp;
	struct inet6_dev *idev = NULL;
	struct neighbour *neigh;
	int dad = ipv6_addr_any(saddr);
782
	bool inc;
783
	int is_router = -1;
784
	u64 nonce = 0;
L
Linus Torvalds 已提交
785

786 787 788 789 790
	if (skb->len < sizeof(struct nd_msg)) {
		ND_PRINTK(2, warn, "NS: packet too short\n");
		return;
	}

L
Linus Torvalds 已提交
791
	if (ipv6_addr_is_multicast(&msg->target)) {
792
		ND_PRINTK(2, warn, "NS: multicast target address\n");
L
Linus Torvalds 已提交
793 794 795 796 797 798 799
		return;
	}

	/*
	 * RFC2461 7.1.1:
	 * DAD has to be destined for solicited node multicast address.
	 */
800
	if (dad && !ipv6_addr_is_solict_mult(daddr)) {
801
		ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
L
Linus Torvalds 已提交
802 803 804
		return;
	}

805
	if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
806
		ND_PRINTK(2, warn, "NS: invalid ND options\n");
L
Linus Torvalds 已提交
807 808 809 810 811 812
		return;
	}

	if (ndopts.nd_opts_src_lladdr) {
		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
		if (!lladdr) {
813 814
			ND_PRINTK(2, warn,
				  "NS: invalid link-layer address length\n");
L
Linus Torvalds 已提交
815 816 817 818
			return;
		}

		/* RFC2461 7.1.1:
819 820
		 *	If the IP source address is the unspecified address,
		 *	there MUST NOT be source link-layer address option
L
Linus Torvalds 已提交
821 822 823
		 *	in the message.
		 */
		if (dad) {
824 825
			ND_PRINTK(2, warn,
				  "NS: bad DAD packet (link-layer address option)\n");
L
Linus Torvalds 已提交
826 827 828
			return;
		}
	}
829
	if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1)
830
		memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
L
Linus Torvalds 已提交
831 832 833

	inc = ipv6_addr_is_multicast(daddr);

834
	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
835
	if (ifp) {
D
David Ahern 已提交
836
have_ifp:
837 838
		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
			if (dad) {
839 840 841 842 843 844 845 846 847
				if (nonce != 0 && ifp->dad_nonce == nonce) {
					u8 *np = (u8 *)&nonce;
					/* Matching nonce if looped back */
					ND_PRINTK(2, notice,
						  "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
						  ifp->idev->dev->name,
						  &ifp->addr, np);
					goto out;
				}
848 849 850 851 852
				/*
				 * We are colliding with another node
				 * who is doing DAD
				 * so fail our DAD process
				 */
853
				addrconf_dad_failure(skb, ifp);
854
				return;
855 856 857 858 859 860 861 862
			} else {
				/*
				 * This is not a dad solicitation.
				 * If we are an optimistic node,
				 * we should respond.
				 * Otherwise, we should ignore it.
				 */
				if (!(ifp->flags & IFA_F_OPTIMISTIC))
L
Linus Torvalds 已提交
863 864 865 866 867 868
					goto out;
			}
		}

		idev = ifp->idev;
	} else {
869 870
		struct net *net = dev_net(dev);

D
David Ahern 已提交
871 872 873 874 875 876 877 878 879 880 881 882
		/* perhaps an address on the master device */
		if (netif_is_l3_slave(dev)) {
			struct net_device *mdev;

			mdev = netdev_master_upper_dev_get_rcu(dev);
			if (mdev) {
				ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
				if (ifp)
					goto have_ifp;
			}
		}

L
Linus Torvalds 已提交
883 884 885 886 887 888
		idev = in6_dev_get(dev);
		if (!idev) {
			/* XXX: count this drop? */
			return;
		}

889
		if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
890
		    (idev->cnf.forwarding &&
891
		     (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
892
		     (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
893
			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
L
Linus Torvalds 已提交
894
			    skb->pkt_type != PACKET_HOST &&
895
			    inc &&
J
Jiri Pirko 已提交
896
			    NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) {
L
Linus Torvalds 已提交
897 898
				/*
				 * for anycast or proxy,
899 900
				 * sender should delay its response
				 * by a random time between 0 and
L
Linus Torvalds 已提交
901 902 903 904 905 906 907 908 909 910 911 912
				 * MAX_ANYCAST_DELAY_TIME seconds.
				 * (RFC2461) -- yoshfuji
				 */
				struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
				if (n)
					pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
				goto out;
			}
		} else
			goto out;
	}

913
	if (is_router < 0)
914
		is_router = idev->cnf.forwarding;
915

L
Linus Torvalds 已提交
916
	if (dad) {
917
		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
918
			      !!is_router, false, (ifp != NULL), true);
L
Linus Torvalds 已提交
919 920 921 922 923 924 925 926
		goto out;
	}

	if (inc)
		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
	else
		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);

927
	/*
L
Linus Torvalds 已提交
928 929 930 931 932 933
	 *	update / create cache entry
	 *	for the source address
	 */
	neigh = __neigh_lookup(&nd_tbl, saddr, dev,
			       !inc || lladdr || !dev->addr_len);
	if (neigh)
934
		ndisc_update(dev, neigh, lladdr, NUD_STALE,
L
Linus Torvalds 已提交
935
			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
936 937
			     NEIGH_UPDATE_F_OVERRIDE,
			     NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
938
	if (neigh || !dev->header_ops) {
939
		ndisc_send_na(dev, saddr, &msg->target, !!is_router,
940
			      true, (ifp != NULL && inc), inc);
L
Linus Torvalds 已提交
941 942 943 944 945 946 947 948 949 950 951 952 953
		if (neigh)
			neigh_release(neigh);
	}

out:
	if (ifp)
		in6_ifa_put(ifp);
	else
		in6_dev_put(idev);
}

static void ndisc_recv_na(struct sk_buff *skb)
{
954
	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
955
	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
956
	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
L
Linus Torvalds 已提交
957
	u8 *lladdr = NULL;
958
	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
959
				    offsetof(struct nd_msg, opt));
L
Linus Torvalds 已提交
960 961
	struct ndisc_options ndopts;
	struct net_device *dev = skb->dev;
962
	struct inet6_dev *idev = __in6_dev_get(dev);
L
Linus Torvalds 已提交
963 964 965 966
	struct inet6_ifaddr *ifp;
	struct neighbour *neigh;

	if (skb->len < sizeof(struct nd_msg)) {
967
		ND_PRINTK(2, warn, "NA: packet too short\n");
L
Linus Torvalds 已提交
968 969 970 971
		return;
	}

	if (ipv6_addr_is_multicast(&msg->target)) {
972
		ND_PRINTK(2, warn, "NA: target address is multicast\n");
L
Linus Torvalds 已提交
973 974 975 976 977
		return;
	}

	if (ipv6_addr_is_multicast(daddr) &&
	    msg->icmph.icmp6_solicited) {
978
		ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n");
L
Linus Torvalds 已提交
979 980
		return;
	}
981

982 983 984 985 986 987 988 989
	/* For some 802.11 wireless deployments (and possibly other networks),
	 * there will be a NA proxy and unsolicitd packets are attacks
	 * and thus should not be accepted.
	 */
	if (!msg->icmph.icmp6_solicited && idev &&
	    idev->cnf.drop_unsolicited_na)
		return;

990
	if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
991
		ND_PRINTK(2, warn, "NS: invalid ND option\n");
L
Linus Torvalds 已提交
992 993 994 995 996
		return;
	}
	if (ndopts.nd_opts_tgt_lladdr) {
		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
		if (!lladdr) {
997 998
			ND_PRINTK(2, warn,
				  "NA: invalid link-layer address length\n");
L
Linus Torvalds 已提交
999 1000 1001
			return;
		}
	}
1002
	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
1003
	if (ifp) {
1004 1005
		if (skb->pkt_type != PACKET_LOOPBACK
		    && (ifp->flags & IFA_F_TENTATIVE)) {
1006
				addrconf_dad_failure(skb, ifp);
1007
				return;
L
Linus Torvalds 已提交
1008 1009 1010 1011 1012
		}
		/* What should we make now? The advertisement
		   is invalid, but ndisc specs say nothing
		   about it. It could be misconfiguration, or
		   an smart proxy agent tries to help us :-)
1013 1014 1015 1016

		   We should not print the error if NA has been
		   received from loopback - it is just our own
		   unsolicited advertisement.
L
Linus Torvalds 已提交
1017
		 */
1018
		if (skb->pkt_type != PACKET_LOOPBACK)
1019
			ND_PRINTK(1, warn,
1020 1021
				  "NA: %pM advertised our address %pI6c on %s!\n",
				  eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
L
Linus Torvalds 已提交
1022 1023 1024 1025 1026 1027 1028
		in6_ifa_put(ifp);
		return;
	}
	neigh = neigh_lookup(&nd_tbl, &msg->target, dev);

	if (neigh) {
		u8 old_flags = neigh->flags;
1029
		struct net *net = dev_net(dev);
L
Linus Torvalds 已提交
1030 1031 1032 1033

		if (neigh->nud_state & NUD_FAILED)
			goto out;

1034 1035 1036 1037 1038 1039
		/*
		 * Don't update the neighbor cache entry on a proxy NA from
		 * ourselves because either the proxied node is off link or it
		 * has already sent a NA to us.
		 */
		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
1040 1041
		    net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
		    pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
1042
			/* XXX: idev->cnf.proxy_ndp */
1043
			goto out;
1044
		}
1045

1046
		ndisc_update(dev, neigh, lladdr,
L
Linus Torvalds 已提交
1047 1048 1049 1050
			     msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
			     (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1051 1052
			     (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
			     NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);
L
Linus Torvalds 已提交
1053 1054 1055 1056 1057

		if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
			/*
			 * Change: router to host
			 */
1058
			rt6_clean_tohost(dev_net(dev),  saddr);
L
Linus Torvalds 已提交
1059 1060 1061 1062 1063 1064 1065 1066 1067
		}

out:
		neigh_release(neigh);
	}
}

static void ndisc_recv_rs(struct sk_buff *skb)
{
1068
	struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
L
Linus Torvalds 已提交
1069 1070 1071
	unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
	struct neighbour *neigh;
	struct inet6_dev *idev;
1072
	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
L
Linus Torvalds 已提交
1073 1074 1075 1076 1077 1078
	struct ndisc_options ndopts;
	u8 *lladdr = NULL;

	if (skb->len < sizeof(*rs_msg))
		return;

E
Eric Dumazet 已提交
1079
	idev = __in6_dev_get(skb->dev);
L
Linus Torvalds 已提交
1080
	if (!idev) {
1081
		ND_PRINTK(1, err, "RS: can't find in6 device\n");
L
Linus Torvalds 已提交
1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
		return;
	}

	/* Don't accept RS if we're not in router mode */
	if (!idev->cnf.forwarding)
		goto out;

	/*
	 * Don't update NCE if src = ::;
	 * this implies that the source node has no ip address assigned yet.
	 */
	if (ipv6_addr_any(saddr))
		goto out;

	/* Parse ND options */
1097
	if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) {
1098
		ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n");
L
Linus Torvalds 已提交
1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
		goto out;
	}

	if (ndopts.nd_opts_src_lladdr) {
		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
					     skb->dev);
		if (!lladdr)
			goto out;
	}

	neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
	if (neigh) {
1111
		ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
L
Linus Torvalds 已提交
1112 1113
			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
			     NEIGH_UPDATE_F_OVERRIDE|
1114 1115
			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
			     NDISC_ROUTER_SOLICITATION, &ndopts);
L
Linus Torvalds 已提交
1116 1117 1118
		neigh_release(neigh);
	}
out:
E
Eric Dumazet 已提交
1119
	return;
L
Linus Torvalds 已提交
1120 1121
}

1122 1123 1124 1125 1126 1127
static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
{
	struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
	struct sk_buff *skb;
	struct nlmsghdr *nlh;
	struct nduseroptmsg *ndmsg;
1128
	struct net *net = dev_net(ra->dev);
1129 1130 1131 1132 1133 1134
	int err;
	int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
				    + (opt->nd_opt_len << 3));
	size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));

	skb = nlmsg_new(msg_size, GFP_ATOMIC);
1135
	if (!skb) {
1136 1137 1138 1139 1140
		err = -ENOBUFS;
		goto errout;
	}

	nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1141
	if (!nlh) {
1142 1143 1144 1145 1146
		goto nla_put_failure;
	}

	ndmsg = nlmsg_data(nlh);
	ndmsg->nduseropt_family = AF_INET6;
1147
	ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1148 1149 1150 1151 1152 1153
	ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
	ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
	ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;

	memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);

1154
	if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr))
D
David S. Miller 已提交
1155
		goto nla_put_failure;
1156 1157
	nlmsg_end(skb, nlh);

1158
	rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
1159 1160 1161 1162 1163 1164
	return;

nla_put_failure:
	nlmsg_free(skb);
	err = -EMSGSIZE;
errout:
1165
	rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1166 1167
}

L
Linus Torvalds 已提交
1168 1169
static void ndisc_router_discovery(struct sk_buff *skb)
{
1170
	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
L
Linus Torvalds 已提交
1171 1172
	struct neighbour *neigh = NULL;
	struct inet6_dev *in6_dev;
1173
	struct fib6_info *rt = NULL;
1174
	struct net *net;
L
Linus Torvalds 已提交
1175 1176 1177
	int lifetime;
	struct ndisc_options ndopts;
	int optlen;
1178
	unsigned int pref = 0;
1179
	__u32 old_if_flags;
1180
	bool send_ifinfo_notify = false;
L
Linus Torvalds 已提交
1181

1182
	__u8 *opt = (__u8 *)(ra_msg + 1);
L
Linus Torvalds 已提交
1183

1184 1185
	optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
		sizeof(struct ra_msg);
L
Linus Torvalds 已提交
1186

1187 1188 1189
	ND_PRINTK(2, info,
		  "RA: %s, dev: %s\n",
		  __func__, skb->dev->name);
1190
	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1191
		ND_PRINTK(2, warn, "RA: source address is not link-local\n");
L
Linus Torvalds 已提交
1192 1193 1194
		return;
	}
	if (optlen < 0) {
1195
		ND_PRINTK(2, warn, "RA: packet too short\n");
L
Linus Torvalds 已提交
1196 1197 1198
		return;
	}

1199
#ifdef CONFIG_IPV6_NDISC_NODETYPE
1200
	if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
1201
		ND_PRINTK(2, warn, "RA: from host or unauthorized router\n");
1202 1203
		return;
	}
1204
#endif
1205

L
Linus Torvalds 已提交
1206 1207 1208 1209
	/*
	 *	set the RA_RECV flag in the interface
	 */

E
Eric Dumazet 已提交
1210
	in6_dev = __in6_dev_get(skb->dev);
1211
	if (!in6_dev) {
1212 1213
		ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
			  skb->dev->name);
L
Linus Torvalds 已提交
1214 1215 1216
		return;
	}

1217
	if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) {
1218
		ND_PRINTK(2, warn, "RA: invalid ND options\n");
L
Linus Torvalds 已提交
1219 1220 1221
		return;
	}

1222 1223 1224 1225
	if (!ipv6_accept_ra(in6_dev)) {
		ND_PRINTK(2, info,
			  "RA: %s, did not accept ra for dev: %s\n",
			  __func__, skb->dev->name);
1226
		goto skip_linkparms;
1227
	}
1228

1229
#ifdef CONFIG_IPV6_NDISC_NODETYPE
1230
	/* skip link-specific parameters from interior routers */
1231 1232 1233 1234
	if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
		ND_PRINTK(2, info,
			  "RA: %s, nodetype is NODEFAULT, dev: %s\n",
			  __func__, skb->dev->name);
1235
		goto skip_linkparms;
1236
	}
1237
#endif
1238

L
Linus Torvalds 已提交
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250
	if (in6_dev->if_flags & IF_RS_SENT) {
		/*
		 *	flag that an RA was received after an RS was sent
		 *	out on this interface.
		 */
		in6_dev->if_flags |= IF_RA_RCVD;
	}

	/*
	 * Remember the managed/otherconf flags from most recently
	 * received RA message (RFC 2462) -- yoshfuji
	 */
1251
	old_if_flags = in6_dev->if_flags;
L
Linus Torvalds 已提交
1252 1253 1254 1255 1256 1257 1258
	in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
				IF_RA_OTHERCONF)) |
				(ra_msg->icmph.icmp6_addrconf_managed ?
					IF_RA_MANAGED : 0) |
				(ra_msg->icmph.icmp6_addrconf_other ?
					IF_RA_OTHERCONF : 0);

1259
	if (old_if_flags != in6_dev->if_flags)
1260
		send_ifinfo_notify = true;
1261

1262 1263 1264 1265
	if (!in6_dev->cnf.accept_ra_defrtr) {
		ND_PRINTK(2, info,
			  "RA: %s, defrtr is false for dev: %s\n",
			  __func__, skb->dev->name);
1266
		goto skip_defrtr;
1267
	}
1268

1269 1270 1271
	/* Do not accept RA with source-addr found on local machine unless
	 * accept_ra_from_local is set to true.
	 */
1272
	net = dev_net(in6_dev->dev);
L
Li RongQing 已提交
1273
	if (!in6_dev->cnf.accept_ra_from_local &&
1274
	    ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
1275
		ND_PRINTK(2, info,
1276 1277
			  "RA from local address detected on dev: %s: default router ignored\n",
			  skb->dev->name);
1278
		goto skip_defrtr;
1279
	}
1280

L
Linus Torvalds 已提交
1281 1282
	lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);

1283 1284 1285
#ifdef CONFIG_IPV6_ROUTER_PREF
	pref = ra_msg->icmph.icmp6_router_pref;
	/* 10b is handled as if it were 00b (medium) */
1286
	if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1287
	    !in6_dev->cnf.accept_ra_rtr_pref)
1288 1289
		pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
1290
	/* routes added from RAs do not use nexthop objects */
1291
	rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
1292
	if (rt) {
1293 1294
		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
					 rt->fib6_nh->fib_nh_dev, NULL,
1295
					  &ipv6_hdr(skb)->saddr);
1296
		if (!neigh) {
1297 1298 1299
			ND_PRINTK(0, err,
				  "RA: %s got default router without neighbour\n",
				  __func__);
1300
			fib6_info_release(rt);
1301 1302 1303
			return;
		}
	}
L
Linus Torvalds 已提交
1304
	if (rt && lifetime == 0) {
1305
		ip6_del_rt(net, rt);
L
Linus Torvalds 已提交
1306 1307 1308
		rt = NULL;
	}

1309 1310
	ND_PRINTK(3, info, "RA: rt: %p  lifetime: %d, for dev: %s\n",
		  rt, lifetime, skb->dev->name);
1311
	if (!rt && lifetime) {
1312
		ND_PRINTK(3, info, "RA: adding default router\n");
L
Linus Torvalds 已提交
1313

1314 1315
		rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
					 skb->dev, pref);
1316
		if (!rt) {
1317 1318 1319
			ND_PRINTK(0, err,
				  "RA: %s failed to add default route\n",
				  __func__);
L
Linus Torvalds 已提交
1320 1321 1322
			return;
		}

1323 1324
		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
					 rt->fib6_nh->fib_nh_dev, NULL,
1325
					  &ipv6_hdr(skb)->saddr);
1326
		if (!neigh) {
1327 1328 1329
			ND_PRINTK(0, err,
				  "RA: %s got default router without neighbour\n",
				  __func__);
1330
			fib6_info_release(rt);
L
Linus Torvalds 已提交
1331 1332 1333
			return;
		}
		neigh->flags |= NTF_ROUTER;
1334
	} else if (rt) {
1335
		rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
L
Linus Torvalds 已提交
1336 1337 1338
	}

	if (rt)
1339
		fib6_set_expires(rt, jiffies + (HZ * lifetime));
1340 1341 1342
	if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
	    ra_msg->icmph.icmp6_hop_limit) {
		if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
1343
			in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1344 1345
			fib6_metric_set(rt, RTAX_HOPLIMIT,
					ra_msg->icmph.icmp6_hop_limit);
1346
		} else {
1347
			ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
1348
		}
L
Linus Torvalds 已提交
1349 1350
	}

1351 1352
skip_defrtr:

L
Linus Torvalds 已提交
1353 1354 1355 1356 1357 1358 1359 1360 1361
	/*
	 *	Update Reachable Time and Retrans Timer
	 */

	if (in6_dev->nd_parms) {
		unsigned long rtime = ntohl(ra_msg->retrans_timer);

		if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
			rtime = (rtime*HZ)/1000;
1362 1363
			if (rtime < HZ/100)
				rtime = HZ/100;
J
Jiri Pirko 已提交
1364
			NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);
L
Linus Torvalds 已提交
1365
			in6_dev->tstamp = jiffies;
1366
			send_ifinfo_notify = true;
L
Linus Torvalds 已提交
1367 1368 1369 1370 1371 1372 1373 1374 1375
		}

		rtime = ntohl(ra_msg->reachable_time);
		if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
			rtime = (rtime*HZ)/1000;

			if (rtime < HZ/10)
				rtime = HZ/10;

J
Jiri Pirko 已提交
1376 1377 1378 1379 1380
			if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) {
				NEIGH_VAR_SET(in6_dev->nd_parms,
					      BASE_REACHABLE_TIME, rtime);
				NEIGH_VAR_SET(in6_dev->nd_parms,
					      GC_STALETIME, 3 * rtime);
L
Linus Torvalds 已提交
1381 1382
				in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
				in6_dev->tstamp = jiffies;
1383
				send_ifinfo_notify = true;
L
Linus Torvalds 已提交
1384 1385 1386 1387
			}
		}
	}

1388 1389 1390 1391 1392 1393
	/*
	 *	Send a notify if RA changed managed/otherconf flags or timer settings
	 */
	if (send_ifinfo_notify)
		inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);

1394 1395
skip_linkparms:

L
Linus Torvalds 已提交
1396 1397 1398 1399 1400
	/*
	 *	Process options.
	 */

	if (!neigh)
1401
		neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
L
Linus Torvalds 已提交
1402 1403 1404 1405 1406 1407 1408
				       skb->dev, 1);
	if (neigh) {
		u8 *lladdr = NULL;
		if (ndopts.nd_opts_src_lladdr) {
			lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
						     skb->dev);
			if (!lladdr) {
1409 1410
				ND_PRINTK(2, warn,
					  "RA: invalid link-layer address length\n");
L
Linus Torvalds 已提交
1411 1412 1413
				goto out;
			}
		}
1414
		ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
L
Linus Torvalds 已提交
1415 1416 1417
			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
			     NEIGH_UPDATE_F_OVERRIDE|
			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1418 1419
			     NEIGH_UPDATE_F_ISROUTER,
			     NDISC_ROUTER_ADVERTISEMENT, &ndopts);
L
Linus Torvalds 已提交
1420 1421
	}

1422 1423 1424 1425
	if (!ipv6_accept_ra(in6_dev)) {
		ND_PRINTK(2, info,
			  "RA: %s, accept_ra is false for dev: %s\n",
			  __func__, skb->dev->name);
1426
		goto out;
1427
	}
1428

1429
#ifdef CONFIG_IPV6_ROUTE_INFO
L
Li RongQing 已提交
1430 1431
	if (!in6_dev->cnf.accept_ra_from_local &&
	    ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
1432
			  in6_dev->dev, 0)) {
1433
		ND_PRINTK(2, info,
1434 1435
			  "RA from local address detected on dev: %s: router info ignored.\n",
			  skb->dev->name);
1436
		goto skip_routeinfo;
1437
	}
1438

1439
	if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
1440 1441 1442 1443
		struct nd_opt_hdr *p;
		for (p = ndopts.nd_opts_ri;
		     p;
		     p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1444 1445 1446 1447 1448 1449
			struct route_info *ri = (struct route_info *)p;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
			if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
			    ri->prefix_len == 0)
				continue;
#endif
1450 1451 1452
			if (ri->prefix_len == 0 &&
			    !in6_dev->cnf.accept_ra_defrtr)
				continue;
1453 1454
			if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
				continue;
1455
			if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
1456
				continue;
1457
			rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
1458
				      &ipv6_hdr(skb)->saddr);
1459 1460
		}
	}
1461 1462

skip_routeinfo:
1463 1464
#endif

1465
#ifdef CONFIG_IPV6_NDISC_NODETYPE
1466
	/* skip link-specific ndopts from interior routers */
1467 1468 1469 1470
	if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
		ND_PRINTK(2, info,
			  "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
			  __func__, skb->dev->name);
1471
		goto out;
1472
	}
1473
#endif
1474

1475
	if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
L
Linus Torvalds 已提交
1476 1477 1478 1479
		struct nd_opt_hdr *p;
		for (p = ndopts.nd_opts_pi;
		     p;
		     p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1480 1481 1482
			addrconf_prefix_rcv(skb->dev, (u8 *)p,
					    (p->nd_opt_len) << 3,
					    ndopts.nd_opts_src_lladdr != NULL);
L
Linus Torvalds 已提交
1483 1484 1485
		}
	}

1486
	if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) {
A
Al Viro 已提交
1487
		__be32 n;
L
Linus Torvalds 已提交
1488 1489
		u32 mtu;

1490
		memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
A
Al Viro 已提交
1491
		mtu = ntohl(n);
L
Linus Torvalds 已提交
1492 1493

		if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1494
			ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
L
Linus Torvalds 已提交
1495 1496
		} else if (in6_dev->cnf.mtu6 != mtu) {
			in6_dev->cnf.mtu6 = mtu;
1497
			fib6_metric_set(rt, RTAX_MTU, mtu);
L
Linus Torvalds 已提交
1498 1499 1500
			rt6_mtu_change(skb->dev, mtu);
		}
	}
1501

1502
	if (ndopts.nd_useropts) {
1503 1504 1505
		struct nd_opt_hdr *p;
		for (p = ndopts.nd_useropts;
		     p;
1506 1507
		     p = ndisc_next_useropt(skb->dev, p,
					    ndopts.nd_useropts_end)) {
1508
			ndisc_ra_useropt(skb, p);
1509 1510 1511
		}
	}

L
Linus Torvalds 已提交
1512
	if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1513
		ND_PRINTK(2, warn, "RA: invalid RA options\n");
L
Linus Torvalds 已提交
1514 1515
	}
out:
1516
	fib6_info_release(rt);
1517
	if (neigh)
L
Linus Torvalds 已提交
1518 1519 1520 1521 1522
		neigh_release(neigh);
}

static void ndisc_redirect_rcv(struct sk_buff *skb)
{
1523 1524 1525
	u8 *hdr;
	struct ndisc_options ndopts;
	struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
1526
	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
1527 1528
				    offsetof(struct rd_msg, opt));

1529
#ifdef CONFIG_IPV6_NDISC_NODETYPE
1530 1531 1532
	switch (skb->ndisc_nodetype) {
	case NDISC_NODETYPE_HOST:
	case NDISC_NODETYPE_NODEFAULT:
1533 1534
		ND_PRINTK(2, warn,
			  "Redirect: from host or unauthorized router\n");
1535 1536
		return;
	}
1537
#endif
1538

1539
	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1540 1541
		ND_PRINTK(2, warn,
			  "Redirect: source address is not link-local\n");
L
Linus Torvalds 已提交
1542 1543 1544
		return;
	}

1545
	if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
1546 1547
		return;

1548
	if (!ndopts.nd_opts_rh) {
1549
		ip6_redirect_no_header(skb, dev_net(skb->dev),
1550
					skb->dev->ifindex);
1551
		return;
1552
	}
1553 1554 1555 1556 1557 1558

	hdr = (u8 *)ndopts.nd_opts_rh;
	hdr += 8;
	if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
		return;

1559
	icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
L
Linus Torvalds 已提交
1560 1561
}

1562 1563 1564
static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
					   struct sk_buff *orig_skb,
					   int rd_len)
1565
{
1566 1567
	u8 *opt = skb_put(skb, rd_len);

1568 1569 1570 1571 1572
	memset(opt, 0, 8);
	*(opt++) = ND_OPT_REDIRECT_HDR;
	*(opt++) = (rd_len >> 3);
	opt += 6;

1573 1574
	skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
		      rd_len - 8);
1575 1576
}

1577
void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
L
Linus Torvalds 已提交
1578
{
1579
	struct net_device *dev = skb->dev;
1580
	struct net *net = dev_net(dev);
1581
	struct sock *sk = net->ipv6.ndisc_sk;
1582
	int optlen = 0;
1583
	struct inet_peer *peer;
L
Linus Torvalds 已提交
1584
	struct sk_buff *buff;
1585
	struct rd_msg *msg;
L
Linus Torvalds 已提交
1586 1587 1588
	struct in6_addr saddr_buf;
	struct rt6_info *rt;
	struct dst_entry *dst;
1589
	struct flowi6 fl6;
L
Linus Torvalds 已提交
1590
	int rd_len;
1591 1592
	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
	   ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
1593
	bool ret;
L
Linus Torvalds 已提交
1594

1595 1596 1597 1598 1599 1600
	if (netif_is_l3_master(skb->dev)) {
		dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
		if (!dev)
			return;
	}

1601
	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1602 1603
		ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
			  dev->name);
1604 1605
		return;
	}
L
Linus Torvalds 已提交
1606

1607
	if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1608
	    ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1609 1610
		ND_PRINTK(2, warn,
			  "Redirect: target address is not link-local unicast\n");
1611 1612 1613
		return;
	}

1614
	icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
1615
			 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
L
Linus Torvalds 已提交
1616

1617
	dst = ip6_route_output(net, NULL, &fl6);
1618 1619
	if (dst->error) {
		dst_release(dst);
L
Linus Torvalds 已提交
1620
		return;
1621
	}
1622
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1623
	if (IS_ERR(dst))
L
Linus Torvalds 已提交
1624 1625 1626 1627 1628
		return;

	rt = (struct rt6_info *) dst;

	if (rt->rt6i_flags & RTF_GATEWAY) {
1629 1630
		ND_PRINTK(2, warn,
			  "Redirect: destination is not a neighbour\n");
I
Ilpo Järvinen 已提交
1631
		goto release;
L
Linus Torvalds 已提交
1632
	}
1633
	peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
1634 1635 1636 1637
	ret = inet_peer_xrlim_allow(peer, 1*HZ);
	if (peer)
		inet_putpeer(peer);
	if (!ret)
I
Ilpo Järvinen 已提交
1638
		goto release;
L
Linus Torvalds 已提交
1639 1640

	if (dev->addr_len) {
1641 1642
		struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
		if (!neigh) {
1643 1644
			ND_PRINTK(2, warn,
				  "Redirect: no neigh for target address\n");
1645 1646 1647
			goto release;
		}

L
Linus Torvalds 已提交
1648 1649 1650 1651 1652
		read_lock_bh(&neigh->lock);
		if (neigh->nud_state & NUD_VALID) {
			memcpy(ha_buf, neigh->ha, dev->addr_len);
			read_unlock_bh(&neigh->lock);
			ha = ha_buf;
1653 1654 1655
			optlen += ndisc_redirect_opt_addr_space(dev, neigh,
								ops_data_buf,
								&ops_data);
L
Linus Torvalds 已提交
1656 1657
		} else
			read_unlock_bh(&neigh->lock);
1658 1659

		neigh_release(neigh);
L
Linus Torvalds 已提交
1660 1661 1662
	}

	rd_len = min_t(unsigned int,
1663 1664
		       IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
		       skb->len + 8);
L
Linus Torvalds 已提交
1665
	rd_len &= ~0x7;
1666
	optlen += rd_len;
L
Linus Torvalds 已提交
1667

1668
	buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
1669
	if (!buff)
I
Ilpo Järvinen 已提交
1670
		goto release;
L
Linus Torvalds 已提交
1671

1672
	msg = skb_put(buff, sizeof(*msg));
1673 1674 1675 1676 1677 1678 1679
	*msg = (struct rd_msg) {
		.icmph = {
			.icmp6_type = NDISC_REDIRECT,
		},
		.target = *target,
		.dest = ipv6_hdr(skb)->daddr,
	};
L
Linus Torvalds 已提交
1680 1681 1682 1683 1684 1685

	/*
	 *	include target_address option
	 */

	if (ha)
1686
		ndisc_fill_redirect_addr_option(buff, ha, ops_data);
L
Linus Torvalds 已提交
1687 1688 1689 1690 1691

	/*
	 *	build redirect option and copy skb over to the new packet.
	 */

1692
	if (rd_len)
1693
		ndisc_fill_redirect_hdr_option(buff, skb, rd_len);
L
Linus Torvalds 已提交
1694

E
Eric Dumazet 已提交
1695
	skb_dst_set(buff, dst);
1696
	ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
I
Ilpo Järvinen 已提交
1697 1698 1699 1700
	return;

release:
	dst_release(dst);
L
Linus Torvalds 已提交
1701 1702 1703 1704
}

static void pndisc_redo(struct sk_buff *skb)
{
1705
	ndisc_recv_ns(skb);
L
Linus Torvalds 已提交
1706 1707 1708
	kfree_skb(skb);
}

1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722
static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
{
	struct inet6_dev *idev = __in6_dev_get(skb->dev);

	if (!idev)
		return true;
	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
	    idev->cnf.suppress_frag_ndisc) {
		net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
		return true;
	}
	return false;
}

L
Linus Torvalds 已提交
1723 1724 1725 1726
int ndisc_rcv(struct sk_buff *skb)
{
	struct nd_msg *msg;

1727 1728 1729
	if (ndisc_suppress_frag_ndisc(skb))
		return 0;

1730
	if (skb_linearize(skb))
L
Linus Torvalds 已提交
1731 1732
		return 0;

1733
	msg = (struct nd_msg *)skb_transport_header(skb);
L
Linus Torvalds 已提交
1734

1735
	__skb_push(skb, skb->data - skb_transport_header(skb));
L
Linus Torvalds 已提交
1736

1737
	if (ipv6_hdr(skb)->hop_limit != 255) {
1738 1739
		ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n",
			  ipv6_hdr(skb)->hop_limit);
L
Linus Torvalds 已提交
1740 1741 1742 1743
		return 0;
	}

	if (msg->icmph.icmp6_code != 0) {
1744 1745
		ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n",
			  msg->icmph.icmp6_code);
L
Linus Torvalds 已提交
1746 1747 1748 1749 1750
		return 0;
	}

	switch (msg->icmph.icmp6_type) {
	case NDISC_NEIGHBOUR_SOLICITATION:
1751
		memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
L
Linus Torvalds 已提交
1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769
		ndisc_recv_ns(skb);
		break;

	case NDISC_NEIGHBOUR_ADVERTISEMENT:
		ndisc_recv_na(skb);
		break;

	case NDISC_ROUTER_SOLICITATION:
		ndisc_recv_rs(skb);
		break;

	case NDISC_ROUTER_ADVERTISEMENT:
		ndisc_router_discovery(skb);
		break;

	case NDISC_REDIRECT:
		ndisc_redirect_rcv(skb);
		break;
1770
	}
L
Linus Torvalds 已提交
1771 1772 1773 1774 1775 1776

	return 0;
}

static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
1777
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1778
	struct netdev_notifier_change_info *change_info;
1779
	struct net *net = dev_net(dev);
1780
	struct inet6_dev *idev;
L
Linus Torvalds 已提交
1781 1782 1783 1784

	switch (event) {
	case NETDEV_CHANGEADDR:
		neigh_changeaddr(&nd_tbl, dev);
1785
		fib6_run_gc(0, net, false);
J
Joe Perches 已提交
1786
		fallthrough;
1787
	case NETDEV_UP:
1788 1789 1790
		idev = in6_dev_get(dev);
		if (!idev)
			break;
1791 1792
		if (idev->cnf.ndisc_notify ||
		    net->ipv6.devconf_all->ndisc_notify)
1793 1794
			ndisc_send_unsol_na(dev);
		in6_dev_put(idev);
L
Linus Torvalds 已提交
1795
		break;
1796 1797 1798 1799
	case NETDEV_CHANGE:
		change_info = ptr;
		if (change_info->flags_changed & IFF_NOARP)
			neigh_changeaddr(&nd_tbl, dev);
1800 1801
		if (!netif_carrier_ok(dev))
			neigh_carrier_down(&nd_tbl, dev);
1802
		break;
L
Linus Torvalds 已提交
1803 1804
	case NETDEV_DOWN:
		neigh_ifdown(&nd_tbl, dev);
1805
		fib6_run_gc(0, net, false);
L
Linus Torvalds 已提交
1806
		break;
1807 1808 1809
	case NETDEV_NOTIFY_PEERS:
		ndisc_send_unsol_na(dev);
		break;
L
Linus Torvalds 已提交
1810 1811 1812 1813 1814 1815 1816 1817 1818
	default:
		break;
	}

	return NOTIFY_DONE;
}

static struct notifier_block ndisc_netdev_notifier = {
	.notifier_call = ndisc_netdev_event,
1819
	.priority = ADDRCONF_NOTIFY_PRIORITY - 5,
L
Linus Torvalds 已提交
1820 1821 1822 1823 1824 1825 1826 1827 1828 1829
};

#ifdef CONFIG_SYSCTL
static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
					 const char *func, const char *dev_name)
{
	static char warncomm[TASK_COMM_LEN];
	static int warned;
	if (strcmp(warncomm, current->comm) && warned < 5) {
		strcpy(warncomm, current->comm);
1830
		pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
L
Linus Torvalds 已提交
1831 1832 1833 1834 1835 1836 1837
			warncomm, func,
			dev_name, ctl->procname,
			dev_name, ctl->procname);
		warned++;
	}
}

1838
int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
L
Linus Torvalds 已提交
1839 1840 1841 1842 1843
{
	struct net_device *dev = ctl->extra1;
	struct inet6_dev *idev;
	int ret;

1844 1845
	if ((strcmp(ctl->procname, "retrans_time") == 0) ||
	    (strcmp(ctl->procname, "base_reachable_time") == 0))
L
Linus Torvalds 已提交
1846 1847
		ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");

1848
	if (strcmp(ctl->procname, "retrans_time") == 0)
J
Jiri Pirko 已提交
1849
		ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos);
1850 1851

	else if (strcmp(ctl->procname, "base_reachable_time") == 0)
J
Jiri Pirko 已提交
1852 1853
		ret = neigh_proc_dointvec_jiffies(ctl, write,
						  buffer, lenp, ppos);
1854 1855

	else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1856
		 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
J
Jiri Pirko 已提交
1857 1858
		ret = neigh_proc_dointvec_ms_jiffies(ctl, write,
						     buffer, lenp, ppos);
1859
	else
L
Linus Torvalds 已提交
1860 1861 1862
		ret = -1;

	if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
J
Jiri Pirko 已提交
1863 1864 1865
		if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
			idev->nd_parms->reachable_time =
					neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
L
Linus Torvalds 已提交
1866 1867 1868 1869 1870 1871 1872 1873 1874 1875
		idev->tstamp = jiffies;
		inet6_ifinfo_notify(RTM_NEWLINK, idev);
		in6_dev_put(idev);
	}
	return ret;
}


#endif

1876
static int __net_init ndisc_net_init(struct net *net)
L
Linus Torvalds 已提交
1877 1878 1879
{
	struct ipv6_pinfo *np;
	struct sock *sk;
1880
	int err;
L
Linus Torvalds 已提交
1881

1882 1883
	err = inet_ctl_sock_create(&sk, PF_INET6,
				   SOCK_RAW, IPPROTO_ICMPV6, net);
L
Linus Torvalds 已提交
1884
	if (err < 0) {
1885 1886 1887
		ND_PRINTK(0, err,
			  "NDISC: Failed to initialize the control socket (err %d)\n",
			  err);
L
Linus Torvalds 已提交
1888 1889 1890
		return err;
	}

1891
	net->ipv6.ndisc_sk = sk;
1892

L
Linus Torvalds 已提交
1893 1894 1895 1896 1897
	np = inet6_sk(sk);
	np->hop_limit = 255;
	/* Do not loopback ndisc messages */
	np->mc_loop = 0;

1898 1899 1900
	return 0;
}

1901
static void __net_exit ndisc_net_exit(struct net *net)
1902
{
1903
	inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917
}

static struct pernet_operations ndisc_net_ops = {
	.init = ndisc_net_init,
	.exit = ndisc_net_exit,
};

int __init ndisc_init(void)
{
	int err;

	err = register_pernet_subsys(&ndisc_net_ops);
	if (err)
		return err;
1918 1919 1920
	/*
	 * Initialize the neighbour table
	 */
1921
	neigh_table_init(NEIGH_ND_TABLE, &nd_tbl);
L
Linus Torvalds 已提交
1922 1923

#ifdef CONFIG_SYSCTL
1924
	err = neigh_sysctl_register(NULL, &nd_tbl.parms,
1925
				    ndisc_ifinfo_sysctl_change);
1926 1927 1928
	if (err)
		goto out_unregister_pernet;
out:
1929
#endif
1930
	return err;
L
Linus Torvalds 已提交
1931

1932 1933 1934 1935
#ifdef CONFIG_SYSCTL
out_unregister_pernet:
	unregister_pernet_subsys(&ndisc_net_ops);
	goto out;
1936
#endif
L
Linus Torvalds 已提交
1937 1938
}

1939 1940 1941 1942 1943 1944
int __init ndisc_late_init(void)
{
	return register_netdevice_notifier(&ndisc_netdev_notifier);
}

void ndisc_late_cleanup(void)
L
Linus Torvalds 已提交
1945
{
1946
	unregister_netdevice_notifier(&ndisc_netdev_notifier);
1947 1948 1949 1950
}

void ndisc_cleanup(void)
{
L
Linus Torvalds 已提交
1951 1952 1953
#ifdef CONFIG_SYSCTL
	neigh_sysctl_unregister(&nd_tbl.parms);
#endif
1954
	neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl);
1955
	unregister_pernet_subsys(&ndisc_net_ops);
L
Linus Torvalds 已提交
1956
}