icmp.c 25.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 *	Internet Control Message Protocol (ICMPv6)
 *	Linux INET6 implementation
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *
 *	Based on net/ipv4/icmp.c
 *
 *	RFC 1885
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

/*
 *	Changes:
 *
 *	Andi Kleen		:	exception handling
 *	Andi Kleen			add rate limits. never reply to a icmp.
 *					add more length checks and other fixes.
 *	yoshfuji		:	ensure to sent parameter problem for
 *					fragments.
 *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
 *	Randy Dunlap and
 *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
 *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
 */

32 33
#define pr_fmt(fmt) "IPv6: " fmt

L
Linus Torvalds 已提交
34 35 36 37 38 39 40 41 42 43
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/init.h>
44
#include <linux/netfilter.h>
45
#include <linux/slab.h>
L
Linus Torvalds 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59

#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/icmpv6.h>

#include <net/ip.h>
#include <net/sock.h>

#include <net/ipv6.h>
#include <net/ip6_checksum.h>
60
#include <net/ping.h>
L
Linus Torvalds 已提交
61 62 63 64 65 66 67
#include <net/protocol.h>
#include <net/raw.h>
#include <net/rawv6.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/icmp.h>
68
#include <net/xfrm.h>
69
#include <net/inet_common.h>
70
#include <net/dsfield.h>
D
David Ahern 已提交
71
#include <net/l3mdev.h>
L
Linus Torvalds 已提交
72

73
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
74 75 76 77 78 79 80 81

/*
 *	The ICMP socket(s). This is the most convenient way to flow control
 *	our ICMP output as well as maintain a clean interface throughout
 *	all layers. All Socketless IP sends will soon be gone.
 *
 *	On SMP we have one ICMP socket per-cpu.
 */
82 83 84 85
static inline struct sock *icmpv6_sk(struct net *net)
{
	return net->ipv6.icmp_sk[smp_processor_id()];
}
L
Linus Torvalds 已提交
86

87 88 89
static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
		       u8 type, u8 code, int offset, __be32 info)
{
90 91
	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 93 94
	struct net *net = dev_net(skb->dev);

	if (type == ICMPV6_PKT_TOOBIG)
95
		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96
	else if (type == NDISC_REDIRECT)
97 98
		ip6_redirect(skb, net, skb->dev->ifindex, 0,
			     sock_net_uid(net, NULL));
99 100 101

	if (!(type & ICMPV6_INFOMSG_MASK))
		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102
			ping_err(skb, offset, ntohl(info));
103 104
}

105
static int icmpv6_rcv(struct sk_buff *skb);
L
Linus Torvalds 已提交
106

107
static const struct inet6_protocol icmpv6_protocol = {
L
Linus Torvalds 已提交
108
	.handler	=	icmpv6_rcv,
109
	.err_handler	=	icmpv6_err,
110
	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
L
Linus Torvalds 已提交
111 112
};

113
/* Called with BH disabled */
114
static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
L
Linus Torvalds 已提交
115
{
116 117 118
	struct sock *sk;

	sk = icmpv6_sk(net);
119
	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
L
Linus Torvalds 已提交
120 121 122 123
		/* This can happen if the output path (f.e. SIT or
		 * ip6ip6 tunnel) signals dst_link_failure() for an
		 * outgoing ICMP6 packet.
		 */
124
		return NULL;
L
Linus Torvalds 已提交
125
	}
126
	return sk;
L
Linus Torvalds 已提交
127 128
}

129
static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
L
Linus Torvalds 已提交
130
{
131
	spin_unlock(&sk->sk_lock.slock);
L
Linus Torvalds 已提交
132 133 134 135 136 137 138 139 140 141 142 143 144
}

/*
 * Figure out, may we reply to this packet with icmp error.
 *
 * We do not reply, if:
 *	- it was icmp error message.
 *	- it is truncated, so that it is known, that protocol is ICMPV6
 *	  (i.e. in the middle of some exthdr)
 *
 *	--ANK (980726)
 */

145
static bool is_ineligible(const struct sk_buff *skb)
L
Linus Torvalds 已提交
146
{
147
	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
L
Linus Torvalds 已提交
148
	int len = skb->len - ptr;
149
	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150
	__be16 frag_off;
L
Linus Torvalds 已提交
151 152

	if (len < 0)
153
		return true;
L
Linus Torvalds 已提交
154

155
	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
L
Linus Torvalds 已提交
156
	if (ptr < 0)
157
		return false;
L
Linus Torvalds 已提交
158 159 160 161 162
	if (nexthdr == IPPROTO_ICMPV6) {
		u8 _type, *tp;
		tp = skb_header_pointer(skb,
			ptr+offsetof(struct icmp6hdr, icmp6_type),
			sizeof(_type), &_type);
163
		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164
			return true;
L
Linus Torvalds 已提交
165
	}
166
	return false;
L
Linus Torvalds 已提交
167 168
}

169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
static bool icmpv6_mask_allow(int type)
{
	/* Informational messages are not limited. */
	if (type & ICMPV6_INFOMSG_MASK)
		return true;

	/* Do not limit pmtu discovery, it would break it. */
	if (type == ICMPV6_PKT_TOOBIG)
		return true;

	return false;
}

static bool icmpv6_global_allow(int type)
{
	if (icmpv6_mask_allow(type))
		return true;

	if (icmp_global_allow())
		return true;

	return false;
}

193 194
/*
 * Check the ICMP output rate limit
L
Linus Torvalds 已提交
195
 */
196 197
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
			       struct flowi6 *fl6)
L
Linus Torvalds 已提交
198
{
199
	struct net *net = sock_net(sk);
200
	struct dst_entry *dst;
201
	bool res = false;
L
Linus Torvalds 已提交
202

203
	if (icmpv6_mask_allow(type))
204
		return true;
L
Linus Torvalds 已提交
205

206
	/*
L
Linus Torvalds 已提交
207 208 209 210
	 * Look up the output route.
	 * XXX: perhaps the expire for routing entries cloned by
	 * this lookup should be more aggressive (not longer than timeout).
	 */
211
	dst = ip6_route_output(net, sk, fl6);
L
Linus Torvalds 已提交
212
	if (dst->error) {
213
		IP6_INC_STATS(net, ip6_dst_idev(dst),
214
			      IPSTATS_MIB_OUTNOROUTES);
L
Linus Torvalds 已提交
215
	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
216
		res = true;
L
Linus Torvalds 已提交
217 218
	} else {
		struct rt6_info *rt = (struct rt6_info *)dst;
219
		int tmo = net->ipv6.sysctl.icmpv6_time;
220
		struct inet_peer *peer;
L
Linus Torvalds 已提交
221 222 223 224 225

		/* Give more bandwidth to wider prefixes. */
		if (rt->rt6i_dst.plen < 128)
			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);

226 227 228 229
		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
		res = inet_peer_xrlim_allow(peer, tmo);
		if (peer)
			inet_putpeer(peer);
L
Linus Torvalds 已提交
230 231 232 233 234 235 236 237
	}
	dst_release(dst);
	return res;
}

/*
 *	an inline helper for the "simple" if statement below
 *	checks if parameter problem report is caused by an
238
 *	unrecognized IPv6 option that has the Option Type
L
Linus Torvalds 已提交
239 240 241
 *	highest-order two bits set to 10
 */

242
static bool opt_unrec(struct sk_buff *skb, __u32 offset)
L
Linus Torvalds 已提交
243 244 245
{
	u8 _optval, *op;

246
	offset += skb_network_offset(skb);
L
Linus Torvalds 已提交
247
	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
248
	if (!op)
249
		return true;
L
Linus Torvalds 已提交
250 251 252
	return (*op & 0xC0) == 0x80;
}

253 254
void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
				struct icmp6hdr *thdr, int len)
L
Linus Torvalds 已提交
255 256 257 258
{
	struct sk_buff *skb;
	struct icmp6hdr *icmp6h;

259
	skb = skb_peek(&sk->sk_write_queue);
260
	if (!skb)
261
		return;
L
Linus Torvalds 已提交
262

263
	icmp6h = icmp6_hdr(skb);
L
Linus Torvalds 已提交
264 265 266 267
	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
	icmp6h->icmp6_cksum = 0;

	if (skb_queue_len(&sk->sk_write_queue) == 1) {
268
		skb->csum = csum_partial(icmp6h,
L
Linus Torvalds 已提交
269
					sizeof(struct icmp6hdr), skb->csum);
270 271 272
		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
						      &fl6->daddr,
						      len, fl6->flowi6_proto,
L
Linus Torvalds 已提交
273 274
						      skb->csum);
	} else {
275
		__wsum tmp_csum = 0;
L
Linus Torvalds 已提交
276 277 278 279 280

		skb_queue_walk(&sk->sk_write_queue, skb) {
			tmp_csum = csum_add(tmp_csum, skb->csum);
		}

281
		tmp_csum = csum_partial(icmp6h,
L
Linus Torvalds 已提交
282
					sizeof(struct icmp6hdr), tmp_csum);
283 284 285
		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
						      &fl6->daddr,
						      len, fl6->flowi6_proto,
286
						      tmp_csum);
L
Linus Torvalds 已提交
287 288 289 290 291 292 293
	}
	ip6_push_pending_frames(sk);
}

struct icmpv6_msg {
	struct sk_buff	*skb;
	int		offset;
294
	uint8_t		type;
L
Linus Torvalds 已提交
295 296 297 298 299 300
};

static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
	struct sk_buff *org_skb = msg->skb;
301
	__wsum csum = 0;
L
Linus Torvalds 已提交
302 303 304 305

	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
				      to, len, csum);
	skb->csum = csum_block_add(skb->csum, csum, odd);
306 307
	if (!(msg->type & ICMPV6_INFOMSG_MASK))
		nf_ct_attach(skb, org_skb);
L
Linus Torvalds 已提交
308 309 310
	return 0;
}

A
Amerigo Wang 已提交
311
#if IS_ENABLED(CONFIG_IPV6_MIP6)
312 313
static void mip6_addr_swap(struct sk_buff *skb)
{
314
	struct ipv6hdr *iph = ipv6_hdr(skb);
315 316 317 318 319 320 321 322
	struct inet6_skb_parm *opt = IP6CB(skb);
	struct ipv6_destopt_hao *hao;
	struct in6_addr tmp;
	int off;

	if (opt->dsthao) {
		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
		if (likely(off >= 0)) {
323 324
			hao = (struct ipv6_destopt_hao *)
					(skb_network_header(skb) + off);
A
Alexey Dobriyan 已提交
325 326 327
			tmp = iph->saddr;
			iph->saddr = hao->addr;
			hao->addr = tmp;
328 329 330 331 332 333 334
		}
	}
}
#else
static inline void mip6_addr_swap(struct sk_buff *skb) {}
#endif

S
stephen hemminger 已提交
335 336 337 338
static struct dst_entry *icmpv6_route_lookup(struct net *net,
					     struct sk_buff *skb,
					     struct sock *sk,
					     struct flowi6 *fl6)
339 340
{
	struct dst_entry *dst, *dst2;
341
	struct flowi6 fl2;
342 343
	int err;

344
	err = ip6_dst_lookup(net, sk, &dst, fl6);
345 346 347 348 349 350 351
	if (err)
		return ERR_PTR(err);

	/*
	 * We won't send icmp if the destination is known
	 * anycast.
	 */
352
	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
353
		net_dbg_ratelimited("icmp6_send: acast source\n");
354 355 356 357 358 359 360
		dst_release(dst);
		return ERR_PTR(-EINVAL);
	}

	/* No need to clone since we're just using its address. */
	dst2 = dst;

361
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
362
	if (!IS_ERR(dst)) {
363 364
		if (dst != dst2)
			return dst;
365 366 367 368 369
	} else {
		if (PTR_ERR(dst) == -EPERM)
			dst = NULL;
		else
			return dst;
370 371
	}

372
	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
373 374 375
	if (err)
		goto relookup_failed;

376
	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
377 378 379
	if (err)
		goto relookup_failed;

380
	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
381
	if (!IS_ERR(dst2)) {
382 383
		dst_release(dst);
		dst = dst2;
384 385 386 387 388 389 390
	} else {
		err = PTR_ERR(dst2);
		if (err == -EPERM) {
			dst_release(dst);
			return dst2;
		} else
			goto relookup_failed;
391 392 393 394 395 396 397 398
	}

relookup_failed:
	if (dst)
		return dst;
	return ERR_PTR(err);
}

399 400 401 402 403 404
static int icmp6_iif(const struct sk_buff *skb)
{
	int iif = skb->dev->ifindex;

	/* for local traffic to local address, skb dev is the loopback
	 * device. Check if there is a dst attached to the skb and if so
405 406
	 * get the real device index. Same is needed for replies to a link
	 * local address on a device enslaved to an L3 master device
407
	 */
408
	if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
409 410 411 412 413 414 415 416 417
		const struct rt6_info *rt6 = skb_rt6_info(skb);

		if (rt6)
			iif = rt6->rt6i_idev->dev->ifindex;
	}

	return iif;
}

L
Linus Torvalds 已提交
418 419 420
/*
 *	Send an ICMP message in response to a packet in error
 */
421 422
static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
		       const struct in6_addr *force_saddr)
L
Linus Torvalds 已提交
423
{
424
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
425
	struct inet6_dev *idev = NULL;
426
	struct ipv6hdr *hdr = ipv6_hdr(skb);
427 428
	struct sock *sk;
	struct ipv6_pinfo *np;
429
	const struct in6_addr *saddr = NULL;
L
Linus Torvalds 已提交
430 431
	struct dst_entry *dst;
	struct icmp6hdr tmp_hdr;
432
	struct flowi6 fl6;
L
Linus Torvalds 已提交
433
	struct icmpv6_msg msg;
W
Wei Wang 已提交
434
	struct ipcm6_cookie ipc6;
L
Linus Torvalds 已提交
435 436 437
	int iif = 0;
	int addr_type = 0;
	int len;
438
	u32 mark = IP6_REPLY_MARK(net, skb->mark);
L
Linus Torvalds 已提交
439

440
	if ((u8 *)hdr < skb->head ||
441
	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
L
Linus Torvalds 已提交
442 443 444
		return;

	/*
445
	 *	Make sure we respect the rules
L
Linus Torvalds 已提交
446
	 *	i.e. RFC 1885 2.4(e)
447
	 *	Rule (e.1) is enforced by not using icmp6_send
L
Linus Torvalds 已提交
448 449 450 451
	 *	in any code that processes icmp errors.
	 */
	addr_type = ipv6_addr_type(&hdr->daddr);

452
	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
453
	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
L
Linus Torvalds 已提交
454 455 456 457 458 459
		saddr = &hdr->daddr;

	/*
	 *	Dest addr check
	 */

Z
zhuyj 已提交
460
	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
L
Linus Torvalds 已提交
461
		if (type != ICMPV6_PKT_TOOBIG &&
462 463
		    !(type == ICMPV6_PARAMPROB &&
		      code == ICMPV6_UNK_OPTION &&
L
Linus Torvalds 已提交
464 465 466 467 468 469 470 471 472 473 474 475
		      (opt_unrec(skb, info))))
			return;

		saddr = NULL;
	}

	addr_type = ipv6_addr_type(&hdr->saddr);

	/*
	 *	Source addr check
	 */

476
	if (__ipv6_addr_needs_scope_id(addr_type)) {
477
		iif = icmp6_iif(skb);
478
	} else {
479 480 481
		dst = skb_dst(skb);
		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
	}
L
Linus Torvalds 已提交
482 483

	/*
484 485 486 487
	 *	Must not send error if the source does not uniquely
	 *	identify a single node (RFC2463 Section 2.4).
	 *	We check unspecified / multicast addresses here,
	 *	and anycast addresses will be checked later.
L
Linus Torvalds 已提交
488 489
	 */
	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
490 491
		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
				    &hdr->saddr, &hdr->daddr);
L
Linus Torvalds 已提交
492 493 494
		return;
	}

495
	/*
L
Linus Torvalds 已提交
496 497 498
	 *	Never answer to a ICMP packet.
	 */
	if (is_ineligible(skb)) {
499 500
		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
				    &hdr->saddr, &hdr->daddr);
L
Linus Torvalds 已提交
501 502 503
		return;
	}

504 505 506 507
	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
	local_bh_disable();

	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
508
	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
509 510
		goto out_bh_enable;

511 512
	mip6_addr_swap(skb);

513 514
	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_proto = IPPROTO_ICMPV6;
A
Alexey Dobriyan 已提交
515
	fl6.daddr = hdr->saddr;
516 517
	if (force_saddr)
		saddr = force_saddr;
L
Linus Torvalds 已提交
518
	if (saddr)
A
Alexey Dobriyan 已提交
519
		fl6.saddr = *saddr;
520
	fl6.flowi6_mark = mark;
521
	fl6.flowi6_oif = iif;
522 523
	fl6.fl6_icmp_type = type;
	fl6.fl6_icmp_code = code;
524
	fl6.flowi6_uid = sock_net_uid(net, NULL);
525
	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
526
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
L
Linus Torvalds 已提交
527

528
	sk = icmpv6_xmit_lock(net);
529
	if (!sk)
530
		goto out_bh_enable;
531

532
	sk->sk_mark = mark;
533
	np = inet6_sk(sk);
534

535
	if (!icmpv6_xrlim_allow(sk, type, &fl6))
L
Linus Torvalds 已提交
536 537 538 539 540 541 542
		goto out;

	tmp_hdr.icmp6_type = type;
	tmp_hdr.icmp6_code = code;
	tmp_hdr.icmp6_cksum = 0;
	tmp_hdr.icmp6_pointer = htonl(info);

543 544
	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
		fl6.flowi6_oif = np->mcast_oif;
545 546
	else if (!fl6.flowi6_oif)
		fl6.flowi6_oif = np->ucast_oif;
L
Linus Torvalds 已提交
547

548
	ipcm6_init_sk(&ipc6, np);
549 550
	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);

551
	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
552
	if (IS_ERR(dst))
L
Linus Torvalds 已提交
553
		goto out;
554

W
Wei Wang 已提交
555
	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
L
Linus Torvalds 已提交
556 557

	msg.skb = skb;
558
	msg.offset = skb_network_offset(skb);
559
	msg.type = type;
L
Linus Torvalds 已提交
560 561

	len = skb->len - msg.offset;
562
	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
L
Linus Torvalds 已提交
563
	if (len < 0) {
564 565
		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
				    &hdr->saddr, &hdr->daddr);
L
Linus Torvalds 已提交
566 567 568
		goto out_dst_release;
	}

E
Eric Dumazet 已提交
569 570
	rcu_read_lock();
	idev = __in6_dev_get(skb->dev);
L
Linus Torvalds 已提交
571

572 573 574 575
	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
			    len + sizeof(struct icmp6hdr),
			    sizeof(struct icmp6hdr),
			    &ipc6, &fl6, (struct rt6_info *)dst,
576
			    MSG_DONTWAIT)) {
577
		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
L
Linus Torvalds 已提交
578
		ip6_flush_pending_frames(sk);
E
Eric Dumazet 已提交
579
	} else {
580 581
		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
					   len + sizeof(struct icmp6hdr));
L
Linus Torvalds 已提交
582
	}
E
Eric Dumazet 已提交
583
	rcu_read_unlock();
L
Linus Torvalds 已提交
584 585 586
out_dst_release:
	dst_release(dst);
out:
587
	icmpv6_xmit_unlock(sk);
588 589
out_bh_enable:
	local_bh_enable();
L
Linus Torvalds 已提交
590
}
591 592 593 594 595

/* Slightly more convenient version of icmp6_send.
 */
void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
596
	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
597 598
	kfree_skb(skb);
}
599

600 601 602 603 604 605
/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
 * if sufficient data bytes are available
 * @nhs is the size of the tunnel header(s) :
 *  Either an IPv4 header for SIT encap
 *         an IPv4 header + GRE header for GRE encap
 */
606 607
int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
			       unsigned int data_len)
608
{
609
	struct in6_addr temp_saddr;
610 611
	struct rt6_info *rt;
	struct sk_buff *skb2;
612
	u32 info = 0;
613 614 615 616

	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
		return 1;

617 618 619 620 621
	/* RFC 4884 (partial) support for ICMP extensions */
	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
		data_len = 0;

	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
622 623 624 625 626 627 628 629

	if (!skb2)
		return 1;

	skb_dst_drop(skb2);
	skb_pull(skb2, nhs);
	skb_reset_network_header(skb2);

D
David Ahern 已提交
630 631
	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
			skb, 0);
632 633 634 635

	if (rt && rt->dst.dev)
		skb2->dev = rt->dst.dev;

636
	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
637 638 639 640 641 642 643 644 645 646 647 648 649 650

	if (data_len) {
		/* RFC 4884 (partial) support :
		 * insert 0 padding at the end, before the extensions
		 */
		__skb_push(skb2, nhs);
		skb_reset_network_header(skb2);
		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
		memset(skb2->data + data_len - nhs, 0, nhs);
		/* RFC 4884 4.5 : Length is measured in 64-bit words,
		 * and stored in reserved[0]
		 */
		info = (data_len/8) << 24;
	}
651 652
	if (type == ICMP_TIME_EXCEEDED)
		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
653
			   info, &temp_saddr);
654 655
	else
		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
656
			   info, &temp_saddr);
657 658 659 660 661 662 663 664 665
	if (rt)
		ip6_rt_put(rt);

	kfree_skb(skb2);

	return 0;
}
EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);

L
Linus Torvalds 已提交
666 667
static void icmpv6_echo_reply(struct sk_buff *skb)
{
668
	struct net *net = dev_net(skb->dev);
669
	struct sock *sk;
L
Linus Torvalds 已提交
670
	struct inet6_dev *idev;
671
	struct ipv6_pinfo *np;
672
	const struct in6_addr *saddr = NULL;
673
	struct icmp6hdr *icmph = icmp6_hdr(skb);
L
Linus Torvalds 已提交
674
	struct icmp6hdr tmp_hdr;
675
	struct flowi6 fl6;
L
Linus Torvalds 已提交
676 677
	struct icmpv6_msg msg;
	struct dst_entry *dst;
W
Wei Wang 已提交
678
	struct ipcm6_cookie ipc6;
679
	u32 mark = IP6_REPLY_MARK(net, skb->mark);
L
Linus Torvalds 已提交
680

681
	saddr = &ipv6_hdr(skb)->daddr;
L
Linus Torvalds 已提交
682

683
	if (!ipv6_unicast_destination(skb) &&
684
	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
685
	      ipv6_anycast_destination(skb_dst(skb), saddr)))
L
Linus Torvalds 已提交
686 687 688 689 690
		saddr = NULL;

	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;

691 692
	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_proto = IPPROTO_ICMPV6;
A
Alexey Dobriyan 已提交
693
	fl6.daddr = ipv6_hdr(skb)->saddr;
L
Linus Torvalds 已提交
694
	if (saddr)
A
Alexey Dobriyan 已提交
695
		fl6.saddr = *saddr;
696
	fl6.flowi6_oif = icmp6_iif(skb);
697
	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
698
	fl6.flowi6_mark = mark;
699
	fl6.flowi6_uid = sock_net_uid(net, NULL);
700
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
L
Linus Torvalds 已提交
701

702
	local_bh_disable();
703
	sk = icmpv6_xmit_lock(net);
704
	if (!sk)
705
		goto out_bh_enable;
706
	sk->sk_mark = mark;
707
	np = inet6_sk(sk);
708

709 710
	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
		fl6.flowi6_oif = np->mcast_oif;
711 712
	else if (!fl6.flowi6_oif)
		fl6.flowi6_oif = np->ucast_oif;
L
Linus Torvalds 已提交
713

714
	if (ip6_dst_lookup(net, sk, &dst, &fl6))
L
Linus Torvalds 已提交
715
		goto out;
716
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
717
	if (IS_ERR(dst))
718
		goto out;
L
Linus Torvalds 已提交
719

E
Eric Dumazet 已提交
720
	idev = __in6_dev_get(skb->dev);
L
Linus Torvalds 已提交
721 722 723

	msg.skb = skb;
	msg.offset = 0;
724
	msg.type = ICMPV6_ECHO_REPLY;
L
Linus Torvalds 已提交
725

726
	ipcm6_init_sk(&ipc6, np);
W
Wei Wang 已提交
727 728 729
	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));

730 731 732
	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
			    skb->len + sizeof(struct icmp6hdr),
			    sizeof(struct icmp6hdr), &ipc6, &fl6,
733
			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
E
Eric Dumazet 已提交
734
		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
L
Linus Torvalds 已提交
735
		ip6_flush_pending_frames(sk);
E
Eric Dumazet 已提交
736
	} else {
737 738
		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
					   skb->len + sizeof(struct icmp6hdr));
L
Linus Torvalds 已提交
739 740
	}
	dst_release(dst);
741
out:
742
	icmpv6_xmit_unlock(sk);
743 744
out_bh_enable:
	local_bh_enable();
L
Linus Torvalds 已提交
745 746
}

747
void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
L
Linus Torvalds 已提交
748
{
749
	const struct inet6_protocol *ipprot;
L
Linus Torvalds 已提交
750
	int inner_offset;
751
	__be16 frag_off;
752
	u8 nexthdr;
753
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
754 755

	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
756
		goto out;
L
Linus Torvalds 已提交
757 758 759 760

	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
	if (ipv6_ext_hdr(nexthdr)) {
		/* now skip over extension headers */
761 762
		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
						&nexthdr, &frag_off);
763
		if (inner_offset < 0)
764
			goto out;
L
Linus Torvalds 已提交
765 766 767 768 769 770
	} else {
		inner_offset = sizeof(struct ipv6hdr);
	}

	/* Checkin header including 8 bytes of inner protocol header. */
	if (!pskb_may_pull(skb, inner_offset+8))
771
		goto out;
L
Linus Torvalds 已提交
772 773 774 775 776 777 778 779

	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
	   Without this we will not able f.e. to make source routed
	   pmtu discovery.
	   Corresponding argument (opt) to notifiers is already added.
	   --ANK (980726)
	 */

780
	ipprot = rcu_dereference(inet6_protos[nexthdr]);
L
Linus Torvalds 已提交
781 782 783
	if (ipprot && ipprot->err_handler)
		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);

784
	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
785 786 787
	return;

out:
E
Eric Dumazet 已提交
788
	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
L
Linus Torvalds 已提交
789
}
790

L
Linus Torvalds 已提交
791 792 793 794
/*
 *	Handle icmp messages
 */

795
static int icmpv6_rcv(struct sk_buff *skb)
L
Linus Torvalds 已提交
796 797 798
{
	struct net_device *dev = skb->dev;
	struct inet6_dev *idev = __in6_dev_get(dev);
799
	const struct in6_addr *saddr, *daddr;
L
Linus Torvalds 已提交
800
	struct icmp6hdr *hdr;
801
	u8 type;
802
	bool success = false;
L
Linus Torvalds 已提交
803

804
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
805
		struct sec_path *sp = skb_sec_path(skb);
806 807
		int nh;

808
		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
809 810 811
				 XFRM_STATE_ICMP))
			goto drop_no_count;

812
		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
813 814 815 816 817 818 819 820 821 822 823
			goto drop_no_count;

		nh = skb_network_offset(skb);
		skb_set_network_header(skb, sizeof(*hdr));

		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
			goto drop_no_count;

		skb_set_network_header(skb, nh);
	}

E
Eric Dumazet 已提交
824
	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
L
Linus Torvalds 已提交
825

826 827
	saddr = &ipv6_hdr(skb)->saddr;
	daddr = &ipv6_hdr(skb)->daddr;
L
Linus Torvalds 已提交
828

T
Tom Herbert 已提交
829
	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
830 831
		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
				    saddr, daddr);
T
Tom Herbert 已提交
832
		goto csum_error;
L
Linus Torvalds 已提交
833 834
	}

835 836
	if (!pskb_pull(skb, sizeof(*hdr)))
		goto discard_it;
L
Linus Torvalds 已提交
837

838
	hdr = icmp6_hdr(skb);
L
Linus Torvalds 已提交
839 840 841

	type = hdr->icmp6_type;

842
	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
L
Linus Torvalds 已提交
843 844 845 846 847 848 849

	switch (type) {
	case ICMPV6_ECHO_REQUEST:
		icmpv6_echo_reply(skb);
		break;

	case ICMPV6_ECHO_REPLY:
850
		success = ping_rcv(skb);
L
Linus Torvalds 已提交
851 852 853 854 855 856 857 858 859 860
		break;

	case ICMPV6_PKT_TOOBIG:
		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
		   standard destination cache. Seems, only "advanced"
		   destination cache will allow to solve this problem
		   --ANK (980726)
		 */
		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
			goto discard_it;
861
		hdr = icmp6_hdr(skb);
L
Linus Torvalds 已提交
862

863 864
		/* to notify */
		/* fall through */
L
Linus Torvalds 已提交
865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901
	case ICMPV6_DEST_UNREACH:
	case ICMPV6_TIME_EXCEED:
	case ICMPV6_PARAMPROB:
		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
		break;

	case NDISC_ROUTER_SOLICITATION:
	case NDISC_ROUTER_ADVERTISEMENT:
	case NDISC_NEIGHBOUR_SOLICITATION:
	case NDISC_NEIGHBOUR_ADVERTISEMENT:
	case NDISC_REDIRECT:
		ndisc_rcv(skb);
		break;

	case ICMPV6_MGM_QUERY:
		igmp6_event_query(skb);
		break;

	case ICMPV6_MGM_REPORT:
		igmp6_event_report(skb);
		break;

	case ICMPV6_MGM_REDUCTION:
	case ICMPV6_NI_QUERY:
	case ICMPV6_NI_REPLY:
	case ICMPV6_MLD2_REPORT:
	case ICMPV6_DHAAD_REQUEST:
	case ICMPV6_DHAAD_REPLY:
	case ICMPV6_MOBILE_PREFIX_SOL:
	case ICMPV6_MOBILE_PREFIX_ADV:
		break;

	default:
		/* informational */
		if (type & ICMPV6_INFOMSG_MASK)
			break;

902 903
		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
				    saddr, daddr);
904

905 906 907
		/*
		 * error of unknown type.
		 * must pass to upper level
L
Linus Torvalds 已提交
908 909 910
		 */

		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
911 912
	}

913 914 915 916 917 918 919 920
	/* until the v6 path can be better sorted assume failure and
	 * preserve the status quo behaviour for the rest of the paths to here
	 */
	if (success)
		consume_skb(skb);
	else
		kfree_skb(skb);

L
Linus Torvalds 已提交
921 922
	return 0;

923
csum_error:
E
Eric Dumazet 已提交
924
	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
L
Linus Torvalds 已提交
925
discard_it:
E
Eric Dumazet 已提交
926
	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
927
drop_no_count:
L
Linus Torvalds 已提交
928 929 930 931
	kfree_skb(skb);
	return 0;
}

932
void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
933 934 935 936 937
		      u8 type,
		      const struct in6_addr *saddr,
		      const struct in6_addr *daddr,
		      int oif)
{
938
	memset(fl6, 0, sizeof(*fl6));
A
Alexey Dobriyan 已提交
939 940
	fl6->saddr = *saddr;
	fl6->daddr = *daddr;
941
	fl6->flowi6_proto	= IPPROTO_ICMPV6;
942 943
	fl6->fl6_icmp_type	= type;
	fl6->fl6_icmp_code	= 0;
944 945
	fl6->flowi6_oif		= oif;
	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
946 947
}

948
static int __net_init icmpv6_sk_init(struct net *net)
L
Linus Torvalds 已提交
949 950 951 952
{
	struct sock *sk;
	int err, i, j;

953
	net->ipv6.icmp_sk =
K
Kees Cook 已提交
954
		kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
955
	if (!net->ipv6.icmp_sk)
956 957
		return -ENOMEM;

958
	for_each_possible_cpu(i) {
959 960
		err = inet_ctl_sock_create(&sk, PF_INET6,
					   SOCK_RAW, IPPROTO_ICMPV6, net);
L
Linus Torvalds 已提交
961
		if (err < 0) {
962
			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
L
Linus Torvalds 已提交
963 964 965 966
			       err);
			goto fail;
		}

967
		net->ipv6.icmp_sk[i] = sk;
968

L
Linus Torvalds 已提交
969 970 971
		/* Enough space for 2 64K ICMP packets, including
		 * sk_buff struct overhead.
		 */
E
Eric Dumazet 已提交
972
		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
L
Linus Torvalds 已提交
973 974 975 976
	}
	return 0;

 fail:
977
	for (j = 0; j < i; j++)
978
		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
979
	kfree(net->ipv6.icmp_sk);
L
Linus Torvalds 已提交
980 981 982
	return err;
}

983
static void __net_exit icmpv6_sk_exit(struct net *net)
L
Linus Torvalds 已提交
984 985 986
{
	int i;

987
	for_each_possible_cpu(i) {
988
		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
L
Linus Torvalds 已提交
989
	}
990 991 992
	kfree(net->ipv6.icmp_sk);
}

993
static struct pernet_operations icmpv6_sk_ops = {
994 995
	.init = icmpv6_sk_init,
	.exit = icmpv6_sk_exit,
996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
};

int __init icmpv6_init(void)
{
	int err;

	err = register_pernet_subsys(&icmpv6_sk_ops);
	if (err < 0)
		return err;

	err = -EAGAIN;
	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
		goto fail;
1009 1010 1011 1012

	err = inet6_register_icmp_sender(icmp6_send);
	if (err)
		goto sender_reg_err;
1013 1014
	return 0;

1015 1016
sender_reg_err:
	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1017
fail:
1018
	pr_err("Failed to register ICMP6 protocol\n");
1019 1020 1021 1022
	unregister_pernet_subsys(&icmpv6_sk_ops);
	return err;
}

1023
void icmpv6_cleanup(void)
1024
{
1025
	inet6_unregister_icmp_sender(icmp6_send);
1026
	unregister_pernet_subsys(&icmpv6_sk_ops);
L
Linus Torvalds 已提交
1027 1028 1029
	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}

1030

1031
static const struct icmp6_err {
L
Linus Torvalds 已提交
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
	int err;
	int fatal;
} tab_unreach[] = {
	{	/* NOROUTE */
		.err	= ENETUNREACH,
		.fatal	= 0,
	},
	{	/* ADM_PROHIBITED */
		.err	= EACCES,
		.fatal	= 1,
	},
	{	/* Was NOT_NEIGHBOUR, now reserved */
		.err	= EHOSTUNREACH,
		.fatal	= 0,
	},
	{	/* ADDR_UNREACH	*/
		.err	= EHOSTUNREACH,
		.fatal	= 0,
	},
	{	/* PORT_UNREACH	*/
		.err	= ECONNREFUSED,
		.fatal	= 1,
	},
1055 1056 1057 1058 1059 1060 1061 1062
	{	/* POLICY_FAIL */
		.err	= EACCES,
		.fatal	= 1,
	},
	{	/* REJECT_ROUTE	*/
		.err	= EACCES,
		.fatal	= 1,
	},
L
Linus Torvalds 已提交
1063 1064
};

1065
int icmpv6_err_convert(u8 type, u8 code, int *err)
L
Linus Torvalds 已提交
1066 1067 1068 1069 1070 1071 1072 1073
{
	int fatal = 0;

	*err = EPROTO;

	switch (type) {
	case ICMPV6_DEST_UNREACH:
		fatal = 1;
1074
		if (code < ARRAY_SIZE(tab_unreach)) {
L
Linus Torvalds 已提交
1075 1076 1077 1078 1079 1080 1081 1082
			*err  = tab_unreach[code].err;
			fatal = tab_unreach[code].fatal;
		}
		break;

	case ICMPV6_PKT_TOOBIG:
		*err = EMSGSIZE;
		break;
1083

L
Linus Torvalds 已提交
1084 1085 1086 1087 1088 1089 1090 1091
	case ICMPV6_PARAMPROB:
		*err = EPROTO;
		fatal = 1;
		break;

	case ICMPV6_TIME_EXCEED:
		*err = EHOSTUNREACH;
		break;
1092
	}
L
Linus Torvalds 已提交
1093 1094 1095

	return fatal;
}
1096 1097
EXPORT_SYMBOL(icmpv6_err_convert);

L
Linus Torvalds 已提交
1098
#ifdef CONFIG_SYSCTL
S
stephen hemminger 已提交
1099
static struct ctl_table ipv6_icmp_table_template[] = {
L
Linus Torvalds 已提交
1100 1101
	{
		.procname	= "ratelimit",
1102
		.data		= &init_net.ipv6.sysctl.icmpv6_time,
L
Linus Torvalds 已提交
1103 1104
		.maxlen		= sizeof(int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1105
		.proc_handler	= proc_dointvec_ms_jiffies,
L
Linus Torvalds 已提交
1106
	},
1107
	{ },
L
Linus Torvalds 已提交
1108
};
1109

1110
struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1111 1112 1113 1114 1115 1116
{
	struct ctl_table *table;

	table = kmemdup(ipv6_icmp_table_template,
			sizeof(ipv6_icmp_table_template),
			GFP_KERNEL);
1117

1118
	if (table)
1119 1120
		table[0].data = &net->ipv6.sysctl.icmpv6_time;

1121 1122
	return table;
}
L
Linus Torvalds 已提交
1123
#endif