icmp.c 24.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 *	Internet Control Message Protocol (ICMPv6)
 *	Linux INET6 implementation
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *
 *	Based on net/ipv4/icmp.c
 *
 *	RFC 1885
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

/*
 *	Changes:
 *
 *	Andi Kleen		:	exception handling
 *	Andi Kleen			add rate limits. never reply to a icmp.
 *					add more length checks and other fixes.
 *	yoshfuji		:	ensure to sent parameter problem for
 *					fragments.
 *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
 *	Randy Dunlap and
 *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
 *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
 */

32 33
#define pr_fmt(fmt) "IPv6: " fmt

L
Linus Torvalds 已提交
34 35 36 37 38 39 40 41 42 43
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/init.h>
44
#include <linux/netfilter.h>
45
#include <linux/slab.h>
L
Linus Torvalds 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59

#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/icmpv6.h>

#include <net/ip.h>
#include <net/sock.h>

#include <net/ipv6.h>
#include <net/ip6_checksum.h>
60
#include <net/ping.h>
L
Linus Torvalds 已提交
61 62 63 64 65 66 67
#include <net/protocol.h>
#include <net/raw.h>
#include <net/rawv6.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/icmp.h>
68
#include <net/xfrm.h>
69
#include <net/inet_common.h>
70
#include <net/dsfield.h>
D
David Ahern 已提交
71
#include <net/l3mdev.h>
L
Linus Torvalds 已提交
72 73 74 75 76 77 78 79 80 81

#include <asm/uaccess.h>

/*
 *	The ICMP socket(s). This is the most convenient way to flow control
 *	our ICMP output as well as maintain a clean interface throughout
 *	all layers. All Socketless IP sends will soon be gone.
 *
 *	On SMP we have one ICMP socket per-cpu.
 */
82 83 84 85
static inline struct sock *icmpv6_sk(struct net *net)
{
	return net->ipv6.icmp_sk[smp_processor_id()];
}
L
Linus Torvalds 已提交
86

87 88 89
static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
		       u8 type, u8 code, int offset, __be32 info)
{
90 91
	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 93 94 95 96
	struct net *net = dev_net(skb->dev);

	if (type == ICMPV6_PKT_TOOBIG)
		ip6_update_pmtu(skb, net, info, 0, 0);
	else if (type == NDISC_REDIRECT)
97
		ip6_redirect(skb, net, skb->dev->ifindex, 0);
98 99 100

	if (!(type & ICMPV6_INFOMSG_MASK))
		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
101
			ping_err(skb, offset, ntohl(info));
102 103
}

104
static int icmpv6_rcv(struct sk_buff *skb);
L
Linus Torvalds 已提交
105

106
static const struct inet6_protocol icmpv6_protocol = {
L
Linus Torvalds 已提交
107
	.handler	=	icmpv6_rcv,
108
	.err_handler	=	icmpv6_err,
109
	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
L
Linus Torvalds 已提交
110 111
};

112
static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
L
Linus Torvalds 已提交
113
{
114 115
	struct sock *sk;

L
Linus Torvalds 已提交
116 117
	local_bh_disable();

118
	sk = icmpv6_sk(net);
119
	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
L
Linus Torvalds 已提交
120 121 122 123 124
		/* This can happen if the output path (f.e. SIT or
		 * ip6ip6 tunnel) signals dst_link_failure() for an
		 * outgoing ICMP6 packet.
		 */
		local_bh_enable();
125
		return NULL;
L
Linus Torvalds 已提交
126
	}
127
	return sk;
L
Linus Torvalds 已提交
128 129
}

130
static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
L
Linus Torvalds 已提交
131
{
132
	spin_unlock_bh(&sk->sk_lock.slock);
L
Linus Torvalds 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145
}

/*
 * Figure out, may we reply to this packet with icmp error.
 *
 * We do not reply, if:
 *	- it was icmp error message.
 *	- it is truncated, so that it is known, that protocol is ICMPV6
 *	  (i.e. in the middle of some exthdr)
 *
 *	--ANK (980726)
 */

146
static bool is_ineligible(const struct sk_buff *skb)
L
Linus Torvalds 已提交
147
{
148
	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
L
Linus Torvalds 已提交
149
	int len = skb->len - ptr;
150
	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
151
	__be16 frag_off;
L
Linus Torvalds 已提交
152 153

	if (len < 0)
154
		return true;
L
Linus Torvalds 已提交
155

156
	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
L
Linus Torvalds 已提交
157
	if (ptr < 0)
158
		return false;
L
Linus Torvalds 已提交
159 160 161 162 163
	if (nexthdr == IPPROTO_ICMPV6) {
		u8 _type, *tp;
		tp = skb_header_pointer(skb,
			ptr+offsetof(struct icmp6hdr, icmp6_type),
			sizeof(_type), &_type);
164
		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
165
			return true;
L
Linus Torvalds 已提交
166
	}
167
	return false;
L
Linus Torvalds 已提交
168 169
}

170 171
/*
 * Check the ICMP output rate limit
L
Linus Torvalds 已提交
172
 */
173 174
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
			       struct flowi6 *fl6)
L
Linus Torvalds 已提交
175
{
176
	struct net *net = sock_net(sk);
177
	struct dst_entry *dst;
178
	bool res = false;
L
Linus Torvalds 已提交
179 180 181

	/* Informational messages are not limited. */
	if (type & ICMPV6_INFOMSG_MASK)
182
		return true;
L
Linus Torvalds 已提交
183 184 185

	/* Do not limit pmtu discovery, it would break it. */
	if (type == ICMPV6_PKT_TOOBIG)
186
		return true;
L
Linus Torvalds 已提交
187

188
	/*
L
Linus Torvalds 已提交
189 190 191 192
	 * Look up the output route.
	 * XXX: perhaps the expire for routing entries cloned by
	 * this lookup should be more aggressive (not longer than timeout).
	 */
193
	dst = ip6_route_output(net, sk, fl6);
L
Linus Torvalds 已提交
194
	if (dst->error) {
195
		IP6_INC_STATS(net, ip6_dst_idev(dst),
196
			      IPSTATS_MIB_OUTNOROUTES);
L
Linus Torvalds 已提交
197
	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
198
		res = true;
L
Linus Torvalds 已提交
199 200
	} else {
		struct rt6_info *rt = (struct rt6_info *)dst;
201
		int tmo = net->ipv6.sysctl.icmpv6_time;
L
Linus Torvalds 已提交
202 203 204 205 206

		/* Give more bandwidth to wider prefixes. */
		if (rt->rt6i_dst.plen < 128)
			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);

207 208 209 210
		if (icmp_global_allow()) {
			struct inet_peer *peer;

			peer = inet_getpeer_v6(net->ipv6.peers,
211
					       &fl6->daddr, 1);
212 213 214 215
			res = inet_peer_xrlim_allow(peer, tmo);
			if (peer)
				inet_putpeer(peer);
		}
L
Linus Torvalds 已提交
216 217 218 219 220 221 222 223
	}
	dst_release(dst);
	return res;
}

/*
 *	an inline helper for the "simple" if statement below
 *	checks if parameter problem report is caused by an
224
 *	unrecognized IPv6 option that has the Option Type
L
Linus Torvalds 已提交
225 226 227
 *	highest-order two bits set to 10
 */

228
static bool opt_unrec(struct sk_buff *skb, __u32 offset)
L
Linus Torvalds 已提交
229 230 231
{
	u8 _optval, *op;

232
	offset += skb_network_offset(skb);
L
Linus Torvalds 已提交
233
	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
234
	if (!op)
235
		return true;
L
Linus Torvalds 已提交
236 237 238
	return (*op & 0xC0) == 0x80;
}

239 240
int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
			       struct icmp6hdr *thdr, int len)
L
Linus Torvalds 已提交
241 242 243 244 245
{
	struct sk_buff *skb;
	struct icmp6hdr *icmp6h;
	int err = 0;

246
	skb = skb_peek(&sk->sk_write_queue);
247
	if (!skb)
L
Linus Torvalds 已提交
248 249
		goto out;

250
	icmp6h = icmp6_hdr(skb);
L
Linus Torvalds 已提交
251 252 253 254
	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
	icmp6h->icmp6_cksum = 0;

	if (skb_queue_len(&sk->sk_write_queue) == 1) {
255
		skb->csum = csum_partial(icmp6h,
L
Linus Torvalds 已提交
256
					sizeof(struct icmp6hdr), skb->csum);
257 258 259
		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
						      &fl6->daddr,
						      len, fl6->flowi6_proto,
L
Linus Torvalds 已提交
260 261
						      skb->csum);
	} else {
262
		__wsum tmp_csum = 0;
L
Linus Torvalds 已提交
263 264 265 266 267

		skb_queue_walk(&sk->sk_write_queue, skb) {
			tmp_csum = csum_add(tmp_csum, skb->csum);
		}

268
		tmp_csum = csum_partial(icmp6h,
L
Linus Torvalds 已提交
269
					sizeof(struct icmp6hdr), tmp_csum);
270 271 272
		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
						      &fl6->daddr,
						      len, fl6->flowi6_proto,
273
						      tmp_csum);
L
Linus Torvalds 已提交
274 275 276 277 278 279 280 281 282
	}
	ip6_push_pending_frames(sk);
out:
	return err;
}

struct icmpv6_msg {
	struct sk_buff	*skb;
	int		offset;
283
	uint8_t		type;
L
Linus Torvalds 已提交
284 285 286 287 288 289
};

static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
	struct sk_buff *org_skb = msg->skb;
290
	__wsum csum = 0;
L
Linus Torvalds 已提交
291 292 293 294

	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
				      to, len, csum);
	skb->csum = csum_block_add(skb->csum, csum, odd);
295 296
	if (!(msg->type & ICMPV6_INFOMSG_MASK))
		nf_ct_attach(skb, org_skb);
L
Linus Torvalds 已提交
297 298 299
	return 0;
}

A
Amerigo Wang 已提交
300
#if IS_ENABLED(CONFIG_IPV6_MIP6)
301 302
static void mip6_addr_swap(struct sk_buff *skb)
{
303
	struct ipv6hdr *iph = ipv6_hdr(skb);
304 305 306 307 308 309 310 311
	struct inet6_skb_parm *opt = IP6CB(skb);
	struct ipv6_destopt_hao *hao;
	struct in6_addr tmp;
	int off;

	if (opt->dsthao) {
		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
		if (likely(off >= 0)) {
312 313
			hao = (struct ipv6_destopt_hao *)
					(skb_network_header(skb) + off);
A
Alexey Dobriyan 已提交
314 315 316
			tmp = iph->saddr;
			iph->saddr = hao->addr;
			hao->addr = tmp;
317 318 319 320 321 322 323
		}
	}
}
#else
static inline void mip6_addr_swap(struct sk_buff *skb) {}
#endif

S
stephen hemminger 已提交
324 325 326 327
static struct dst_entry *icmpv6_route_lookup(struct net *net,
					     struct sk_buff *skb,
					     struct sock *sk,
					     struct flowi6 *fl6)
328 329
{
	struct dst_entry *dst, *dst2;
330
	struct flowi6 fl2;
331 332
	int err;

333
	err = ip6_dst_lookup(net, sk, &dst, fl6);
334 335 336 337 338 339 340
	if (err)
		return ERR_PTR(err);

	/*
	 * We won't send icmp if the destination is known
	 * anycast.
	 */
341
	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
342
		net_dbg_ratelimited("icmp6_send: acast source\n");
343 344 345 346 347 348 349
		dst_release(dst);
		return ERR_PTR(-EINVAL);
	}

	/* No need to clone since we're just using its address. */
	dst2 = dst;

350
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
351
	if (!IS_ERR(dst)) {
352 353
		if (dst != dst2)
			return dst;
354 355 356 357 358
	} else {
		if (PTR_ERR(dst) == -EPERM)
			dst = NULL;
		else
			return dst;
359 360
	}

361
	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
362 363 364
	if (err)
		goto relookup_failed;

365
	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
366 367 368
	if (err)
		goto relookup_failed;

369
	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
370
	if (!IS_ERR(dst2)) {
371 372
		dst_release(dst);
		dst = dst2;
373 374 375 376 377 378 379
	} else {
		err = PTR_ERR(dst2);
		if (err == -EPERM) {
			dst_release(dst);
			return dst2;
		} else
			goto relookup_failed;
380 381 382 383 384 385 386 387
	}

relookup_failed:
	if (dst)
		return dst;
	return ERR_PTR(err);
}

L
Linus Torvalds 已提交
388 389 390
/*
 *	Send an ICMP message in response to a packet in error
 */
391 392
static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
		       const struct in6_addr *force_saddr)
L
Linus Torvalds 已提交
393
{
394
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
395
	struct inet6_dev *idev = NULL;
396
	struct ipv6hdr *hdr = ipv6_hdr(skb);
397 398
	struct sock *sk;
	struct ipv6_pinfo *np;
399
	const struct in6_addr *saddr = NULL;
L
Linus Torvalds 已提交
400 401
	struct dst_entry *dst;
	struct icmp6hdr tmp_hdr;
402
	struct flowi6 fl6;
L
Linus Torvalds 已提交
403
	struct icmpv6_msg msg;
404
	struct sockcm_cookie sockc_unused = {0};
W
Wei Wang 已提交
405
	struct ipcm6_cookie ipc6;
L
Linus Torvalds 已提交
406 407 408 409
	int iif = 0;
	int addr_type = 0;
	int len;
	int err = 0;
410
	u32 mark = IP6_REPLY_MARK(net, skb->mark);
L
Linus Torvalds 已提交
411

412
	if ((u8 *)hdr < skb->head ||
413
	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
L
Linus Torvalds 已提交
414 415 416
		return;

	/*
417
	 *	Make sure we respect the rules
L
Linus Torvalds 已提交
418
	 *	i.e. RFC 1885 2.4(e)
419
	 *	Rule (e.1) is enforced by not using icmp6_send
L
Linus Torvalds 已提交
420 421 422 423
	 *	in any code that processes icmp errors.
	 */
	addr_type = ipv6_addr_type(&hdr->daddr);

424
	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
425
	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
L
Linus Torvalds 已提交
426 427 428 429 430 431
		saddr = &hdr->daddr;

	/*
	 *	Dest addr check
	 */

Z
zhuyj 已提交
432
	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
L
Linus Torvalds 已提交
433
		if (type != ICMPV6_PKT_TOOBIG &&
434 435
		    !(type == ICMPV6_PARAMPROB &&
		      code == ICMPV6_UNK_OPTION &&
L
Linus Torvalds 已提交
436 437 438 439 440 441 442 443 444 445 446 447
		      (opt_unrec(skb, info))))
			return;

		saddr = NULL;
	}

	addr_type = ipv6_addr_type(&hdr->saddr);

	/*
	 *	Source addr check
	 */

448
	if (__ipv6_addr_needs_scope_id(addr_type))
L
Linus Torvalds 已提交
449
		iif = skb->dev->ifindex;
450 451 452 453
	else {
		dst = skb_dst(skb);
		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
	}
L
Linus Torvalds 已提交
454 455

	/*
456 457 458 459
	 *	Must not send error if the source does not uniquely
	 *	identify a single node (RFC2463 Section 2.4).
	 *	We check unspecified / multicast addresses here,
	 *	and anycast addresses will be checked later.
L
Linus Torvalds 已提交
460 461
	 */
	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
462 463
		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
				    &hdr->saddr, &hdr->daddr);
L
Linus Torvalds 已提交
464 465 466
		return;
	}

467
	/*
L
Linus Torvalds 已提交
468 469 470
	 *	Never answer to a ICMP packet.
	 */
	if (is_ineligible(skb)) {
471 472
		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
				    &hdr->saddr, &hdr->daddr);
L
Linus Torvalds 已提交
473 474 475
		return;
	}

476 477
	mip6_addr_swap(skb);

478 479
	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_proto = IPPROTO_ICMPV6;
A
Alexey Dobriyan 已提交
480
	fl6.daddr = hdr->saddr;
481 482
	if (force_saddr)
		saddr = force_saddr;
L
Linus Torvalds 已提交
483
	if (saddr)
A
Alexey Dobriyan 已提交
484
		fl6.saddr = *saddr;
485
	fl6.flowi6_mark = mark;
486
	fl6.flowi6_oif = iif;
487 488
	fl6.fl6_icmp_type = type;
	fl6.fl6_icmp_code = code;
489
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
L
Linus Torvalds 已提交
490

491
	sk = icmpv6_xmit_lock(net);
492
	if (!sk)
493
		return;
494
	sk->sk_mark = mark;
495
	np = inet6_sk(sk);
496

497
	if (!icmpv6_xrlim_allow(sk, type, &fl6))
L
Linus Torvalds 已提交
498 499 500 501 502 503 504
		goto out;

	tmp_hdr.icmp6_type = type;
	tmp_hdr.icmp6_code = code;
	tmp_hdr.icmp6_cksum = 0;
	tmp_hdr.icmp6_pointer = htonl(info);

505 506
	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
		fl6.flowi6_oif = np->mcast_oif;
507 508
	else if (!fl6.flowi6_oif)
		fl6.flowi6_oif = np->ucast_oif;
L
Linus Torvalds 已提交
509

510 511 512
	ipc6.tclass = np->tclass;
	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);

513
	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
514
	if (IS_ERR(dst))
L
Linus Torvalds 已提交
515
		goto out;
516

W
Wei Wang 已提交
517 518 519
	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
	ipc6.dontfrag = np->dontfrag;
	ipc6.opt = NULL;
L
Linus Torvalds 已提交
520 521

	msg.skb = skb;
522
	msg.offset = skb_network_offset(skb);
523
	msg.type = type;
L
Linus Torvalds 已提交
524 525

	len = skb->len - msg.offset;
526
	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
L
Linus Torvalds 已提交
527
	if (len < 0) {
528 529
		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
				    &hdr->saddr, &hdr->daddr);
L
Linus Torvalds 已提交
530 531 532
		goto out_dst_release;
	}

E
Eric Dumazet 已提交
533 534
	rcu_read_lock();
	idev = __in6_dev_get(skb->dev);
L
Linus Torvalds 已提交
535 536 537

	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
			      len + sizeof(struct icmp6hdr),
W
Wei Wang 已提交
538 539 540
			      sizeof(struct icmp6hdr),
			      &ipc6, &fl6, (struct rt6_info *)dst,
			      MSG_DONTWAIT, &sockc_unused);
L
Linus Torvalds 已提交
541
	if (err) {
542
		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
L
Linus Torvalds 已提交
543
		ip6_flush_pending_frames(sk);
E
Eric Dumazet 已提交
544 545 546
	} else {
		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
						 len + sizeof(struct icmp6hdr));
L
Linus Torvalds 已提交
547
	}
E
Eric Dumazet 已提交
548
	rcu_read_unlock();
L
Linus Torvalds 已提交
549 550 551
out_dst_release:
	dst_release(dst);
out:
552
	icmpv6_xmit_unlock(sk);
L
Linus Torvalds 已提交
553
}
554 555 556 557 558

/* Slightly more convenient version of icmp6_send.
 */
void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
559
	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
560 561
	kfree_skb(skb);
}
562

563 564 565 566 567 568
/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
 * if sufficient data bytes are available
 * @nhs is the size of the tunnel header(s) :
 *  Either an IPv4 header for SIT encap
 *         an IPv4 header + GRE header for GRE encap
 */
569 570
int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
			       unsigned int data_len)
571
{
572
	struct in6_addr temp_saddr;
573 574
	struct rt6_info *rt;
	struct sk_buff *skb2;
575
	u32 info = 0;
576 577 578 579

	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
		return 1;

580 581 582 583 584
	/* RFC 4884 (partial) support for ICMP extensions */
	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
		data_len = 0;

	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
585 586 587 588 589 590 591 592 593 594 595 596 597

	if (!skb2)
		return 1;

	skb_dst_drop(skb2);
	skb_pull(skb2, nhs);
	skb_reset_network_header(skb2);

	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);

	if (rt && rt->dst.dev)
		skb2->dev = rt->dst.dev;

598
	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
599 600 601 602 603 604 605 606 607 608 609 610 611 612

	if (data_len) {
		/* RFC 4884 (partial) support :
		 * insert 0 padding at the end, before the extensions
		 */
		__skb_push(skb2, nhs);
		skb_reset_network_header(skb2);
		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
		memset(skb2->data + data_len - nhs, 0, nhs);
		/* RFC 4884 4.5 : Length is measured in 64-bit words,
		 * and stored in reserved[0]
		 */
		info = (data_len/8) << 24;
	}
613 614
	if (type == ICMP_TIME_EXCEEDED)
		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
615
			   info, &temp_saddr);
616 617
	else
		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
618
			   info, &temp_saddr);
619 620 621 622 623 624 625 626 627
	if (rt)
		ip6_rt_put(rt);

	kfree_skb(skb2);

	return 0;
}
EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);

L
Linus Torvalds 已提交
628 629
static void icmpv6_echo_reply(struct sk_buff *skb)
{
630
	struct net *net = dev_net(skb->dev);
631
	struct sock *sk;
L
Linus Torvalds 已提交
632
	struct inet6_dev *idev;
633
	struct ipv6_pinfo *np;
634
	const struct in6_addr *saddr = NULL;
635
	struct icmp6hdr *icmph = icmp6_hdr(skb);
L
Linus Torvalds 已提交
636
	struct icmp6hdr tmp_hdr;
637
	struct flowi6 fl6;
L
Linus Torvalds 已提交
638 639
	struct icmpv6_msg msg;
	struct dst_entry *dst;
W
Wei Wang 已提交
640
	struct ipcm6_cookie ipc6;
L
Linus Torvalds 已提交
641
	int err = 0;
642
	u32 mark = IP6_REPLY_MARK(net, skb->mark);
643
	struct sockcm_cookie sockc_unused = {0};
L
Linus Torvalds 已提交
644

645
	saddr = &ipv6_hdr(skb)->daddr;
L
Linus Torvalds 已提交
646

647
	if (!ipv6_unicast_destination(skb) &&
648
	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
649
	      ipv6_anycast_destination(skb_dst(skb), saddr)))
L
Linus Torvalds 已提交
650 651 652 653 654
		saddr = NULL;

	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;

655 656
	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_proto = IPPROTO_ICMPV6;
A
Alexey Dobriyan 已提交
657
	fl6.daddr = ipv6_hdr(skb)->saddr;
L
Linus Torvalds 已提交
658
	if (saddr)
A
Alexey Dobriyan 已提交
659
		fl6.saddr = *saddr;
660
	fl6.flowi6_oif = skb->dev->ifindex;
661
	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
662
	fl6.flowi6_mark = mark;
663
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
L
Linus Torvalds 已提交
664

665
	sk = icmpv6_xmit_lock(net);
666
	if (!sk)
667
		return;
668
	sk->sk_mark = mark;
669
	np = inet6_sk(sk);
670

671 672
	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
		fl6.flowi6_oif = np->mcast_oif;
673 674
	else if (!fl6.flowi6_oif)
		fl6.flowi6_oif = np->ucast_oif;
L
Linus Torvalds 已提交
675

676
	err = ip6_dst_lookup(net, sk, &dst, &fl6);
L
Linus Torvalds 已提交
677 678
	if (err)
		goto out;
679
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
680
	if (IS_ERR(dst))
681
		goto out;
L
Linus Torvalds 已提交
682

E
Eric Dumazet 已提交
683
	idev = __in6_dev_get(skb->dev);
L
Linus Torvalds 已提交
684 685 686

	msg.skb = skb;
	msg.offset = 0;
687
	msg.type = ICMPV6_ECHO_REPLY;
L
Linus Torvalds 已提交
688

W
Wei Wang 已提交
689 690 691 692 693
	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
	ipc6.dontfrag = np->dontfrag;
	ipc6.opt = NULL;

L
Linus Torvalds 已提交
694
	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
W
Wei Wang 已提交
695
				sizeof(struct icmp6hdr), &ipc6, &fl6,
696
				(struct rt6_info *)dst, MSG_DONTWAIT,
W
Wei Wang 已提交
697
				&sockc_unused);
L
Linus Torvalds 已提交
698 699

	if (err) {
E
Eric Dumazet 已提交
700
		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
L
Linus Torvalds 已提交
701
		ip6_flush_pending_frames(sk);
E
Eric Dumazet 已提交
702 703 704
	} else {
		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
						 skb->len + sizeof(struct icmp6hdr));
L
Linus Torvalds 已提交
705 706
	}
	dst_release(dst);
707
out:
708
	icmpv6_xmit_unlock(sk);
L
Linus Torvalds 已提交
709 710
}

711
void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
L
Linus Torvalds 已提交
712
{
713
	const struct inet6_protocol *ipprot;
L
Linus Torvalds 已提交
714
	int inner_offset;
715
	__be16 frag_off;
716
	u8 nexthdr;
717
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
718 719

	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
720
		goto out;
L
Linus Torvalds 已提交
721 722 723 724

	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
	if (ipv6_ext_hdr(nexthdr)) {
		/* now skip over extension headers */
725 726
		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
						&nexthdr, &frag_off);
727
		if (inner_offset < 0)
728
			goto out;
L
Linus Torvalds 已提交
729 730 731 732 733 734
	} else {
		inner_offset = sizeof(struct ipv6hdr);
	}

	/* Checkin header including 8 bytes of inner protocol header. */
	if (!pskb_may_pull(skb, inner_offset+8))
735
		goto out;
L
Linus Torvalds 已提交
736 737 738 739 740 741 742 743

	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
	   Without this we will not able f.e. to make source routed
	   pmtu discovery.
	   Corresponding argument (opt) to notifiers is already added.
	   --ANK (980726)
	 */

744
	ipprot = rcu_dereference(inet6_protos[nexthdr]);
L
Linus Torvalds 已提交
745 746 747
	if (ipprot && ipprot->err_handler)
		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);

748
	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
749 750 751
	return;

out:
E
Eric Dumazet 已提交
752
	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
L
Linus Torvalds 已提交
753
}
754

L
Linus Torvalds 已提交
755 756 757 758
/*
 *	Handle icmp messages
 */

759
static int icmpv6_rcv(struct sk_buff *skb)
L
Linus Torvalds 已提交
760 761 762
{
	struct net_device *dev = skb->dev;
	struct inet6_dev *idev = __in6_dev_get(dev);
763
	const struct in6_addr *saddr, *daddr;
L
Linus Torvalds 已提交
764
	struct icmp6hdr *hdr;
765
	u8 type;
766
	bool success = false;
L
Linus Torvalds 已提交
767

768
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
769
		struct sec_path *sp = skb_sec_path(skb);
770 771
		int nh;

772
		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
773 774 775
				 XFRM_STATE_ICMP))
			goto drop_no_count;

776
		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
777 778 779 780 781 782 783 784 785 786 787
			goto drop_no_count;

		nh = skb_network_offset(skb);
		skb_set_network_header(skb, sizeof(*hdr));

		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
			goto drop_no_count;

		skb_set_network_header(skb, nh);
	}

E
Eric Dumazet 已提交
788
	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
L
Linus Torvalds 已提交
789

790 791
	saddr = &ipv6_hdr(skb)->saddr;
	daddr = &ipv6_hdr(skb)->daddr;
L
Linus Torvalds 已提交
792

T
Tom Herbert 已提交
793
	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
794 795
		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
				    saddr, daddr);
T
Tom Herbert 已提交
796
		goto csum_error;
L
Linus Torvalds 已提交
797 798
	}

799 800
	if (!pskb_pull(skb, sizeof(*hdr)))
		goto discard_it;
L
Linus Torvalds 已提交
801

802
	hdr = icmp6_hdr(skb);
L
Linus Torvalds 已提交
803 804 805

	type = hdr->icmp6_type;

806
	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
L
Linus Torvalds 已提交
807 808 809 810 811 812 813

	switch (type) {
	case ICMPV6_ECHO_REQUEST:
		icmpv6_echo_reply(skb);
		break;

	case ICMPV6_ECHO_REPLY:
814
		success = ping_rcv(skb);
L
Linus Torvalds 已提交
815 816 817 818 819 820 821 822 823 824
		break;

	case ICMPV6_PKT_TOOBIG:
		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
		   standard destination cache. Seems, only "advanced"
		   destination cache will allow to solve this problem
		   --ANK (980726)
		 */
		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
			goto discard_it;
825
		hdr = icmp6_hdr(skb);
L
Linus Torvalds 已提交
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867

		/*
		 *	Drop through to notify
		 */

	case ICMPV6_DEST_UNREACH:
	case ICMPV6_TIME_EXCEED:
	case ICMPV6_PARAMPROB:
		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
		break;

	case NDISC_ROUTER_SOLICITATION:
	case NDISC_ROUTER_ADVERTISEMENT:
	case NDISC_NEIGHBOUR_SOLICITATION:
	case NDISC_NEIGHBOUR_ADVERTISEMENT:
	case NDISC_REDIRECT:
		ndisc_rcv(skb);
		break;

	case ICMPV6_MGM_QUERY:
		igmp6_event_query(skb);
		break;

	case ICMPV6_MGM_REPORT:
		igmp6_event_report(skb);
		break;

	case ICMPV6_MGM_REDUCTION:
	case ICMPV6_NI_QUERY:
	case ICMPV6_NI_REPLY:
	case ICMPV6_MLD2_REPORT:
	case ICMPV6_DHAAD_REQUEST:
	case ICMPV6_DHAAD_REPLY:
	case ICMPV6_MOBILE_PREFIX_SOL:
	case ICMPV6_MOBILE_PREFIX_ADV:
		break;

	default:
		/* informational */
		if (type & ICMPV6_INFOMSG_MASK)
			break;

868 869
		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
				    saddr, daddr);
870

871 872 873
		/*
		 * error of unknown type.
		 * must pass to upper level
L
Linus Torvalds 已提交
874 875 876
		 */

		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
877 878
	}

879 880 881 882 883 884 885 886
	/* until the v6 path can be better sorted assume failure and
	 * preserve the status quo behaviour for the rest of the paths to here
	 */
	if (success)
		consume_skb(skb);
	else
		kfree_skb(skb);

L
Linus Torvalds 已提交
887 888
	return 0;

889
csum_error:
E
Eric Dumazet 已提交
890
	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
L
Linus Torvalds 已提交
891
discard_it:
E
Eric Dumazet 已提交
892
	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
893
drop_no_count:
L
Linus Torvalds 已提交
894 895 896 897
	kfree_skb(skb);
	return 0;
}

898
void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
899 900 901 902 903
		      u8 type,
		      const struct in6_addr *saddr,
		      const struct in6_addr *daddr,
		      int oif)
{
904
	memset(fl6, 0, sizeof(*fl6));
A
Alexey Dobriyan 已提交
905 906
	fl6->saddr = *saddr;
	fl6->daddr = *daddr;
907
	fl6->flowi6_proto	= IPPROTO_ICMPV6;
908 909
	fl6->fl6_icmp_type	= type;
	fl6->fl6_icmp_code	= 0;
910 911
	fl6->flowi6_oif		= oif;
	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
912 913
}

914
static int __net_init icmpv6_sk_init(struct net *net)
L
Linus Torvalds 已提交
915 916 917 918
{
	struct sock *sk;
	int err, i, j;

919 920
	net->ipv6.icmp_sk =
		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
921
	if (!net->ipv6.icmp_sk)
922 923
		return -ENOMEM;

924
	for_each_possible_cpu(i) {
925 926
		err = inet_ctl_sock_create(&sk, PF_INET6,
					   SOCK_RAW, IPPROTO_ICMPV6, net);
L
Linus Torvalds 已提交
927
		if (err < 0) {
928
			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
L
Linus Torvalds 已提交
929 930 931 932
			       err);
			goto fail;
		}

933
		net->ipv6.icmp_sk[i] = sk;
934

L
Linus Torvalds 已提交
935 936 937
		/* Enough space for 2 64K ICMP packets, including
		 * sk_buff struct overhead.
		 */
E
Eric Dumazet 已提交
938
		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
L
Linus Torvalds 已提交
939 940 941 942
	}
	return 0;

 fail:
943
	for (j = 0; j < i; j++)
944
		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
945
	kfree(net->ipv6.icmp_sk);
L
Linus Torvalds 已提交
946 947 948
	return err;
}

949
static void __net_exit icmpv6_sk_exit(struct net *net)
L
Linus Torvalds 已提交
950 951 952
{
	int i;

953
	for_each_possible_cpu(i) {
954
		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
L
Linus Torvalds 已提交
955
	}
956 957 958
	kfree(net->ipv6.icmp_sk);
}

959
static struct pernet_operations icmpv6_sk_ops = {
960 961
	.init = icmpv6_sk_init,
	.exit = icmpv6_sk_exit,
962 963 964 965 966 967 968 969 970 971 972 973 974
};

int __init icmpv6_init(void)
{
	int err;

	err = register_pernet_subsys(&icmpv6_sk_ops);
	if (err < 0)
		return err;

	err = -EAGAIN;
	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
		goto fail;
975 976 977 978

	err = inet6_register_icmp_sender(icmp6_send);
	if (err)
		goto sender_reg_err;
979 980
	return 0;

981 982
sender_reg_err:
	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
983
fail:
984
	pr_err("Failed to register ICMP6 protocol\n");
985 986 987 988
	unregister_pernet_subsys(&icmpv6_sk_ops);
	return err;
}

989
void icmpv6_cleanup(void)
990
{
991
	inet6_unregister_icmp_sender(icmp6_send);
992
	unregister_pernet_subsys(&icmpv6_sk_ops);
L
Linus Torvalds 已提交
993 994 995
	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}

996

997
static const struct icmp6_err {
L
Linus Torvalds 已提交
998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
	int err;
	int fatal;
} tab_unreach[] = {
	{	/* NOROUTE */
		.err	= ENETUNREACH,
		.fatal	= 0,
	},
	{	/* ADM_PROHIBITED */
		.err	= EACCES,
		.fatal	= 1,
	},
	{	/* Was NOT_NEIGHBOUR, now reserved */
		.err	= EHOSTUNREACH,
		.fatal	= 0,
	},
	{	/* ADDR_UNREACH	*/
		.err	= EHOSTUNREACH,
		.fatal	= 0,
	},
	{	/* PORT_UNREACH	*/
		.err	= ECONNREFUSED,
		.fatal	= 1,
	},
1021 1022 1023 1024 1025 1026 1027 1028
	{	/* POLICY_FAIL */
		.err	= EACCES,
		.fatal	= 1,
	},
	{	/* REJECT_ROUTE	*/
		.err	= EACCES,
		.fatal	= 1,
	},
L
Linus Torvalds 已提交
1029 1030
};

1031
int icmpv6_err_convert(u8 type, u8 code, int *err)
L
Linus Torvalds 已提交
1032 1033 1034 1035 1036 1037 1038 1039
{
	int fatal = 0;

	*err = EPROTO;

	switch (type) {
	case ICMPV6_DEST_UNREACH:
		fatal = 1;
1040
		if (code < ARRAY_SIZE(tab_unreach)) {
L
Linus Torvalds 已提交
1041 1042 1043 1044 1045 1046 1047 1048
			*err  = tab_unreach[code].err;
			fatal = tab_unreach[code].fatal;
		}
		break;

	case ICMPV6_PKT_TOOBIG:
		*err = EMSGSIZE;
		break;
1049

L
Linus Torvalds 已提交
1050 1051 1052 1053 1054 1055 1056 1057
	case ICMPV6_PARAMPROB:
		*err = EPROTO;
		fatal = 1;
		break;

	case ICMPV6_TIME_EXCEED:
		*err = EHOSTUNREACH;
		break;
1058
	}
L
Linus Torvalds 已提交
1059 1060 1061

	return fatal;
}
1062 1063
EXPORT_SYMBOL(icmpv6_err_convert);

L
Linus Torvalds 已提交
1064
#ifdef CONFIG_SYSCTL
S
stephen hemminger 已提交
1065
static struct ctl_table ipv6_icmp_table_template[] = {
L
Linus Torvalds 已提交
1066 1067
	{
		.procname	= "ratelimit",
1068
		.data		= &init_net.ipv6.sysctl.icmpv6_time,
L
Linus Torvalds 已提交
1069 1070
		.maxlen		= sizeof(int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1071
		.proc_handler	= proc_dointvec_ms_jiffies,
L
Linus Torvalds 已提交
1072
	},
1073
	{ },
L
Linus Torvalds 已提交
1074
};
1075

1076
struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1077 1078 1079 1080 1081 1082
{
	struct ctl_table *table;

	table = kmemdup(ipv6_icmp_table_template,
			sizeof(ipv6_icmp_table_template),
			GFP_KERNEL);
1083

1084
	if (table)
1085 1086
		table[0].data = &net->ipv6.sysctl.icmpv6_time;

1087 1088
	return table;
}
L
Linus Torvalds 已提交
1089
#endif