icmp.c 22.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 *	Internet Control Message Protocol (ICMPv6)
 *	Linux INET6 implementation
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *
 *	Based on net/ipv4/icmp.c
 *
 *	RFC 1885
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

/*
 *	Changes:
 *
 *	Andi Kleen		:	exception handling
 *	Andi Kleen			add rate limits. never reply to a icmp.
 *					add more length checks and other fixes.
 *	yoshfuji		:	ensure to sent parameter problem for
 *					fragments.
 *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
 *	Randy Dunlap and
 *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
 *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
 */

32 33
#define pr_fmt(fmt) "IPv6: " fmt

L
Linus Torvalds 已提交
34 35 36 37 38 39 40 41 42 43
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/init.h>
44
#include <linux/netfilter.h>
45
#include <linux/slab.h>
L
Linus Torvalds 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59

#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/icmpv6.h>

#include <net/ip.h>
#include <net/sock.h>

#include <net/ipv6.h>
#include <net/ip6_checksum.h>
60
#include <net/ping.h>
L
Linus Torvalds 已提交
61 62 63 64 65 66 67
#include <net/protocol.h>
#include <net/raw.h>
#include <net/rawv6.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/icmp.h>
68
#include <net/xfrm.h>
69
#include <net/inet_common.h>
L
Linus Torvalds 已提交
70 71 72 73 74 75 76 77 78 79

#include <asm/uaccess.h>

/*
 *	The ICMP socket(s). This is the most convenient way to flow control
 *	our ICMP output as well as maintain a clean interface throughout
 *	all layers. All Socketless IP sends will soon be gone.
 *
 *	On SMP we have one ICMP socket per-cpu.
 */
80 81 82 83
static inline struct sock *icmpv6_sk(struct net *net)
{
	return net->ipv6.icmp_sk[smp_processor_id()];
}
L
Linus Torvalds 已提交
84

85 86 87
static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
		       u8 type, u8 code, int offset, __be32 info)
{
88 89
	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
90 91 92 93 94
	struct net *net = dev_net(skb->dev);

	if (type == ICMPV6_PKT_TOOBIG)
		ip6_update_pmtu(skb, net, info, 0, 0);
	else if (type == NDISC_REDIRECT)
95
		ip6_redirect(skb, net, skb->dev->ifindex, 0);
96 97 98 99

	if (!(type & ICMPV6_INFOMSG_MASK))
		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
			ping_err(skb, offset, info);
100 101
}

102
static int icmpv6_rcv(struct sk_buff *skb);
L
Linus Torvalds 已提交
103

104
static const struct inet6_protocol icmpv6_protocol = {
L
Linus Torvalds 已提交
105
	.handler	=	icmpv6_rcv,
106
	.err_handler	=	icmpv6_err,
107
	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
L
Linus Torvalds 已提交
108 109
};

110
static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
L
Linus Torvalds 已提交
111
{
112 113
	struct sock *sk;

L
Linus Torvalds 已提交
114 115
	local_bh_disable();

116
	sk = icmpv6_sk(net);
117
	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
L
Linus Torvalds 已提交
118 119 120 121 122
		/* This can happen if the output path (f.e. SIT or
		 * ip6ip6 tunnel) signals dst_link_failure() for an
		 * outgoing ICMP6 packet.
		 */
		local_bh_enable();
123
		return NULL;
L
Linus Torvalds 已提交
124
	}
125
	return sk;
L
Linus Torvalds 已提交
126 127
}

128
static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
L
Linus Torvalds 已提交
129
{
130
	spin_unlock_bh(&sk->sk_lock.slock);
L
Linus Torvalds 已提交
131 132 133 134 135 136 137 138 139 140 141 142 143
}

/*
 * Figure out, may we reply to this packet with icmp error.
 *
 * We do not reply, if:
 *	- it was icmp error message.
 *	- it is truncated, so that it is known, that protocol is ICMPV6
 *	  (i.e. in the middle of some exthdr)
 *
 *	--ANK (980726)
 */

144
static bool is_ineligible(const struct sk_buff *skb)
L
Linus Torvalds 已提交
145
{
146
	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
L
Linus Torvalds 已提交
147
	int len = skb->len - ptr;
148
	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
149
	__be16 frag_off;
L
Linus Torvalds 已提交
150 151

	if (len < 0)
152
		return true;
L
Linus Torvalds 已提交
153

154
	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
L
Linus Torvalds 已提交
155
	if (ptr < 0)
156
		return false;
L
Linus Torvalds 已提交
157 158 159 160 161 162 163
	if (nexthdr == IPPROTO_ICMPV6) {
		u8 _type, *tp;
		tp = skb_header_pointer(skb,
			ptr+offsetof(struct icmp6hdr, icmp6_type),
			sizeof(_type), &_type);
		if (tp == NULL ||
		    !(*tp & ICMPV6_INFOMSG_MASK))
164
			return true;
L
Linus Torvalds 已提交
165
	}
166
	return false;
L
Linus Torvalds 已提交
167 168
}

169 170
/*
 * Check the ICMP output rate limit
L
Linus Torvalds 已提交
171
 */
172
static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
173
				      struct flowi6 *fl6)
L
Linus Torvalds 已提交
174 175
{
	struct dst_entry *dst;
176
	struct net *net = sock_net(sk);
177
	bool res = false;
L
Linus Torvalds 已提交
178 179 180

	/* Informational messages are not limited. */
	if (type & ICMPV6_INFOMSG_MASK)
181
		return true;
L
Linus Torvalds 已提交
182 183 184

	/* Do not limit pmtu discovery, it would break it. */
	if (type == ICMPV6_PKT_TOOBIG)
185
		return true;
L
Linus Torvalds 已提交
186

187
	/*
L
Linus Torvalds 已提交
188 189 190 191
	 * Look up the output route.
	 * XXX: perhaps the expire for routing entries cloned by
	 * this lookup should be more aggressive (not longer than timeout).
	 */
192
	dst = ip6_route_output(net, sk, fl6);
L
Linus Torvalds 已提交
193
	if (dst->error) {
194
		IP6_INC_STATS(net, ip6_dst_idev(dst),
195
			      IPSTATS_MIB_OUTNOROUTES);
L
Linus Torvalds 已提交
196
	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
197
		res = true;
L
Linus Torvalds 已提交
198 199
	} else {
		struct rt6_info *rt = (struct rt6_info *)dst;
200
		int tmo = net->ipv6.sysctl.icmpv6_time;
201
		struct inet_peer *peer;
L
Linus Torvalds 已提交
202 203 204 205 206

		/* Give more bandwidth to wider prefixes. */
		if (rt->rt6i_dst.plen < 128)
			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);

207
		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
208
		res = inet_peer_xrlim_allow(peer, tmo);
209 210
		if (peer)
			inet_putpeer(peer);
L
Linus Torvalds 已提交
211 212 213 214 215 216 217 218
	}
	dst_release(dst);
	return res;
}

/*
 *	an inline helper for the "simple" if statement below
 *	checks if parameter problem report is caused by an
219
 *	unrecognized IPv6 option that has the Option Type
L
Linus Torvalds 已提交
220 221 222
 *	highest-order two bits set to 10
 */

223
static bool opt_unrec(struct sk_buff *skb, __u32 offset)
L
Linus Torvalds 已提交
224 225 226
{
	u8 _optval, *op;

227
	offset += skb_network_offset(skb);
L
Linus Torvalds 已提交
228 229
	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
	if (op == NULL)
230
		return true;
L
Linus Torvalds 已提交
231 232 233
	return (*op & 0xC0) == 0x80;
}

234 235
int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
			       struct icmp6hdr *thdr, int len)
L
Linus Torvalds 已提交
236 237 238 239 240 241 242 243
{
	struct sk_buff *skb;
	struct icmp6hdr *icmp6h;
	int err = 0;

	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
		goto out;

244
	icmp6h = icmp6_hdr(skb);
L
Linus Torvalds 已提交
245 246 247 248
	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
	icmp6h->icmp6_cksum = 0;

	if (skb_queue_len(&sk->sk_write_queue) == 1) {
249
		skb->csum = csum_partial(icmp6h,
L
Linus Torvalds 已提交
250
					sizeof(struct icmp6hdr), skb->csum);
251 252 253
		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
						      &fl6->daddr,
						      len, fl6->flowi6_proto,
L
Linus Torvalds 已提交
254 255
						      skb->csum);
	} else {
256
		__wsum tmp_csum = 0;
L
Linus Torvalds 已提交
257 258 259 260 261

		skb_queue_walk(&sk->sk_write_queue, skb) {
			tmp_csum = csum_add(tmp_csum, skb->csum);
		}

262
		tmp_csum = csum_partial(icmp6h,
L
Linus Torvalds 已提交
263
					sizeof(struct icmp6hdr), tmp_csum);
264 265 266
		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
						      &fl6->daddr,
						      len, fl6->flowi6_proto,
267
						      tmp_csum);
L
Linus Torvalds 已提交
268 269 270 271 272 273 274 275 276
	}
	ip6_push_pending_frames(sk);
out:
	return err;
}

struct icmpv6_msg {
	struct sk_buff	*skb;
	int		offset;
277
	uint8_t		type;
L
Linus Torvalds 已提交
278 279 280 281 282 283
};

static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
	struct sk_buff *org_skb = msg->skb;
284
	__wsum csum = 0;
L
Linus Torvalds 已提交
285 286 287 288

	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
				      to, len, csum);
	skb->csum = csum_block_add(skb->csum, csum, odd);
289 290
	if (!(msg->type & ICMPV6_INFOMSG_MASK))
		nf_ct_attach(skb, org_skb);
L
Linus Torvalds 已提交
291 292 293
	return 0;
}

A
Amerigo Wang 已提交
294
#if IS_ENABLED(CONFIG_IPV6_MIP6)
295 296
static void mip6_addr_swap(struct sk_buff *skb)
{
297
	struct ipv6hdr *iph = ipv6_hdr(skb);
298 299 300 301 302 303 304 305
	struct inet6_skb_parm *opt = IP6CB(skb);
	struct ipv6_destopt_hao *hao;
	struct in6_addr tmp;
	int off;

	if (opt->dsthao) {
		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
		if (likely(off >= 0)) {
306 307
			hao = (struct ipv6_destopt_hao *)
					(skb_network_header(skb) + off);
A
Alexey Dobriyan 已提交
308 309 310
			tmp = iph->saddr;
			iph->saddr = hao->addr;
			hao->addr = tmp;
311 312 313 314 315 316 317
		}
	}
}
#else
static inline void mip6_addr_swap(struct sk_buff *skb) {}
#endif

S
stephen hemminger 已提交
318 319 320 321
static struct dst_entry *icmpv6_route_lookup(struct net *net,
					     struct sk_buff *skb,
					     struct sock *sk,
					     struct flowi6 *fl6)
322 323
{
	struct dst_entry *dst, *dst2;
324
	struct flowi6 fl2;
325 326
	int err;

327
	err = ip6_dst_lookup(sk, &dst, fl6);
328 329 330 331 332 333 334 335
	if (err)
		return ERR_PTR(err);

	/*
	 * We won't send icmp if the destination is known
	 * anycast.
	 */
	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
336
		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
337 338 339 340 341 342 343
		dst_release(dst);
		return ERR_PTR(-EINVAL);
	}

	/* No need to clone since we're just using its address. */
	dst2 = dst;

344
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
345
	if (!IS_ERR(dst)) {
346 347
		if (dst != dst2)
			return dst;
348 349 350 351 352
	} else {
		if (PTR_ERR(dst) == -EPERM)
			dst = NULL;
		else
			return dst;
353 354
	}

355
	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
356 357 358 359 360 361 362
	if (err)
		goto relookup_failed;

	err = ip6_dst_lookup(sk, &dst2, &fl2);
	if (err)
		goto relookup_failed;

363
	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
364
	if (!IS_ERR(dst2)) {
365 366
		dst_release(dst);
		dst = dst2;
367 368 369 370 371 372 373
	} else {
		err = PTR_ERR(dst2);
		if (err == -EPERM) {
			dst_release(dst);
			return dst2;
		} else
			goto relookup_failed;
374 375 376 377 378 379 380 381
	}

relookup_failed:
	if (dst)
		return dst;
	return ERR_PTR(err);
}

L
Linus Torvalds 已提交
382 383 384
/*
 *	Send an ICMP message in response to a packet in error
 */
385
static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
L
Linus Torvalds 已提交
386
{
387
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
388
	struct inet6_dev *idev = NULL;
389
	struct ipv6hdr *hdr = ipv6_hdr(skb);
390 391
	struct sock *sk;
	struct ipv6_pinfo *np;
392
	const struct in6_addr *saddr = NULL;
L
Linus Torvalds 已提交
393 394
	struct dst_entry *dst;
	struct icmp6hdr tmp_hdr;
395
	struct flowi6 fl6;
L
Linus Torvalds 已提交
396 397 398 399
	struct icmpv6_msg msg;
	int iif = 0;
	int addr_type = 0;
	int len;
G
Gerrit Renker 已提交
400
	int hlimit;
L
Linus Torvalds 已提交
401 402
	int err = 0;

403
	if ((u8 *)hdr < skb->head ||
404
	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
L
Linus Torvalds 已提交
405 406 407
		return;

	/*
408
	 *	Make sure we respect the rules
L
Linus Torvalds 已提交
409
	 *	i.e. RFC 1885 2.4(e)
410
	 *	Rule (e.1) is enforced by not using icmp6_send
L
Linus Torvalds 已提交
411 412 413 414
	 *	in any code that processes icmp errors.
	 */
	addr_type = ipv6_addr_type(&hdr->daddr);

415
	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
L
Linus Torvalds 已提交
416 417 418 419 420 421 422 423
		saddr = &hdr->daddr;

	/*
	 *	Dest addr check
	 */

	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
		if (type != ICMPV6_PKT_TOOBIG &&
424 425
		    !(type == ICMPV6_PARAMPROB &&
		      code == ICMPV6_UNK_OPTION &&
L
Linus Torvalds 已提交
426 427 428 429 430 431 432 433 434 435 436 437
		      (opt_unrec(skb, info))))
			return;

		saddr = NULL;
	}

	addr_type = ipv6_addr_type(&hdr->saddr);

	/*
	 *	Source addr check
	 */

438
	if (__ipv6_addr_needs_scope_id(addr_type))
L
Linus Torvalds 已提交
439 440 441
		iif = skb->dev->ifindex;

	/*
442 443 444 445
	 *	Must not send error if the source does not uniquely
	 *	identify a single node (RFC2463 Section 2.4).
	 *	We check unspecified / multicast addresses here,
	 *	and anycast addresses will be checked later.
L
Linus Torvalds 已提交
446 447
	 */
	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
448
		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
L
Linus Torvalds 已提交
449 450 451
		return;
	}

452
	/*
L
Linus Torvalds 已提交
453 454 455
	 *	Never answer to a ICMP packet.
	 */
	if (is_ineligible(skb)) {
456
		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
L
Linus Torvalds 已提交
457 458 459
		return;
	}

460 461
	mip6_addr_swap(skb);

462 463
	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_proto = IPPROTO_ICMPV6;
A
Alexey Dobriyan 已提交
464
	fl6.daddr = hdr->saddr;
L
Linus Torvalds 已提交
465
	if (saddr)
A
Alexey Dobriyan 已提交
466
		fl6.saddr = *saddr;
467
	fl6.flowi6_oif = iif;
468 469
	fl6.fl6_icmp_type = type;
	fl6.fl6_icmp_code = code;
470
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
L
Linus Torvalds 已提交
471

472 473
	sk = icmpv6_xmit_lock(net);
	if (sk == NULL)
474
		return;
475
	np = inet6_sk(sk);
476

477
	if (!icmpv6_xrlim_allow(sk, type, &fl6))
L
Linus Torvalds 已提交
478 479 480 481 482 483 484
		goto out;

	tmp_hdr.icmp6_type = type;
	tmp_hdr.icmp6_code = code;
	tmp_hdr.icmp6_cksum = 0;
	tmp_hdr.icmp6_pointer = htonl(info);

485 486
	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
		fl6.flowi6_oif = np->mcast_oif;
487 488
	else if (!fl6.flowi6_oif)
		fl6.flowi6_oif = np->ucast_oif;
L
Linus Torvalds 已提交
489

490
	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
491
	if (IS_ERR(dst))
L
Linus Torvalds 已提交
492
		goto out;
493

494
	if (ipv6_addr_is_multicast(&fl6.daddr))
L
Linus Torvalds 已提交
495 496 497 498
		hlimit = np->mcast_hops;
	else
		hlimit = np->hop_limit;
	if (hlimit < 0)
499
		hlimit = ip6_dst_hoplimit(dst);
L
Linus Torvalds 已提交
500 501

	msg.skb = skb;
502
	msg.offset = skb_network_offset(skb);
503
	msg.type = type;
L
Linus Torvalds 已提交
504 505 506 507

	len = skb->len - msg.offset;
	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
	if (len < 0) {
508
		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
L
Linus Torvalds 已提交
509 510 511
		goto out_dst_release;
	}

E
Eric Dumazet 已提交
512 513
	rcu_read_lock();
	idev = __in6_dev_get(skb->dev);
L
Linus Torvalds 已提交
514 515 516

	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
			      len + sizeof(struct icmp6hdr),
G
Gerrit Renker 已提交
517
			      sizeof(struct icmp6hdr), hlimit,
518
			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
519
			      MSG_DONTWAIT, np->dontfrag);
L
Linus Torvalds 已提交
520
	if (err) {
E
Eric Dumazet 已提交
521
		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
L
Linus Torvalds 已提交
522
		ip6_flush_pending_frames(sk);
E
Eric Dumazet 已提交
523 524 525
	} else {
		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
						 len + sizeof(struct icmp6hdr));
L
Linus Torvalds 已提交
526
	}
E
Eric Dumazet 已提交
527
	rcu_read_unlock();
L
Linus Torvalds 已提交
528 529 530
out_dst_release:
	dst_release(dst);
out:
531
	icmpv6_xmit_unlock(sk);
L
Linus Torvalds 已提交
532
}
533 534 535 536 537 538 539 540

/* Slightly more convenient version of icmp6_send.
 */
void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
	kfree_skb(skb);
}
541

L
Linus Torvalds 已提交
542 543
static void icmpv6_echo_reply(struct sk_buff *skb)
{
544
	struct net *net = dev_net(skb->dev);
545
	struct sock *sk;
L
Linus Torvalds 已提交
546
	struct inet6_dev *idev;
547
	struct ipv6_pinfo *np;
548
	const struct in6_addr *saddr = NULL;
549
	struct icmp6hdr *icmph = icmp6_hdr(skb);
L
Linus Torvalds 已提交
550
	struct icmp6hdr tmp_hdr;
551
	struct flowi6 fl6;
L
Linus Torvalds 已提交
552 553 554 555 556
	struct icmpv6_msg msg;
	struct dst_entry *dst;
	int err = 0;
	int hlimit;

557
	saddr = &ipv6_hdr(skb)->daddr;
L
Linus Torvalds 已提交
558 559 560 561 562 563 564

	if (!ipv6_unicast_destination(skb))
		saddr = NULL;

	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;

565 566
	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_proto = IPPROTO_ICMPV6;
A
Alexey Dobriyan 已提交
567
	fl6.daddr = ipv6_hdr(skb)->saddr;
L
Linus Torvalds 已提交
568
	if (saddr)
A
Alexey Dobriyan 已提交
569
		fl6.saddr = *saddr;
570
	fl6.flowi6_oif = skb->dev->ifindex;
571
	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
572
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
L
Linus Torvalds 已提交
573

574 575
	sk = icmpv6_xmit_lock(net);
	if (sk == NULL)
576
		return;
577
	np = inet6_sk(sk);
578

579 580
	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
		fl6.flowi6_oif = np->mcast_oif;
581 582
	else if (!fl6.flowi6_oif)
		fl6.flowi6_oif = np->ucast_oif;
L
Linus Torvalds 已提交
583

584
	err = ip6_dst_lookup(sk, &dst, &fl6);
L
Linus Torvalds 已提交
585 586
	if (err)
		goto out;
587
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
588
	if (IS_ERR(dst))
589
		goto out;
L
Linus Torvalds 已提交
590

591
	if (ipv6_addr_is_multicast(&fl6.daddr))
L
Linus Torvalds 已提交
592 593 594 595
		hlimit = np->mcast_hops;
	else
		hlimit = np->hop_limit;
	if (hlimit < 0)
596
		hlimit = ip6_dst_hoplimit(dst);
L
Linus Torvalds 已提交
597

E
Eric Dumazet 已提交
598
	idev = __in6_dev_get(skb->dev);
L
Linus Torvalds 已提交
599 600 601

	msg.skb = skb;
	msg.offset = 0;
602
	msg.type = ICMPV6_ECHO_REPLY;
L
Linus Torvalds 已提交
603 604

	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
605
				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
606
				(struct rt6_info *)dst, MSG_DONTWAIT,
607
				np->dontfrag);
L
Linus Torvalds 已提交
608 609

	if (err) {
E
Eric Dumazet 已提交
610
		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
L
Linus Torvalds 已提交
611
		ip6_flush_pending_frames(sk);
E
Eric Dumazet 已提交
612 613 614
	} else {
		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
						 skb->len + sizeof(struct icmp6hdr));
L
Linus Torvalds 已提交
615 616
	}
	dst_release(dst);
617
out:
618
	icmpv6_xmit_unlock(sk);
L
Linus Torvalds 已提交
619 620
}

621
void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
L
Linus Torvalds 已提交
622
{
623
	const struct inet6_protocol *ipprot;
L
Linus Torvalds 已提交
624
	int inner_offset;
625
	__be16 frag_off;
626
	u8 nexthdr;
L
Linus Torvalds 已提交
627 628 629 630 631 632 633

	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
		return;

	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
	if (ipv6_ext_hdr(nexthdr)) {
		/* now skip over extension headers */
634 635
		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
						&nexthdr, &frag_off);
L
Linus Torvalds 已提交
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
		if (inner_offset<0)
			return;
	} else {
		inner_offset = sizeof(struct ipv6hdr);
	}

	/* Checkin header including 8 bytes of inner protocol header. */
	if (!pskb_may_pull(skb, inner_offset+8))
		return;

	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
	   Without this we will not able f.e. to make source routed
	   pmtu discovery.
	   Corresponding argument (opt) to notifiers is already added.
	   --ANK (980726)
	 */

	rcu_read_lock();
654
	ipprot = rcu_dereference(inet6_protos[nexthdr]);
L
Linus Torvalds 已提交
655 656 657 658
	if (ipprot && ipprot->err_handler)
		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
	rcu_read_unlock();

659
	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
L
Linus Torvalds 已提交
660
}
661

L
Linus Torvalds 已提交
662 663 664 665
/*
 *	Handle icmp messages
 */

666
static int icmpv6_rcv(struct sk_buff *skb)
L
Linus Torvalds 已提交
667 668 669
{
	struct net_device *dev = skb->dev;
	struct inet6_dev *idev = __in6_dev_get(dev);
670
	const struct in6_addr *saddr, *daddr;
L
Linus Torvalds 已提交
671
	struct icmp6hdr *hdr;
672
	u8 type;
L
Linus Torvalds 已提交
673

674
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
675
		struct sec_path *sp = skb_sec_path(skb);
676 677
		int nh;

678
		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
679 680 681
				 XFRM_STATE_ICMP))
			goto drop_no_count;

682
		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
683 684 685 686 687 688 689 690 691 692 693
			goto drop_no_count;

		nh = skb_network_offset(skb);
		skb_set_network_header(skb, sizeof(*hdr));

		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
			goto drop_no_count;

		skb_set_network_header(skb, nh);
	}

694
	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
L
Linus Torvalds 已提交
695

696 697
	saddr = &ipv6_hdr(skb)->saddr;
	daddr = &ipv6_hdr(skb)->daddr;
L
Linus Torvalds 已提交
698 699

	/* Perform checksum. */
700
	switch (skb->ip_summed) {
701
	case CHECKSUM_COMPLETE:
702 703 704 705 706
		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
				     skb->csum))
			break;
		/* fall through */
	case CHECKSUM_NONE:
707 708
		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
					     IPPROTO_ICMPV6, 0));
709
		if (__skb_checksum_complete(skb)) {
710 711
			LIMIT_NETDEBUG(KERN_DEBUG
				       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
712
				       saddr, daddr);
713
			goto csum_error;
L
Linus Torvalds 已提交
714 715 716
		}
	}

717 718
	if (!pskb_pull(skb, sizeof(*hdr)))
		goto discard_it;
L
Linus Torvalds 已提交
719

720
	hdr = icmp6_hdr(skb);
L
Linus Torvalds 已提交
721 722 723

	type = hdr->icmp6_type;

724
	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
L
Linus Torvalds 已提交
725 726 727 728 729 730 731

	switch (type) {
	case ICMPV6_ECHO_REQUEST:
		icmpv6_echo_reply(skb);
		break;

	case ICMPV6_ECHO_REPLY:
732
		ping_rcv(skb);
L
Linus Torvalds 已提交
733 734 735 736 737 738 739 740 741 742
		break;

	case ICMPV6_PKT_TOOBIG:
		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
		   standard destination cache. Seems, only "advanced"
		   destination cache will allow to solve this problem
		   --ANK (980726)
		 */
		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
			goto discard_it;
743
		hdr = icmp6_hdr(skb);
L
Linus Torvalds 已提交
744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781

		/*
		 *	Drop through to notify
		 */

	case ICMPV6_DEST_UNREACH:
	case ICMPV6_TIME_EXCEED:
	case ICMPV6_PARAMPROB:
		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
		break;

	case NDISC_ROUTER_SOLICITATION:
	case NDISC_ROUTER_ADVERTISEMENT:
	case NDISC_NEIGHBOUR_SOLICITATION:
	case NDISC_NEIGHBOUR_ADVERTISEMENT:
	case NDISC_REDIRECT:
		ndisc_rcv(skb);
		break;

	case ICMPV6_MGM_QUERY:
		igmp6_event_query(skb);
		break;

	case ICMPV6_MGM_REPORT:
		igmp6_event_report(skb);
		break;

	case ICMPV6_MGM_REDUCTION:
	case ICMPV6_NI_QUERY:
	case ICMPV6_NI_REPLY:
	case ICMPV6_MLD2_REPORT:
	case ICMPV6_DHAAD_REQUEST:
	case ICMPV6_DHAAD_REPLY:
	case ICMPV6_MOBILE_PREFIX_SOL:
	case ICMPV6_MOBILE_PREFIX_ADV:
		break;

	default:
782
		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
L
Linus Torvalds 已提交
783 784 785 786 787

		/* informational */
		if (type & ICMPV6_INFOMSG_MASK)
			break;

788 789 790
		/*
		 * error of unknown type.
		 * must pass to upper level
L
Linus Torvalds 已提交
791 792 793
		 */

		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
794 795
	}

L
Linus Torvalds 已提交
796 797 798
	kfree_skb(skb);
	return 0;

799 800
csum_error:
	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
L
Linus Torvalds 已提交
801
discard_it:
802
	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
803
drop_no_count:
L
Linus Torvalds 已提交
804 805 806 807
	kfree_skb(skb);
	return 0;
}

808
void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
809 810 811 812 813
		      u8 type,
		      const struct in6_addr *saddr,
		      const struct in6_addr *daddr,
		      int oif)
{
814
	memset(fl6, 0, sizeof(*fl6));
A
Alexey Dobriyan 已提交
815 816
	fl6->saddr = *saddr;
	fl6->daddr = *daddr;
817
	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
818 819
	fl6->fl6_icmp_type	= type;
	fl6->fl6_icmp_code	= 0;
820 821
	fl6->flowi6_oif		= oif;
	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
822 823
}

824
/*
825
 * Special lock-class for __icmpv6_sk:
826 827 828
 */
static struct lock_class_key icmpv6_socket_sk_dst_lock_key;

829
static int __net_init icmpv6_sk_init(struct net *net)
L
Linus Torvalds 已提交
830 831 832 833
{
	struct sock *sk;
	int err, i, j;

834 835 836
	net->ipv6.icmp_sk =
		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
	if (net->ipv6.icmp_sk == NULL)
837 838
		return -ENOMEM;

839
	for_each_possible_cpu(i) {
840 841
		err = inet_ctl_sock_create(&sk, PF_INET6,
					   SOCK_RAW, IPPROTO_ICMPV6, net);
L
Linus Torvalds 已提交
842
		if (err < 0) {
843
			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
L
Linus Torvalds 已提交
844 845 846 847
			       err);
			goto fail;
		}

848
		net->ipv6.icmp_sk[i] = sk;
849

850 851 852
		/*
		 * Split off their lock-class, because sk->sk_dst_lock
		 * gets used from softirqs, which is safe for
853
		 * __icmpv6_sk (because those never get directly used
854 855 856 857
		 * via userspace syscalls), but unsafe for normal sockets.
		 */
		lockdep_set_class(&sk->sk_dst_lock,
				  &icmpv6_socket_sk_dst_lock_key);
L
Linus Torvalds 已提交
858 859 860 861

		/* Enough space for 2 64K ICMP packets, including
		 * sk_buff struct overhead.
		 */
E
Eric Dumazet 已提交
862
		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
L
Linus Torvalds 已提交
863 864 865 866
	}
	return 0;

 fail:
867
	for (j = 0; j < i; j++)
868
		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
869
	kfree(net->ipv6.icmp_sk);
L
Linus Torvalds 已提交
870 871 872
	return err;
}

873
static void __net_exit icmpv6_sk_exit(struct net *net)
L
Linus Torvalds 已提交
874 875 876
{
	int i;

877
	for_each_possible_cpu(i) {
878
		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
L
Linus Torvalds 已提交
879
	}
880 881 882
	kfree(net->ipv6.icmp_sk);
}

883
static struct pernet_operations icmpv6_sk_ops = {
884 885 886 887 888 889 890 891 892 893 894 895 896 897 898
       .init = icmpv6_sk_init,
       .exit = icmpv6_sk_exit,
};

int __init icmpv6_init(void)
{
	int err;

	err = register_pernet_subsys(&icmpv6_sk_ops);
	if (err < 0)
		return err;

	err = -EAGAIN;
	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
		goto fail;
899 900 901 902

	err = inet6_register_icmp_sender(icmp6_send);
	if (err)
		goto sender_reg_err;
903 904
	return 0;

905 906
sender_reg_err:
	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
907
fail:
908
	pr_err("Failed to register ICMP6 protocol\n");
909 910 911 912
	unregister_pernet_subsys(&icmpv6_sk_ops);
	return err;
}

913
void icmpv6_cleanup(void)
914
{
915
	inet6_unregister_icmp_sender(icmp6_send);
916
	unregister_pernet_subsys(&icmpv6_sk_ops);
L
Linus Torvalds 已提交
917 918 919
	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}

920

921
static const struct icmp6_err {
L
Linus Torvalds 已提交
922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944
	int err;
	int fatal;
} tab_unreach[] = {
	{	/* NOROUTE */
		.err	= ENETUNREACH,
		.fatal	= 0,
	},
	{	/* ADM_PROHIBITED */
		.err	= EACCES,
		.fatal	= 1,
	},
	{	/* Was NOT_NEIGHBOUR, now reserved */
		.err	= EHOSTUNREACH,
		.fatal	= 0,
	},
	{	/* ADDR_UNREACH	*/
		.err	= EHOSTUNREACH,
		.fatal	= 0,
	},
	{	/* PORT_UNREACH	*/
		.err	= ECONNREFUSED,
		.fatal	= 1,
	},
945 946 947 948 949 950 951 952
	{	/* POLICY_FAIL */
		.err	= EACCES,
		.fatal	= 1,
	},
	{	/* REJECT_ROUTE	*/
		.err	= EACCES,
		.fatal	= 1,
	},
L
Linus Torvalds 已提交
953 954
};

955
int icmpv6_err_convert(u8 type, u8 code, int *err)
L
Linus Torvalds 已提交
956 957 958 959 960 961 962 963
{
	int fatal = 0;

	*err = EPROTO;

	switch (type) {
	case ICMPV6_DEST_UNREACH:
		fatal = 1;
964
		if (code < ARRAY_SIZE(tab_unreach)) {
L
Linus Torvalds 已提交
965 966 967 968 969 970 971 972
			*err  = tab_unreach[code].err;
			fatal = tab_unreach[code].fatal;
		}
		break;

	case ICMPV6_PKT_TOOBIG:
		*err = EMSGSIZE;
		break;
973

L
Linus Torvalds 已提交
974 975 976 977 978 979 980 981
	case ICMPV6_PARAMPROB:
		*err = EPROTO;
		fatal = 1;
		break;

	case ICMPV6_TIME_EXCEED:
		*err = EHOSTUNREACH;
		break;
982
	}
L
Linus Torvalds 已提交
983 984 985

	return fatal;
}
986 987
EXPORT_SYMBOL(icmpv6_err_convert);

L
Linus Torvalds 已提交
988
#ifdef CONFIG_SYSCTL
S
stephen hemminger 已提交
989
static struct ctl_table ipv6_icmp_table_template[] = {
L
Linus Torvalds 已提交
990 991
	{
		.procname	= "ratelimit",
992
		.data		= &init_net.ipv6.sysctl.icmpv6_time,
L
Linus Torvalds 已提交
993 994
		.maxlen		= sizeof(int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
995
		.proc_handler	= proc_dointvec_ms_jiffies,
L
Linus Torvalds 已提交
996
	},
997
	{ },
L
Linus Torvalds 已提交
998
};
999

1000
struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1001 1002 1003 1004 1005 1006
{
	struct ctl_table *table;

	table = kmemdup(ipv6_icmp_table_template,
			sizeof(ipv6_icmp_table_template),
			GFP_KERNEL);
1007

1008
	if (table)
1009 1010
		table[0].data = &net->ipv6.sysctl.icmpv6_time;

1011 1012
	return table;
}
L
Linus Torvalds 已提交
1013 1014
#endif