icmp.c 22.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 *	Internet Control Message Protocol (ICMPv6)
 *	Linux INET6 implementation
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *
 *	Based on net/ipv4/icmp.c
 *
 *	RFC 1885
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

/*
 *	Changes:
 *
 *	Andi Kleen		:	exception handling
 *	Andi Kleen			add rate limits. never reply to a icmp.
 *					add more length checks and other fixes.
 *	yoshfuji		:	ensure to sent parameter problem for
 *					fragments.
 *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
 *	Randy Dunlap and
 *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
 *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
 */

32 33
#define pr_fmt(fmt) "IPv6: " fmt

L
Linus Torvalds 已提交
34 35 36 37 38 39 40 41 42 43
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/init.h>
44
#include <linux/netfilter.h>
45
#include <linux/slab.h>
L
Linus Torvalds 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59

#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/icmpv6.h>

#include <net/ip.h>
#include <net/sock.h>

#include <net/ipv6.h>
#include <net/ip6_checksum.h>
60
#include <net/ping.h>
L
Linus Torvalds 已提交
61 62 63 64 65 66 67
#include <net/protocol.h>
#include <net/raw.h>
#include <net/rawv6.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/icmp.h>
68
#include <net/xfrm.h>
69
#include <net/inet_common.h>
L
Linus Torvalds 已提交
70 71 72 73 74 75 76 77 78 79

#include <asm/uaccess.h>

/*
 *	The ICMP socket(s). This is the most convenient way to flow control
 *	our ICMP output as well as maintain a clean interface throughout
 *	all layers. All Socketless IP sends will soon be gone.
 *
 *	On SMP we have one ICMP socket per-cpu.
 */
80 81 82 83
static inline struct sock *icmpv6_sk(struct net *net)
{
	return net->ipv6.icmp_sk[smp_processor_id()];
}
L
Linus Torvalds 已提交
84

85 86 87
static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
		       u8 type, u8 code, int offset, __be32 info)
{
88 89
	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
90 91 92 93 94 95
	struct net *net = dev_net(skb->dev);

	if (type == ICMPV6_PKT_TOOBIG)
		ip6_update_pmtu(skb, net, info, 0, 0);
	else if (type == NDISC_REDIRECT)
		ip6_redirect(skb, net, 0, 0);
96 97 98 99

	if (!(type & ICMPV6_INFOMSG_MASK))
		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
			ping_err(skb, offset, info);
100 101
}

102
static int icmpv6_rcv(struct sk_buff *skb);
L
Linus Torvalds 已提交
103

104
static const struct inet6_protocol icmpv6_protocol = {
L
Linus Torvalds 已提交
105
	.handler	=	icmpv6_rcv,
106
	.err_handler	=	icmpv6_err,
107
	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
L
Linus Torvalds 已提交
108 109
};

110
static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
L
Linus Torvalds 已提交
111
{
112 113
	struct sock *sk;

L
Linus Torvalds 已提交
114 115
	local_bh_disable();

116
	sk = icmpv6_sk(net);
117
	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
L
Linus Torvalds 已提交
118 119 120 121 122
		/* This can happen if the output path (f.e. SIT or
		 * ip6ip6 tunnel) signals dst_link_failure() for an
		 * outgoing ICMP6 packet.
		 */
		local_bh_enable();
123
		return NULL;
L
Linus Torvalds 已提交
124
	}
125
	return sk;
L
Linus Torvalds 已提交
126 127
}

128
static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
L
Linus Torvalds 已提交
129
{
130
	spin_unlock_bh(&sk->sk_lock.slock);
L
Linus Torvalds 已提交
131 132 133 134 135 136 137 138 139 140 141 142 143
}

/*
 * Figure out, may we reply to this packet with icmp error.
 *
 * We do not reply, if:
 *	- it was icmp error message.
 *	- it is truncated, so that it is known, that protocol is ICMPV6
 *	  (i.e. in the middle of some exthdr)
 *
 *	--ANK (980726)
 */

144
static bool is_ineligible(const struct sk_buff *skb)
L
Linus Torvalds 已提交
145
{
146
	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
L
Linus Torvalds 已提交
147
	int len = skb->len - ptr;
148
	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
149
	__be16 frag_off;
L
Linus Torvalds 已提交
150 151

	if (len < 0)
152
		return true;
L
Linus Torvalds 已提交
153

154
	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
L
Linus Torvalds 已提交
155
	if (ptr < 0)
156
		return false;
L
Linus Torvalds 已提交
157 158 159 160 161 162 163
	if (nexthdr == IPPROTO_ICMPV6) {
		u8 _type, *tp;
		tp = skb_header_pointer(skb,
			ptr+offsetof(struct icmp6hdr, icmp6_type),
			sizeof(_type), &_type);
		if (tp == NULL ||
		    !(*tp & ICMPV6_INFOMSG_MASK))
164
			return true;
L
Linus Torvalds 已提交
165
	}
166
	return false;
L
Linus Torvalds 已提交
167 168
}

169 170
/*
 * Check the ICMP output rate limit
L
Linus Torvalds 已提交
171
 */
172
static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
173
				      struct flowi6 *fl6)
L
Linus Torvalds 已提交
174 175
{
	struct dst_entry *dst;
176
	struct net *net = sock_net(sk);
177
	bool res = false;
L
Linus Torvalds 已提交
178 179 180

	/* Informational messages are not limited. */
	if (type & ICMPV6_INFOMSG_MASK)
181
		return true;
L
Linus Torvalds 已提交
182 183 184

	/* Do not limit pmtu discovery, it would break it. */
	if (type == ICMPV6_PKT_TOOBIG)
185
		return true;
L
Linus Torvalds 已提交
186

187
	/*
L
Linus Torvalds 已提交
188 189 190 191
	 * Look up the output route.
	 * XXX: perhaps the expire for routing entries cloned by
	 * this lookup should be more aggressive (not longer than timeout).
	 */
192
	dst = ip6_route_output(net, sk, fl6);
L
Linus Torvalds 已提交
193
	if (dst->error) {
194
		IP6_INC_STATS(net, ip6_dst_idev(dst),
195
			      IPSTATS_MIB_OUTNOROUTES);
L
Linus Torvalds 已提交
196
	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
197
		res = true;
L
Linus Torvalds 已提交
198 199
	} else {
		struct rt6_info *rt = (struct rt6_info *)dst;
200
		int tmo = net->ipv6.sysctl.icmpv6_time;
201
		struct inet_peer *peer;
L
Linus Torvalds 已提交
202 203 204 205 206

		/* Give more bandwidth to wider prefixes. */
		if (rt->rt6i_dst.plen < 128)
			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);

207
		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
208
		res = inet_peer_xrlim_allow(peer, tmo);
209 210
		if (peer)
			inet_putpeer(peer);
L
Linus Torvalds 已提交
211 212 213 214 215 216 217 218
	}
	dst_release(dst);
	return res;
}

/*
 *	an inline helper for the "simple" if statement below
 *	checks if parameter problem report is caused by an
219
 *	unrecognized IPv6 option that has the Option Type
L
Linus Torvalds 已提交
220 221 222
 *	highest-order two bits set to 10
 */

223
static bool opt_unrec(struct sk_buff *skb, __u32 offset)
L
Linus Torvalds 已提交
224 225 226
{
	u8 _optval, *op;

227
	offset += skb_network_offset(skb);
L
Linus Torvalds 已提交
228 229
	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
	if (op == NULL)
230
		return true;
L
Linus Torvalds 已提交
231 232 233
	return (*op & 0xC0) == 0x80;
}

234 235
int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
			       struct icmp6hdr *thdr, int len)
L
Linus Torvalds 已提交
236 237 238 239 240 241 242 243
{
	struct sk_buff *skb;
	struct icmp6hdr *icmp6h;
	int err = 0;

	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
		goto out;

244
	icmp6h = icmp6_hdr(skb);
L
Linus Torvalds 已提交
245 246 247 248
	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
	icmp6h->icmp6_cksum = 0;

	if (skb_queue_len(&sk->sk_write_queue) == 1) {
249
		skb->csum = csum_partial(icmp6h,
L
Linus Torvalds 已提交
250
					sizeof(struct icmp6hdr), skb->csum);
251 252 253
		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
						      &fl6->daddr,
						      len, fl6->flowi6_proto,
L
Linus Torvalds 已提交
254 255
						      skb->csum);
	} else {
256
		__wsum tmp_csum = 0;
L
Linus Torvalds 已提交
257 258 259 260 261

		skb_queue_walk(&sk->sk_write_queue, skb) {
			tmp_csum = csum_add(tmp_csum, skb->csum);
		}

262
		tmp_csum = csum_partial(icmp6h,
L
Linus Torvalds 已提交
263
					sizeof(struct icmp6hdr), tmp_csum);
264 265 266
		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
						      &fl6->daddr,
						      len, fl6->flowi6_proto,
267
						      tmp_csum);
L
Linus Torvalds 已提交
268 269 270 271 272 273 274 275 276
	}
	ip6_push_pending_frames(sk);
out:
	return err;
}

struct icmpv6_msg {
	struct sk_buff	*skb;
	int		offset;
277
	uint8_t		type;
L
Linus Torvalds 已提交
278 279 280 281 282 283
};

static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
	struct sk_buff *org_skb = msg->skb;
284
	__wsum csum = 0;
L
Linus Torvalds 已提交
285 286 287 288

	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
				      to, len, csum);
	skb->csum = csum_block_add(skb->csum, csum, odd);
289 290
	if (!(msg->type & ICMPV6_INFOMSG_MASK))
		nf_ct_attach(skb, org_skb);
L
Linus Torvalds 已提交
291 292 293
	return 0;
}

A
Amerigo Wang 已提交
294
#if IS_ENABLED(CONFIG_IPV6_MIP6)
295 296
static void mip6_addr_swap(struct sk_buff *skb)
{
297
	struct ipv6hdr *iph = ipv6_hdr(skb);
298 299 300 301 302 303 304 305
	struct inet6_skb_parm *opt = IP6CB(skb);
	struct ipv6_destopt_hao *hao;
	struct in6_addr tmp;
	int off;

	if (opt->dsthao) {
		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
		if (likely(off >= 0)) {
306 307
			hao = (struct ipv6_destopt_hao *)
					(skb_network_header(skb) + off);
A
Alexey Dobriyan 已提交
308 309 310
			tmp = iph->saddr;
			iph->saddr = hao->addr;
			hao->addr = tmp;
311 312 313 314 315 316 317
		}
	}
}
#else
static inline void mip6_addr_swap(struct sk_buff *skb) {}
#endif

318 319
struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
				      struct sock *sk, struct flowi6 *fl6)
320 321
{
	struct dst_entry *dst, *dst2;
322
	struct flowi6 fl2;
323 324
	int err;

325
	err = ip6_dst_lookup(sk, &dst, fl6);
326 327 328 329 330 331 332 333
	if (err)
		return ERR_PTR(err);

	/*
	 * We won't send icmp if the destination is known
	 * anycast.
	 */
	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
334
		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
335 336 337 338 339 340 341
		dst_release(dst);
		return ERR_PTR(-EINVAL);
	}

	/* No need to clone since we're just using its address. */
	dst2 = dst;

342
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
343
	if (!IS_ERR(dst)) {
344 345
		if (dst != dst2)
			return dst;
346 347 348 349 350
	} else {
		if (PTR_ERR(dst) == -EPERM)
			dst = NULL;
		else
			return dst;
351 352
	}

353
	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
354 355 356 357 358 359 360
	if (err)
		goto relookup_failed;

	err = ip6_dst_lookup(sk, &dst2, &fl2);
	if (err)
		goto relookup_failed;

361
	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
362
	if (!IS_ERR(dst2)) {
363 364
		dst_release(dst);
		dst = dst2;
365 366 367 368 369 370 371
	} else {
		err = PTR_ERR(dst2);
		if (err == -EPERM) {
			dst_release(dst);
			return dst2;
		} else
			goto relookup_failed;
372 373 374 375 376 377 378 379
	}

relookup_failed:
	if (dst)
		return dst;
	return ERR_PTR(err);
}

L
Linus Torvalds 已提交
380 381 382
/*
 *	Send an ICMP message in response to a packet in error
 */
383
static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
L
Linus Torvalds 已提交
384
{
385
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
386
	struct inet6_dev *idev = NULL;
387
	struct ipv6hdr *hdr = ipv6_hdr(skb);
388 389
	struct sock *sk;
	struct ipv6_pinfo *np;
390
	const struct in6_addr *saddr = NULL;
L
Linus Torvalds 已提交
391 392
	struct dst_entry *dst;
	struct icmp6hdr tmp_hdr;
393
	struct flowi6 fl6;
L
Linus Torvalds 已提交
394 395 396 397
	struct icmpv6_msg msg;
	int iif = 0;
	int addr_type = 0;
	int len;
G
Gerrit Renker 已提交
398
	int hlimit;
L
Linus Torvalds 已提交
399 400
	int err = 0;

401
	if ((u8 *)hdr < skb->head ||
402
	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
L
Linus Torvalds 已提交
403 404 405
		return;

	/*
406
	 *	Make sure we respect the rules
L
Linus Torvalds 已提交
407
	 *	i.e. RFC 1885 2.4(e)
408
	 *	Rule (e.1) is enforced by not using icmp6_send
L
Linus Torvalds 已提交
409 410 411 412
	 *	in any code that processes icmp errors.
	 */
	addr_type = ipv6_addr_type(&hdr->daddr);

413
	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
L
Linus Torvalds 已提交
414 415 416 417 418 419 420 421
		saddr = &hdr->daddr;

	/*
	 *	Dest addr check
	 */

	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
		if (type != ICMPV6_PKT_TOOBIG &&
422 423
		    !(type == ICMPV6_PARAMPROB &&
		      code == ICMPV6_UNK_OPTION &&
L
Linus Torvalds 已提交
424 425 426 427 428 429 430 431 432 433 434 435
		      (opt_unrec(skb, info))))
			return;

		saddr = NULL;
	}

	addr_type = ipv6_addr_type(&hdr->saddr);

	/*
	 *	Source addr check
	 */

436
	if (__ipv6_addr_needs_scope_id(addr_type))
L
Linus Torvalds 已提交
437 438 439
		iif = skb->dev->ifindex;

	/*
440 441 442 443
	 *	Must not send error if the source does not uniquely
	 *	identify a single node (RFC2463 Section 2.4).
	 *	We check unspecified / multicast addresses here,
	 *	and anycast addresses will be checked later.
L
Linus Torvalds 已提交
444 445
	 */
	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
446
		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
L
Linus Torvalds 已提交
447 448 449
		return;
	}

450
	/*
L
Linus Torvalds 已提交
451 452 453
	 *	Never answer to a ICMP packet.
	 */
	if (is_ineligible(skb)) {
454
		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
L
Linus Torvalds 已提交
455 456 457
		return;
	}

458 459
	mip6_addr_swap(skb);

460 461
	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_proto = IPPROTO_ICMPV6;
A
Alexey Dobriyan 已提交
462
	fl6.daddr = hdr->saddr;
L
Linus Torvalds 已提交
463
	if (saddr)
A
Alexey Dobriyan 已提交
464
		fl6.saddr = *saddr;
465
	fl6.flowi6_oif = iif;
466 467
	fl6.fl6_icmp_type = type;
	fl6.fl6_icmp_code = code;
468
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
L
Linus Torvalds 已提交
469

470 471
	sk = icmpv6_xmit_lock(net);
	if (sk == NULL)
472
		return;
473
	np = inet6_sk(sk);
474

475
	if (!icmpv6_xrlim_allow(sk, type, &fl6))
L
Linus Torvalds 已提交
476 477 478 479 480 481 482
		goto out;

	tmp_hdr.icmp6_type = type;
	tmp_hdr.icmp6_code = code;
	tmp_hdr.icmp6_cksum = 0;
	tmp_hdr.icmp6_pointer = htonl(info);

483 484
	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
		fl6.flowi6_oif = np->mcast_oif;
485 486
	else if (!fl6.flowi6_oif)
		fl6.flowi6_oif = np->ucast_oif;
L
Linus Torvalds 已提交
487

488
	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
489
	if (IS_ERR(dst))
L
Linus Torvalds 已提交
490
		goto out;
491

492
	if (ipv6_addr_is_multicast(&fl6.daddr))
L
Linus Torvalds 已提交
493 494 495 496
		hlimit = np->mcast_hops;
	else
		hlimit = np->hop_limit;
	if (hlimit < 0)
497
		hlimit = ip6_dst_hoplimit(dst);
L
Linus Torvalds 已提交
498 499

	msg.skb = skb;
500
	msg.offset = skb_network_offset(skb);
501
	msg.type = type;
L
Linus Torvalds 已提交
502 503 504 505

	len = skb->len - msg.offset;
	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
	if (len < 0) {
506
		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
L
Linus Torvalds 已提交
507 508 509
		goto out_dst_release;
	}

E
Eric Dumazet 已提交
510 511
	rcu_read_lock();
	idev = __in6_dev_get(skb->dev);
L
Linus Torvalds 已提交
512 513 514

	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
			      len + sizeof(struct icmp6hdr),
G
Gerrit Renker 已提交
515
			      sizeof(struct icmp6hdr), hlimit,
516
			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
517
			      MSG_DONTWAIT, np->dontfrag);
L
Linus Torvalds 已提交
518
	if (err) {
E
Eric Dumazet 已提交
519
		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
L
Linus Torvalds 已提交
520
		ip6_flush_pending_frames(sk);
E
Eric Dumazet 已提交
521 522 523
	} else {
		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
						 len + sizeof(struct icmp6hdr));
L
Linus Torvalds 已提交
524
	}
E
Eric Dumazet 已提交
525
	rcu_read_unlock();
L
Linus Torvalds 已提交
526 527 528
out_dst_release:
	dst_release(dst);
out:
529
	icmpv6_xmit_unlock(sk);
L
Linus Torvalds 已提交
530
}
531 532 533 534 535 536 537 538

/* Slightly more convenient version of icmp6_send.
 */
void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
	kfree_skb(skb);
}
539

L
Linus Torvalds 已提交
540 541
static void icmpv6_echo_reply(struct sk_buff *skb)
{
542
	struct net *net = dev_net(skb->dev);
543
	struct sock *sk;
L
Linus Torvalds 已提交
544
	struct inet6_dev *idev;
545
	struct ipv6_pinfo *np;
546
	const struct in6_addr *saddr = NULL;
547
	struct icmp6hdr *icmph = icmp6_hdr(skb);
L
Linus Torvalds 已提交
548
	struct icmp6hdr tmp_hdr;
549
	struct flowi6 fl6;
L
Linus Torvalds 已提交
550 551 552 553 554
	struct icmpv6_msg msg;
	struct dst_entry *dst;
	int err = 0;
	int hlimit;

555
	saddr = &ipv6_hdr(skb)->daddr;
L
Linus Torvalds 已提交
556 557 558 559 560 561 562

	if (!ipv6_unicast_destination(skb))
		saddr = NULL;

	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;

563 564
	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_proto = IPPROTO_ICMPV6;
A
Alexey Dobriyan 已提交
565
	fl6.daddr = ipv6_hdr(skb)->saddr;
L
Linus Torvalds 已提交
566
	if (saddr)
A
Alexey Dobriyan 已提交
567
		fl6.saddr = *saddr;
568
	fl6.flowi6_oif = skb->dev->ifindex;
569
	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
570
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
L
Linus Torvalds 已提交
571

572 573
	sk = icmpv6_xmit_lock(net);
	if (sk == NULL)
574
		return;
575
	np = inet6_sk(sk);
576

577 578
	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
		fl6.flowi6_oif = np->mcast_oif;
579 580
	else if (!fl6.flowi6_oif)
		fl6.flowi6_oif = np->ucast_oif;
L
Linus Torvalds 已提交
581

582
	err = ip6_dst_lookup(sk, &dst, &fl6);
L
Linus Torvalds 已提交
583 584
	if (err)
		goto out;
585
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
586
	if (IS_ERR(dst))
587
		goto out;
L
Linus Torvalds 已提交
588

589
	if (ipv6_addr_is_multicast(&fl6.daddr))
L
Linus Torvalds 已提交
590 591 592 593
		hlimit = np->mcast_hops;
	else
		hlimit = np->hop_limit;
	if (hlimit < 0)
594
		hlimit = ip6_dst_hoplimit(dst);
L
Linus Torvalds 已提交
595

E
Eric Dumazet 已提交
596
	idev = __in6_dev_get(skb->dev);
L
Linus Torvalds 已提交
597 598 599

	msg.skb = skb;
	msg.offset = 0;
600
	msg.type = ICMPV6_ECHO_REPLY;
L
Linus Torvalds 已提交
601 602

	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
603
				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
604
				(struct rt6_info *)dst, MSG_DONTWAIT,
605
				np->dontfrag);
L
Linus Torvalds 已提交
606 607

	if (err) {
E
Eric Dumazet 已提交
608
		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
L
Linus Torvalds 已提交
609
		ip6_flush_pending_frames(sk);
E
Eric Dumazet 已提交
610 611 612
	} else {
		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
						 skb->len + sizeof(struct icmp6hdr));
L
Linus Torvalds 已提交
613 614
	}
	dst_release(dst);
615
out:
616
	icmpv6_xmit_unlock(sk);
L
Linus Torvalds 已提交
617 618
}

619
void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
L
Linus Torvalds 已提交
620
{
621
	const struct inet6_protocol *ipprot;
L
Linus Torvalds 已提交
622
	int inner_offset;
623
	__be16 frag_off;
624
	u8 nexthdr;
L
Linus Torvalds 已提交
625 626 627 628 629 630 631

	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
		return;

	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
	if (ipv6_ext_hdr(nexthdr)) {
		/* now skip over extension headers */
632 633
		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
						&nexthdr, &frag_off);
L
Linus Torvalds 已提交
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651
		if (inner_offset<0)
			return;
	} else {
		inner_offset = sizeof(struct ipv6hdr);
	}

	/* Checkin header including 8 bytes of inner protocol header. */
	if (!pskb_may_pull(skb, inner_offset+8))
		return;

	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
	   Without this we will not able f.e. to make source routed
	   pmtu discovery.
	   Corresponding argument (opt) to notifiers is already added.
	   --ANK (980726)
	 */

	rcu_read_lock();
652
	ipprot = rcu_dereference(inet6_protos[nexthdr]);
L
Linus Torvalds 已提交
653 654 655 656
	if (ipprot && ipprot->err_handler)
		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
	rcu_read_unlock();

657
	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
L
Linus Torvalds 已提交
658
}
659

L
Linus Torvalds 已提交
660 661 662 663
/*
 *	Handle icmp messages
 */

664
static int icmpv6_rcv(struct sk_buff *skb)
L
Linus Torvalds 已提交
665 666 667
{
	struct net_device *dev = skb->dev;
	struct inet6_dev *idev = __in6_dev_get(dev);
668
	const struct in6_addr *saddr, *daddr;
L
Linus Torvalds 已提交
669
	struct icmp6hdr *hdr;
670
	u8 type;
L
Linus Torvalds 已提交
671

672
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
673
		struct sec_path *sp = skb_sec_path(skb);
674 675
		int nh;

676
		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
677 678 679
				 XFRM_STATE_ICMP))
			goto drop_no_count;

680
		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
681 682 683 684 685 686 687 688 689 690 691
			goto drop_no_count;

		nh = skb_network_offset(skb);
		skb_set_network_header(skb, sizeof(*hdr));

		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
			goto drop_no_count;

		skb_set_network_header(skb, nh);
	}

692
	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
L
Linus Torvalds 已提交
693

694 695
	saddr = &ipv6_hdr(skb)->saddr;
	daddr = &ipv6_hdr(skb)->daddr;
L
Linus Torvalds 已提交
696 697

	/* Perform checksum. */
698
	switch (skb->ip_summed) {
699
	case CHECKSUM_COMPLETE:
700 701 702 703 704
		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
				     skb->csum))
			break;
		/* fall through */
	case CHECKSUM_NONE:
705 706
		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
					     IPPROTO_ICMPV6, 0));
707
		if (__skb_checksum_complete(skb)) {
708 709
			LIMIT_NETDEBUG(KERN_DEBUG
				       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
710
				       saddr, daddr);
711
			goto csum_error;
L
Linus Torvalds 已提交
712 713 714
		}
	}

715 716
	if (!pskb_pull(skb, sizeof(*hdr)))
		goto discard_it;
L
Linus Torvalds 已提交
717

718
	hdr = icmp6_hdr(skb);
L
Linus Torvalds 已提交
719 720 721

	type = hdr->icmp6_type;

722
	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
L
Linus Torvalds 已提交
723 724 725 726 727 728 729

	switch (type) {
	case ICMPV6_ECHO_REQUEST:
		icmpv6_echo_reply(skb);
		break;

	case ICMPV6_ECHO_REPLY:
730
		ping_rcv(skb);
L
Linus Torvalds 已提交
731 732 733 734 735 736 737 738 739 740
		break;

	case ICMPV6_PKT_TOOBIG:
		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
		   standard destination cache. Seems, only "advanced"
		   destination cache will allow to solve this problem
		   --ANK (980726)
		 */
		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
			goto discard_it;
741
		hdr = icmp6_hdr(skb);
L
Linus Torvalds 已提交
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779

		/*
		 *	Drop through to notify
		 */

	case ICMPV6_DEST_UNREACH:
	case ICMPV6_TIME_EXCEED:
	case ICMPV6_PARAMPROB:
		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
		break;

	case NDISC_ROUTER_SOLICITATION:
	case NDISC_ROUTER_ADVERTISEMENT:
	case NDISC_NEIGHBOUR_SOLICITATION:
	case NDISC_NEIGHBOUR_ADVERTISEMENT:
	case NDISC_REDIRECT:
		ndisc_rcv(skb);
		break;

	case ICMPV6_MGM_QUERY:
		igmp6_event_query(skb);
		break;

	case ICMPV6_MGM_REPORT:
		igmp6_event_report(skb);
		break;

	case ICMPV6_MGM_REDUCTION:
	case ICMPV6_NI_QUERY:
	case ICMPV6_NI_REPLY:
	case ICMPV6_MLD2_REPORT:
	case ICMPV6_DHAAD_REQUEST:
	case ICMPV6_DHAAD_REPLY:
	case ICMPV6_MOBILE_PREFIX_SOL:
	case ICMPV6_MOBILE_PREFIX_ADV:
		break;

	default:
780
		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
L
Linus Torvalds 已提交
781 782 783 784 785

		/* informational */
		if (type & ICMPV6_INFOMSG_MASK)
			break;

786 787 788
		/*
		 * error of unknown type.
		 * must pass to upper level
L
Linus Torvalds 已提交
789 790 791
		 */

		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
792 793
	}

L
Linus Torvalds 已提交
794 795 796
	kfree_skb(skb);
	return 0;

797 798
csum_error:
	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
L
Linus Torvalds 已提交
799
discard_it:
800
	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
801
drop_no_count:
L
Linus Torvalds 已提交
802 803 804 805
	kfree_skb(skb);
	return 0;
}

806
void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
807 808 809 810 811
		      u8 type,
		      const struct in6_addr *saddr,
		      const struct in6_addr *daddr,
		      int oif)
{
812
	memset(fl6, 0, sizeof(*fl6));
A
Alexey Dobriyan 已提交
813 814
	fl6->saddr = *saddr;
	fl6->daddr = *daddr;
815
	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
816 817
	fl6->fl6_icmp_type	= type;
	fl6->fl6_icmp_code	= 0;
818 819
	fl6->flowi6_oif		= oif;
	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
820 821
}

822
/*
823
 * Special lock-class for __icmpv6_sk:
824 825 826
 */
static struct lock_class_key icmpv6_socket_sk_dst_lock_key;

827
static int __net_init icmpv6_sk_init(struct net *net)
L
Linus Torvalds 已提交
828 829 830 831
{
	struct sock *sk;
	int err, i, j;

832 833 834
	net->ipv6.icmp_sk =
		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
	if (net->ipv6.icmp_sk == NULL)
835 836
		return -ENOMEM;

837
	for_each_possible_cpu(i) {
838 839
		err = inet_ctl_sock_create(&sk, PF_INET6,
					   SOCK_RAW, IPPROTO_ICMPV6, net);
L
Linus Torvalds 已提交
840
		if (err < 0) {
841
			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
L
Linus Torvalds 已提交
842 843 844 845
			       err);
			goto fail;
		}

846
		net->ipv6.icmp_sk[i] = sk;
847

848 849 850
		/*
		 * Split off their lock-class, because sk->sk_dst_lock
		 * gets used from softirqs, which is safe for
851
		 * __icmpv6_sk (because those never get directly used
852 853 854 855
		 * via userspace syscalls), but unsafe for normal sockets.
		 */
		lockdep_set_class(&sk->sk_dst_lock,
				  &icmpv6_socket_sk_dst_lock_key);
L
Linus Torvalds 已提交
856 857 858 859

		/* Enough space for 2 64K ICMP packets, including
		 * sk_buff struct overhead.
		 */
E
Eric Dumazet 已提交
860
		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
L
Linus Torvalds 已提交
861 862 863 864
	}
	return 0;

 fail:
865
	for (j = 0; j < i; j++)
866
		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
867
	kfree(net->ipv6.icmp_sk);
L
Linus Torvalds 已提交
868 869 870
	return err;
}

871
static void __net_exit icmpv6_sk_exit(struct net *net)
L
Linus Torvalds 已提交
872 873 874
{
	int i;

875
	for_each_possible_cpu(i) {
876
		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
L
Linus Torvalds 已提交
877
	}
878 879 880
	kfree(net->ipv6.icmp_sk);
}

881
static struct pernet_operations icmpv6_sk_ops = {
882 883 884 885 886 887 888 889 890 891 892 893 894 895 896
       .init = icmpv6_sk_init,
       .exit = icmpv6_sk_exit,
};

int __init icmpv6_init(void)
{
	int err;

	err = register_pernet_subsys(&icmpv6_sk_ops);
	if (err < 0)
		return err;

	err = -EAGAIN;
	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
		goto fail;
897 898 899 900

	err = inet6_register_icmp_sender(icmp6_send);
	if (err)
		goto sender_reg_err;
901 902
	return 0;

903 904
sender_reg_err:
	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
905
fail:
906
	pr_err("Failed to register ICMP6 protocol\n");
907 908 909 910
	unregister_pernet_subsys(&icmpv6_sk_ops);
	return err;
}

911
void icmpv6_cleanup(void)
912
{
913
	inet6_unregister_icmp_sender(icmp6_send);
914
	unregister_pernet_subsys(&icmpv6_sk_ops);
L
Linus Torvalds 已提交
915 916 917
	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}

918

919
static const struct icmp6_err {
L
Linus Torvalds 已提交
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944
	int err;
	int fatal;
} tab_unreach[] = {
	{	/* NOROUTE */
		.err	= ENETUNREACH,
		.fatal	= 0,
	},
	{	/* ADM_PROHIBITED */
		.err	= EACCES,
		.fatal	= 1,
	},
	{	/* Was NOT_NEIGHBOUR, now reserved */
		.err	= EHOSTUNREACH,
		.fatal	= 0,
	},
	{	/* ADDR_UNREACH	*/
		.err	= EHOSTUNREACH,
		.fatal	= 0,
	},
	{	/* PORT_UNREACH	*/
		.err	= ECONNREFUSED,
		.fatal	= 1,
	},
};

945
int icmpv6_err_convert(u8 type, u8 code, int *err)
L
Linus Torvalds 已提交
946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962
{
	int fatal = 0;

	*err = EPROTO;

	switch (type) {
	case ICMPV6_DEST_UNREACH:
		fatal = 1;
		if (code <= ICMPV6_PORT_UNREACH) {
			*err  = tab_unreach[code].err;
			fatal = tab_unreach[code].fatal;
		}
		break;

	case ICMPV6_PKT_TOOBIG:
		*err = EMSGSIZE;
		break;
963

L
Linus Torvalds 已提交
964 965 966 967 968 969 970 971
	case ICMPV6_PARAMPROB:
		*err = EPROTO;
		fatal = 1;
		break;

	case ICMPV6_TIME_EXCEED:
		*err = EHOSTUNREACH;
		break;
972
	}
L
Linus Torvalds 已提交
973 974 975

	return fatal;
}
976 977
EXPORT_SYMBOL(icmpv6_err_convert);

L
Linus Torvalds 已提交
978
#ifdef CONFIG_SYSCTL
979
struct ctl_table ipv6_icmp_table_template[] = {
L
Linus Torvalds 已提交
980 981
	{
		.procname	= "ratelimit",
982
		.data		= &init_net.ipv6.sysctl.icmpv6_time,
L
Linus Torvalds 已提交
983 984
		.maxlen		= sizeof(int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
985
		.proc_handler	= proc_dointvec_ms_jiffies,
L
Linus Torvalds 已提交
986
	},
987
	{ },
L
Linus Torvalds 已提交
988
};
989

990
struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
991 992 993 994 995 996
{
	struct ctl_table *table;

	table = kmemdup(ipv6_icmp_table_template,
			sizeof(ipv6_icmp_table_template),
			GFP_KERNEL);
997

998
	if (table)
999 1000
		table[0].data = &net->ipv6.sysctl.icmpv6_time;

1001 1002
	return table;
}
L
Linus Torvalds 已提交
1003 1004
#endif