ip6_input.c 14.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3
/*
 *	IPv6 input
4
 *	Linux INET6 implementation
L
Linus Torvalds 已提交
5 6 7 8 9 10 11 12 13
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *	Ian P. Morris		<I.P.Morris@soton.ac.uk>
 *
 *	Based in linux/net/ipv4/ip_input.c
 */
/* Changes
 *
14 15
 *	Mitsuru KANDA @USAGI and
 *	YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
L
Linus Torvalds 已提交
16 17 18 19 20 21 22 23 24 25
 */

#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/icmpv6.h>
26
#include <linux/mroute6.h>
27
#include <linux/slab.h>
28
#include <linux/indirect_call_wrapper.h>
L
Linus Torvalds 已提交
29 30 31 32 33 34

#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>

#include <net/sock.h>
#include <net/snmp.h>
35
#include <net/udp.h>
L
Linus Torvalds 已提交
36 37 38 39 40 41 42 43 44

#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/xfrm.h>
45
#include <net/inet_ecn.h>
46
#include <net/dst_metadata.h>
L
Linus Torvalds 已提交
47

48
INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
49 50
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
				struct sk_buff *skb)
L
Linus Torvalds 已提交
51
{
52 53
	void (*edemux)(struct sk_buff *skb);

54
	if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
E
Eric Dumazet 已提交
55 56 57
		const struct inet6_protocol *ipprot;

		ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
58
		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
59 60
			INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
					udp_v6_early_demux, skb);
E
Eric Dumazet 已提交
61
	}
62
	if (!skb_valid_dst(skb))
L
Linus Torvalds 已提交
63
		ip6_route_input(skb);
64 65 66 67 68 69 70 71 72 73 74
}

int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
	/* if ingress device is enslaved to an L3 master device pass the
	 * skb to its handler for processing
	 */
	skb = l3mdev_ip6_rcv(skb);
	if (!skb)
		return NET_RX_SUCCESS;
	ip6_rcv_finish_core(net, sk, skb);
L
Linus Torvalds 已提交
75 76 77 78

	return dst_input(skb);
}

79 80 81 82
static void ip6_sublist_rcv_finish(struct list_head *head)
{
	struct sk_buff *skb, *next;

83 84
	list_for_each_entry_safe(skb, next, head, list) {
		skb_list_del_init(skb);
85
		dst_input(skb);
86
	}
87 88
}

89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
static bool ip6_can_use_hint(const struct sk_buff *skb,
			     const struct sk_buff *hint)
{
	return hint && !skb_dst(skb) &&
	       ipv6_addr_equal(&ipv6_hdr(hint)->daddr, &ipv6_hdr(skb)->daddr);
}

static struct sk_buff *ip6_extract_route_hint(const struct net *net,
					      struct sk_buff *skb)
{
	if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
		return NULL;

	return skb;
}

105 106 107
static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
				struct list_head *head)
{
108
	struct sk_buff *skb, *next, *hint = NULL;
109 110 111 112 113 114 115
	struct dst_entry *curr_dst = NULL;
	struct list_head sublist;

	INIT_LIST_HEAD(&sublist);
	list_for_each_entry_safe(skb, next, head, list) {
		struct dst_entry *dst;

116
		skb_list_del_init(skb);
117 118 119 120 121 122
		/* if ingress device is enslaved to an L3 master device pass the
		 * skb to its handler for processing
		 */
		skb = l3mdev_ip6_rcv(skb);
		if (!skb)
			continue;
123 124 125 126 127

		if (ip6_can_use_hint(skb, hint))
			skb_dst_copy(skb, hint);
		else
			ip6_rcv_finish_core(net, sk, skb);
128 129
		dst = skb_dst(skb);
		if (curr_dst != dst) {
130 131
			hint = ip6_extract_route_hint(net, skb);

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
			/* dispatch old sublist */
			if (!list_empty(&sublist))
				ip6_sublist_rcv_finish(&sublist);
			/* start new sublist */
			INIT_LIST_HEAD(&sublist);
			curr_dst = dst;
		}
		list_add_tail(&skb->list, &sublist);
	}
	/* dispatch final sublist */
	ip6_sublist_rcv_finish(&sublist);
}

static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
				    struct net *net)
L
Linus Torvalds 已提交
147
{
148
	const struct ipv6hdr *hdr;
149
	u32 pkt_len;
150
	struct inet6_dev *idev;
L
Linus Torvalds 已提交
151

152 153
	if (skb->pkt_type == PACKET_OTHERHOST) {
		kfree_skb(skb);
154
		return NULL;
155 156 157
	}

	rcu_read_lock();
L
Linus Torvalds 已提交
158

159 160
	idev = __in6_dev_get(skb->dev);

161
	__IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
L
Linus Torvalds 已提交
162

163 164
	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
	    !idev || unlikely(idev->cnf.disable_ipv6)) {
E
Eric Dumazet 已提交
165
		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
166
		goto drop;
L
Linus Torvalds 已提交
167 168
	}

169 170
	memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));

L
Linus Torvalds 已提交
171 172 173 174 175 176
	/*
	 * Store incoming device index. When the packet will
	 * be queued, we cannot refer to skb->dev anymore.
	 *
	 * BTW, when we send a packet for our own local address on a
	 * non-loopback interface (e.g. ethX), it is being delivered
177
	 * via the loopback interface (lo) here; skb->dev = loopback_dev.
L
Linus Torvalds 已提交
178 179 180 181
	 * It, however, should be considered as if it is being
	 * arrived via the sending interface (ethX), because of the
	 * nature of scoping architecture. --yoshfuji
	 */
182
	IP6CB(skb)->iif = skb_valid_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
L
Linus Torvalds 已提交
183

184
	if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
L
Linus Torvalds 已提交
185 186
		goto err;

187
	hdr = ipv6_hdr(skb);
L
Linus Torvalds 已提交
188 189 190 191

	if (hdr->version != 6)
		goto err;

E
Eric Dumazet 已提交
192 193
	__IP6_ADD_STATS(net, idev,
			IPSTATS_MIB_NOECTPKTS +
194
				(ipv6_get_dsfield(hdr) & INET_ECN_MASK),
E
Eric Dumazet 已提交
195
			max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
196 197
	/*
	 * RFC4291 2.5.3
198 199
	 * The loopback address must not be used as the source address in IPv6
	 * packets that are sent outside of a single node. [..]
200 201 202
	 * A packet received on an interface with a destination address
	 * of loopback must be dropped.
	 */
203 204
	if ((ipv6_addr_loopback(&hdr->saddr) ||
	     ipv6_addr_loopback(&hdr->daddr)) &&
205 206
	    !(dev->flags & IFF_LOOPBACK) &&
	    !netif_is_l3_master(dev))
207 208
		goto err;

209 210 211 212 213 214 215 216 217 218 219 220
	/* RFC4291 Errata ID: 3480
	 * Interface-Local scope spans only a single interface on a
	 * node and is useful only for loopback transmission of
	 * multicast.  Packets with interface-local scope received
	 * from another node must be discarded.
	 */
	if (!(skb->pkt_type == PACKET_LOOPBACK ||
	      dev->flags & IFF_LOOPBACK) &&
	    ipv6_addr_is_multicast(&hdr->daddr) &&
	    IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
		goto err;

221 222 223 224 225 226 227 228 229 230
	/* If enabled, drop unicast packets that were encapsulated in link-layer
	 * multicast or broadcast to protected against the so-called "hole-196"
	 * attack in 802.11 wireless.
	 */
	if (!ipv6_addr_is_multicast(&hdr->daddr) &&
	    (skb->pkt_type == PACKET_BROADCAST ||
	     skb->pkt_type == PACKET_MULTICAST) &&
	    idev->cnf.drop_unicast_in_l2_multicast)
		goto err;

231 232 233 234 235 236 237 238 239
	/* RFC4291 2.7
	 * Nodes must not originate a packet to a multicast address whose scope
	 * field contains the reserved value 0; if such a packet is received, it
	 * must be silently dropped.
	 */
	if (ipv6_addr_is_multicast(&hdr->daddr) &&
	    IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0)
		goto err;

240 241 242 243 244 245 246 247
	/*
	 * RFC4291 2.7
	 * Multicast addresses must not be used as source addresses in IPv6
	 * packets or appear in any Routing header.
	 */
	if (ipv6_addr_is_multicast(&hdr->saddr))
		goto err;

248
	skb->transport_header = skb->network_header + sizeof(*hdr);
249 250
	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);

L
Linus Torvalds 已提交
251 252 253 254
	pkt_len = ntohs(hdr->payload_len);

	/* pkt_len may be zero if Jumbo payload option is present */
	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
255
		if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
E
Eric Dumazet 已提交
256 257
			__IP6_INC_STATS(net,
					idev, IPSTATS_MIB_INTRUNCATEDPKTS);
258 259
			goto drop;
		}
L
Linus Torvalds 已提交
260
		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
E
Eric Dumazet 已提交
261
			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
L
Linus Torvalds 已提交
262 263
			goto drop;
		}
264
		hdr = ipv6_hdr(skb);
L
Linus Torvalds 已提交
265 266 267
	}

	if (hdr->nexthdr == NEXTHDR_HOP) {
268
		if (ipv6_parse_hopopts(skb) < 0) {
E
Eric Dumazet 已提交
269
			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
270
			rcu_read_unlock();
271
			return NULL;
L
Linus Torvalds 已提交
272 273 274
		}
	}

275 276
	rcu_read_unlock();

277
	/* Must drop socket now because of tproxy. */
J
Joe Stringer 已提交
278 279
	if (!skb_sk_is_prefetched(skb))
		skb_orphan(skb);
280

281
	return skb;
L
Linus Torvalds 已提交
282
err:
E
Eric Dumazet 已提交
283
	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
L
Linus Torvalds 已提交
284
drop:
285
	rcu_read_unlock();
L
Linus Torvalds 已提交
286
	kfree_skb(skb);
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
	return NULL;
}

int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
	struct net *net = dev_net(skb->dev);

	skb = ip6_rcv_core(skb, dev, net);
	if (skb == NULL)
		return NET_RX_DROP;
	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
		       net, NULL, skb, dev, NULL,
		       ip6_rcv_finish);
}

static void ip6_sublist_rcv(struct list_head *head, struct net_device *dev,
			    struct net *net)
{
	NF_HOOK_LIST(NFPROTO_IPV6, NF_INET_PRE_ROUTING, net, NULL,
		     head, dev, NULL, ip6_rcv_finish);
	ip6_list_rcv_finish(net, NULL, head);
}

/* Receive a list of IPv6 packets */
void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
		   struct net_device *orig_dev)
{
	struct net_device *curr_dev = NULL;
	struct net *curr_net = NULL;
	struct sk_buff *skb, *next;
	struct list_head sublist;

	INIT_LIST_HEAD(&sublist);
	list_for_each_entry_safe(skb, next, head, list) {
		struct net_device *dev = skb->dev;
		struct net *net = dev_net(dev);

324
		skb_list_del_init(skb);
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
		skb = ip6_rcv_core(skb, dev, net);
		if (skb == NULL)
			continue;

		if (curr_dev != dev || curr_net != net) {
			/* dispatch old sublist */
			if (!list_empty(&sublist))
				ip6_sublist_rcv(&sublist, curr_dev, curr_net);
			/* start new sublist */
			INIT_LIST_HEAD(&sublist);
			curr_dev = dev;
			curr_net = net;
		}
		list_add_tail(&skb->list, &sublist);
	}
	/* dispatch final sublist */
341 342
	if (!list_empty(&sublist))
		ip6_sublist_rcv(&sublist, curr_dev, curr_net);
L
Linus Torvalds 已提交
343 344
}

345 346
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *));

L
Linus Torvalds 已提交
347 348 349
/*
 *	Deliver the packet to the host
 */
350 351
void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr,
			      bool have_final)
L
Linus Torvalds 已提交
352
{
353
	const struct inet6_protocol *ipprot;
354
	struct inet6_dev *idev;
L
Linus Torvalds 已提交
355
	unsigned int nhoff;
356
	bool raw;
L
Linus Torvalds 已提交
357 358 359 360 361

	/*
	 *	Parse extension headers
	 */

362
resubmit:
E
Eric Dumazet 已提交
363
	idev = ip6_dst_idev(skb_dst(skb));
364
	nhoff = IP6CB(skb)->nhoff;
365 366 367 368 369
	if (!have_final) {
		if (!pskb_pull(skb, skb_transport_offset(skb)))
			goto discard;
		nexthdr = skb_network_header(skb)[nhoff];
	}
L
Linus Torvalds 已提交
370

T
Tom Herbert 已提交
371
resubmit_final:
372
	raw = raw6_local_deliver(skb, nexthdr);
373
	ipprot = rcu_dereference(inet6_protos[nexthdr]);
374
	if (ipprot) {
L
Linus Torvalds 已提交
375
		int ret;
376

377 378 379 380 381 382 383 384 385 386
		if (have_final) {
			if (!(ipprot->flags & INET6_PROTO_FINAL)) {
				/* Once we've seen a final protocol don't
				 * allow encapsulation on any non-final
				 * ones. This allows foo in UDP encapsulation
				 * to work.
				 */
				goto discard;
			}
		} else if (ipprot->flags & INET6_PROTO_FINAL) {
387
			const struct ipv6hdr *hdr;
388 389
			int sdif = inet6_sdif(skb);
			struct net_device *dev;
L
Linus Torvalds 已提交
390

391 392 393
			/* Only do this once for first final protocol */
			have_final = true;

394 395 396
			/* Free reference early: we don't need it any more,
			   and it may hold ip_conntrack module loaded
			   indefinitely. */
397
			nf_reset_ct(skb);
398

399
			skb_postpull_rcsum(skb, skb_network_header(skb),
400
					   skb_network_header_len(skb));
401
			hdr = ipv6_hdr(skb);
402 403 404 405 406 407 408 409 410 411

			/* skb->dev passed may be master dev for vrfs. */
			if (sdif) {
				dev = dev_get_by_index_rcu(net, sdif);
				if (!dev)
					goto discard;
			} else {
				dev = skb->dev;
			}

L
Linus Torvalds 已提交
412
			if (ipv6_addr_is_multicast(&hdr->daddr) &&
413 414
			    !ipv6_chk_mcast_addr(dev, &hdr->daddr,
						 &hdr->saddr) &&
415
			    !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
L
Linus Torvalds 已提交
416 417 418
				goto discard;
		}
		if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
419
		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
L
Linus Torvalds 已提交
420
			goto discard;
421

422 423
		ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv,
				      skb);
T
Tom Herbert 已提交
424 425 426 427 428 429 430 431 432 433 434 435 436
		if (ret > 0) {
			if (ipprot->flags & INET6_PROTO_FINAL) {
				/* Not an extension header, most likely UDP
				 * encapsulation. Use return value as nexthdr
				 * protocol not nhoff (which presumably is
				 * not set by handler).
				 */
				nexthdr = ret;
				goto resubmit_final;
			} else {
				goto resubmit;
			}
		} else if (ret == 0) {
E
Eric Dumazet 已提交
437
			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
T
Tom Herbert 已提交
438
		}
L
Linus Torvalds 已提交
439
	} else {
440
		if (!raw) {
L
Linus Torvalds 已提交
441
			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
E
Eric Dumazet 已提交
442 443
				__IP6_INC_STATS(net, idev,
						IPSTATS_MIB_INUNKNOWNPROTOS);
444
				icmpv6_send(skb, ICMPV6_PARAMPROB,
445
					    ICMPV6_UNK_NEXTHDR, nhoff);
L
Linus Torvalds 已提交
446
			}
447 448
			kfree_skb(skb);
		} else {
E
Eric Dumazet 已提交
449
			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
450 451
			consume_skb(skb);
		}
L
Linus Torvalds 已提交
452
	}
453
	return;
L
Linus Torvalds 已提交
454 455

discard:
E
Eric Dumazet 已提交
456
	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
L
Linus Torvalds 已提交
457
	kfree_skb(skb);
458 459 460 461 462 463 464 465
}

static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
	rcu_read_lock();
	ip6_protocol_deliver_rcu(net, skb, 0, false);
	rcu_read_unlock();

L
Linus Torvalds 已提交
466 467 468 469 470 471
	return 0;
}


int ip6_input(struct sk_buff *skb)
{
472 473
	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
		       dev_net(skb->dev), NULL, skb, skb->dev, NULL,
474
		       ip6_input_finish);
L
Linus Torvalds 已提交
475
}
476
EXPORT_SYMBOL_GPL(ip6_input);
L
Linus Torvalds 已提交
477 478 479

int ip6_mc_input(struct sk_buff *skb)
{
480
	int sdif = inet6_sdif(skb);
481
	const struct ipv6hdr *hdr;
482
	struct net_device *dev;
483
	bool deliver;
L
Linus Torvalds 已提交
484

485
	__IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
486
			 __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST,
487
			 skb->len);
L
Linus Torvalds 已提交
488

489 490 491 492 493 494 495 496 497 498 499 500 501
	/* skb->dev passed may be master dev for vrfs. */
	if (sdif) {
		rcu_read_lock();
		dev = dev_get_by_index_rcu(dev_net(skb->dev), sdif);
		if (!dev) {
			rcu_read_unlock();
			kfree_skb(skb);
			return -ENODEV;
		}
	} else {
		dev = skb->dev;
	}

502
	hdr = ipv6_hdr(skb);
503 504 505
	deliver = ipv6_chk_mcast_addr(dev, &hdr->daddr, NULL);
	if (sdif)
		rcu_read_unlock();
L
Linus Torvalds 已提交
506

507
#ifdef CONFIG_IPV6_MROUTE
L
Linus Torvalds 已提交
508
	/*
509
	 *      IPv6 multicast router mode is now supported ;)
L
Linus Torvalds 已提交
510
	 */
E
Eric Dumazet 已提交
511
	if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
512 513
	    !(ipv6_addr_type(&hdr->daddr) &
	      (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
514 515 516 517 518 519 520 521 522
	    likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
		/*
		 * Okay, we try to forward - split and duplicate
		 * packets.
		 */
		struct sk_buff *skb2;
		struct inet6_skb_parm *opt = IP6CB(skb);

		/* Check for MLD */
523
		if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
524 525
			/* Check if this is a mld message */
			u8 nexthdr = hdr->nexthdr;
526
			__be16 frag_off;
527 528 529 530 531
			int offset;

			/* Check if the value of Router Alert
			 * is for MLD (0x0000).
			 */
532
			if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) {
533
				deliver = false;
534

535 536
				if (!ipv6_ext_hdr(nexthdr)) {
					/* BUG */
537
					goto out;
538 539
				}
				offset = ipv6_skip_exthdr(skb, sizeof(*hdr),
540
							  &nexthdr, &frag_off);
541
				if (offset < 0)
542
					goto out;
543

544 545
				if (ipv6_is_mld(skb, nexthdr, offset))
					deliver = true;
546

547
				goto out;
548 549 550
			}
			/* unknown RA - process it normally */
		}
L
Linus Torvalds 已提交
551

552 553 554 555 556 557
		if (deliver)
			skb2 = skb_clone(skb, GFP_ATOMIC);
		else {
			skb2 = skb;
			skb = NULL;
		}
558

559 560
		if (skb2) {
			ip6_mr_input(skb2);
L
Linus Torvalds 已提交
561 562
		}
	}
563
out:
564 565
#endif
	if (likely(deliver))
L
Linus Torvalds 已提交
566
		ip6_input(skb);
567 568 569
	else {
		/* discard */
		kfree_skb(skb);
L
Linus Torvalds 已提交
570 571 572 573
	}

	return 0;
}