ip6_input.c 12.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 *	IPv6 input
3
 *	Linux INET6 implementation
L
Linus Torvalds 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *	Ian P. Morris		<I.P.Morris@soton.ac.uk>
 *
 *	Based in linux/net/ipv4/ip_input.c
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */
/* Changes
 *
18 19
 *	Mitsuru KANDA @USAGI and
 *	YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
L
Linus Torvalds 已提交
20 21 22 23 24 25 26 27 28 29
 */

#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/icmpv6.h>
30
#include <linux/mroute6.h>
31
#include <linux/slab.h>
L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46

#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>

#include <net/sock.h>
#include <net/snmp.h>

#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/xfrm.h>
47
#include <net/inet_ecn.h>
48
#include <net/dst_metadata.h>
L
Linus Torvalds 已提交
49

50 51
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
				struct sk_buff *skb)
L
Linus Torvalds 已提交
52
{
53 54
	void (*edemux)(struct sk_buff *skb);

55
	if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
E
Eric Dumazet 已提交
56 57 58
		const struct inet6_protocol *ipprot;

		ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
59 60
		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
			edemux(skb);
E
Eric Dumazet 已提交
61
	}
62
	if (!skb_valid_dst(skb))
L
Linus Torvalds 已提交
63
		ip6_route_input(skb);
64 65 66 67 68 69 70 71 72 73 74
}

int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
	/* if ingress device is enslaved to an L3 master device pass the
	 * skb to its handler for processing
	 */
	skb = l3mdev_ip6_rcv(skb);
	if (!skb)
		return NET_RX_SUCCESS;
	ip6_rcv_finish_core(net, sk, skb);
L
Linus Torvalds 已提交
75 76 77 78

	return dst_input(skb);
}

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
static void ip6_sublist_rcv_finish(struct list_head *head)
{
	struct sk_buff *skb, *next;

	list_for_each_entry_safe(skb, next, head, list)
		dst_input(skb);
}

static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
				struct list_head *head)
{
	struct dst_entry *curr_dst = NULL;
	struct sk_buff *skb, *next;
	struct list_head sublist;

	INIT_LIST_HEAD(&sublist);
	list_for_each_entry_safe(skb, next, head, list) {
		struct dst_entry *dst;

		list_del(&skb->list);
		/* if ingress device is enslaved to an L3 master device pass the
		 * skb to its handler for processing
		 */
		skb = l3mdev_ip6_rcv(skb);
		if (!skb)
			continue;
		ip6_rcv_finish_core(net, sk, skb);
		dst = skb_dst(skb);
		if (curr_dst != dst) {
			/* dispatch old sublist */
			if (!list_empty(&sublist))
				ip6_sublist_rcv_finish(&sublist);
			/* start new sublist */
			INIT_LIST_HEAD(&sublist);
			curr_dst = dst;
		}
		list_add_tail(&skb->list, &sublist);
	}
	/* dispatch final sublist */
	ip6_sublist_rcv_finish(&sublist);
}

static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
				    struct net *net)
L
Linus Torvalds 已提交
123
{
124
	const struct ipv6hdr *hdr;
125
	u32 pkt_len;
126
	struct inet6_dev *idev;
L
Linus Torvalds 已提交
127

128 129
	if (skb->pkt_type == PACKET_OTHERHOST) {
		kfree_skb(skb);
130
		return NULL;
131 132 133
	}

	rcu_read_lock();
L
Linus Torvalds 已提交
134

135 136
	idev = __in6_dev_get(skb->dev);

137
	__IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
L
Linus Torvalds 已提交
138

139 140
	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
	    !idev || unlikely(idev->cnf.disable_ipv6)) {
E
Eric Dumazet 已提交
141
		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
142
		goto drop;
L
Linus Torvalds 已提交
143 144
	}

145 146
	memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));

L
Linus Torvalds 已提交
147 148 149 150 151 152
	/*
	 * Store incoming device index. When the packet will
	 * be queued, we cannot refer to skb->dev anymore.
	 *
	 * BTW, when we send a packet for our own local address on a
	 * non-loopback interface (e.g. ethX), it is being delivered
153
	 * via the loopback interface (lo) here; skb->dev = loopback_dev.
L
Linus Torvalds 已提交
154 155 156 157
	 * It, however, should be considered as if it is being
	 * arrived via the sending interface (ethX), because of the
	 * nature of scoping architecture. --yoshfuji
	 */
158
	IP6CB(skb)->iif = skb_valid_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
L
Linus Torvalds 已提交
159

160
	if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
L
Linus Torvalds 已提交
161 162
		goto err;

163
	hdr = ipv6_hdr(skb);
L
Linus Torvalds 已提交
164 165 166 167

	if (hdr->version != 6)
		goto err;

E
Eric Dumazet 已提交
168 169
	__IP6_ADD_STATS(net, idev,
			IPSTATS_MIB_NOECTPKTS +
170
				(ipv6_get_dsfield(hdr) & INET_ECN_MASK),
E
Eric Dumazet 已提交
171
			max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
172 173
	/*
	 * RFC4291 2.5.3
174 175
	 * The loopback address must not be used as the source address in IPv6
	 * packets that are sent outside of a single node. [..]
176 177 178
	 * A packet received on an interface with a destination address
	 * of loopback must be dropped.
	 */
179 180 181
	if ((ipv6_addr_loopback(&hdr->saddr) ||
	     ipv6_addr_loopback(&hdr->daddr)) &&
	     !(dev->flags & IFF_LOOPBACK))
182 183
		goto err;

184 185 186 187 188 189 190 191 192 193 194 195
	/* RFC4291 Errata ID: 3480
	 * Interface-Local scope spans only a single interface on a
	 * node and is useful only for loopback transmission of
	 * multicast.  Packets with interface-local scope received
	 * from another node must be discarded.
	 */
	if (!(skb->pkt_type == PACKET_LOOPBACK ||
	      dev->flags & IFF_LOOPBACK) &&
	    ipv6_addr_is_multicast(&hdr->daddr) &&
	    IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
		goto err;

196 197 198 199 200 201 202 203 204 205
	/* If enabled, drop unicast packets that were encapsulated in link-layer
	 * multicast or broadcast to protected against the so-called "hole-196"
	 * attack in 802.11 wireless.
	 */
	if (!ipv6_addr_is_multicast(&hdr->daddr) &&
	    (skb->pkt_type == PACKET_BROADCAST ||
	     skb->pkt_type == PACKET_MULTICAST) &&
	    idev->cnf.drop_unicast_in_l2_multicast)
		goto err;

206 207 208 209 210 211 212 213 214
	/* RFC4291 2.7
	 * Nodes must not originate a packet to a multicast address whose scope
	 * field contains the reserved value 0; if such a packet is received, it
	 * must be silently dropped.
	 */
	if (ipv6_addr_is_multicast(&hdr->daddr) &&
	    IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0)
		goto err;

215 216 217 218 219 220 221 222
	/*
	 * RFC4291 2.7
	 * Multicast addresses must not be used as source addresses in IPv6
	 * packets or appear in any Routing header.
	 */
	if (ipv6_addr_is_multicast(&hdr->saddr))
		goto err;

223
	skb->transport_header = skb->network_header + sizeof(*hdr);
224 225
	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);

L
Linus Torvalds 已提交
226 227 228 229
	pkt_len = ntohs(hdr->payload_len);

	/* pkt_len may be zero if Jumbo payload option is present */
	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
230
		if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
E
Eric Dumazet 已提交
231 232
			__IP6_INC_STATS(net,
					idev, IPSTATS_MIB_INTRUNCATEDPKTS);
233 234
			goto drop;
		}
L
Linus Torvalds 已提交
235
		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
E
Eric Dumazet 已提交
236
			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
L
Linus Torvalds 已提交
237 238
			goto drop;
		}
239
		hdr = ipv6_hdr(skb);
L
Linus Torvalds 已提交
240 241 242
	}

	if (hdr->nexthdr == NEXTHDR_HOP) {
243
		if (ipv6_parse_hopopts(skb) < 0) {
E
Eric Dumazet 已提交
244
			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
245
			rcu_read_unlock();
246
			return NULL;
L
Linus Torvalds 已提交
247 248 249
		}
	}

250 251
	rcu_read_unlock();

252 253 254
	/* Must drop socket now because of tproxy. */
	skb_orphan(skb);

255
	return skb;
L
Linus Torvalds 已提交
256
err:
E
Eric Dumazet 已提交
257
	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
L
Linus Torvalds 已提交
258
drop:
259
	rcu_read_unlock();
L
Linus Torvalds 已提交
260
	kfree_skb(skb);
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
	return NULL;
}

int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
	struct net *net = dev_net(skb->dev);

	skb = ip6_rcv_core(skb, dev, net);
	if (skb == NULL)
		return NET_RX_DROP;
	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
		       net, NULL, skb, dev, NULL,
		       ip6_rcv_finish);
}

static void ip6_sublist_rcv(struct list_head *head, struct net_device *dev,
			    struct net *net)
{
	NF_HOOK_LIST(NFPROTO_IPV6, NF_INET_PRE_ROUTING, net, NULL,
		     head, dev, NULL, ip6_rcv_finish);
	ip6_list_rcv_finish(net, NULL, head);
}

/* Receive a list of IPv6 packets */
void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
		   struct net_device *orig_dev)
{
	struct net_device *curr_dev = NULL;
	struct net *curr_net = NULL;
	struct sk_buff *skb, *next;
	struct list_head sublist;

	INIT_LIST_HEAD(&sublist);
	list_for_each_entry_safe(skb, next, head, list) {
		struct net_device *dev = skb->dev;
		struct net *net = dev_net(dev);

		list_del(&skb->list);
		skb = ip6_rcv_core(skb, dev, net);
		if (skb == NULL)
			continue;

		if (curr_dev != dev || curr_net != net) {
			/* dispatch old sublist */
			if (!list_empty(&sublist))
				ip6_sublist_rcv(&sublist, curr_dev, curr_net);
			/* start new sublist */
			INIT_LIST_HEAD(&sublist);
			curr_dev = dev;
			curr_net = net;
		}
		list_add_tail(&skb->list, &sublist);
	}
	/* dispatch final sublist */
	ip6_sublist_rcv(&sublist, curr_dev, curr_net);
L
Linus Torvalds 已提交
316 317 318 319 320 321 322
}

/*
 *	Deliver the packet to the host
 */


323
static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
L
Linus Torvalds 已提交
324
{
325
	const struct inet6_protocol *ipprot;
326
	struct inet6_dev *idev;
L
Linus Torvalds 已提交
327
	unsigned int nhoff;
328 329
	int nexthdr;
	bool raw;
330
	bool have_final = false;
L
Linus Torvalds 已提交
331 332 333 334 335 336

	/*
	 *	Parse extension headers
	 */

	rcu_read_lock();
337
resubmit:
E
Eric Dumazet 已提交
338
	idev = ip6_dst_idev(skb_dst(skb));
339
	if (!pskb_pull(skb, skb_transport_offset(skb)))
L
Linus Torvalds 已提交
340
		goto discard;
341
	nhoff = IP6CB(skb)->nhoff;
342
	nexthdr = skb_network_header(skb)[nhoff];
L
Linus Torvalds 已提交
343

T
Tom Herbert 已提交
344
resubmit_final:
345
	raw = raw6_local_deliver(skb, nexthdr);
346
	ipprot = rcu_dereference(inet6_protos[nexthdr]);
347
	if (ipprot) {
L
Linus Torvalds 已提交
348
		int ret;
349

350 351 352 353 354 355 356 357 358 359
		if (have_final) {
			if (!(ipprot->flags & INET6_PROTO_FINAL)) {
				/* Once we've seen a final protocol don't
				 * allow encapsulation on any non-final
				 * ones. This allows foo in UDP encapsulation
				 * to work.
				 */
				goto discard;
			}
		} else if (ipprot->flags & INET6_PROTO_FINAL) {
360
			const struct ipv6hdr *hdr;
L
Linus Torvalds 已提交
361

362 363 364
			/* Only do this once for first final protocol */
			have_final = true;

365 366 367 368 369
			/* Free reference early: we don't need it any more,
			   and it may hold ip_conntrack module loaded
			   indefinitely. */
			nf_reset(skb);

370
			skb_postpull_rcsum(skb, skb_network_header(skb),
371
					   skb_network_header_len(skb));
372
			hdr = ipv6_hdr(skb);
L
Linus Torvalds 已提交
373 374 375
			if (ipv6_addr_is_multicast(&hdr->daddr) &&
			    !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
			    &hdr->saddr) &&
376
			    !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
L
Linus Torvalds 已提交
377 378 379
				goto discard;
		}
		if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
380
		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
L
Linus Torvalds 已提交
381
			goto discard;
382

383
		ret = ipprot->handler(skb);
T
Tom Herbert 已提交
384 385 386 387 388 389 390 391 392 393 394 395 396
		if (ret > 0) {
			if (ipprot->flags & INET6_PROTO_FINAL) {
				/* Not an extension header, most likely UDP
				 * encapsulation. Use return value as nexthdr
				 * protocol not nhoff (which presumably is
				 * not set by handler).
				 */
				nexthdr = ret;
				goto resubmit_final;
			} else {
				goto resubmit;
			}
		} else if (ret == 0) {
E
Eric Dumazet 已提交
397
			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
T
Tom Herbert 已提交
398
		}
L
Linus Torvalds 已提交
399
	} else {
400
		if (!raw) {
L
Linus Torvalds 已提交
401
			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
E
Eric Dumazet 已提交
402 403
				__IP6_INC_STATS(net, idev,
						IPSTATS_MIB_INUNKNOWNPROTOS);
404
				icmpv6_send(skb, ICMPV6_PARAMPROB,
405
					    ICMPV6_UNK_NEXTHDR, nhoff);
L
Linus Torvalds 已提交
406
			}
407 408
			kfree_skb(skb);
		} else {
E
Eric Dumazet 已提交
409
			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
410 411
			consume_skb(skb);
		}
L
Linus Torvalds 已提交
412 413 414 415 416
	}
	rcu_read_unlock();
	return 0;

discard:
E
Eric Dumazet 已提交
417
	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
L
Linus Torvalds 已提交
418 419 420 421 422 423 424 425
	rcu_read_unlock();
	kfree_skb(skb);
	return 0;
}


int ip6_input(struct sk_buff *skb)
{
426 427
	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
		       dev_net(skb->dev), NULL, skb, skb->dev, NULL,
428
		       ip6_input_finish);
L
Linus Torvalds 已提交
429
}
430
EXPORT_SYMBOL_GPL(ip6_input);
L
Linus Torvalds 已提交
431 432 433

int ip6_mc_input(struct sk_buff *skb)
{
434
	const struct ipv6hdr *hdr;
435
	bool deliver;
L
Linus Torvalds 已提交
436

437
	__IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
438
			 __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST,
439
			 skb->len);
L
Linus Torvalds 已提交
440

441
	hdr = ipv6_hdr(skb);
442
	deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
L
Linus Torvalds 已提交
443

444
#ifdef CONFIG_IPV6_MROUTE
L
Linus Torvalds 已提交
445
	/*
446
	 *      IPv6 multicast router mode is now supported ;)
L
Linus Torvalds 已提交
447
	 */
448
	if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
449 450
	    !(ipv6_addr_type(&hdr->daddr) &
	      (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
451 452 453 454 455 456 457 458 459
	    likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
		/*
		 * Okay, we try to forward - split and duplicate
		 * packets.
		 */
		struct sk_buff *skb2;
		struct inet6_skb_parm *opt = IP6CB(skb);

		/* Check for MLD */
460
		if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
461 462
			/* Check if this is a mld message */
			u8 nexthdr = hdr->nexthdr;
463
			__be16 frag_off;
464 465 466 467 468
			int offset;

			/* Check if the value of Router Alert
			 * is for MLD (0x0000).
			 */
469
			if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) {
470
				deliver = false;
471

472 473
				if (!ipv6_ext_hdr(nexthdr)) {
					/* BUG */
474
					goto out;
475 476
				}
				offset = ipv6_skip_exthdr(skb, sizeof(*hdr),
477
							  &nexthdr, &frag_off);
478
				if (offset < 0)
479
					goto out;
480

481 482
				if (ipv6_is_mld(skb, nexthdr, offset))
					deliver = true;
483

484
				goto out;
485 486 487
			}
			/* unknown RA - process it normally */
		}
L
Linus Torvalds 已提交
488

489 490 491 492 493 494
		if (deliver)
			skb2 = skb_clone(skb, GFP_ATOMIC);
		else {
			skb2 = skb;
			skb = NULL;
		}
495

496 497
		if (skb2) {
			ip6_mr_input(skb2);
L
Linus Torvalds 已提交
498 499
		}
	}
500
out:
501 502
#endif
	if (likely(deliver))
L
Linus Torvalds 已提交
503
		ip6_input(skb);
504 505 506
	else {
		/* discard */
		kfree_skb(skb);
L
Linus Torvalds 已提交
507 508 509 510
	}

	return 0;
}