flow_dissector.c 29.2 KB
Newer Older
1
#include <linux/kernel.h>
E
Eric Dumazet 已提交
2
#include <linux/skbuff.h>
3
#include <linux/export.h>
E
Eric Dumazet 已提交
4 5 6 7
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_vlan.h>
#include <net/ip.h>
E
Eric Dumazet 已提交
8
#include <net/ipv6.h>
9 10
#include <net/gre.h>
#include <net/pptp.h>
11 12 13 14
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
#include <linux/dccp.h>
E
Eric Dumazet 已提交
15 16 17
#include <linux/if_tunnel.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
18
#include <linux/stddef.h>
19
#include <linux/if_ether.h>
20
#include <linux/mpls.h>
21
#include <net/flow_dissector.h>
22
#include <scsi/fc/fc_fcoe.h>
E
Eric Dumazet 已提交
23

24 25
static void dissector_set_key(struct flow_dissector *flow_dissector,
			      enum flow_dissector_key_id key_id)
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
{
	flow_dissector->used_keys |= (1 << key_id);
}

void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
			     const struct flow_dissector_key *key,
			     unsigned int key_count)
{
	unsigned int i;

	memset(flow_dissector, 0, sizeof(*flow_dissector));

	for (i = 0; i < key_count; i++, key++) {
		/* User should make sure that every key target offset is withing
		 * boundaries of unsigned short.
		 */
		BUG_ON(key->offset > USHRT_MAX);
43 44
		BUG_ON(dissector_uses_key(flow_dissector,
					  key->key_id));
45

46
		dissector_set_key(flow_dissector, key->key_id);
47 48 49
		flow_dissector->offset[key->key_id] = key->offset;
	}

50 51
	/* Ensure that the dissector always includes control and basic key.
	 * That way we are able to avoid handling lack of these in fast path.
52
	 */
53 54 55 56
	BUG_ON(!dissector_uses_key(flow_dissector,
				   FLOW_DISSECTOR_KEY_CONTROL));
	BUG_ON(!dissector_uses_key(flow_dissector,
				   FLOW_DISSECTOR_KEY_BASIC));
57 58 59
}
EXPORT_SYMBOL(skb_flow_dissector_init);

S
Simon Horman 已提交
60 61 62 63 64 65 66 67 68 69
/**
 * skb_flow_get_be16 - extract be16 entity
 * @skb: sk_buff to extract from
 * @poff: offset to extract at
 * @data: raw buffer pointer to the packet
 * @hlen: packet header length
 *
 * The function will try to retrieve a be32 entity at
 * offset poff
 */
70 71
static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
				void *data, int hlen)
S
Simon Horman 已提交
72 73 74 75 76 77 78 79 80 81
{
	__be16 *u, _u;

	u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
	if (u)
		return *u;

	return 0;
}

82
/**
83 84
 * __skb_flow_get_ports - extract the upper layer ports and return them
 * @skb: sk_buff to extract the ports from
85 86
 * @thoff: transport header offset
 * @ip_proto: protocol for which to get port offset
87 88
 * @data: raw buffer pointer to the packet, if NULL use skb->data
 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
89 90 91 92
 *
 * The function will try to retrieve the ports at offset thoff + poff where poff
 * is the protocol port offset returned from proto_ports_offset
 */
93 94
__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
			    void *data, int hlen)
95 96 97
{
	int poff = proto_ports_offset(ip_proto);

98 99 100 101 102
	if (!data) {
		data = skb->data;
		hlen = skb_headlen(skb);
	}

103 104 105
	if (poff >= 0) {
		__be32 *ports, _ports;

106 107
		ports = __skb_header_pointer(skb, thoff + poff,
					     sizeof(_ports), data, hlen, &_ports);
108 109 110 111 112 113
		if (ports)
			return *ports;
	}

	return 0;
}
114
EXPORT_SYMBOL(__skb_flow_get_ports);
115

116 117 118 119 120
enum flow_dissect_ret {
	FLOW_DISSECT_RET_OUT_GOOD,
	FLOW_DISSECT_RET_OUT_BAD,
};

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
			struct flow_dissector *flow_dissector,
			void *target_container, void *data, int nhoff, int hlen)
{
	struct flow_dissector_key_keyid *key_keyid;
	struct mpls_label *hdr, _hdr[2];

	if (!dissector_uses_key(flow_dissector,
				FLOW_DISSECTOR_KEY_MPLS_ENTROPY))
		return FLOW_DISSECT_RET_OUT_GOOD;

	hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
				   hlen, &_hdr);
	if (!hdr)
		return FLOW_DISSECT_RET_OUT_BAD;

	if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
	    MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
		key_keyid = skb_flow_dissector_target(flow_dissector,
						      FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
						      target_container);
		key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
	}
	return FLOW_DISSECT_RET_OUT_GOOD;
}

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
static enum flow_dissect_ret
__skb_flow_dissect_arp(const struct sk_buff *skb,
		       struct flow_dissector *flow_dissector,
		       void *target_container, void *data, int nhoff, int hlen)
{
	struct flow_dissector_key_arp *key_arp;
	struct {
		unsigned char ar_sha[ETH_ALEN];
		unsigned char ar_sip[4];
		unsigned char ar_tha[ETH_ALEN];
		unsigned char ar_tip[4];
	} *arp_eth, _arp_eth;
	const struct arphdr *arp;
	struct arphdr *_arp;

	if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP))
		return FLOW_DISSECT_RET_OUT_GOOD;

	arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
				   hlen, &_arp);
	if (!arp)
		return FLOW_DISSECT_RET_OUT_BAD;

	if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
	    arp->ar_pro != htons(ETH_P_IP) ||
	    arp->ar_hln != ETH_ALEN ||
	    arp->ar_pln != 4 ||
	    (arp->ar_op != htons(ARPOP_REPLY) &&
	     arp->ar_op != htons(ARPOP_REQUEST)))
		return FLOW_DISSECT_RET_OUT_BAD;

	arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
				       sizeof(_arp_eth), data,
				       hlen, &_arp_eth);
	if (!arp_eth)
		return FLOW_DISSECT_RET_OUT_BAD;

	key_arp = skb_flow_dissector_target(flow_dissector,
					    FLOW_DISSECTOR_KEY_ARP,
					    target_container);

	memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip));
	memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip));

	/* Only store the lower byte of the opcode;
	 * this covers ARPOP_REPLY and ARPOP_REQUEST.
	 */
	key_arp->op = ntohs(arp->ar_op) & 0xff;

	ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
	ether_addr_copy(key_arp->tha, arp_eth->ar_tha);

	return FLOW_DISSECT_RET_OUT_GOOD;
}

203 204 205
/**
 * __skb_flow_dissect - extract the flow_keys struct and return it
 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
206 207
 * @flow_dissector: list of keys to dissect
 * @target_container: target structure to put dissected values into
208 209 210 211 212
 * @data: raw buffer pointer to the packet, if NULL use skb->data
 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
 *
213 214 215 216 217
 * The function will try to retrieve individual keys into target specified
 * by flow_dissector from either the skbuff or a raw buffer specified by the
 * rest parameters.
 *
 * Caller must take care of zeroing target container memory.
218
 */
219 220 221
bool __skb_flow_dissect(const struct sk_buff *skb,
			struct flow_dissector *flow_dissector,
			void *target_container,
222 223
			void *data, __be16 proto, int nhoff, int hlen,
			unsigned int flags)
E
Eric Dumazet 已提交
224
{
225
	struct flow_dissector_key_control *key_control;
226 227 228
	struct flow_dissector_key_basic *key_basic;
	struct flow_dissector_key_addrs *key_addrs;
	struct flow_dissector_key_ports *key_ports;
S
Simon Horman 已提交
229
	struct flow_dissector_key_icmp *key_icmp;
T
Tom Herbert 已提交
230
	struct flow_dissector_key_tags *key_tags;
231
	struct flow_dissector_key_vlan *key_vlan;
T
Tom Herbert 已提交
232
	struct flow_dissector_key_keyid *key_keyid;
233
	bool skip_vlan = false;
234
	u8 ip_proto = 0;
235
	bool ret;
E
Eric Dumazet 已提交
236

237 238
	if (!data) {
		data = skb->data;
239 240
		proto = skb_vlan_tag_present(skb) ?
			 skb->vlan_proto : skb->protocol;
241
		nhoff = skb_network_offset(skb);
242 243 244
		hlen = skb_headlen(skb);
	}

245 246 247 248 249 250 251
	/* It is ensured by skb_flow_dissector_init() that control key will
	 * be always present.
	 */
	key_control = skb_flow_dissector_target(flow_dissector,
						FLOW_DISSECTOR_KEY_CONTROL,
						target_container);

252 253 254 255 256 257
	/* It is ensured by skb_flow_dissector_init() that basic key will
	 * be always present.
	 */
	key_basic = skb_flow_dissector_target(flow_dissector,
					      FLOW_DISSECTOR_KEY_BASIC,
					      target_container);
E
Eric Dumazet 已提交
258

259 260
	if (dissector_uses_key(flow_dissector,
			       FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
261 262 263 264 265 266 267 268 269
		struct ethhdr *eth = eth_hdr(skb);
		struct flow_dissector_key_eth_addrs *key_eth_addrs;

		key_eth_addrs = skb_flow_dissector_target(flow_dissector,
							  FLOW_DISSECTOR_KEY_ETH_ADDRS,
							  target_container);
		memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
	}

270
proto_again:
E
Eric Dumazet 已提交
271
	switch (proto) {
272
	case htons(ETH_P_IP): {
E
Eric Dumazet 已提交
273 274 275
		const struct iphdr *iph;
		struct iphdr _iph;
ip:
276
		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
277
		if (!iph || iph->ihl < 5)
278
			goto out_bad;
279
		nhoff += iph->ihl * 4;
E
Eric Dumazet 已提交
280

281 282
		ip_proto = iph->protocol;

283 284 285 286 287 288 289 290 291 292
		if (dissector_uses_key(flow_dissector,
				       FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
			key_addrs = skb_flow_dissector_target(flow_dissector,
							      FLOW_DISSECTOR_KEY_IPV4_ADDRS,
							      target_container);

			memcpy(&key_addrs->v4addrs, &iph->saddr,
			       sizeof(key_addrs->v4addrs));
			key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
		}
293 294

		if (ip_is_fragment(iph)) {
295
			key_control->flags |= FLOW_DIS_IS_FRAGMENT;
296 297 298 299

			if (iph->frag_off & htons(IP_OFFSET)) {
				goto out_good;
			} else {
300
				key_control->flags |= FLOW_DIS_FIRST_FRAG;
301 302 303 304 305
				if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
					goto out_good;
			}
		}

306 307 308
		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
			goto out_good;

E
Eric Dumazet 已提交
309 310
		break;
	}
311
	case htons(ETH_P_IPV6): {
E
Eric Dumazet 已提交
312 313
		const struct ipv6hdr *iph;
		struct ipv6hdr _iph;
314

E
Eric Dumazet 已提交
315
ipv6:
316
		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
E
Eric Dumazet 已提交
317
		if (!iph)
318
			goto out_bad;
E
Eric Dumazet 已提交
319 320 321

		ip_proto = iph->nexthdr;
		nhoff += sizeof(struct ipv6hdr);
322

323 324
		if (dissector_uses_key(flow_dissector,
				       FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
325 326 327
			key_addrs = skb_flow_dissector_target(flow_dissector,
							      FLOW_DISSECTOR_KEY_IPV6_ADDRS,
							      target_container);
328

329 330
			memcpy(&key_addrs->v6addrs, &iph->saddr,
			       sizeof(key_addrs->v6addrs));
331
			key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
332
		}
333

334 335 336 337 338 339
		if ((dissector_uses_key(flow_dissector,
					FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
		     (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
		    ip6_flowlabel(iph)) {
			__be32 flow_label = ip6_flowlabel(iph);

340 341
			if (dissector_uses_key(flow_dissector,
					       FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
342 343 344 345
				key_tags = skb_flow_dissector_target(flow_dissector,
								     FLOW_DISSECTOR_KEY_FLOW_LABEL,
								     target_container);
				key_tags->flow_label = ntohl(flow_label);
346
			}
347 348
			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
				goto out_good;
349 350
		}

351 352 353
		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
			goto out_good;

E
Eric Dumazet 已提交
354 355
		break;
	}
356 357
	case htons(ETH_P_8021AD):
	case htons(ETH_P_8021Q): {
E
Eric Dumazet 已提交
358
		const struct vlan_hdr *vlan;
359 360
		struct vlan_hdr _vlan;
		bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
E
Eric Dumazet 已提交
361

362
		if (vlan_tag_present)
363 364
			proto = skb->protocol;

365
		if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
366 367 368 369 370 371 372
			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
						    data, hlen, &_vlan);
			if (!vlan)
				goto out_bad;
			proto = vlan->h_vlan_encapsulated_proto;
			nhoff += sizeof(*vlan);
			if (skip_vlan)
373
				goto proto_again;
374
		}
E
Eric Dumazet 已提交
375

376
		skip_vlan = true;
377
		if (dissector_uses_key(flow_dissector,
378 379 380
				       FLOW_DISSECTOR_KEY_VLAN)) {
			key_vlan = skb_flow_dissector_target(flow_dissector,
							     FLOW_DISSECTOR_KEY_VLAN,
T
Tom Herbert 已提交
381 382
							     target_container);

383
			if (vlan_tag_present) {
384 385 386 387 388
				key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
				key_vlan->vlan_priority =
					(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
			} else {
				key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
389
					VLAN_VID_MASK;
390 391 392 393
				key_vlan->vlan_priority =
					(ntohs(vlan->h_vlan_TCI) &
					 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
			}
T
Tom Herbert 已提交
394 395
		}

396
		goto proto_again;
E
Eric Dumazet 已提交
397
	}
398
	case htons(ETH_P_PPP_SES): {
E
Eric Dumazet 已提交
399 400 401 402
		struct {
			struct pppoe_hdr hdr;
			__be16 proto;
		} *hdr, _hdr;
403
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
E
Eric Dumazet 已提交
404
		if (!hdr)
405
			goto out_bad;
E
Eric Dumazet 已提交
406 407 408
		proto = hdr->proto;
		nhoff += PPPOE_SES_HLEN;
		switch (proto) {
409
		case htons(PPP_IP):
E
Eric Dumazet 已提交
410
			goto ip;
411
		case htons(PPP_IPV6):
E
Eric Dumazet 已提交
412 413
			goto ipv6;
		default:
414
			goto out_bad;
E
Eric Dumazet 已提交
415 416
		}
	}
E
Erik Hugne 已提交
417 418 419 420 421 422 423
	case htons(ETH_P_TIPC): {
		struct {
			__be32 pre[3];
			__be32 srcnode;
		} *hdr, _hdr;
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
		if (!hdr)
424
			goto out_bad;
425

426 427
		if (dissector_uses_key(flow_dissector,
				       FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
428
			key_addrs = skb_flow_dissector_target(flow_dissector,
T
Tom Herbert 已提交
429
							      FLOW_DISSECTOR_KEY_TIPC_ADDRS,
430
							      target_container);
T
Tom Herbert 已提交
431 432
			key_addrs->tipcaddrs.srcnode = hdr->srcnode;
			key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
433
		}
434
		goto out_good;
E
Erik Hugne 已提交
435
	}
436 437

	case htons(ETH_P_MPLS_UC):
438
	case htons(ETH_P_MPLS_MC):
439
mpls:
440 441 442 443
		switch (__skb_flow_dissect_mpls(skb, flow_dissector,
						target_container, data,
						nhoff, hlen)) {
		case FLOW_DISSECT_RET_OUT_GOOD:
444
			goto out_good;
445 446
		case FLOW_DISSECT_RET_OUT_BAD:
			goto out_bad;
447
		}
448
	case htons(ETH_P_FCOE):
449 450 451 452 453
		if ((hlen - nhoff) < FCOE_HEADER_LEN)
			goto out_bad;

		nhoff += FCOE_HEADER_LEN;
		goto out_good;
S
Simon Horman 已提交
454 455

	case htons(ETH_P_ARP):
456 457 458 459 460 461 462
	case htons(ETH_P_RARP):
		switch (__skb_flow_dissect_arp(skb, flow_dissector,
					       target_container, data,
					       nhoff, hlen)) {
		case FLOW_DISSECT_RET_OUT_GOOD:
			goto out_good;
		case FLOW_DISSECT_RET_OUT_BAD:
S
Simon Horman 已提交
463 464
			goto out_bad;
		}
E
Eric Dumazet 已提交
465
	default:
466
		goto out_bad;
E
Eric Dumazet 已提交
467 468
	}

469
ip_proto_again:
E
Eric Dumazet 已提交
470 471
	switch (ip_proto) {
	case IPPROTO_GRE: {
472 473 474
		struct gre_base_hdr *hdr, _hdr;
		u16 gre_ver;
		int offset = 0;
E
Eric Dumazet 已提交
475

476
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
E
Eric Dumazet 已提交
477
		if (!hdr)
478
			goto out_bad;
479 480 481

		/* Only look inside GRE without routing */
		if (hdr->flags & GRE_ROUTING)
482
			goto out_good;
483

484 485 486
		/* Only look inside GRE for version 0 and 1 */
		gre_ver = ntohs(hdr->flags & GRE_VERSION);
		if (gre_ver > 1)
487
			goto out_good;
488 489 490 491 492

		proto = hdr->protocol;
		if (gre_ver) {
			/* Version1 must be PPTP, and check the flags */
			if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
493
				goto out_good;
494 495 496 497
		}

		offset += sizeof(struct gre_base_hdr);

498
		if (hdr->flags & GRE_CSUM)
499 500 501
			offset += sizeof(((struct gre_full_hdr *)0)->csum) +
				  sizeof(((struct gre_full_hdr *)0)->reserved1);

T
Tom Herbert 已提交
502 503 504 505
		if (hdr->flags & GRE_KEY) {
			const __be32 *keyid;
			__be32 _keyid;

506
			keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
T
Tom Herbert 已提交
507 508
						     data, hlen, &_keyid);
			if (!keyid)
509
				goto out_bad;
T
Tom Herbert 已提交
510

511 512
			if (dissector_uses_key(flow_dissector,
					       FLOW_DISSECTOR_KEY_GRE_KEYID)) {
T
Tom Herbert 已提交
513 514 515
				key_keyid = skb_flow_dissector_target(flow_dissector,
								      FLOW_DISSECTOR_KEY_GRE_KEYID,
								      target_container);
516 517 518 519
				if (gre_ver == 0)
					key_keyid->keyid = *keyid;
				else
					key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
T
Tom Herbert 已提交
520
			}
521
			offset += sizeof(((struct gre_full_hdr *)0)->key);
T
Tom Herbert 已提交
522
		}
523

524
		if (hdr->flags & GRE_SEQ)
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
			offset += sizeof(((struct pptp_gre_header *)0)->seq);

		if (gre_ver == 0) {
			if (proto == htons(ETH_P_TEB)) {
				const struct ethhdr *eth;
				struct ethhdr _eth;

				eth = __skb_header_pointer(skb, nhoff + offset,
							   sizeof(_eth),
							   data, hlen, &_eth);
				if (!eth)
					goto out_bad;
				proto = eth->h_proto;
				offset += sizeof(*eth);

				/* Cap headers that we access via pointers at the
				 * end of the Ethernet header as our maximum alignment
				 * at that point is only 2 bytes.
				 */
				if (NET_IP_ALIGN)
					hlen = (nhoff + offset);
			}
		} else { /* version 1, must be PPTP */
			u8 _ppp_hdr[PPP_HDRLEN];
			u8 *ppp_hdr;

			if (hdr->flags & GRE_ACK)
				offset += sizeof(((struct pptp_gre_header *)0)->ack);

554 555 556
			ppp_hdr = __skb_header_pointer(skb, nhoff + offset,
						     sizeof(_ppp_hdr),
						     data, hlen, _ppp_hdr);
557
			if (!ppp_hdr)
558
				goto out_bad;
559 560 561 562 563 564 565 566 567 568 569 570 571 572

			switch (PPP_PROTOCOL(ppp_hdr)) {
			case PPP_IP:
				proto = htons(ETH_P_IP);
				break;
			case PPP_IPV6:
				proto = htons(ETH_P_IPV6);
				break;
			default:
				/* Could probably catch some more like MPLS */
				break;
			}

			offset += PPP_HDRLEN;
E
Eric Dumazet 已提交
573
		}
574

575
		nhoff += offset;
576
		key_control->flags |= FLOW_DIS_ENCAPSULATION;
577 578 579
		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
			goto out_good;

580
		goto proto_again;
E
Eric Dumazet 已提交
581
	}
582 583 584 585 586 587 588 589 590 591
	case NEXTHDR_HOP:
	case NEXTHDR_ROUTING:
	case NEXTHDR_DEST: {
		u8 _opthdr[2], *opthdr;

		if (proto != htons(ETH_P_IPV6))
			break;

		opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
					      data, hlen, &_opthdr);
592
		if (!opthdr)
593
			goto out_bad;
594

595 596
		ip_proto = opthdr[0];
		nhoff += (opthdr[1] + 1) << 3;
597 598 599

		goto ip_proto_again;
	}
600 601 602 603 604 605 606 607 608 609 610 611
	case NEXTHDR_FRAGMENT: {
		struct frag_hdr _fh, *fh;

		if (proto != htons(ETH_P_IPV6))
			break;

		fh = __skb_header_pointer(skb, nhoff, sizeof(_fh),
					  data, hlen, &_fh);

		if (!fh)
			goto out_bad;

612
		key_control->flags |= FLOW_DIS_IS_FRAGMENT;
613 614

		nhoff += sizeof(_fh);
615
		ip_proto = fh->nexthdr;
616 617

		if (!(fh->frag_off & htons(IP6_OFFSET))) {
618
			key_control->flags |= FLOW_DIS_FIRST_FRAG;
619
			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
620 621 622 623
				goto ip_proto_again;
		}
		goto out_good;
	}
E
Eric Dumazet 已提交
624
	case IPPROTO_IPIP:
T
Tom Herbert 已提交
625
		proto = htons(ETH_P_IP);
626

627
		key_control->flags |= FLOW_DIS_ENCAPSULATION;
628 629 630
		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
			goto out_good;

T
Tom Herbert 已提交
631
		goto ip;
632 633
	case IPPROTO_IPV6:
		proto = htons(ETH_P_IPV6);
634

635
		key_control->flags |= FLOW_DIS_ENCAPSULATION;
636 637 638
		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
			goto out_good;

639
		goto ipv6;
640 641 642
	case IPPROTO_MPLS:
		proto = htons(ETH_P_MPLS_UC);
		goto mpls;
E
Eric Dumazet 已提交
643 644 645 646
	default:
		break;
	}

647 648
	if (dissector_uses_key(flow_dissector,
			       FLOW_DISSECTOR_KEY_PORTS)) {
649 650 651 652 653 654
		key_ports = skb_flow_dissector_target(flow_dissector,
						      FLOW_DISSECTOR_KEY_PORTS,
						      target_container);
		key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
							data, hlen);
	}
655

S
Simon Horman 已提交
656 657 658 659 660 661 662 663
	if (dissector_uses_key(flow_dissector,
			       FLOW_DISSECTOR_KEY_ICMP)) {
		key_icmp = skb_flow_dissector_target(flow_dissector,
						     FLOW_DISSECTOR_KEY_ICMP,
						     target_container);
		key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
	}

664 665 666
out_good:
	ret = true;

667 668
	key_control->thoff = (u16)nhoff;
out:
669 670 671 672
	key_basic->n_proto = proto;
	key_basic->ip_proto = ip_proto;

	return ret;
673 674 675 676 677

out_bad:
	ret = false;
	key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
	goto out;
E
Eric Dumazet 已提交
678
}
679
EXPORT_SYMBOL(__skb_flow_dissect);
680 681

static u32 hashrnd __read_mostly;
682 683 684 685 686
static __always_inline void __flow_hash_secret_init(void)
{
	net_get_random_once(&hashrnd, sizeof(hashrnd));
}

687 688
static __always_inline u32 __flow_hash_words(const u32 *words, u32 length,
					     u32 keyval)
689 690 691 692
{
	return jhash2(words, length, keyval);
}

693
static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow)
694
{
695 696
	const void *p = flow;

697
	BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
698
	return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET);
699 700
}

701
static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
702
{
703
	size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
704
	BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
705 706 707 708 709 710 711 712 713 714
	BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
		     sizeof(*flow) - sizeof(flow->addrs));

	switch (flow->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		diff -= sizeof(flow->addrs.v4addrs);
		break;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		diff -= sizeof(flow->addrs.v6addrs);
		break;
T
Tom Herbert 已提交
715 716 717
	case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
		diff -= sizeof(flow->addrs.tipcaddrs);
		break;
718 719 720 721 722 723 724 725 726 727 728 729
	}
	return (sizeof(*flow) - diff) / sizeof(u32);
}

__be32 flow_get_u32_src(const struct flow_keys *flow)
{
	switch (flow->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		return flow->addrs.v4addrs.src;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		return (__force __be32)ipv6_addr_hash(
			&flow->addrs.v6addrs.src);
T
Tom Herbert 已提交
730 731
	case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
		return flow->addrs.tipcaddrs.srcnode;
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782
	default:
		return 0;
	}
}
EXPORT_SYMBOL(flow_get_u32_src);

__be32 flow_get_u32_dst(const struct flow_keys *flow)
{
	switch (flow->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		return flow->addrs.v4addrs.dst;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		return (__force __be32)ipv6_addr_hash(
			&flow->addrs.v6addrs.dst);
	default:
		return 0;
	}
}
EXPORT_SYMBOL(flow_get_u32_dst);

static inline void __flow_hash_consistentify(struct flow_keys *keys)
{
	int addr_diff, i;

	switch (keys->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		addr_diff = (__force u32)keys->addrs.v4addrs.dst -
			    (__force u32)keys->addrs.v4addrs.src;
		if ((addr_diff < 0) ||
		    (addr_diff == 0 &&
		     ((__force u16)keys->ports.dst <
		      (__force u16)keys->ports.src))) {
			swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
			swap(keys->ports.src, keys->ports.dst);
		}
		break;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		addr_diff = memcmp(&keys->addrs.v6addrs.dst,
				   &keys->addrs.v6addrs.src,
				   sizeof(keys->addrs.v6addrs.dst));
		if ((addr_diff < 0) ||
		    (addr_diff == 0 &&
		     ((__force u16)keys->ports.dst <
		      (__force u16)keys->ports.src))) {
			for (i = 0; i < 4; i++)
				swap(keys->addrs.v6addrs.src.s6_addr32[i],
				     keys->addrs.v6addrs.dst.s6_addr32[i]);
			swap(keys->ports.src, keys->ports.dst);
		}
		break;
	}
783 784
}

T
Tom Herbert 已提交
785
static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
786 787 788
{
	u32 hash;

789
	__flow_hash_consistentify(keys);
790

791
	hash = __flow_hash_words(flow_keys_hash_start(keys),
792
				 flow_keys_hash_length(keys), keyval);
793 794 795 796 797 798 799 800
	if (!hash)
		hash = 1;

	return hash;
}

u32 flow_hash_from_keys(struct flow_keys *keys)
{
T
Tom Herbert 已提交
801 802
	__flow_hash_secret_init();
	return __flow_hash_from_keys(keys, hashrnd);
803 804 805
}
EXPORT_SYMBOL(flow_hash_from_keys);

T
Tom Herbert 已提交
806 807 808
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
				  struct flow_keys *keys, u32 keyval)
{
809 810
	skb_flow_dissect_flow_keys(skb, keys,
				   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
T
Tom Herbert 已提交
811 812 813 814

	return __flow_hash_from_keys(keys, keyval);
}

T
Tom Herbert 已提交
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
struct _flow_keys_digest_data {
	__be16	n_proto;
	u8	ip_proto;
	u8	padding;
	__be32	ports;
	__be32	src;
	__be32	dst;
};

void make_flow_keys_digest(struct flow_keys_digest *digest,
			   const struct flow_keys *flow)
{
	struct _flow_keys_digest_data *data =
	    (struct _flow_keys_digest_data *)digest;

	BUILD_BUG_ON(sizeof(*data) > sizeof(*digest));

	memset(digest, 0, sizeof(*digest));

834 835 836
	data->n_proto = flow->basic.n_proto;
	data->ip_proto = flow->basic.ip_proto;
	data->ports = flow->ports.ports;
837 838
	data->src = flow->addrs.v4addrs.src;
	data->dst = flow->addrs.v4addrs.dst;
T
Tom Herbert 已提交
839 840 841
}
EXPORT_SYMBOL(make_flow_keys_digest);

842 843
static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;

844
u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
845 846 847 848 849 850 851 852 853 854 855 856 857 858
{
	struct flow_keys keys;

	__flow_hash_secret_init();

	memset(&keys, 0, sizeof(keys));
	__skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
			   NULL, 0, 0, 0,
			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);

	return __flow_hash_from_keys(&keys, hashrnd);
}
EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);

859 860 861 862 863
/**
 * __skb_get_hash: calculate a flow hash
 * @skb: sk_buff to calculate flow hash from
 *
 * This function calculates a flow hash based on src/dst addresses
864 865
 * and src/dst port numbers.  Sets hash in skb to non-zero hash value
 * on success, zero indicates no valid hash.  Also, sets l4_hash in skb
866 867
 * if hash is a canonical 4-tuple hash over transport ports.
 */
868
void __skb_get_hash(struct sk_buff *skb)
869 870
{
	struct flow_keys keys;
871
	u32 hash;
872

T
Tom Herbert 已提交
873 874
	__flow_hash_secret_init();

875 876 877
	hash = ___skb_get_hash(skb, &keys, hashrnd);

	__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
878
}
879
EXPORT_SYMBOL(__skb_get_hash);
880

T
Tom Herbert 已提交
881 882 883 884 885 886 887 888
__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
{
	struct flow_keys keys;

	return ___skb_get_hash(skb, &keys, perturb);
}
EXPORT_SYMBOL(skb_get_hash_perturb);

889
__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
{
	struct flow_keys keys;

	memset(&keys, 0, sizeof(keys));

	memcpy(&keys.addrs.v6addrs.src, &fl6->saddr,
	       sizeof(keys.addrs.v6addrs.src));
	memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr,
	       sizeof(keys.addrs.v6addrs.dst));
	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
	keys.ports.src = fl6->fl6_sport;
	keys.ports.dst = fl6->fl6_dport;
	keys.keyid.keyid = fl6->fl6_gre_key;
	keys.tags.flow_label = (__force u32)fl6->flowlabel;
	keys.basic.ip_proto = fl6->flowi6_proto;

906 907
	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
			  flow_keys_have_l4(&keys));
908 909 910 911 912

	return skb->hash;
}
EXPORT_SYMBOL(__skb_get_hash_flowi6);

913
__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
914 915 916 917 918 919 920 921 922 923 924 925 926
{
	struct flow_keys keys;

	memset(&keys, 0, sizeof(keys));

	keys.addrs.v4addrs.src = fl4->saddr;
	keys.addrs.v4addrs.dst = fl4->daddr;
	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
	keys.ports.src = fl4->fl4_sport;
	keys.ports.dst = fl4->fl4_dport;
	keys.keyid.keyid = fl4->fl4_gre_key;
	keys.basic.ip_proto = fl4->flowi4_proto;

927 928
	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
			  flow_keys_have_l4(&keys));
929 930 931 932 933

	return skb->hash;
}
EXPORT_SYMBOL(__skb_get_hash_flowi4);

934 935
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
		   const struct flow_keys *keys, int hlen)
936
{
937
	u32 poff = keys->control.thoff;
938

939 940 941 942 943
	/* skip L4 headers for fragments after the first */
	if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) &&
	    !(keys->control.flags & FLOW_DIS_FIRST_FRAG))
		return poff;

944
	switch (keys->basic.ip_proto) {
945
	case IPPROTO_TCP: {
946 947 948
		/* access doff as u8 to avoid unaligned access */
		const u8 *doff;
		u8 _doff;
949

950 951 952
		doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
					    data, hlen, &_doff);
		if (!doff)
953 954
			return poff;

955
		poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984
		break;
	}
	case IPPROTO_UDP:
	case IPPROTO_UDPLITE:
		poff += sizeof(struct udphdr);
		break;
	/* For the rest, we do not really care about header
	 * extensions at this point for now.
	 */
	case IPPROTO_ICMP:
		poff += sizeof(struct icmphdr);
		break;
	case IPPROTO_ICMPV6:
		poff += sizeof(struct icmp6hdr);
		break;
	case IPPROTO_IGMP:
		poff += sizeof(struct igmphdr);
		break;
	case IPPROTO_DCCP:
		poff += sizeof(struct dccp_hdr);
		break;
	case IPPROTO_SCTP:
		poff += sizeof(struct sctphdr);
		break;
	}

	return poff;
}

985 986 987 988 989 990
/**
 * skb_get_poff - get the offset to the payload
 * @skb: sk_buff to get the payload offset from
 *
 * The function will get the offset to the payload as far as it could
 * be dissected.  The main user is currently BPF, so that we can dynamically
991 992 993 994 995 996 997
 * truncate packets without needing to push actual payload to the user
 * space and can analyze headers only, instead.
 */
u32 skb_get_poff(const struct sk_buff *skb)
{
	struct flow_keys keys;

998
	if (!skb_flow_dissect_flow_keys(skb, &keys, 0))
999 1000 1001 1002
		return 0;

	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
}
1003

1004
__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022
{
	memset(keys, 0, sizeof(*keys));

	memcpy(&keys->addrs.v6addrs.src, &fl6->saddr,
	    sizeof(keys->addrs.v6addrs.src));
	memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr,
	    sizeof(keys->addrs.v6addrs.dst));
	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
	keys->ports.src = fl6->fl6_sport;
	keys->ports.dst = fl6->fl6_dport;
	keys->keyid.keyid = fl6->fl6_gre_key;
	keys->tags.flow_label = (__force u32)fl6->flowlabel;
	keys->basic.ip_proto = fl6->flowi6_proto;

	return flow_hash_from_keys(keys);
}
EXPORT_SYMBOL(__get_hash_from_flowi6);

1023
__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
{
	memset(keys, 0, sizeof(*keys));

	keys->addrs.v4addrs.src = fl4->saddr;
	keys->addrs.v4addrs.dst = fl4->daddr;
	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
	keys->ports.src = fl4->fl4_sport;
	keys->ports.dst = fl4->fl4_dport;
	keys->keyid.keyid = fl4->fl4_gre_key;
	keys->basic.ip_proto = fl4->flowi4_proto;

	return flow_hash_from_keys(keys);
}
EXPORT_SYMBOL(__get_hash_from_flowi4);

1039
static const struct flow_dissector_key flow_keys_dissector_keys[] = {
1040 1041 1042 1043
	{
		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
		.offset = offsetof(struct flow_keys, control),
	},
1044 1045 1046 1047 1048 1049
	{
		.key_id = FLOW_DISSECTOR_KEY_BASIC,
		.offset = offsetof(struct flow_keys, basic),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
1050 1051 1052 1053 1054
		.offset = offsetof(struct flow_keys, addrs.v4addrs),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.v6addrs),
1055
	},
T
Tom Herbert 已提交
1056 1057 1058 1059
	{
		.key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.tipcaddrs),
	},
1060 1061 1062 1063
	{
		.key_id = FLOW_DISSECTOR_KEY_PORTS,
		.offset = offsetof(struct flow_keys, ports),
	},
T
Tom Herbert 已提交
1064
	{
1065 1066
		.key_id = FLOW_DISSECTOR_KEY_VLAN,
		.offset = offsetof(struct flow_keys, vlan),
T
Tom Herbert 已提交
1067
	},
1068 1069 1070 1071
	{
		.key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
		.offset = offsetof(struct flow_keys, tags),
	},
T
Tom Herbert 已提交
1072 1073 1074 1075
	{
		.key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
		.offset = offsetof(struct flow_keys, keyid),
	},
1076 1077
};

1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
	{
		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
		.offset = offsetof(struct flow_keys, control),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_BASIC,
		.offset = offsetof(struct flow_keys, basic),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.v4addrs),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.v6addrs),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_PORTS,
		.offset = offsetof(struct flow_keys, ports),
	},
};

1101
static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
1102 1103 1104 1105
	{
		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
		.offset = offsetof(struct flow_keys, control),
	},
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121
	{
		.key_id = FLOW_DISSECTOR_KEY_BASIC,
		.offset = offsetof(struct flow_keys, basic),
	},
};

struct flow_dissector flow_keys_dissector __read_mostly;
EXPORT_SYMBOL(flow_keys_dissector);

struct flow_dissector flow_keys_buf_dissector __read_mostly;

static int __init init_default_flow_dissectors(void)
{
	skb_flow_dissector_init(&flow_keys_dissector,
				flow_keys_dissector_keys,
				ARRAY_SIZE(flow_keys_dissector_keys));
1122 1123 1124
	skb_flow_dissector_init(&flow_keys_dissector_symmetric,
				flow_keys_dissector_symmetric_keys,
				ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
1125 1126 1127 1128 1129 1130
	skb_flow_dissector_init(&flow_keys_buf_dissector,
				flow_keys_buf_dissector_keys,
				ARRAY_SIZE(flow_keys_buf_dissector_keys));
	return 0;
}

1131
core_initcall(init_default_flow_dissectors);