flow_dissector.c 24.0 KB
Newer Older
1
#include <linux/kernel.h>
E
Eric Dumazet 已提交
2
#include <linux/skbuff.h>
3
#include <linux/export.h>
E
Eric Dumazet 已提交
4 5 6 7
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_vlan.h>
#include <net/ip.h>
E
Eric Dumazet 已提交
8
#include <net/ipv6.h>
9 10 11 12
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
#include <linux/dccp.h>
E
Eric Dumazet 已提交
13 14 15
#include <linux/if_tunnel.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
16
#include <linux/stddef.h>
17
#include <linux/if_ether.h>
18
#include <linux/mpls.h>
19
#include <net/flow_dissector.h>
20
#include <scsi/fc/fc_fcoe.h>
E
Eric Dumazet 已提交
21

22 23
static void dissector_set_key(struct flow_dissector *flow_dissector,
			      enum flow_dissector_key_id key_id)
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
{
	flow_dissector->used_keys |= (1 << key_id);
}

void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
			     const struct flow_dissector_key *key,
			     unsigned int key_count)
{
	unsigned int i;

	memset(flow_dissector, 0, sizeof(*flow_dissector));

	for (i = 0; i < key_count; i++, key++) {
		/* User should make sure that every key target offset is withing
		 * boundaries of unsigned short.
		 */
		BUG_ON(key->offset > USHRT_MAX);
41 42
		BUG_ON(dissector_uses_key(flow_dissector,
					  key->key_id));
43

44
		dissector_set_key(flow_dissector, key->key_id);
45 46 47
		flow_dissector->offset[key->key_id] = key->offset;
	}

48 49
	/* Ensure that the dissector always includes control and basic key.
	 * That way we are able to avoid handling lack of these in fast path.
50
	 */
51 52 53 54
	BUG_ON(!dissector_uses_key(flow_dissector,
				   FLOW_DISSECTOR_KEY_CONTROL));
	BUG_ON(!dissector_uses_key(flow_dissector,
				   FLOW_DISSECTOR_KEY_BASIC));
55 56 57
}
EXPORT_SYMBOL(skb_flow_dissector_init);

58
/**
59 60
 * __skb_flow_get_ports - extract the upper layer ports and return them
 * @skb: sk_buff to extract the ports from
61 62
 * @thoff: transport header offset
 * @ip_proto: protocol for which to get port offset
63 64
 * @data: raw buffer pointer to the packet, if NULL use skb->data
 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
65 66 67 68
 *
 * The function will try to retrieve the ports at offset thoff + poff where poff
 * is the protocol port offset returned from proto_ports_offset
 */
69 70
__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
			    void *data, int hlen)
71 72 73
{
	int poff = proto_ports_offset(ip_proto);

74 75 76 77 78
	if (!data) {
		data = skb->data;
		hlen = skb_headlen(skb);
	}

79 80 81
	if (poff >= 0) {
		__be32 *ports, _ports;

82 83
		ports = __skb_header_pointer(skb, thoff + poff,
					     sizeof(_ports), data, hlen, &_ports);
84 85 86 87 88 89
		if (ports)
			return *ports;
	}

	return 0;
}
90
EXPORT_SYMBOL(__skb_flow_get_ports);
91

92 93 94
/**
 * __skb_flow_dissect - extract the flow_keys struct and return it
 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
95 96
 * @flow_dissector: list of keys to dissect
 * @target_container: target structure to put dissected values into
97 98 99 100 101
 * @data: raw buffer pointer to the packet, if NULL use skb->data
 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
 *
102 103 104 105 106
 * The function will try to retrieve individual keys into target specified
 * by flow_dissector from either the skbuff or a raw buffer specified by the
 * rest parameters.
 *
 * Caller must take care of zeroing target container memory.
107
 */
108 109 110
bool __skb_flow_dissect(const struct sk_buff *skb,
			struct flow_dissector *flow_dissector,
			void *target_container,
111 112
			void *data, __be16 proto, int nhoff, int hlen,
			unsigned int flags)
E
Eric Dumazet 已提交
113
{
114
	struct flow_dissector_key_control *key_control;
115 116 117
	struct flow_dissector_key_basic *key_basic;
	struct flow_dissector_key_addrs *key_addrs;
	struct flow_dissector_key_ports *key_ports;
T
Tom Herbert 已提交
118
	struct flow_dissector_key_tags *key_tags;
T
Tom Herbert 已提交
119
	struct flow_dissector_key_keyid *key_keyid;
120
	u8 ip_proto = 0;
121
	bool ret = false;
E
Eric Dumazet 已提交
122

123 124
	if (!data) {
		data = skb->data;
125 126
		proto = skb->protocol;
		nhoff = skb_network_offset(skb);
127 128 129
		hlen = skb_headlen(skb);
	}

130 131 132 133 134 135 136
	/* It is ensured by skb_flow_dissector_init() that control key will
	 * be always present.
	 */
	key_control = skb_flow_dissector_target(flow_dissector,
						FLOW_DISSECTOR_KEY_CONTROL,
						target_container);

137 138 139 140 141 142
	/* It is ensured by skb_flow_dissector_init() that basic key will
	 * be always present.
	 */
	key_basic = skb_flow_dissector_target(flow_dissector,
					      FLOW_DISSECTOR_KEY_BASIC,
					      target_container);
E
Eric Dumazet 已提交
143

144 145
	if (dissector_uses_key(flow_dissector,
			       FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
146 147 148 149 150 151 152 153 154
		struct ethhdr *eth = eth_hdr(skb);
		struct flow_dissector_key_eth_addrs *key_eth_addrs;

		key_eth_addrs = skb_flow_dissector_target(flow_dissector,
							  FLOW_DISSECTOR_KEY_ETH_ADDRS,
							  target_container);
		memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
	}

E
Eric Dumazet 已提交
155 156
again:
	switch (proto) {
157
	case htons(ETH_P_IP): {
E
Eric Dumazet 已提交
158 159 160
		const struct iphdr *iph;
		struct iphdr _iph;
ip:
161
		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
162
		if (!iph || iph->ihl < 5)
163
			goto out_bad;
164
		nhoff += iph->ihl * 4;
E
Eric Dumazet 已提交
165

166 167
		ip_proto = iph->protocol;

168 169 170 171 172 173 174 175 176 177
		if (dissector_uses_key(flow_dissector,
				       FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
			key_addrs = skb_flow_dissector_target(flow_dissector,
							      FLOW_DISSECTOR_KEY_IPV4_ADDRS,
							      target_container);

			memcpy(&key_addrs->v4addrs, &iph->saddr,
			       sizeof(key_addrs->v4addrs));
			key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
		}
178 179

		if (ip_is_fragment(iph)) {
180
			key_control->flags |= FLOW_DIS_IS_FRAGMENT;
181 182 183 184

			if (iph->frag_off & htons(IP_OFFSET)) {
				goto out_good;
			} else {
185
				key_control->flags |= FLOW_DIS_FIRST_FRAG;
186 187 188 189 190
				if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
					goto out_good;
			}
		}

191 192 193
		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
			goto out_good;

E
Eric Dumazet 已提交
194 195
		break;
	}
196
	case htons(ETH_P_IPV6): {
E
Eric Dumazet 已提交
197 198
		const struct ipv6hdr *iph;
		struct ipv6hdr _iph;
199

E
Eric Dumazet 已提交
200
ipv6:
201
		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
E
Eric Dumazet 已提交
202
		if (!iph)
203
			goto out_bad;
E
Eric Dumazet 已提交
204 205 206

		ip_proto = iph->nexthdr;
		nhoff += sizeof(struct ipv6hdr);
207

208 209
		if (dissector_uses_key(flow_dissector,
				       FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
210 211 212
			key_addrs = skb_flow_dissector_target(flow_dissector,
							      FLOW_DISSECTOR_KEY_IPV6_ADDRS,
							      target_container);
213

214 215
			memcpy(&key_addrs->v6addrs, &iph->saddr,
			       sizeof(key_addrs->v6addrs));
216
			key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
217
		}
218

219 220 221 222 223 224
		if ((dissector_uses_key(flow_dissector,
					FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
		     (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
		    ip6_flowlabel(iph)) {
			__be32 flow_label = ip6_flowlabel(iph);

225 226
			if (dissector_uses_key(flow_dissector,
					       FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
227 228 229 230
				key_tags = skb_flow_dissector_target(flow_dissector,
								     FLOW_DISSECTOR_KEY_FLOW_LABEL,
								     target_container);
				key_tags->flow_label = ntohl(flow_label);
231
			}
232 233
			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
				goto out_good;
234 235
		}

236 237 238
		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
			goto out_good;

E
Eric Dumazet 已提交
239 240
		break;
	}
241 242
	case htons(ETH_P_8021AD):
	case htons(ETH_P_8021Q): {
E
Eric Dumazet 已提交
243 244 245
		const struct vlan_hdr *vlan;
		struct vlan_hdr _vlan;

246
		vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
E
Eric Dumazet 已提交
247
		if (!vlan)
248
			goto out_bad;
E
Eric Dumazet 已提交
249

250 251
		if (dissector_uses_key(flow_dissector,
				       FLOW_DISSECTOR_KEY_VLANID)) {
T
Tom Herbert 已提交
252 253 254 255 256 257 258
			key_tags = skb_flow_dissector_target(flow_dissector,
							     FLOW_DISSECTOR_KEY_VLANID,
							     target_container);

			key_tags->vlan_id = skb_vlan_tag_get_id(skb);
		}

E
Eric Dumazet 已提交
259 260 261 262
		proto = vlan->h_vlan_encapsulated_proto;
		nhoff += sizeof(*vlan);
		goto again;
	}
263
	case htons(ETH_P_PPP_SES): {
E
Eric Dumazet 已提交
264 265 266 267
		struct {
			struct pppoe_hdr hdr;
			__be16 proto;
		} *hdr, _hdr;
268
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
E
Eric Dumazet 已提交
269
		if (!hdr)
270
			goto out_bad;
E
Eric Dumazet 已提交
271 272 273
		proto = hdr->proto;
		nhoff += PPPOE_SES_HLEN;
		switch (proto) {
274
		case htons(PPP_IP):
E
Eric Dumazet 已提交
275
			goto ip;
276
		case htons(PPP_IPV6):
E
Eric Dumazet 已提交
277 278
			goto ipv6;
		default:
279
			goto out_bad;
E
Eric Dumazet 已提交
280 281
		}
	}
E
Erik Hugne 已提交
282 283 284 285 286 287 288
	case htons(ETH_P_TIPC): {
		struct {
			__be32 pre[3];
			__be32 srcnode;
		} *hdr, _hdr;
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
		if (!hdr)
289
			goto out_bad;
290

291 292
		if (dissector_uses_key(flow_dissector,
				       FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
293
			key_addrs = skb_flow_dissector_target(flow_dissector,
T
Tom Herbert 已提交
294
							      FLOW_DISSECTOR_KEY_TIPC_ADDRS,
295
							      target_container);
T
Tom Herbert 已提交
296 297
			key_addrs->tipcaddrs.srcnode = hdr->srcnode;
			key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
298
		}
299
		goto out_good;
E
Erik Hugne 已提交
300
	}
301 302 303 304 305 306 307 308

	case htons(ETH_P_MPLS_UC):
	case htons(ETH_P_MPLS_MC): {
		struct mpls_label *hdr, _hdr[2];
mpls:
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
					   hlen, &_hdr);
		if (!hdr)
309
			goto out_bad;
310

311 312
		if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
		     MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
313 314
			if (dissector_uses_key(flow_dissector,
					       FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
315 316 317 318 319 320 321
				key_keyid = skb_flow_dissector_target(flow_dissector,
								      FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
								      target_container);
				key_keyid->keyid = hdr[1].entry &
					htonl(MPLS_LS_LABEL_MASK);
			}

322
			goto out_good;
323 324
		}

325
		goto out_good;
326 327
	}

328
	case htons(ETH_P_FCOE):
329 330 331 332 333
		if ((hlen - nhoff) < FCOE_HEADER_LEN)
			goto out_bad;

		nhoff += FCOE_HEADER_LEN;
		goto out_good;
E
Eric Dumazet 已提交
334
	default:
335
		goto out_bad;
E
Eric Dumazet 已提交
336 337
	}

338
ip_proto_again:
E
Eric Dumazet 已提交
339 340 341 342 343 344 345
	switch (ip_proto) {
	case IPPROTO_GRE: {
		struct gre_hdr {
			__be16 flags;
			__be16 proto;
		} *hdr, _hdr;

346
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
E
Eric Dumazet 已提交
347
		if (!hdr)
348
			goto out_bad;
E
Eric Dumazet 已提交
349 350 351 352
		/*
		 * Only look inside GRE if version zero and no
		 * routing
		 */
353 354 355 356 357 358
		if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
			break;

		proto = hdr->proto;
		nhoff += 4;
		if (hdr->flags & GRE_CSUM)
E
Eric Dumazet 已提交
359
			nhoff += 4;
T
Tom Herbert 已提交
360 361 362 363 364 365 366 367
		if (hdr->flags & GRE_KEY) {
			const __be32 *keyid;
			__be32 _keyid;

			keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
						     data, hlen, &_keyid);

			if (!keyid)
368
				goto out_bad;
T
Tom Herbert 已提交
369

370 371
			if (dissector_uses_key(flow_dissector,
					       FLOW_DISSECTOR_KEY_GRE_KEYID)) {
T
Tom Herbert 已提交
372 373 374 375 376
				key_keyid = skb_flow_dissector_target(flow_dissector,
								      FLOW_DISSECTOR_KEY_GRE_KEYID,
								      target_container);
				key_keyid->keyid = *keyid;
			}
377
			nhoff += 4;
T
Tom Herbert 已提交
378
		}
379 380 381 382 383 384 385 386 387 388
		if (hdr->flags & GRE_SEQ)
			nhoff += 4;
		if (proto == htons(ETH_P_TEB)) {
			const struct ethhdr *eth;
			struct ethhdr _eth;

			eth = __skb_header_pointer(skb, nhoff,
						   sizeof(_eth),
						   data, hlen, &_eth);
			if (!eth)
389
				goto out_bad;
390 391
			proto = eth->h_proto;
			nhoff += sizeof(*eth);
392 393 394 395 396 397 398

			/* Cap headers that we access via pointers at the
			 * end of the Ethernet header as our maximum alignment
			 * at that point is only 2 bytes.
			 */
			if (NET_IP_ALIGN)
				hlen = nhoff;
E
Eric Dumazet 已提交
399
		}
400

401
		key_control->flags |= FLOW_DIS_ENCAPSULATION;
402 403 404
		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
			goto out_good;

405
		goto again;
E
Eric Dumazet 已提交
406
	}
407 408 409 410 411 412 413 414 415 416
	case NEXTHDR_HOP:
	case NEXTHDR_ROUTING:
	case NEXTHDR_DEST: {
		u8 _opthdr[2], *opthdr;

		if (proto != htons(ETH_P_IPV6))
			break;

		opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
					      data, hlen, &_opthdr);
417
		if (!opthdr)
418
			goto out_bad;
419

420 421
		ip_proto = opthdr[0];
		nhoff += (opthdr[1] + 1) << 3;
422 423 424

		goto ip_proto_again;
	}
425 426 427 428 429 430 431 432 433 434 435 436
	case NEXTHDR_FRAGMENT: {
		struct frag_hdr _fh, *fh;

		if (proto != htons(ETH_P_IPV6))
			break;

		fh = __skb_header_pointer(skb, nhoff, sizeof(_fh),
					  data, hlen, &_fh);

		if (!fh)
			goto out_bad;

437
		key_control->flags |= FLOW_DIS_IS_FRAGMENT;
438 439

		nhoff += sizeof(_fh);
440
		ip_proto = fh->nexthdr;
441 442

		if (!(fh->frag_off & htons(IP6_OFFSET))) {
443
			key_control->flags |= FLOW_DIS_FIRST_FRAG;
444
			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
445 446 447 448
				goto ip_proto_again;
		}
		goto out_good;
	}
E
Eric Dumazet 已提交
449
	case IPPROTO_IPIP:
T
Tom Herbert 已提交
450
		proto = htons(ETH_P_IP);
451

452
		key_control->flags |= FLOW_DIS_ENCAPSULATION;
453 454 455
		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
			goto out_good;

T
Tom Herbert 已提交
456
		goto ip;
457 458
	case IPPROTO_IPV6:
		proto = htons(ETH_P_IPV6);
459

460
		key_control->flags |= FLOW_DIS_ENCAPSULATION;
461 462 463
		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
			goto out_good;

464
		goto ipv6;
465 466 467
	case IPPROTO_MPLS:
		proto = htons(ETH_P_MPLS_UC);
		goto mpls;
E
Eric Dumazet 已提交
468 469 470 471
	default:
		break;
	}

472 473
	if (dissector_uses_key(flow_dissector,
			       FLOW_DISSECTOR_KEY_PORTS)) {
474 475 476 477 478 479
		key_ports = skb_flow_dissector_target(flow_dissector,
						      FLOW_DISSECTOR_KEY_PORTS,
						      target_container);
		key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
							data, hlen);
	}
480

481 482 483 484 485 486 487 488 489
out_good:
	ret = true;

out_bad:
	key_basic->n_proto = proto;
	key_basic->ip_proto = ip_proto;
	key_control->thoff = (u16)nhoff;

	return ret;
E
Eric Dumazet 已提交
490
}
491
EXPORT_SYMBOL(__skb_flow_dissect);
492 493

static u32 hashrnd __read_mostly;
494 495 496 497 498
static __always_inline void __flow_hash_secret_init(void)
{
	net_get_random_once(&hashrnd, sizeof(hashrnd));
}

499 500
static __always_inline u32 __flow_hash_words(const u32 *words, u32 length,
					     u32 keyval)
501 502 503 504
{
	return jhash2(words, length, keyval);
}

505
static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow)
506
{
507 508
	const void *p = flow;

509
	BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
510
	return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET);
511 512
}

513
static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
514
{
515
	size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
516
	BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
517 518 519 520 521 522 523 524 525 526
	BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
		     sizeof(*flow) - sizeof(flow->addrs));

	switch (flow->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		diff -= sizeof(flow->addrs.v4addrs);
		break;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		diff -= sizeof(flow->addrs.v6addrs);
		break;
T
Tom Herbert 已提交
527 528 529
	case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
		diff -= sizeof(flow->addrs.tipcaddrs);
		break;
530 531 532 533 534 535 536 537 538 539 540 541
	}
	return (sizeof(*flow) - diff) / sizeof(u32);
}

__be32 flow_get_u32_src(const struct flow_keys *flow)
{
	switch (flow->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		return flow->addrs.v4addrs.src;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		return (__force __be32)ipv6_addr_hash(
			&flow->addrs.v6addrs.src);
T
Tom Herbert 已提交
542 543
	case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
		return flow->addrs.tipcaddrs.srcnode;
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
	default:
		return 0;
	}
}
EXPORT_SYMBOL(flow_get_u32_src);

__be32 flow_get_u32_dst(const struct flow_keys *flow)
{
	switch (flow->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		return flow->addrs.v4addrs.dst;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		return (__force __be32)ipv6_addr_hash(
			&flow->addrs.v6addrs.dst);
	default:
		return 0;
	}
}
EXPORT_SYMBOL(flow_get_u32_dst);

static inline void __flow_hash_consistentify(struct flow_keys *keys)
{
	int addr_diff, i;

	switch (keys->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		addr_diff = (__force u32)keys->addrs.v4addrs.dst -
			    (__force u32)keys->addrs.v4addrs.src;
		if ((addr_diff < 0) ||
		    (addr_diff == 0 &&
		     ((__force u16)keys->ports.dst <
		      (__force u16)keys->ports.src))) {
			swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
			swap(keys->ports.src, keys->ports.dst);
		}
		break;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		addr_diff = memcmp(&keys->addrs.v6addrs.dst,
				   &keys->addrs.v6addrs.src,
				   sizeof(keys->addrs.v6addrs.dst));
		if ((addr_diff < 0) ||
		    (addr_diff == 0 &&
		     ((__force u16)keys->ports.dst <
		      (__force u16)keys->ports.src))) {
			for (i = 0; i < 4; i++)
				swap(keys->addrs.v6addrs.src.s6_addr32[i],
				     keys->addrs.v6addrs.dst.s6_addr32[i]);
			swap(keys->ports.src, keys->ports.dst);
		}
		break;
	}
595 596
}

T
Tom Herbert 已提交
597
static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
598 599 600
{
	u32 hash;

601
	__flow_hash_consistentify(keys);
602

603
	hash = __flow_hash_words(flow_keys_hash_start(keys),
604
				 flow_keys_hash_length(keys), keyval);
605 606 607 608 609 610 611 612
	if (!hash)
		hash = 1;

	return hash;
}

u32 flow_hash_from_keys(struct flow_keys *keys)
{
T
Tom Herbert 已提交
613 614
	__flow_hash_secret_init();
	return __flow_hash_from_keys(keys, hashrnd);
615 616 617
}
EXPORT_SYMBOL(flow_hash_from_keys);

T
Tom Herbert 已提交
618 619 620
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
				  struct flow_keys *keys, u32 keyval)
{
621 622
	skb_flow_dissect_flow_keys(skb, keys,
				   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
T
Tom Herbert 已提交
623 624 625 626

	return __flow_hash_from_keys(keys, keyval);
}

T
Tom Herbert 已提交
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
struct _flow_keys_digest_data {
	__be16	n_proto;
	u8	ip_proto;
	u8	padding;
	__be32	ports;
	__be32	src;
	__be32	dst;
};

void make_flow_keys_digest(struct flow_keys_digest *digest,
			   const struct flow_keys *flow)
{
	struct _flow_keys_digest_data *data =
	    (struct _flow_keys_digest_data *)digest;

	BUILD_BUG_ON(sizeof(*data) > sizeof(*digest));

	memset(digest, 0, sizeof(*digest));

646 647 648
	data->n_proto = flow->basic.n_proto;
	data->ip_proto = flow->basic.ip_proto;
	data->ports = flow->ports.ports;
649 650
	data->src = flow->addrs.v4addrs.src;
	data->dst = flow->addrs.v4addrs.dst;
T
Tom Herbert 已提交
651 652 653
}
EXPORT_SYMBOL(make_flow_keys_digest);

654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;

u32 __skb_get_hash_symmetric(struct sk_buff *skb)
{
	struct flow_keys keys;

	__flow_hash_secret_init();

	memset(&keys, 0, sizeof(keys));
	__skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
			   NULL, 0, 0, 0,
			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);

	return __flow_hash_from_keys(&keys, hashrnd);
}
EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);

671 672 673 674 675
/**
 * __skb_get_hash: calculate a flow hash
 * @skb: sk_buff to calculate flow hash from
 *
 * This function calculates a flow hash based on src/dst addresses
676 677
 * and src/dst port numbers.  Sets hash in skb to non-zero hash value
 * on success, zero indicates no valid hash.  Also, sets l4_hash in skb
678 679
 * if hash is a canonical 4-tuple hash over transport ports.
 */
680
void __skb_get_hash(struct sk_buff *skb)
681 682 683
{
	struct flow_keys keys;

T
Tom Herbert 已提交
684 685
	__flow_hash_secret_init();

686
	__skb_set_sw_hash(skb, ___skb_get_hash(skb, &keys, hashrnd),
687
			  flow_keys_have_l4(&keys));
688
}
689
EXPORT_SYMBOL(__skb_get_hash);
690

T
Tom Herbert 已提交
691 692 693 694 695 696 697 698
__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
{
	struct flow_keys keys;

	return ___skb_get_hash(skb, &keys, perturb);
}
EXPORT_SYMBOL(skb_get_hash_perturb);

699
__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
{
	struct flow_keys keys;

	memset(&keys, 0, sizeof(keys));

	memcpy(&keys.addrs.v6addrs.src, &fl6->saddr,
	       sizeof(keys.addrs.v6addrs.src));
	memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr,
	       sizeof(keys.addrs.v6addrs.dst));
	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
	keys.ports.src = fl6->fl6_sport;
	keys.ports.dst = fl6->fl6_dport;
	keys.keyid.keyid = fl6->fl6_gre_key;
	keys.tags.flow_label = (__force u32)fl6->flowlabel;
	keys.basic.ip_proto = fl6->flowi6_proto;

716 717
	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
			  flow_keys_have_l4(&keys));
718 719 720 721 722

	return skb->hash;
}
EXPORT_SYMBOL(__skb_get_hash_flowi6);

723
__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
724 725 726 727 728 729 730 731 732 733 734 735 736
{
	struct flow_keys keys;

	memset(&keys, 0, sizeof(keys));

	keys.addrs.v4addrs.src = fl4->saddr;
	keys.addrs.v4addrs.dst = fl4->daddr;
	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
	keys.ports.src = fl4->fl4_sport;
	keys.ports.dst = fl4->fl4_dport;
	keys.keyid.keyid = fl4->fl4_gre_key;
	keys.basic.ip_proto = fl4->flowi4_proto;

737 738
	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
			  flow_keys_have_l4(&keys));
739 740 741 742 743

	return skb->hash;
}
EXPORT_SYMBOL(__skb_get_hash_flowi4);

744 745
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
		   const struct flow_keys *keys, int hlen)
746
{
747
	u32 poff = keys->control.thoff;
748

749 750 751 752 753
	/* skip L4 headers for fragments after the first */
	if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) &&
	    !(keys->control.flags & FLOW_DIS_FIRST_FRAG))
		return poff;

754
	switch (keys->basic.ip_proto) {
755
	case IPPROTO_TCP: {
756 757 758
		/* access doff as u8 to avoid unaligned access */
		const u8 *doff;
		u8 _doff;
759

760 761 762
		doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
					    data, hlen, &_doff);
		if (!doff)
763 764
			return poff;

765
		poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
		break;
	}
	case IPPROTO_UDP:
	case IPPROTO_UDPLITE:
		poff += sizeof(struct udphdr);
		break;
	/* For the rest, we do not really care about header
	 * extensions at this point for now.
	 */
	case IPPROTO_ICMP:
		poff += sizeof(struct icmphdr);
		break;
	case IPPROTO_ICMPV6:
		poff += sizeof(struct icmp6hdr);
		break;
	case IPPROTO_IGMP:
		poff += sizeof(struct igmphdr);
		break;
	case IPPROTO_DCCP:
		poff += sizeof(struct dccp_hdr);
		break;
	case IPPROTO_SCTP:
		poff += sizeof(struct sctphdr);
		break;
	}

	return poff;
}

795 796 797 798 799 800
/**
 * skb_get_poff - get the offset to the payload
 * @skb: sk_buff to get the payload offset from
 *
 * The function will get the offset to the payload as far as it could
 * be dissected.  The main user is currently BPF, so that we can dynamically
801 802 803 804 805 806 807
 * truncate packets without needing to push actual payload to the user
 * space and can analyze headers only, instead.
 */
u32 skb_get_poff(const struct sk_buff *skb)
{
	struct flow_keys keys;

808
	if (!skb_flow_dissect_flow_keys(skb, &keys, 0))
809 810 811 812
		return 0;

	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
}
813

814
__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
{
	memset(keys, 0, sizeof(*keys));

	memcpy(&keys->addrs.v6addrs.src, &fl6->saddr,
	    sizeof(keys->addrs.v6addrs.src));
	memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr,
	    sizeof(keys->addrs.v6addrs.dst));
	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
	keys->ports.src = fl6->fl6_sport;
	keys->ports.dst = fl6->fl6_dport;
	keys->keyid.keyid = fl6->fl6_gre_key;
	keys->tags.flow_label = (__force u32)fl6->flowlabel;
	keys->basic.ip_proto = fl6->flowi6_proto;

	return flow_hash_from_keys(keys);
}
EXPORT_SYMBOL(__get_hash_from_flowi6);

833
__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
{
	memset(keys, 0, sizeof(*keys));

	keys->addrs.v4addrs.src = fl4->saddr;
	keys->addrs.v4addrs.dst = fl4->daddr;
	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
	keys->ports.src = fl4->fl4_sport;
	keys->ports.dst = fl4->fl4_dport;
	keys->keyid.keyid = fl4->fl4_gre_key;
	keys->basic.ip_proto = fl4->flowi4_proto;

	return flow_hash_from_keys(keys);
}
EXPORT_SYMBOL(__get_hash_from_flowi4);

849
static const struct flow_dissector_key flow_keys_dissector_keys[] = {
850 851 852 853
	{
		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
		.offset = offsetof(struct flow_keys, control),
	},
854 855 856 857 858 859
	{
		.key_id = FLOW_DISSECTOR_KEY_BASIC,
		.offset = offsetof(struct flow_keys, basic),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
860 861 862 863 864
		.offset = offsetof(struct flow_keys, addrs.v4addrs),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.v6addrs),
865
	},
T
Tom Herbert 已提交
866 867 868 869
	{
		.key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.tipcaddrs),
	},
870 871 872 873
	{
		.key_id = FLOW_DISSECTOR_KEY_PORTS,
		.offset = offsetof(struct flow_keys, ports),
	},
T
Tom Herbert 已提交
874 875 876 877
	{
		.key_id = FLOW_DISSECTOR_KEY_VLANID,
		.offset = offsetof(struct flow_keys, tags),
	},
878 879 880 881
	{
		.key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
		.offset = offsetof(struct flow_keys, tags),
	},
T
Tom Herbert 已提交
882 883 884 885
	{
		.key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
		.offset = offsetof(struct flow_keys, keyid),
	},
886 887
};

888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
	{
		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
		.offset = offsetof(struct flow_keys, control),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_BASIC,
		.offset = offsetof(struct flow_keys, basic),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.v4addrs),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.v6addrs),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_PORTS,
		.offset = offsetof(struct flow_keys, ports),
	},
};

911
static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
912 913 914 915
	{
		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
		.offset = offsetof(struct flow_keys, control),
	},
916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
	{
		.key_id = FLOW_DISSECTOR_KEY_BASIC,
		.offset = offsetof(struct flow_keys, basic),
	},
};

struct flow_dissector flow_keys_dissector __read_mostly;
EXPORT_SYMBOL(flow_keys_dissector);

struct flow_dissector flow_keys_buf_dissector __read_mostly;

static int __init init_default_flow_dissectors(void)
{
	skb_flow_dissector_init(&flow_keys_dissector,
				flow_keys_dissector_keys,
				ARRAY_SIZE(flow_keys_dissector_keys));
932 933 934
	skb_flow_dissector_init(&flow_keys_dissector_symmetric,
				flow_keys_dissector_symmetric_keys,
				ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
935 936 937 938 939 940 941
	skb_flow_dissector_init(&flow_keys_buf_dissector,
				flow_keys_buf_dissector_keys,
				ARRAY_SIZE(flow_keys_buf_dissector_keys));
	return 0;
}

late_initcall_sync(init_default_flow_dissectors);