flow_dissector.c 26.7 KB
Newer Older
1
#include <linux/kernel.h>
E
Eric Dumazet 已提交
2
#include <linux/skbuff.h>
3
#include <linux/export.h>
E
Eric Dumazet 已提交
4 5 6 7
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_vlan.h>
#include <net/ip.h>
E
Eric Dumazet 已提交
8
#include <net/ipv6.h>
9 10
#include <net/gre.h>
#include <net/pptp.h>
11 12 13 14
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
#include <linux/dccp.h>
E
Eric Dumazet 已提交
15 16 17
#include <linux/if_tunnel.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
18
#include <linux/stddef.h>
19
#include <linux/if_ether.h>
20
#include <linux/mpls.h>
21
#include <net/flow_dissector.h>
22
#include <scsi/fc/fc_fcoe.h>
E
Eric Dumazet 已提交
23

24 25
static void dissector_set_key(struct flow_dissector *flow_dissector,
			      enum flow_dissector_key_id key_id)
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
{
	flow_dissector->used_keys |= (1 << key_id);
}

void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
			     const struct flow_dissector_key *key,
			     unsigned int key_count)
{
	unsigned int i;

	memset(flow_dissector, 0, sizeof(*flow_dissector));

	for (i = 0; i < key_count; i++, key++) {
		/* User should make sure that every key target offset is withing
		 * boundaries of unsigned short.
		 */
		BUG_ON(key->offset > USHRT_MAX);
43 44
		BUG_ON(dissector_uses_key(flow_dissector,
					  key->key_id));
45

46
		dissector_set_key(flow_dissector, key->key_id);
47 48 49
		flow_dissector->offset[key->key_id] = key->offset;
	}

50 51
	/* Ensure that the dissector always includes control and basic key.
	 * That way we are able to avoid handling lack of these in fast path.
52
	 */
53 54 55 56
	BUG_ON(!dissector_uses_key(flow_dissector,
				   FLOW_DISSECTOR_KEY_CONTROL));
	BUG_ON(!dissector_uses_key(flow_dissector,
				   FLOW_DISSECTOR_KEY_BASIC));
57 58 59
}
EXPORT_SYMBOL(skb_flow_dissector_init);

S
Simon Horman 已提交
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
/**
 * skb_flow_get_be16 - extract be16 entity
 * @skb: sk_buff to extract from
 * @poff: offset to extract at
 * @data: raw buffer pointer to the packet
 * @hlen: packet header length
 *
 * The function will try to retrieve a be32 entity at
 * offset poff
 */
__be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, void *data,
			 int hlen)
{
	__be16 *u, _u;

	u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
	if (u)
		return *u;

	return 0;
}

82
/**
83 84
 * __skb_flow_get_ports - extract the upper layer ports and return them
 * @skb: sk_buff to extract the ports from
85 86
 * @thoff: transport header offset
 * @ip_proto: protocol for which to get port offset
87 88
 * @data: raw buffer pointer to the packet, if NULL use skb->data
 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
89 90 91 92
 *
 * The function will try to retrieve the ports at offset thoff + poff where poff
 * is the protocol port offset returned from proto_ports_offset
 */
93 94
__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
			    void *data, int hlen)
95 96 97
{
	int poff = proto_ports_offset(ip_proto);

98 99 100 101 102
	if (!data) {
		data = skb->data;
		hlen = skb_headlen(skb);
	}

103 104 105
	if (poff >= 0) {
		__be32 *ports, _ports;

106 107
		ports = __skb_header_pointer(skb, thoff + poff,
					     sizeof(_ports), data, hlen, &_ports);
108 109 110 111 112 113
		if (ports)
			return *ports;
	}

	return 0;
}
114
EXPORT_SYMBOL(__skb_flow_get_ports);
115

116 117 118
/**
 * __skb_flow_dissect - extract the flow_keys struct and return it
 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
119 120
 * @flow_dissector: list of keys to dissect
 * @target_container: target structure to put dissected values into
121 122 123 124 125
 * @data: raw buffer pointer to the packet, if NULL use skb->data
 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
 *
126 127 128 129 130
 * The function will try to retrieve individual keys into target specified
 * by flow_dissector from either the skbuff or a raw buffer specified by the
 * rest parameters.
 *
 * Caller must take care of zeroing target container memory.
131
 */
132 133 134
bool __skb_flow_dissect(const struct sk_buff *skb,
			struct flow_dissector *flow_dissector,
			void *target_container,
135 136
			void *data, __be16 proto, int nhoff, int hlen,
			unsigned int flags)
E
Eric Dumazet 已提交
137
{
138
	struct flow_dissector_key_control *key_control;
139 140 141
	struct flow_dissector_key_basic *key_basic;
	struct flow_dissector_key_addrs *key_addrs;
	struct flow_dissector_key_ports *key_ports;
S
Simon Horman 已提交
142
	struct flow_dissector_key_icmp *key_icmp;
T
Tom Herbert 已提交
143
	struct flow_dissector_key_tags *key_tags;
144
	struct flow_dissector_key_vlan *key_vlan;
T
Tom Herbert 已提交
145
	struct flow_dissector_key_keyid *key_keyid;
146
	bool skip_vlan = false;
147
	u8 ip_proto = 0;
148
	bool ret;
E
Eric Dumazet 已提交
149

150 151
	if (!data) {
		data = skb->data;
152 153
		proto = skb_vlan_tag_present(skb) ?
			 skb->vlan_proto : skb->protocol;
154
		nhoff = skb_network_offset(skb);
155 156 157
		hlen = skb_headlen(skb);
	}

158 159 160 161 162 163 164
	/* It is ensured by skb_flow_dissector_init() that control key will
	 * be always present.
	 */
	key_control = skb_flow_dissector_target(flow_dissector,
						FLOW_DISSECTOR_KEY_CONTROL,
						target_container);

165 166 167 168 169 170
	/* It is ensured by skb_flow_dissector_init() that basic key will
	 * be always present.
	 */
	key_basic = skb_flow_dissector_target(flow_dissector,
					      FLOW_DISSECTOR_KEY_BASIC,
					      target_container);
E
Eric Dumazet 已提交
171

172 173
	if (dissector_uses_key(flow_dissector,
			       FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
174 175 176 177 178 179 180 181 182
		struct ethhdr *eth = eth_hdr(skb);
		struct flow_dissector_key_eth_addrs *key_eth_addrs;

		key_eth_addrs = skb_flow_dissector_target(flow_dissector,
							  FLOW_DISSECTOR_KEY_ETH_ADDRS,
							  target_container);
		memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
	}

E
Eric Dumazet 已提交
183 184
again:
	switch (proto) {
185
	case htons(ETH_P_IP): {
E
Eric Dumazet 已提交
186 187 188
		const struct iphdr *iph;
		struct iphdr _iph;
ip:
189
		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
190
		if (!iph || iph->ihl < 5)
191
			goto out_bad;
192
		nhoff += iph->ihl * 4;
E
Eric Dumazet 已提交
193

194 195
		ip_proto = iph->protocol;

196 197 198 199 200 201 202 203 204 205
		if (dissector_uses_key(flow_dissector,
				       FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
			key_addrs = skb_flow_dissector_target(flow_dissector,
							      FLOW_DISSECTOR_KEY_IPV4_ADDRS,
							      target_container);

			memcpy(&key_addrs->v4addrs, &iph->saddr,
			       sizeof(key_addrs->v4addrs));
			key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
		}
206 207

		if (ip_is_fragment(iph)) {
208
			key_control->flags |= FLOW_DIS_IS_FRAGMENT;
209 210 211 212

			if (iph->frag_off & htons(IP_OFFSET)) {
				goto out_good;
			} else {
213
				key_control->flags |= FLOW_DIS_FIRST_FRAG;
214 215 216 217 218
				if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
					goto out_good;
			}
		}

219 220 221
		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
			goto out_good;

E
Eric Dumazet 已提交
222 223
		break;
	}
224
	case htons(ETH_P_IPV6): {
E
Eric Dumazet 已提交
225 226
		const struct ipv6hdr *iph;
		struct ipv6hdr _iph;
227

E
Eric Dumazet 已提交
228
ipv6:
229
		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
E
Eric Dumazet 已提交
230
		if (!iph)
231
			goto out_bad;
E
Eric Dumazet 已提交
232 233 234

		ip_proto = iph->nexthdr;
		nhoff += sizeof(struct ipv6hdr);
235

236 237
		if (dissector_uses_key(flow_dissector,
				       FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
238 239 240
			key_addrs = skb_flow_dissector_target(flow_dissector,
							      FLOW_DISSECTOR_KEY_IPV6_ADDRS,
							      target_container);
241

242 243
			memcpy(&key_addrs->v6addrs, &iph->saddr,
			       sizeof(key_addrs->v6addrs));
244
			key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
245
		}
246

247 248 249 250 251 252
		if ((dissector_uses_key(flow_dissector,
					FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
		     (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
		    ip6_flowlabel(iph)) {
			__be32 flow_label = ip6_flowlabel(iph);

253 254
			if (dissector_uses_key(flow_dissector,
					       FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
255 256 257 258
				key_tags = skb_flow_dissector_target(flow_dissector,
								     FLOW_DISSECTOR_KEY_FLOW_LABEL,
								     target_container);
				key_tags->flow_label = ntohl(flow_label);
259
			}
260 261
			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
				goto out_good;
262 263
		}

264 265 266
		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
			goto out_good;

E
Eric Dumazet 已提交
267 268
		break;
	}
269 270
	case htons(ETH_P_8021AD):
	case htons(ETH_P_8021Q): {
E
Eric Dumazet 已提交
271
		const struct vlan_hdr *vlan;
272 273
		struct vlan_hdr _vlan;
		bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
E
Eric Dumazet 已提交
274

275
		if (vlan_tag_present)
276 277
			proto = skb->protocol;

278
		if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
279 280 281 282 283 284 285 286 287
			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
						    data, hlen, &_vlan);
			if (!vlan)
				goto out_bad;
			proto = vlan->h_vlan_encapsulated_proto;
			nhoff += sizeof(*vlan);
			if (skip_vlan)
				goto again;
		}
E
Eric Dumazet 已提交
288

289
		skip_vlan = true;
290
		if (dissector_uses_key(flow_dissector,
291 292 293
				       FLOW_DISSECTOR_KEY_VLAN)) {
			key_vlan = skb_flow_dissector_target(flow_dissector,
							     FLOW_DISSECTOR_KEY_VLAN,
T
Tom Herbert 已提交
294 295
							     target_container);

296
			if (vlan_tag_present) {
297 298 299 300 301
				key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
				key_vlan->vlan_priority =
					(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
			} else {
				key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
302
					VLAN_VID_MASK;
303 304 305 306
				key_vlan->vlan_priority =
					(ntohs(vlan->h_vlan_TCI) &
					 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
			}
T
Tom Herbert 已提交
307 308
		}

E
Eric Dumazet 已提交
309 310
		goto again;
	}
311
	case htons(ETH_P_PPP_SES): {
E
Eric Dumazet 已提交
312 313 314 315
		struct {
			struct pppoe_hdr hdr;
			__be16 proto;
		} *hdr, _hdr;
316
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
E
Eric Dumazet 已提交
317
		if (!hdr)
318
			goto out_bad;
E
Eric Dumazet 已提交
319 320 321
		proto = hdr->proto;
		nhoff += PPPOE_SES_HLEN;
		switch (proto) {
322
		case htons(PPP_IP):
E
Eric Dumazet 已提交
323
			goto ip;
324
		case htons(PPP_IPV6):
E
Eric Dumazet 已提交
325 326
			goto ipv6;
		default:
327
			goto out_bad;
E
Eric Dumazet 已提交
328 329
		}
	}
E
Erik Hugne 已提交
330 331 332 333 334 335 336
	case htons(ETH_P_TIPC): {
		struct {
			__be32 pre[3];
			__be32 srcnode;
		} *hdr, _hdr;
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
		if (!hdr)
337
			goto out_bad;
338

339 340
		if (dissector_uses_key(flow_dissector,
				       FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
341
			key_addrs = skb_flow_dissector_target(flow_dissector,
T
Tom Herbert 已提交
342
							      FLOW_DISSECTOR_KEY_TIPC_ADDRS,
343
							      target_container);
T
Tom Herbert 已提交
344 345
			key_addrs->tipcaddrs.srcnode = hdr->srcnode;
			key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
346
		}
347
		goto out_good;
E
Erik Hugne 已提交
348
	}
349 350 351 352 353 354 355 356

	case htons(ETH_P_MPLS_UC):
	case htons(ETH_P_MPLS_MC): {
		struct mpls_label *hdr, _hdr[2];
mpls:
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
					   hlen, &_hdr);
		if (!hdr)
357
			goto out_bad;
358

359 360
		if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
		     MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
361 362
			if (dissector_uses_key(flow_dissector,
					       FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
363 364 365 366 367 368 369
				key_keyid = skb_flow_dissector_target(flow_dissector,
								      FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
								      target_container);
				key_keyid->keyid = hdr[1].entry &
					htonl(MPLS_LS_LABEL_MASK);
			}

370
			goto out_good;
371 372
		}

373
		goto out_good;
374 375
	}

376
	case htons(ETH_P_FCOE):
377 378 379 380 381
		if ((hlen - nhoff) < FCOE_HEADER_LEN)
			goto out_bad;

		nhoff += FCOE_HEADER_LEN;
		goto out_good;
E
Eric Dumazet 已提交
382
	default:
383
		goto out_bad;
E
Eric Dumazet 已提交
384 385
	}

386
ip_proto_again:
E
Eric Dumazet 已提交
387 388
	switch (ip_proto) {
	case IPPROTO_GRE: {
389 390 391
		struct gre_base_hdr *hdr, _hdr;
		u16 gre_ver;
		int offset = 0;
E
Eric Dumazet 已提交
392

393
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
E
Eric Dumazet 已提交
394
		if (!hdr)
395
			goto out_bad;
396 397 398

		/* Only look inside GRE without routing */
		if (hdr->flags & GRE_ROUTING)
399 400
			break;

401 402 403 404 405 406 407 408 409 410 411 412 413 414
		/* Only look inside GRE for version 0 and 1 */
		gre_ver = ntohs(hdr->flags & GRE_VERSION);
		if (gre_ver > 1)
			break;

		proto = hdr->protocol;
		if (gre_ver) {
			/* Version1 must be PPTP, and check the flags */
			if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
				break;
		}

		offset += sizeof(struct gre_base_hdr);

415
		if (hdr->flags & GRE_CSUM)
416 417 418
			offset += sizeof(((struct gre_full_hdr *)0)->csum) +
				  sizeof(((struct gre_full_hdr *)0)->reserved1);

T
Tom Herbert 已提交
419 420 421 422
		if (hdr->flags & GRE_KEY) {
			const __be32 *keyid;
			__be32 _keyid;

423
			keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
T
Tom Herbert 已提交
424 425
						     data, hlen, &_keyid);
			if (!keyid)
426
				goto out_bad;
T
Tom Herbert 已提交
427

428 429
			if (dissector_uses_key(flow_dissector,
					       FLOW_DISSECTOR_KEY_GRE_KEYID)) {
T
Tom Herbert 已提交
430 431 432
				key_keyid = skb_flow_dissector_target(flow_dissector,
								      FLOW_DISSECTOR_KEY_GRE_KEYID,
								      target_container);
433 434 435 436
				if (gre_ver == 0)
					key_keyid->keyid = *keyid;
				else
					key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
T
Tom Herbert 已提交
437
			}
438
			offset += sizeof(((struct gre_full_hdr *)0)->key);
T
Tom Herbert 已提交
439
		}
440

441
		if (hdr->flags & GRE_SEQ)
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
			offset += sizeof(((struct pptp_gre_header *)0)->seq);

		if (gre_ver == 0) {
			if (proto == htons(ETH_P_TEB)) {
				const struct ethhdr *eth;
				struct ethhdr _eth;

				eth = __skb_header_pointer(skb, nhoff + offset,
							   sizeof(_eth),
							   data, hlen, &_eth);
				if (!eth)
					goto out_bad;
				proto = eth->h_proto;
				offset += sizeof(*eth);

				/* Cap headers that we access via pointers at the
				 * end of the Ethernet header as our maximum alignment
				 * at that point is only 2 bytes.
				 */
				if (NET_IP_ALIGN)
					hlen = (nhoff + offset);
			}
		} else { /* version 1, must be PPTP */
			u8 _ppp_hdr[PPP_HDRLEN];
			u8 *ppp_hdr;

			if (hdr->flags & GRE_ACK)
				offset += sizeof(((struct pptp_gre_header *)0)->ack);

			ppp_hdr = skb_header_pointer(skb, nhoff + offset,
						     sizeof(_ppp_hdr), _ppp_hdr);
			if (!ppp_hdr)
474
				goto out_bad;
475 476 477 478 479 480 481 482 483 484 485 486 487 488

			switch (PPP_PROTOCOL(ppp_hdr)) {
			case PPP_IP:
				proto = htons(ETH_P_IP);
				break;
			case PPP_IPV6:
				proto = htons(ETH_P_IPV6);
				break;
			default:
				/* Could probably catch some more like MPLS */
				break;
			}

			offset += PPP_HDRLEN;
E
Eric Dumazet 已提交
489
		}
490

491
		nhoff += offset;
492
		key_control->flags |= FLOW_DIS_ENCAPSULATION;
493 494 495
		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
			goto out_good;

496
		goto again;
E
Eric Dumazet 已提交
497
	}
498 499 500 501 502 503 504 505 506 507
	case NEXTHDR_HOP:
	case NEXTHDR_ROUTING:
	case NEXTHDR_DEST: {
		u8 _opthdr[2], *opthdr;

		if (proto != htons(ETH_P_IPV6))
			break;

		opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
					      data, hlen, &_opthdr);
508
		if (!opthdr)
509
			goto out_bad;
510

511 512
		ip_proto = opthdr[0];
		nhoff += (opthdr[1] + 1) << 3;
513 514 515

		goto ip_proto_again;
	}
516 517 518 519 520 521 522 523 524 525 526 527
	case NEXTHDR_FRAGMENT: {
		struct frag_hdr _fh, *fh;

		if (proto != htons(ETH_P_IPV6))
			break;

		fh = __skb_header_pointer(skb, nhoff, sizeof(_fh),
					  data, hlen, &_fh);

		if (!fh)
			goto out_bad;

528
		key_control->flags |= FLOW_DIS_IS_FRAGMENT;
529 530

		nhoff += sizeof(_fh);
531
		ip_proto = fh->nexthdr;
532 533

		if (!(fh->frag_off & htons(IP6_OFFSET))) {
534
			key_control->flags |= FLOW_DIS_FIRST_FRAG;
535
			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
536 537 538 539
				goto ip_proto_again;
		}
		goto out_good;
	}
E
Eric Dumazet 已提交
540
	case IPPROTO_IPIP:
T
Tom Herbert 已提交
541
		proto = htons(ETH_P_IP);
542

543
		key_control->flags |= FLOW_DIS_ENCAPSULATION;
544 545 546
		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
			goto out_good;

T
Tom Herbert 已提交
547
		goto ip;
548 549
	case IPPROTO_IPV6:
		proto = htons(ETH_P_IPV6);
550

551
		key_control->flags |= FLOW_DIS_ENCAPSULATION;
552 553 554
		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
			goto out_good;

555
		goto ipv6;
556 557 558
	case IPPROTO_MPLS:
		proto = htons(ETH_P_MPLS_UC);
		goto mpls;
E
Eric Dumazet 已提交
559 560 561 562
	default:
		break;
	}

563 564
	if (dissector_uses_key(flow_dissector,
			       FLOW_DISSECTOR_KEY_PORTS)) {
565 566 567 568 569 570
		key_ports = skb_flow_dissector_target(flow_dissector,
						      FLOW_DISSECTOR_KEY_PORTS,
						      target_container);
		key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
							data, hlen);
	}
571

S
Simon Horman 已提交
572 573 574 575 576 577 578 579
	if (dissector_uses_key(flow_dissector,
			       FLOW_DISSECTOR_KEY_ICMP)) {
		key_icmp = skb_flow_dissector_target(flow_dissector,
						     FLOW_DISSECTOR_KEY_ICMP,
						     target_container);
		key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
	}

580 581 582
out_good:
	ret = true;

583 584
	key_control->thoff = (u16)nhoff;
out:
585 586 587 588
	key_basic->n_proto = proto;
	key_basic->ip_proto = ip_proto;

	return ret;
589 590 591 592 593

out_bad:
	ret = false;
	key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
	goto out;
E
Eric Dumazet 已提交
594
}
595
EXPORT_SYMBOL(__skb_flow_dissect);
596 597

static u32 hashrnd __read_mostly;
598 599 600 601 602
static __always_inline void __flow_hash_secret_init(void)
{
	net_get_random_once(&hashrnd, sizeof(hashrnd));
}

603 604
static __always_inline u32 __flow_hash_words(const u32 *words, u32 length,
					     u32 keyval)
605 606 607 608
{
	return jhash2(words, length, keyval);
}

609
static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow)
610
{
611 612
	const void *p = flow;

613
	BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
614
	return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET);
615 616
}

617
static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
618
{
619
	size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
620
	BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
621 622 623 624 625 626 627 628 629 630
	BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
		     sizeof(*flow) - sizeof(flow->addrs));

	switch (flow->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		diff -= sizeof(flow->addrs.v4addrs);
		break;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		diff -= sizeof(flow->addrs.v6addrs);
		break;
T
Tom Herbert 已提交
631 632 633
	case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
		diff -= sizeof(flow->addrs.tipcaddrs);
		break;
634 635 636 637 638 639 640 641 642 643 644 645
	}
	return (sizeof(*flow) - diff) / sizeof(u32);
}

__be32 flow_get_u32_src(const struct flow_keys *flow)
{
	switch (flow->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		return flow->addrs.v4addrs.src;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		return (__force __be32)ipv6_addr_hash(
			&flow->addrs.v6addrs.src);
T
Tom Herbert 已提交
646 647
	case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
		return flow->addrs.tipcaddrs.srcnode;
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
	default:
		return 0;
	}
}
EXPORT_SYMBOL(flow_get_u32_src);

__be32 flow_get_u32_dst(const struct flow_keys *flow)
{
	switch (flow->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		return flow->addrs.v4addrs.dst;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		return (__force __be32)ipv6_addr_hash(
			&flow->addrs.v6addrs.dst);
	default:
		return 0;
	}
}
EXPORT_SYMBOL(flow_get_u32_dst);

static inline void __flow_hash_consistentify(struct flow_keys *keys)
{
	int addr_diff, i;

	switch (keys->control.addr_type) {
	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
		addr_diff = (__force u32)keys->addrs.v4addrs.dst -
			    (__force u32)keys->addrs.v4addrs.src;
		if ((addr_diff < 0) ||
		    (addr_diff == 0 &&
		     ((__force u16)keys->ports.dst <
		      (__force u16)keys->ports.src))) {
			swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
			swap(keys->ports.src, keys->ports.dst);
		}
		break;
	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
		addr_diff = memcmp(&keys->addrs.v6addrs.dst,
				   &keys->addrs.v6addrs.src,
				   sizeof(keys->addrs.v6addrs.dst));
		if ((addr_diff < 0) ||
		    (addr_diff == 0 &&
		     ((__force u16)keys->ports.dst <
		      (__force u16)keys->ports.src))) {
			for (i = 0; i < 4; i++)
				swap(keys->addrs.v6addrs.src.s6_addr32[i],
				     keys->addrs.v6addrs.dst.s6_addr32[i]);
			swap(keys->ports.src, keys->ports.dst);
		}
		break;
	}
699 700
}

T
Tom Herbert 已提交
701
static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
702 703 704
{
	u32 hash;

705
	__flow_hash_consistentify(keys);
706

707
	hash = __flow_hash_words(flow_keys_hash_start(keys),
708
				 flow_keys_hash_length(keys), keyval);
709 710 711 712 713 714 715 716
	if (!hash)
		hash = 1;

	return hash;
}

u32 flow_hash_from_keys(struct flow_keys *keys)
{
T
Tom Herbert 已提交
717 718
	__flow_hash_secret_init();
	return __flow_hash_from_keys(keys, hashrnd);
719 720 721
}
EXPORT_SYMBOL(flow_hash_from_keys);

T
Tom Herbert 已提交
722 723 724
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
				  struct flow_keys *keys, u32 keyval)
{
725 726
	skb_flow_dissect_flow_keys(skb, keys,
				   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
T
Tom Herbert 已提交
727 728 729 730

	return __flow_hash_from_keys(keys, keyval);
}

T
Tom Herbert 已提交
731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
struct _flow_keys_digest_data {
	__be16	n_proto;
	u8	ip_proto;
	u8	padding;
	__be32	ports;
	__be32	src;
	__be32	dst;
};

void make_flow_keys_digest(struct flow_keys_digest *digest,
			   const struct flow_keys *flow)
{
	struct _flow_keys_digest_data *data =
	    (struct _flow_keys_digest_data *)digest;

	BUILD_BUG_ON(sizeof(*data) > sizeof(*digest));

	memset(digest, 0, sizeof(*digest));

750 751 752
	data->n_proto = flow->basic.n_proto;
	data->ip_proto = flow->basic.ip_proto;
	data->ports = flow->ports.ports;
753 754
	data->src = flow->addrs.v4addrs.src;
	data->dst = flow->addrs.v4addrs.dst;
T
Tom Herbert 已提交
755 756 757
}
EXPORT_SYMBOL(make_flow_keys_digest);

758 759
static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;

760
u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
761 762 763 764 765 766 767 768 769 770 771 772 773 774
{
	struct flow_keys keys;

	__flow_hash_secret_init();

	memset(&keys, 0, sizeof(keys));
	__skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
			   NULL, 0, 0, 0,
			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);

	return __flow_hash_from_keys(&keys, hashrnd);
}
EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);

775 776 777 778 779
/**
 * __skb_get_hash: calculate a flow hash
 * @skb: sk_buff to calculate flow hash from
 *
 * This function calculates a flow hash based on src/dst addresses
780 781
 * and src/dst port numbers.  Sets hash in skb to non-zero hash value
 * on success, zero indicates no valid hash.  Also, sets l4_hash in skb
782 783
 * if hash is a canonical 4-tuple hash over transport ports.
 */
784
void __skb_get_hash(struct sk_buff *skb)
785 786
{
	struct flow_keys keys;
787
	u32 hash;
788

T
Tom Herbert 已提交
789 790
	__flow_hash_secret_init();

791 792 793
	hash = ___skb_get_hash(skb, &keys, hashrnd);

	__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
794
}
795
EXPORT_SYMBOL(__skb_get_hash);
796

T
Tom Herbert 已提交
797 798 799 800 801 802 803 804
__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
{
	struct flow_keys keys;

	return ___skb_get_hash(skb, &keys, perturb);
}
EXPORT_SYMBOL(skb_get_hash_perturb);

805
__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
{
	struct flow_keys keys;

	memset(&keys, 0, sizeof(keys));

	memcpy(&keys.addrs.v6addrs.src, &fl6->saddr,
	       sizeof(keys.addrs.v6addrs.src));
	memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr,
	       sizeof(keys.addrs.v6addrs.dst));
	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
	keys.ports.src = fl6->fl6_sport;
	keys.ports.dst = fl6->fl6_dport;
	keys.keyid.keyid = fl6->fl6_gre_key;
	keys.tags.flow_label = (__force u32)fl6->flowlabel;
	keys.basic.ip_proto = fl6->flowi6_proto;

822 823
	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
			  flow_keys_have_l4(&keys));
824 825 826 827 828

	return skb->hash;
}
EXPORT_SYMBOL(__skb_get_hash_flowi6);

829
__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
830 831 832 833 834 835 836 837 838 839 840 841 842
{
	struct flow_keys keys;

	memset(&keys, 0, sizeof(keys));

	keys.addrs.v4addrs.src = fl4->saddr;
	keys.addrs.v4addrs.dst = fl4->daddr;
	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
	keys.ports.src = fl4->fl4_sport;
	keys.ports.dst = fl4->fl4_dport;
	keys.keyid.keyid = fl4->fl4_gre_key;
	keys.basic.ip_proto = fl4->flowi4_proto;

843 844
	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
			  flow_keys_have_l4(&keys));
845 846 847 848 849

	return skb->hash;
}
EXPORT_SYMBOL(__skb_get_hash_flowi4);

850 851
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
		   const struct flow_keys *keys, int hlen)
852
{
853
	u32 poff = keys->control.thoff;
854

855 856 857 858 859
	/* skip L4 headers for fragments after the first */
	if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) &&
	    !(keys->control.flags & FLOW_DIS_FIRST_FRAG))
		return poff;

860
	switch (keys->basic.ip_proto) {
861
	case IPPROTO_TCP: {
862 863 864
		/* access doff as u8 to avoid unaligned access */
		const u8 *doff;
		u8 _doff;
865

866 867 868
		doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
					    data, hlen, &_doff);
		if (!doff)
869 870
			return poff;

871
		poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900
		break;
	}
	case IPPROTO_UDP:
	case IPPROTO_UDPLITE:
		poff += sizeof(struct udphdr);
		break;
	/* For the rest, we do not really care about header
	 * extensions at this point for now.
	 */
	case IPPROTO_ICMP:
		poff += sizeof(struct icmphdr);
		break;
	case IPPROTO_ICMPV6:
		poff += sizeof(struct icmp6hdr);
		break;
	case IPPROTO_IGMP:
		poff += sizeof(struct igmphdr);
		break;
	case IPPROTO_DCCP:
		poff += sizeof(struct dccp_hdr);
		break;
	case IPPROTO_SCTP:
		poff += sizeof(struct sctphdr);
		break;
	}

	return poff;
}

901 902 903 904 905 906
/**
 * skb_get_poff - get the offset to the payload
 * @skb: sk_buff to get the payload offset from
 *
 * The function will get the offset to the payload as far as it could
 * be dissected.  The main user is currently BPF, so that we can dynamically
907 908 909 910 911 912 913
 * truncate packets without needing to push actual payload to the user
 * space and can analyze headers only, instead.
 */
u32 skb_get_poff(const struct sk_buff *skb)
{
	struct flow_keys keys;

914
	if (!skb_flow_dissect_flow_keys(skb, &keys, 0))
915 916 917 918
		return 0;

	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
}
919

920
__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938
{
	memset(keys, 0, sizeof(*keys));

	memcpy(&keys->addrs.v6addrs.src, &fl6->saddr,
	    sizeof(keys->addrs.v6addrs.src));
	memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr,
	    sizeof(keys->addrs.v6addrs.dst));
	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
	keys->ports.src = fl6->fl6_sport;
	keys->ports.dst = fl6->fl6_dport;
	keys->keyid.keyid = fl6->fl6_gre_key;
	keys->tags.flow_label = (__force u32)fl6->flowlabel;
	keys->basic.ip_proto = fl6->flowi6_proto;

	return flow_hash_from_keys(keys);
}
EXPORT_SYMBOL(__get_hash_from_flowi6);

939
__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954
{
	memset(keys, 0, sizeof(*keys));

	keys->addrs.v4addrs.src = fl4->saddr;
	keys->addrs.v4addrs.dst = fl4->daddr;
	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
	keys->ports.src = fl4->fl4_sport;
	keys->ports.dst = fl4->fl4_dport;
	keys->keyid.keyid = fl4->fl4_gre_key;
	keys->basic.ip_proto = fl4->flowi4_proto;

	return flow_hash_from_keys(keys);
}
EXPORT_SYMBOL(__get_hash_from_flowi4);

955
static const struct flow_dissector_key flow_keys_dissector_keys[] = {
956 957 958 959
	{
		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
		.offset = offsetof(struct flow_keys, control),
	},
960 961 962 963 964 965
	{
		.key_id = FLOW_DISSECTOR_KEY_BASIC,
		.offset = offsetof(struct flow_keys, basic),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
966 967 968 969 970
		.offset = offsetof(struct flow_keys, addrs.v4addrs),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.v6addrs),
971
	},
T
Tom Herbert 已提交
972 973 974 975
	{
		.key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.tipcaddrs),
	},
976 977 978 979
	{
		.key_id = FLOW_DISSECTOR_KEY_PORTS,
		.offset = offsetof(struct flow_keys, ports),
	},
T
Tom Herbert 已提交
980
	{
981 982
		.key_id = FLOW_DISSECTOR_KEY_VLAN,
		.offset = offsetof(struct flow_keys, vlan),
T
Tom Herbert 已提交
983
	},
984 985 986 987
	{
		.key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
		.offset = offsetof(struct flow_keys, tags),
	},
T
Tom Herbert 已提交
988 989 990 991
	{
		.key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
		.offset = offsetof(struct flow_keys, keyid),
	},
992 993
};

994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
	{
		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
		.offset = offsetof(struct flow_keys, control),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_BASIC,
		.offset = offsetof(struct flow_keys, basic),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.v4addrs),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
		.offset = offsetof(struct flow_keys, addrs.v6addrs),
	},
	{
		.key_id = FLOW_DISSECTOR_KEY_PORTS,
		.offset = offsetof(struct flow_keys, ports),
	},
};

1017
static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
1018 1019 1020 1021
	{
		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
		.offset = offsetof(struct flow_keys, control),
	},
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037
	{
		.key_id = FLOW_DISSECTOR_KEY_BASIC,
		.offset = offsetof(struct flow_keys, basic),
	},
};

struct flow_dissector flow_keys_dissector __read_mostly;
EXPORT_SYMBOL(flow_keys_dissector);

struct flow_dissector flow_keys_buf_dissector __read_mostly;

static int __init init_default_flow_dissectors(void)
{
	skb_flow_dissector_init(&flow_keys_dissector,
				flow_keys_dissector_keys,
				ARRAY_SIZE(flow_keys_dissector_keys));
1038 1039 1040
	skb_flow_dissector_init(&flow_keys_dissector_symmetric,
				flow_keys_dissector_symmetric_keys,
				ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
1041 1042 1043 1044 1045 1046
	skb_flow_dissector_init(&flow_keys_buf_dissector,
				flow_keys_buf_dissector_keys,
				ARRAY_SIZE(flow_keys_buf_dissector_keys));
	return 0;
}

1047
core_initcall(init_default_flow_dissectors);