flow_dissector.c 11.7 KB
Newer Older
1
#include <linux/kernel.h>
E
Eric Dumazet 已提交
2
#include <linux/skbuff.h>
3
#include <linux/export.h>
E
Eric Dumazet 已提交
4 5 6 7
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_vlan.h>
#include <net/ip.h>
E
Eric Dumazet 已提交
8
#include <net/ipv6.h>
9 10 11 12
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
#include <linux/dccp.h>
E
Eric Dumazet 已提交
13 14 15
#include <linux/if_tunnel.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
16
#include <net/flow_dissector.h>
17
#include <scsi/fc/fc_fcoe.h>
E
Eric Dumazet 已提交
18

19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector,
					enum flow_dissector_key_id key_id)
{
	return flow_dissector->used_keys & (1 << key_id);
}

static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector,
				       enum flow_dissector_key_id key_id)
{
	flow_dissector->used_keys |= (1 << key_id);
}

static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
				       enum flow_dissector_key_id key_id,
				       void *target_container)
{
	return ((char *) target_container) + flow_dissector->offset[key_id];
}

void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
			     const struct flow_dissector_key *key,
			     unsigned int key_count)
{
	unsigned int i;

	memset(flow_dissector, 0, sizeof(*flow_dissector));

	for (i = 0; i < key_count; i++, key++) {
		/* User should make sure that every key target offset is withing
		 * boundaries of unsigned short.
		 */
		BUG_ON(key->offset > USHRT_MAX);
		BUG_ON(skb_flow_dissector_uses_key(flow_dissector,
						   key->key_id));

		skb_flow_dissector_set_key(flow_dissector, key->key_id);
		flow_dissector->offset[key->key_id] = key->offset;
	}

	/* Ensure that the dissector always includes basic key. That way
	 * we are able to avoid handling lack of it in fast path.
	 */
	BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
					    FLOW_DISSECTOR_KEY_BASIC));
}
EXPORT_SYMBOL(skb_flow_dissector_init);

66 67 68 69 70 71 72 73 74 75
/* copy saddr & daddr, possibly using 64bit load/store
 * Equivalent to :	flow->src = iph->saddr;
 *			flow->dst = iph->daddr;
 */
static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
{
	BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
		     offsetof(typeof(*flow), src) + sizeof(flow->src));
	memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
}
E
Eric Dumazet 已提交
76

77
/**
78 79
 * __skb_flow_get_ports - extract the upper layer ports and return them
 * @skb: sk_buff to extract the ports from
80 81
 * @thoff: transport header offset
 * @ip_proto: protocol for which to get port offset
82 83
 * @data: raw buffer pointer to the packet, if NULL use skb->data
 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
84 85 86 87
 *
 * The function will try to retrieve the ports at offset thoff + poff where poff
 * is the protocol port offset returned from proto_ports_offset
 */
88 89
__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
			    void *data, int hlen)
90 91 92
{
	int poff = proto_ports_offset(ip_proto);

93 94 95 96 97
	if (!data) {
		data = skb->data;
		hlen = skb_headlen(skb);
	}

98 99 100
	if (poff >= 0) {
		__be32 *ports, _ports;

101 102
		ports = __skb_header_pointer(skb, thoff + poff,
					     sizeof(_ports), data, hlen, &_ports);
103 104 105 106 107 108
		if (ports)
			return *ports;
	}

	return 0;
}
109
EXPORT_SYMBOL(__skb_flow_get_ports);
110

111 112 113 114 115 116 117 118 119 120 121 122 123
/**
 * __skb_flow_dissect - extract the flow_keys struct and return it
 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
 * @data: raw buffer pointer to the packet, if NULL use skb->data
 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
 *
 * The function will try to retrieve the struct flow_keys from either the skbuff
 * or a raw buffer specified by the rest parameters
 */
bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
			void *data, __be16 proto, int nhoff, int hlen)
E
Eric Dumazet 已提交
124 125 126
{
	u8 ip_proto;

127 128
	if (!data) {
		data = skb->data;
129 130
		proto = skb->protocol;
		nhoff = skb_network_offset(skb);
131 132 133
		hlen = skb_headlen(skb);
	}

E
Eric Dumazet 已提交
134 135 136 137
	memset(flow, 0, sizeof(*flow));

again:
	switch (proto) {
138
	case htons(ETH_P_IP): {
E
Eric Dumazet 已提交
139 140 141
		const struct iphdr *iph;
		struct iphdr _iph;
ip:
142
		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
143
		if (!iph || iph->ihl < 5)
E
Eric Dumazet 已提交
144
			return false;
145
		nhoff += iph->ihl * 4;
E
Eric Dumazet 已提交
146

147
		ip_proto = iph->protocol;
E
Eric Dumazet 已提交
148 149
		if (ip_is_fragment(iph))
			ip_proto = 0;
150

151 152 153 154 155 156 157
		/* skip the address processing if skb is NULL.  The assumption
		 * here is that if there is no skb we are not looking for flow
		 * info but lengths and protocols.
		 */
		if (!skb)
			break;

158
		iph_to_flow_copy_addrs(flow, iph);
E
Eric Dumazet 已提交
159 160
		break;
	}
161
	case htons(ETH_P_IPV6): {
E
Eric Dumazet 已提交
162 163
		const struct ipv6hdr *iph;
		struct ipv6hdr _iph;
164 165
		__be32 flow_label;

E
Eric Dumazet 已提交
166
ipv6:
167
		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
E
Eric Dumazet 已提交
168 169 170 171 172
		if (!iph)
			return false;

		ip_proto = iph->nexthdr;
		nhoff += sizeof(struct ipv6hdr);
173

174
		/* see comment above in IPv4 section */
175 176 177
		if (!skb)
			break;

178 179 180
		flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
		flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);

181 182 183 184 185 186 187 188 189 190 191 192 193 194
		flow_label = ip6_flowlabel(iph);
		if (flow_label) {
			/* Awesome, IPv6 packet has a flow label so we can
			 * use that to represent the ports without any
			 * further dissection.
			 */
			flow->n_proto = proto;
			flow->ip_proto = ip_proto;
			flow->ports = flow_label;
			flow->thoff = (u16)nhoff;

			return true;
		}

E
Eric Dumazet 已提交
195 196
		break;
	}
197 198
	case htons(ETH_P_8021AD):
	case htons(ETH_P_8021Q): {
E
Eric Dumazet 已提交
199 200 201
		const struct vlan_hdr *vlan;
		struct vlan_hdr _vlan;

202
		vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
E
Eric Dumazet 已提交
203 204 205 206 207 208 209
		if (!vlan)
			return false;

		proto = vlan->h_vlan_encapsulated_proto;
		nhoff += sizeof(*vlan);
		goto again;
	}
210
	case htons(ETH_P_PPP_SES): {
E
Eric Dumazet 已提交
211 212 213 214
		struct {
			struct pppoe_hdr hdr;
			__be16 proto;
		} *hdr, _hdr;
215
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
E
Eric Dumazet 已提交
216 217 218 219 220
		if (!hdr)
			return false;
		proto = hdr->proto;
		nhoff += PPPOE_SES_HLEN;
		switch (proto) {
221
		case htons(PPP_IP):
E
Eric Dumazet 已提交
222
			goto ip;
223
		case htons(PPP_IPV6):
E
Eric Dumazet 已提交
224 225 226 227 228
			goto ipv6;
		default:
			return false;
		}
	}
E
Erik Hugne 已提交
229 230 231 232 233 234 235 236 237 238 239 240 241 242
	case htons(ETH_P_TIPC): {
		struct {
			__be32 pre[3];
			__be32 srcnode;
		} *hdr, _hdr;
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
		if (!hdr)
			return false;
		flow->src = hdr->srcnode;
		flow->dst = 0;
		flow->n_proto = proto;
		flow->thoff = (u16)nhoff;
		return true;
	}
243 244 245
	case htons(ETH_P_FCOE):
		flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
		/* fall through */
E
Eric Dumazet 已提交
246 247 248 249 250 251 252 253 254 255 256
	default:
		return false;
	}

	switch (ip_proto) {
	case IPPROTO_GRE: {
		struct gre_hdr {
			__be16 flags;
			__be16 proto;
		} *hdr, _hdr;

257
		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
E
Eric Dumazet 已提交
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
		if (!hdr)
			return false;
		/*
		 * Only look inside GRE if version zero and no
		 * routing
		 */
		if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) {
			proto = hdr->proto;
			nhoff += 4;
			if (hdr->flags & GRE_CSUM)
				nhoff += 4;
			if (hdr->flags & GRE_KEY)
				nhoff += 4;
			if (hdr->flags & GRE_SEQ)
				nhoff += 4;
M
Michael Dalton 已提交
273 274 275 276
			if (proto == htons(ETH_P_TEB)) {
				const struct ethhdr *eth;
				struct ethhdr _eth;

277 278 279
				eth = __skb_header_pointer(skb, nhoff,
							   sizeof(_eth),
							   data, hlen, &_eth);
M
Michael Dalton 已提交
280 281 282 283 284
				if (!eth)
					return false;
				proto = eth->h_proto;
				nhoff += sizeof(*eth);
			}
E
Eric Dumazet 已提交
285 286 287 288 289
			goto again;
		}
		break;
	}
	case IPPROTO_IPIP:
T
Tom Herbert 已提交
290 291
		proto = htons(ETH_P_IP);
		goto ip;
292 293 294
	case IPPROTO_IPV6:
		proto = htons(ETH_P_IPV6);
		goto ipv6;
E
Eric Dumazet 已提交
295 296 297 298
	default:
		break;
	}

299
	flow->n_proto = proto;
E
Eric Dumazet 已提交
300
	flow->ip_proto = ip_proto;
301 302
	flow->thoff = (u16) nhoff;

303 304 305 306 307
	/* unless skb is set we don't need to record port info */
	if (skb)
		flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
						   data, hlen);

E
Eric Dumazet 已提交
308 309
	return true;
}
310
EXPORT_SYMBOL(__skb_flow_dissect);
311 312

static u32 hashrnd __read_mostly;
313 314 315 316 317
static __always_inline void __flow_hash_secret_init(void)
{
	net_get_random_once(&hashrnd, sizeof(hashrnd));
}

T
Tom Herbert 已提交
318
static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval)
319
{
T
Tom Herbert 已提交
320
	return jhash_3words(a, b, c, keyval);
321 322
}

T
Tom Herbert 已提交
323
static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
324 325 326 327 328 329 330 331 332 333 334 335 336
{
	u32 hash;

	/* get a consistent hash (same value on both flow directions) */
	if (((__force u32)keys->dst < (__force u32)keys->src) ||
	    (((__force u32)keys->dst == (__force u32)keys->src) &&
	     ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
		swap(keys->dst, keys->src);
		swap(keys->port16[0], keys->port16[1]);
	}

	hash = __flow_hash_3words((__force u32)keys->dst,
				  (__force u32)keys->src,
T
Tom Herbert 已提交
337 338
				  (__force u32)keys->ports,
				  keyval);
339 340 341 342 343 344 345 346
	if (!hash)
		hash = 1;

	return hash;
}

u32 flow_hash_from_keys(struct flow_keys *keys)
{
T
Tom Herbert 已提交
347 348
	__flow_hash_secret_init();
	return __flow_hash_from_keys(keys, hashrnd);
349 350 351
}
EXPORT_SYMBOL(flow_hash_from_keys);

T
Tom Herbert 已提交
352 353 354 355 356 357 358 359 360
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
				  struct flow_keys *keys, u32 keyval)
{
	if (!skb_flow_dissect(skb, keys))
		return 0;

	return __flow_hash_from_keys(keys, keyval);
}

T
Tom Herbert 已提交
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
struct _flow_keys_digest_data {
	__be16	n_proto;
	u8	ip_proto;
	u8	padding;
	__be32	ports;
	__be32	src;
	__be32	dst;
};

void make_flow_keys_digest(struct flow_keys_digest *digest,
			   const struct flow_keys *flow)
{
	struct _flow_keys_digest_data *data =
	    (struct _flow_keys_digest_data *)digest;

	BUILD_BUG_ON(sizeof(*data) > sizeof(*digest));

	memset(digest, 0, sizeof(*digest));

	data->n_proto = flow->n_proto;
	data->ip_proto = flow->ip_proto;
	data->ports = flow->ports;
	data->src = flow->src;
	data->dst = flow->dst;
}
EXPORT_SYMBOL(make_flow_keys_digest);

388 389 390 391 392
/**
 * __skb_get_hash: calculate a flow hash
 * @skb: sk_buff to calculate flow hash from
 *
 * This function calculates a flow hash based on src/dst addresses
393 394
 * and src/dst port numbers.  Sets hash in skb to non-zero hash value
 * on success, zero indicates no valid hash.  Also, sets l4_hash in skb
395 396
 * if hash is a canonical 4-tuple hash over transport ports.
 */
397
void __skb_get_hash(struct sk_buff *skb)
398 399
{
	struct flow_keys keys;
T
Tom Herbert 已提交
400
	u32 hash;
401

T
Tom Herbert 已提交
402 403 404 405
	__flow_hash_secret_init();

	hash = ___skb_get_hash(skb, &keys, hashrnd);
	if (!hash)
406 407
		return;
	if (keys.ports)
408
		skb->l4_hash = 1;
409
	skb->sw_hash = 1;
T
Tom Herbert 已提交
410
	skb->hash = hash;
411
}
412
EXPORT_SYMBOL(__skb_get_hash);
413

T
Tom Herbert 已提交
414 415 416 417 418 419 420 421
__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
{
	struct flow_keys keys;

	return ___skb_get_hash(skb, &keys, perturb);
}
EXPORT_SYMBOL(skb_get_hash_perturb);

422 423
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
		   const struct flow_keys *keys, int hlen)
424
{
425
	u32 poff = keys->thoff;
426

427
	switch (keys->ip_proto) {
428
	case IPPROTO_TCP: {
429 430 431
		/* access doff as u8 to avoid unaligned access */
		const u8 *doff;
		u8 _doff;
432

433 434 435
		doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
					    data, hlen, &_doff);
		if (!doff)
436 437
			return poff;

438
		poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
		break;
	}
	case IPPROTO_UDP:
	case IPPROTO_UDPLITE:
		poff += sizeof(struct udphdr);
		break;
	/* For the rest, we do not really care about header
	 * extensions at this point for now.
	 */
	case IPPROTO_ICMP:
		poff += sizeof(struct icmphdr);
		break;
	case IPPROTO_ICMPV6:
		poff += sizeof(struct icmp6hdr);
		break;
	case IPPROTO_IGMP:
		poff += sizeof(struct igmphdr);
		break;
	case IPPROTO_DCCP:
		poff += sizeof(struct dccp_hdr);
		break;
	case IPPROTO_SCTP:
		poff += sizeof(struct sctphdr);
		break;
	}

	return poff;
}

468 469 470 471 472 473
/**
 * skb_get_poff - get the offset to the payload
 * @skb: sk_buff to get the payload offset from
 *
 * The function will get the offset to the payload as far as it could
 * be dissected.  The main user is currently BPF, so that we can dynamically
474 475 476 477 478 479 480 481 482 483 484 485
 * truncate packets without needing to push actual payload to the user
 * space and can analyze headers only, instead.
 */
u32 skb_get_poff(const struct sk_buff *skb)
{
	struct flow_keys keys;

	if (!skb_flow_dissect(skb, &keys))
		return 0;

	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
}