udp_offload.c 11.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *	IPV4 GSO/GRO offload support
 *	Linux INET implementation
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 *	UDPv4 GSO support
 */

#include <linux/skbuff.h>
#include <net/udp.h>
#include <net/protocol.h>

17
static DEFINE_SPINLOCK(udp_offload_lock);
18
static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
19

20 21
#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock))

22 23
struct udp_offload_priv {
	struct udp_offload	*offload;
24
	possible_net_t	net;
25 26 27 28
	struct rcu_head		rcu;
	struct udp_offload_priv __rcu *next;
};

T
Tom Herbert 已提交
29 30 31 32
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
	netdev_features_t features,
	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
					     netdev_features_t features),
33
	__be16 new_protocol, bool is_ipv6)
34
{
35
	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
36
	bool remcsum, need_csum, offload_csum, ufo;
37
	struct sk_buff *segs = ERR_PTR(-EINVAL);
38
	struct udphdr *uh = udp_hdr(skb);
39 40
	u16 mac_offset = skb->mac_header;
	__be16 protocol = skb->protocol;
41
	u16 mac_len = skb->mac_len;
42
	int udp_offset, outer_hlen;
43
	__wsum partial;
44 45 46 47

	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
		goto out;

48 49 50 51 52 53 54 55
	/* Adjust partial header checksum to negate old length.
	 * We cannot rely on the value contained in uh->len as it is
	 * possible that the actual value exceeds the boundaries of the
	 * 16 bit length field due to the header being added outside of an
	 * IP or IPv6 frame that was already limited to 64K - 1.
	 */
	partial = csum_sub(csum_unfold(uh->check),
			   (__force __wsum)htonl(skb->len));
56 57

	/* setup inner skb. */
58
	skb->encapsulation = 0;
59
	SKB_GSO_CB(skb)->encap_level = 0;
60 61 62 63
	__skb_pull(skb, tnl_hlen);
	skb_reset_mac_header(skb);
	skb_set_network_header(skb, skb_inner_network_offset(skb));
	skb->mac_len = skb_inner_network_offset(skb);
T
Tom Herbert 已提交
64
	skb->protocol = new_protocol;
65 66

	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
67
	skb->encap_hdr_csum = need_csum;
68 69

	remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
70
	skb->remcsum_offload = remcsum;
71

72 73
	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);

74 75
	/* Try to offload checksum if possible */
	offload_csum = !!(need_csum &&
76 77 78
			  (skb->dev->features &
			   (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
				      (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
79

80 81
	features &= skb->dev->hw_enc_features;

82 83 84 85
	/* The only checksum offload we care about from here on out is the
	 * outer one so strip the existing checksum feature flags and
	 * instead set the flag based on our outer checksum offload value.
	 */
86
	if (remcsum || ufo) {
87
		features &= ~NETIF_F_CSUM_MASK;
88
		if (!need_csum || offload_csum)
89 90 91
			features |= NETIF_F_HW_CSUM;
	}

92
	/* segment inner packet. */
93
	segs = gso_inner_segment(skb, features);
94
	if (IS_ERR_OR_NULL(segs)) {
95 96 97 98 99 100 101 102 103
		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
				     mac_len);
		goto out;
	}

	outer_hlen = skb_tnl_header_len(skb);
	udp_offset = outer_hlen - tnl_hlen;
	skb = segs;
	do {
104
		__be16 len;
105

106
		if (remcsum)
107
			skb->ip_summed = CHECKSUM_NONE;
108 109 110

		/* Set up inner headers if we are offloading inner checksum */
		if (skb->ip_summed == CHECKSUM_PARTIAL) {
111 112 113
			skb_reset_inner_headers(skb);
			skb->encapsulation = 1;
		}
114 115

		skb->mac_len = mac_len;
116
		skb->protocol = protocol;
117

118
		__skb_push(skb, outer_hlen);
119 120 121
		skb_reset_mac_header(skb);
		skb_set_network_header(skb, mac_len);
		skb_set_transport_header(skb, udp_offset);
122
		len = htons(skb->len - udp_offset);
123
		uh = udp_hdr(skb);
124
		uh->len = len;
125

126 127 128
		if (!need_csum)
			continue;

129
		uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len));
130

131 132
		if (skb->encapsulation || !offload_csum) {
			uh->check = gso_make_checksum(skb, ~uh->check);
133 134
			if (uh->check == 0)
				uh->check = CSUM_MANGLED_0;
135 136 137 138
		} else {
			skb->ip_summed = CHECKSUM_PARTIAL;
			skb->csum_start = skb_transport_header(skb) - skb->head;
			skb->csum_offset = offsetof(struct udphdr, check);
139 140 141 142 143 144
		}
	} while ((skb = skb->next));
out:
	return segs;
}

T
Tom Herbert 已提交
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
				       netdev_features_t features,
				       bool is_ipv6)
{
	__be16 protocol = skb->protocol;
	const struct net_offload **offloads;
	const struct net_offload *ops;
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
					     netdev_features_t features);

	rcu_read_lock();

	switch (skb->inner_protocol_type) {
	case ENCAP_TYPE_ETHER:
		protocol = skb->inner_protocol;
		gso_inner_segment = skb_mac_gso_segment;
		break;
	case ENCAP_TYPE_IPPROTO:
		offloads = is_ipv6 ? inet6_offloads : inet_offloads;
		ops = rcu_dereference(offloads[skb->inner_ipproto]);
		if (!ops || !ops->callbacks.gso_segment)
			goto out_unlock;
		gso_inner_segment = ops->callbacks.gso_segment;
		break;
	default:
		goto out_unlock;
	}

	segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
175
					protocol, is_ipv6);
T
Tom Herbert 已提交
176 177 178 179 180 181 182

out_unlock:
	rcu_read_unlock();

	return segs;
}

183 184 185 186 187
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
					 netdev_features_t features)
{
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	unsigned int mss;
188
	__wsum csum;
189 190
	struct udphdr *uh;
	struct iphdr *iph;
191 192

	if (skb->encapsulation &&
193 194
	    (skb_shinfo(skb)->gso_type &
	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
T
Tom Herbert 已提交
195
		segs = skb_udp_tunnel_segment(skb, features, false);
196 197
		goto out;
	}
198

199 200 201
	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
		goto out;

202 203 204 205 206 207 208 209 210 211
	mss = skb_shinfo(skb)->gso_size;
	if (unlikely(skb->len <= mss))
		goto out;

	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
		/* Packet is from an untrusted source, reset gso_segs. */
		int type = skb_shinfo(skb)->gso_type;

		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
				      SKB_GSO_UDP_TUNNEL |
212
				      SKB_GSO_UDP_TUNNEL_CSUM |
213
				      SKB_GSO_TUNNEL_REMCSUM |
E
Eric Dumazet 已提交
214
				      SKB_GSO_IPIP |
P
Pravin B Shelar 已提交
215
				      SKB_GSO_GRE | SKB_GSO_GRE_CSUM) ||
216 217 218 219 220 221 222 223 224
			     !(type & (SKB_GSO_UDP))))
			goto out;

		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);

		segs = NULL;
		goto out;
	}

225 226 227 228
	/* Do software UFO. Complete and fill in the UDP checksum as
	 * HW cannot do checksum of UDP packets sent as multiple
	 * IP fragments.
	 */
229 230 231 232 233 234 235 236 237 238

	uh = udp_hdr(skb);
	iph = ip_hdr(skb);

	uh->check = 0;
	csum = skb_checksum(skb, 0, skb->len, 0);
	uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
	if (uh->check == 0)
		uh->check = CSUM_MANGLED_0;

239 240
	skb->ip_summed = CHECKSUM_NONE;

241 242 243 244 245 246 247
	/* If there is no outer header we can fake a checksum offload
	 * due to the fact that we have already done the checksum in
	 * software prior to segmenting the frame.
	 */
	if (!skb->encap_hdr_csum)
		features |= NETIF_F_HW_CSUM;

248 249 250
	/* Fragment the skb. IP headers of the fragments are updated in
	 * inet_gso_segment()
	 */
251
	segs = skb_segment(skb, features);
252 253 254 255
out:
	return segs;
}

256
int udp_add_offload(struct net *net, struct udp_offload *uo)
257
{
258
	struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
259 260 261 262

	if (!new_offload)
		return -ENOMEM;

263
	write_pnet(&new_offload->net, net);
264 265 266
	new_offload->offload = uo;

	spin_lock(&udp_offload_lock);
267 268
	new_offload->next = udp_offload_base;
	rcu_assign_pointer(udp_offload_base, new_offload);
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
	spin_unlock(&udp_offload_lock);

	return 0;
}
EXPORT_SYMBOL(udp_add_offload);

static void udp_offload_free_routine(struct rcu_head *head)
{
	struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu);
	kfree(ou_priv);
}

void udp_del_offload(struct udp_offload *uo)
{
	struct udp_offload_priv __rcu **head = &udp_offload_base;
	struct udp_offload_priv *uo_priv;

	spin_lock(&udp_offload_lock);

288
	uo_priv = udp_deref_protected(*head);
289
	for (; uo_priv != NULL;
290
	     uo_priv = udp_deref_protected(*head)) {
291
		if (uo_priv->offload == uo) {
292 293
			rcu_assign_pointer(*head,
					   udp_deref_protected(uo_priv->next));
294 295 296 297
			goto unlock;
		}
		head = &uo_priv->next;
	}
298
	pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port));
299 300
unlock:
	spin_unlock(&udp_offload_lock);
301
	if (uo_priv)
302 303 304 305
		call_rcu(&uo_priv->rcu, udp_offload_free_routine);
}
EXPORT_SYMBOL(udp_del_offload);

T
Tom Herbert 已提交
306 307
struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
				 struct udphdr *uh)
308 309 310
{
	struct udp_offload_priv *uo_priv;
	struct sk_buff *p, **pp = NULL;
T
Tom Herbert 已提交
311 312
	struct udphdr *uh2;
	unsigned int off = skb_gro_offset(skb);
313 314
	int flush = 1;

315
	if (NAPI_GRO_CB(skb)->encap_mark ||
316 317 318
	    (skb->ip_summed != CHECKSUM_PARTIAL &&
	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
	     !NAPI_GRO_CB(skb)->csum_valid))
319 320
		goto out;

321 322
	/* mark that this skb passed once through the tunnel gro layer */
	NAPI_GRO_CB(skb)->encap_mark = 1;
323 324 325 326

	rcu_read_lock();
	uo_priv = rcu_dereference(udp_offload_base);
	for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
327 328
		if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
		    uo_priv->offload->port == uh->dest &&
329 330 331 332 333 334 335 336 337 338 339 340 341
		    uo_priv->offload->callbacks.gro_receive)
			goto unflush;
	}
	goto out_unlock;

unflush:
	flush = 0;

	for (p = *head; p; p = p->next) {
		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		uh2 = (struct udphdr   *)(p->data + off);
T
Tom Herbert 已提交
342 343 344 345 346 347

		/* Match ports and either checksums are either both zero
		 * or nonzero.
		 */
		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) ||
		    (!uh->check ^ !uh2->check)) {
348 349 350 351 352 353
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}
	}

	skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
354
	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
T
Tom Herbert 已提交
355
	NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
356 357
	pp = uo_priv->offload->callbacks.gro_receive(head, skb,
						     uo_priv->offload);
358 359 360 361 362 363 364 365

out_unlock:
	rcu_read_unlock();
out:
	NAPI_GRO_CB(skb)->flush |= flush;
	return pp;
}

T
Tom Herbert 已提交
366 367 368 369 370
static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
					 struct sk_buff *skb)
{
	struct udphdr *uh = udp_gro_udphdr(skb);

371 372
	if (unlikely(!uh))
		goto flush;
T
Tom Herbert 已提交
373

374
	/* Don't bother verifying checksum if we're going to flush anyway. */
375
	if (NAPI_GRO_CB(skb)->flush)
376 377 378 379 380 381 382 383 384
		goto skip;

	if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
						 inet_gro_compute_pseudo))
		goto flush;
	else if (uh->check)
		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
					     inet_gro_compute_pseudo);
skip:
385
	NAPI_GRO_CB(skb)->is_ipv6 = 0;
T
Tom Herbert 已提交
386
	return udp_gro_receive(head, skb, uh);
387 388 389 390

flush:
	NAPI_GRO_CB(skb)->flush = 1;
	return NULL;
T
Tom Herbert 已提交
391 392 393
}

int udp_gro_complete(struct sk_buff *skb, int nhoff)
394 395 396 397 398 399 400 401 402 403 404 405
{
	struct udp_offload_priv *uo_priv;
	__be16 newlen = htons(skb->len - nhoff);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
	int err = -ENOSYS;

	uh->len = newlen;

	rcu_read_lock();

	uo_priv = rcu_dereference(udp_offload_base);
	for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
406 407
		if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
		    uo_priv->offload->port == uh->dest &&
408 409 410 411
		    uo_priv->offload->callbacks.gro_complete)
			break;
	}

412
	if (uo_priv) {
T
Tom Herbert 已提交
413
		NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
414 415 416
		err = uo_priv->offload->callbacks.gro_complete(skb,
				nhoff + sizeof(struct udphdr),
				uo_priv->offload);
T
Tom Herbert 已提交
417
	}
418 419

	rcu_read_unlock();
420 421 422 423 424 425 426

	if (skb->remcsum_offload)
		skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;

	skb->encapsulation = 1;
	skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr));

427 428 429
	return err;
}

430
static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
T
Tom Herbert 已提交
431 432 433 434
{
	const struct iphdr *iph = ip_hdr(skb);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);

435 436
	if (uh->check) {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
T
Tom Herbert 已提交
437 438
		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
					  iph->daddr, 0);
439 440 441
	} else {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
	}
T
Tom Herbert 已提交
442 443 444 445

	return udp_gro_complete(skb, nhoff);
}

446 447 448
static const struct net_offload udpv4_offload = {
	.callbacks = {
		.gso_segment = udp4_ufo_fragment,
T
Tom Herbert 已提交
449 450
		.gro_receive  =	udp4_gro_receive,
		.gro_complete =	udp4_gro_complete,
451 452 453 454 455 456 457
	},
};

int __init udpv4_offload_init(void)
{
	return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
}