udp_offload.c 11.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *	IPV4 GSO/GRO offload support
 *	Linux INET implementation
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 *	UDPv4 GSO support
 */

#include <linux/skbuff.h>
#include <net/udp.h>
#include <net/protocol.h>

17
static DEFINE_SPINLOCK(udp_offload_lock);
18
static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
19

20 21
#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock))

22 23
struct udp_offload_priv {
	struct udp_offload	*offload;
24
	possible_net_t	net;
25 26 27 28
	struct rcu_head		rcu;
	struct udp_offload_priv __rcu *next;
};

T
Tom Herbert 已提交
29 30 31 32
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
	netdev_features_t features,
	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
					     netdev_features_t features),
33
	__be16 new_protocol, bool is_ipv6)
34 35 36 37 38 39 40 41
{
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	u16 mac_offset = skb->mac_header;
	int mac_len = skb->mac_len;
	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
	__be16 protocol = skb->protocol;
	int udp_offset, outer_hlen;
	unsigned int oldlen;
42 43
	bool need_csum = !!(skb_shinfo(skb)->gso_type &
			    SKB_GSO_UDP_TUNNEL_CSUM);
44 45
	bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
	bool offload_csum = false, dont_encap = (need_csum || remcsum);
46 47 48 49 50 51 52 53 54 55 56

	oldlen = (u16)~skb->len;

	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
		goto out;

	skb->encapsulation = 0;
	__skb_pull(skb, tnl_hlen);
	skb_reset_mac_header(skb);
	skb_set_network_header(skb, skb_inner_network_offset(skb));
	skb->mac_len = skb_inner_network_offset(skb);
T
Tom Herbert 已提交
57
	skb->protocol = new_protocol;
58
	skb->encap_hdr_csum = need_csum;
59
	skb->remcsum_offload = remcsum;
60

61 62
	/* Try to offload checksum if possible */
	offload_csum = !!(need_csum &&
63 64 65
			  ((skb->dev->features & NETIF_F_HW_CSUM) ||
			   (skb->dev->features & (is_ipv6 ?
			    NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM))));
66

67 68
	features &= skb->dev->hw_enc_features;

69
	/* segment inner packet. */
70
	segs = gso_inner_segment(skb, features);
71
	if (IS_ERR_OR_NULL(segs)) {
72 73 74 75 76 77 78 79 80 81 82
		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
				     mac_len);
		goto out;
	}

	outer_hlen = skb_tnl_header_len(skb);
	udp_offset = outer_hlen - tnl_hlen;
	skb = segs;
	do {
		struct udphdr *uh;
		int len;
83 84 85 86 87 88 89 90 91 92 93 94
		__be32 delta;

		if (dont_encap) {
			skb->encapsulation = 0;
			skb->ip_summed = CHECKSUM_NONE;
		} else {
			/* Only set up inner headers if we might be offloading
			 * inner checksum.
			 */
			skb_reset_inner_headers(skb);
			skb->encapsulation = 1;
		}
95 96

		skb->mac_len = mac_len;
97
		skb->protocol = protocol;
98 99 100 101 102 103 104 105 106

		skb_push(skb, outer_hlen);
		skb_reset_mac_header(skb);
		skb_set_network_header(skb, mac_len);
		skb_set_transport_header(skb, udp_offset);
		len = skb->len - udp_offset;
		uh = udp_hdr(skb);
		uh->len = htons(len);

107 108 109 110
		if (!need_csum)
			continue;

		delta = htonl(oldlen + len);
111

112 113 114 115 116 117 118
		uh->check = ~csum_fold((__force __wsum)
				       ((__force u32)uh->check +
					(__force u32)delta));
		if (offload_csum) {
			skb->ip_summed = CHECKSUM_PARTIAL;
			skb->csum_start = skb_transport_header(skb) - skb->head;
			skb->csum_offset = offsetof(struct udphdr, check);
119 120 121 122 123 124 125 126 127 128 129 130
		} else if (remcsum) {
			/* Need to calculate checksum from scratch,
			 * inner checksums are never when doing
			 * remote_checksum_offload.
			 */

			skb->csum = skb_checksum(skb, udp_offset,
						 skb->len - udp_offset,
						 0);
			uh->check = csum_fold(skb->csum);
			if (uh->check == 0)
				uh->check = CSUM_MANGLED_0;
131
		} else {
132 133 134 135 136 137 138 139 140 141
			uh->check = gso_make_checksum(skb, ~uh->check);

			if (uh->check == 0)
				uh->check = CSUM_MANGLED_0;
		}
	} while ((skb = skb->next));
out:
	return segs;
}

T
Tom Herbert 已提交
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
				       netdev_features_t features,
				       bool is_ipv6)
{
	__be16 protocol = skb->protocol;
	const struct net_offload **offloads;
	const struct net_offload *ops;
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
					     netdev_features_t features);

	rcu_read_lock();

	switch (skb->inner_protocol_type) {
	case ENCAP_TYPE_ETHER:
		protocol = skb->inner_protocol;
		gso_inner_segment = skb_mac_gso_segment;
		break;
	case ENCAP_TYPE_IPPROTO:
		offloads = is_ipv6 ? inet6_offloads : inet_offloads;
		ops = rcu_dereference(offloads[skb->inner_ipproto]);
		if (!ops || !ops->callbacks.gso_segment)
			goto out_unlock;
		gso_inner_segment = ops->callbacks.gso_segment;
		break;
	default:
		goto out_unlock;
	}

	segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
172
					protocol, is_ipv6);
T
Tom Herbert 已提交
173 174 175 176 177 178 179

out_unlock:
	rcu_read_unlock();

	return segs;
}

180 181 182 183 184
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
					 netdev_features_t features)
{
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	unsigned int mss;
185
	__wsum csum;
186 187
	struct udphdr *uh;
	struct iphdr *iph;
188 189

	if (skb->encapsulation &&
190 191
	    (skb_shinfo(skb)->gso_type &
	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
T
Tom Herbert 已提交
192
		segs = skb_udp_tunnel_segment(skb, features, false);
193 194
		goto out;
	}
195

196 197 198
	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
		goto out;

199 200 201 202 203 204 205 206 207 208
	mss = skb_shinfo(skb)->gso_size;
	if (unlikely(skb->len <= mss))
		goto out;

	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
		/* Packet is from an untrusted source, reset gso_segs. */
		int type = skb_shinfo(skb)->gso_type;

		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
				      SKB_GSO_UDP_TUNNEL |
209
				      SKB_GSO_UDP_TUNNEL_CSUM |
210
				      SKB_GSO_TUNNEL_REMCSUM |
E
Eric Dumazet 已提交
211
				      SKB_GSO_IPIP |
P
Pravin B Shelar 已提交
212
				      SKB_GSO_GRE | SKB_GSO_GRE_CSUM) ||
213 214 215 216 217 218 219 220 221
			     !(type & (SKB_GSO_UDP))))
			goto out;

		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);

		segs = NULL;
		goto out;
	}

222 223 224 225
	/* Do software UFO. Complete and fill in the UDP checksum as
	 * HW cannot do checksum of UDP packets sent as multiple
	 * IP fragments.
	 */
226 227 228 229 230 231 232 233 234 235

	uh = udp_hdr(skb);
	iph = ip_hdr(skb);

	uh->check = 0;
	csum = skb_checksum(skb, 0, skb->len, 0);
	uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
	if (uh->check == 0)
		uh->check = CSUM_MANGLED_0;

236 237
	skb->ip_summed = CHECKSUM_NONE;

238 239 240
	/* Fragment the skb. IP headers of the fragments are updated in
	 * inet_gso_segment()
	 */
241
	segs = skb_segment(skb, features);
242 243 244 245
out:
	return segs;
}

246
int udp_add_offload(struct net *net, struct udp_offload *uo)
247
{
248
	struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
249 250 251 252

	if (!new_offload)
		return -ENOMEM;

253
	write_pnet(&new_offload->net, net);
254 255 256
	new_offload->offload = uo;

	spin_lock(&udp_offload_lock);
257 258
	new_offload->next = udp_offload_base;
	rcu_assign_pointer(udp_offload_base, new_offload);
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
	spin_unlock(&udp_offload_lock);

	return 0;
}
EXPORT_SYMBOL(udp_add_offload);

static void udp_offload_free_routine(struct rcu_head *head)
{
	struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu);
	kfree(ou_priv);
}

void udp_del_offload(struct udp_offload *uo)
{
	struct udp_offload_priv __rcu **head = &udp_offload_base;
	struct udp_offload_priv *uo_priv;

	spin_lock(&udp_offload_lock);

278
	uo_priv = udp_deref_protected(*head);
279
	for (; uo_priv != NULL;
280
	     uo_priv = udp_deref_protected(*head)) {
281
		if (uo_priv->offload == uo) {
282 283
			rcu_assign_pointer(*head,
					   udp_deref_protected(uo_priv->next));
284 285 286 287
			goto unlock;
		}
		head = &uo_priv->next;
	}
288
	pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port));
289 290
unlock:
	spin_unlock(&udp_offload_lock);
291
	if (uo_priv)
292 293 294 295
		call_rcu(&uo_priv->rcu, udp_offload_free_routine);
}
EXPORT_SYMBOL(udp_del_offload);

T
Tom Herbert 已提交
296 297
struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
				 struct udphdr *uh)
298 299 300
{
	struct udp_offload_priv *uo_priv;
	struct sk_buff *p, **pp = NULL;
T
Tom Herbert 已提交
301 302
	struct udphdr *uh2;
	unsigned int off = skb_gro_offset(skb);
303 304 305
	int flush = 1;

	if (NAPI_GRO_CB(skb)->udp_mark ||
306 307 308
	    (skb->ip_summed != CHECKSUM_PARTIAL &&
	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
	     !NAPI_GRO_CB(skb)->csum_valid))
309 310 311 312 313 314 315 316
		goto out;

	/* mark that this skb passed once through the udp gro layer */
	NAPI_GRO_CB(skb)->udp_mark = 1;

	rcu_read_lock();
	uo_priv = rcu_dereference(udp_offload_base);
	for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
317 318
		if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
		    uo_priv->offload->port == uh->dest &&
319 320 321 322 323 324 325 326 327 328 329 330 331
		    uo_priv->offload->callbacks.gro_receive)
			goto unflush;
	}
	goto out_unlock;

unflush:
	flush = 0;

	for (p = *head; p; p = p->next) {
		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		uh2 = (struct udphdr   *)(p->data + off);
T
Tom Herbert 已提交
332 333 334 335 336 337

		/* Match ports and either checksums are either both zero
		 * or nonzero.
		 */
		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) ||
		    (!uh->check ^ !uh2->check)) {
338 339 340 341 342 343
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}
	}

	skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
344
	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
T
Tom Herbert 已提交
345
	NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
346 347
	pp = uo_priv->offload->callbacks.gro_receive(head, skb,
						     uo_priv->offload);
348 349 350 351 352 353 354 355

out_unlock:
	rcu_read_unlock();
out:
	NAPI_GRO_CB(skb)->flush |= flush;
	return pp;
}

T
Tom Herbert 已提交
356 357 358 359 360
static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
					 struct sk_buff *skb)
{
	struct udphdr *uh = udp_gro_udphdr(skb);

361 362
	if (unlikely(!uh))
		goto flush;
T
Tom Herbert 已提交
363

364
	/* Don't bother verifying checksum if we're going to flush anyway. */
365
	if (NAPI_GRO_CB(skb)->flush)
366 367 368 369 370 371 372 373 374
		goto skip;

	if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
						 inet_gro_compute_pseudo))
		goto flush;
	else if (uh->check)
		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
					     inet_gro_compute_pseudo);
skip:
375
	NAPI_GRO_CB(skb)->is_ipv6 = 0;
T
Tom Herbert 已提交
376
	return udp_gro_receive(head, skb, uh);
377 378 379 380

flush:
	NAPI_GRO_CB(skb)->flush = 1;
	return NULL;
T
Tom Herbert 已提交
381 382 383
}

int udp_gro_complete(struct sk_buff *skb, int nhoff)
384 385 386 387 388 389 390 391 392 393 394 395
{
	struct udp_offload_priv *uo_priv;
	__be16 newlen = htons(skb->len - nhoff);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
	int err = -ENOSYS;

	uh->len = newlen;

	rcu_read_lock();

	uo_priv = rcu_dereference(udp_offload_base);
	for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
396 397
		if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
		    uo_priv->offload->port == uh->dest &&
398 399 400 401
		    uo_priv->offload->callbacks.gro_complete)
			break;
	}

402
	if (uo_priv) {
T
Tom Herbert 已提交
403
		NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
404 405 406
		err = uo_priv->offload->callbacks.gro_complete(skb,
				nhoff + sizeof(struct udphdr),
				uo_priv->offload);
T
Tom Herbert 已提交
407
	}
408 409

	rcu_read_unlock();
410 411 412 413 414 415 416

	if (skb->remcsum_offload)
		skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;

	skb->encapsulation = 1;
	skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr));

417 418 419
	return err;
}

420
static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
T
Tom Herbert 已提交
421 422 423 424
{
	const struct iphdr *iph = ip_hdr(skb);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);

425 426
	if (uh->check) {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
T
Tom Herbert 已提交
427 428
		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
					  iph->daddr, 0);
429 430 431
	} else {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
	}
T
Tom Herbert 已提交
432 433 434 435

	return udp_gro_complete(skb, nhoff);
}

436 437 438
static const struct net_offload udpv4_offload = {
	.callbacks = {
		.gso_segment = udp4_ufo_fragment,
T
Tom Herbert 已提交
439 440
		.gro_receive  =	udp4_gro_receive,
		.gro_complete =	udp4_gro_complete,
441 442 443 444 445 446 447
	},
};

int __init udpv4_offload_init(void)
{
	return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
}