udp_offload.c 11.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *	IPV4 GSO/GRO offload support
 *	Linux INET implementation
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 *	UDPv4 GSO support
 */

#include <linux/skbuff.h>
#include <net/udp.h>
#include <net/protocol.h>

17
static DEFINE_SPINLOCK(udp_offload_lock);
18
static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
19

20 21
#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock))

22 23
struct udp_offload_priv {
	struct udp_offload	*offload;
24
	possible_net_t	net;
25 26 27 28
	struct rcu_head		rcu;
	struct udp_offload_priv __rcu *next;
};

T
Tom Herbert 已提交
29 30 31 32
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
	netdev_features_t features,
	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
					     netdev_features_t features),
33
	__be16 new_protocol, bool is_ipv6)
34
{
35
	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
36
	bool remcsum, need_csum, offload_csum, ufo;
37
	struct sk_buff *segs = ERR_PTR(-EINVAL);
38
	struct udphdr *uh = udp_hdr(skb);
39 40
	u16 mac_offset = skb->mac_header;
	__be16 protocol = skb->protocol;
41
	u16 mac_len = skb->mac_len;
42
	int udp_offset, outer_hlen;
43
	__wsum partial;
44 45 46 47

	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
		goto out;

48 49 50 51 52 53 54 55
	/* Adjust partial header checksum to negate old length.
	 * We cannot rely on the value contained in uh->len as it is
	 * possible that the actual value exceeds the boundaries of the
	 * 16 bit length field due to the header being added outside of an
	 * IP or IPv6 frame that was already limited to 64K - 1.
	 */
	partial = csum_sub(csum_unfold(uh->check),
			   (__force __wsum)htonl(skb->len));
56 57

	/* setup inner skb. */
58 59 60 61 62
	skb->encapsulation = 0;
	__skb_pull(skb, tnl_hlen);
	skb_reset_mac_header(skb);
	skb_set_network_header(skb, skb_inner_network_offset(skb));
	skb->mac_len = skb_inner_network_offset(skb);
T
Tom Herbert 已提交
63
	skb->protocol = new_protocol;
64 65

	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
66
	skb->encap_hdr_csum = need_csum;
67 68

	remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
69
	skb->remcsum_offload = remcsum;
70

71 72
	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);

73 74
	/* Try to offload checksum if possible */
	offload_csum = !!(need_csum &&
75 76 77
			  (skb->dev->features &
			   (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
				      (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
78

79 80
	features &= skb->dev->hw_enc_features;

81 82 83 84
	/* The only checksum offload we care about from here on out is the
	 * outer one so strip the existing checksum feature flags and
	 * instead set the flag based on our outer checksum offload value.
	 */
85
	if (remcsum || ufo) {
86
		features &= ~NETIF_F_CSUM_MASK;
87
		if (!need_csum || offload_csum)
88 89 90
			features |= NETIF_F_HW_CSUM;
	}

91
	/* segment inner packet. */
92
	segs = gso_inner_segment(skb, features);
93
	if (IS_ERR_OR_NULL(segs)) {
94 95 96 97 98 99 100 101 102
		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
				     mac_len);
		goto out;
	}

	outer_hlen = skb_tnl_header_len(skb);
	udp_offset = outer_hlen - tnl_hlen;
	skb = segs;
	do {
103
		__be16 len;
104

105
		if (remcsum)
106
			skb->ip_summed = CHECKSUM_NONE;
107 108 109

		/* Set up inner headers if we are offloading inner checksum */
		if (skb->ip_summed == CHECKSUM_PARTIAL) {
110 111 112
			skb_reset_inner_headers(skb);
			skb->encapsulation = 1;
		}
113 114

		skb->mac_len = mac_len;
115
		skb->protocol = protocol;
116

117
		__skb_push(skb, outer_hlen);
118 119 120
		skb_reset_mac_header(skb);
		skb_set_network_header(skb, mac_len);
		skb_set_transport_header(skb, udp_offset);
121
		len = htons(skb->len - udp_offset);
122
		uh = udp_hdr(skb);
123
		uh->len = len;
124

125 126 127
		if (!need_csum)
			continue;

128
		uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len));
129

130 131
		if (skb->encapsulation || !offload_csum) {
			uh->check = gso_make_checksum(skb, ~uh->check);
132 133
			if (uh->check == 0)
				uh->check = CSUM_MANGLED_0;
134 135 136 137
		} else {
			skb->ip_summed = CHECKSUM_PARTIAL;
			skb->csum_start = skb_transport_header(skb) - skb->head;
			skb->csum_offset = offsetof(struct udphdr, check);
138 139 140 141 142 143
		}
	} while ((skb = skb->next));
out:
	return segs;
}

T
Tom Herbert 已提交
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
				       netdev_features_t features,
				       bool is_ipv6)
{
	__be16 protocol = skb->protocol;
	const struct net_offload **offloads;
	const struct net_offload *ops;
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
					     netdev_features_t features);

	rcu_read_lock();

	switch (skb->inner_protocol_type) {
	case ENCAP_TYPE_ETHER:
		protocol = skb->inner_protocol;
		gso_inner_segment = skb_mac_gso_segment;
		break;
	case ENCAP_TYPE_IPPROTO:
		offloads = is_ipv6 ? inet6_offloads : inet_offloads;
		ops = rcu_dereference(offloads[skb->inner_ipproto]);
		if (!ops || !ops->callbacks.gso_segment)
			goto out_unlock;
		gso_inner_segment = ops->callbacks.gso_segment;
		break;
	default:
		goto out_unlock;
	}

	segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
174
					protocol, is_ipv6);
T
Tom Herbert 已提交
175 176 177 178 179 180 181

out_unlock:
	rcu_read_unlock();

	return segs;
}

182 183 184 185 186
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
					 netdev_features_t features)
{
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	unsigned int mss;
187
	__wsum csum;
188 189
	struct udphdr *uh;
	struct iphdr *iph;
190 191

	if (skb->encapsulation &&
192 193
	    (skb_shinfo(skb)->gso_type &
	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
T
Tom Herbert 已提交
194
		segs = skb_udp_tunnel_segment(skb, features, false);
195 196
		goto out;
	}
197

198 199 200
	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
		goto out;

201 202 203 204 205 206 207 208 209 210
	mss = skb_shinfo(skb)->gso_size;
	if (unlikely(skb->len <= mss))
		goto out;

	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
		/* Packet is from an untrusted source, reset gso_segs. */
		int type = skb_shinfo(skb)->gso_type;

		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
				      SKB_GSO_UDP_TUNNEL |
211
				      SKB_GSO_UDP_TUNNEL_CSUM |
212
				      SKB_GSO_TUNNEL_REMCSUM |
E
Eric Dumazet 已提交
213
				      SKB_GSO_IPIP |
P
Pravin B Shelar 已提交
214
				      SKB_GSO_GRE | SKB_GSO_GRE_CSUM) ||
215 216 217 218 219 220 221 222 223
			     !(type & (SKB_GSO_UDP))))
			goto out;

		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);

		segs = NULL;
		goto out;
	}

224 225 226 227
	/* Do software UFO. Complete and fill in the UDP checksum as
	 * HW cannot do checksum of UDP packets sent as multiple
	 * IP fragments.
	 */
228 229 230 231 232 233 234 235 236 237

	uh = udp_hdr(skb);
	iph = ip_hdr(skb);

	uh->check = 0;
	csum = skb_checksum(skb, 0, skb->len, 0);
	uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
	if (uh->check == 0)
		uh->check = CSUM_MANGLED_0;

238 239
	skb->ip_summed = CHECKSUM_NONE;

240 241 242 243 244 245 246
	/* If there is no outer header we can fake a checksum offload
	 * due to the fact that we have already done the checksum in
	 * software prior to segmenting the frame.
	 */
	if (!skb->encap_hdr_csum)
		features |= NETIF_F_HW_CSUM;

247 248 249
	/* Fragment the skb. IP headers of the fragments are updated in
	 * inet_gso_segment()
	 */
250
	segs = skb_segment(skb, features);
251 252 253 254
out:
	return segs;
}

255
int udp_add_offload(struct net *net, struct udp_offload *uo)
256
{
257
	struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
258 259 260 261

	if (!new_offload)
		return -ENOMEM;

262
	write_pnet(&new_offload->net, net);
263 264 265
	new_offload->offload = uo;

	spin_lock(&udp_offload_lock);
266 267
	new_offload->next = udp_offload_base;
	rcu_assign_pointer(udp_offload_base, new_offload);
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
	spin_unlock(&udp_offload_lock);

	return 0;
}
EXPORT_SYMBOL(udp_add_offload);

static void udp_offload_free_routine(struct rcu_head *head)
{
	struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu);
	kfree(ou_priv);
}

void udp_del_offload(struct udp_offload *uo)
{
	struct udp_offload_priv __rcu **head = &udp_offload_base;
	struct udp_offload_priv *uo_priv;

	spin_lock(&udp_offload_lock);

287
	uo_priv = udp_deref_protected(*head);
288
	for (; uo_priv != NULL;
289
	     uo_priv = udp_deref_protected(*head)) {
290
		if (uo_priv->offload == uo) {
291 292
			rcu_assign_pointer(*head,
					   udp_deref_protected(uo_priv->next));
293 294 295 296
			goto unlock;
		}
		head = &uo_priv->next;
	}
297
	pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port));
298 299
unlock:
	spin_unlock(&udp_offload_lock);
300
	if (uo_priv)
301 302 303 304
		call_rcu(&uo_priv->rcu, udp_offload_free_routine);
}
EXPORT_SYMBOL(udp_del_offload);

T
Tom Herbert 已提交
305 306
struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
				 struct udphdr *uh)
307 308 309
{
	struct udp_offload_priv *uo_priv;
	struct sk_buff *p, **pp = NULL;
T
Tom Herbert 已提交
310 311
	struct udphdr *uh2;
	unsigned int off = skb_gro_offset(skb);
312 313 314
	int flush = 1;

	if (NAPI_GRO_CB(skb)->udp_mark ||
315 316 317
	    (skb->ip_summed != CHECKSUM_PARTIAL &&
	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
	     !NAPI_GRO_CB(skb)->csum_valid))
318 319 320 321 322 323 324 325
		goto out;

	/* mark that this skb passed once through the udp gro layer */
	NAPI_GRO_CB(skb)->udp_mark = 1;

	rcu_read_lock();
	uo_priv = rcu_dereference(udp_offload_base);
	for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
326 327
		if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
		    uo_priv->offload->port == uh->dest &&
328 329 330 331 332 333 334 335 336 337 338 339 340
		    uo_priv->offload->callbacks.gro_receive)
			goto unflush;
	}
	goto out_unlock;

unflush:
	flush = 0;

	for (p = *head; p; p = p->next) {
		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		uh2 = (struct udphdr   *)(p->data + off);
T
Tom Herbert 已提交
341 342 343 344 345 346

		/* Match ports and either checksums are either both zero
		 * or nonzero.
		 */
		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) ||
		    (!uh->check ^ !uh2->check)) {
347 348 349 350 351 352
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}
	}

	skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
353
	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
T
Tom Herbert 已提交
354
	NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
355 356
	pp = uo_priv->offload->callbacks.gro_receive(head, skb,
						     uo_priv->offload);
357 358 359 360 361 362 363 364

out_unlock:
	rcu_read_unlock();
out:
	NAPI_GRO_CB(skb)->flush |= flush;
	return pp;
}

T
Tom Herbert 已提交
365 366 367 368 369
static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
					 struct sk_buff *skb)
{
	struct udphdr *uh = udp_gro_udphdr(skb);

370 371
	if (unlikely(!uh))
		goto flush;
T
Tom Herbert 已提交
372

373
	/* Don't bother verifying checksum if we're going to flush anyway. */
374
	if (NAPI_GRO_CB(skb)->flush)
375 376 377 378 379 380 381 382 383
		goto skip;

	if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
						 inet_gro_compute_pseudo))
		goto flush;
	else if (uh->check)
		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
					     inet_gro_compute_pseudo);
skip:
384
	NAPI_GRO_CB(skb)->is_ipv6 = 0;
T
Tom Herbert 已提交
385
	return udp_gro_receive(head, skb, uh);
386 387 388 389

flush:
	NAPI_GRO_CB(skb)->flush = 1;
	return NULL;
T
Tom Herbert 已提交
390 391 392
}

int udp_gro_complete(struct sk_buff *skb, int nhoff)
393 394 395 396 397 398 399 400 401 402 403 404
{
	struct udp_offload_priv *uo_priv;
	__be16 newlen = htons(skb->len - nhoff);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
	int err = -ENOSYS;

	uh->len = newlen;

	rcu_read_lock();

	uo_priv = rcu_dereference(udp_offload_base);
	for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
405 406
		if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
		    uo_priv->offload->port == uh->dest &&
407 408 409 410
		    uo_priv->offload->callbacks.gro_complete)
			break;
	}

411
	if (uo_priv) {
T
Tom Herbert 已提交
412
		NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
413 414 415
		err = uo_priv->offload->callbacks.gro_complete(skb,
				nhoff + sizeof(struct udphdr),
				uo_priv->offload);
T
Tom Herbert 已提交
416
	}
417 418

	rcu_read_unlock();
419 420 421 422 423 424 425

	if (skb->remcsum_offload)
		skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;

	skb->encapsulation = 1;
	skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr));

426 427 428
	return err;
}

429
static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
T
Tom Herbert 已提交
430 431 432 433
{
	const struct iphdr *iph = ip_hdr(skb);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);

434 435
	if (uh->check) {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
T
Tom Herbert 已提交
436 437
		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
					  iph->daddr, 0);
438 439 440
	} else {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
	}
T
Tom Herbert 已提交
441 442 443 444

	return udp_gro_complete(skb, nhoff);
}

445 446 447
static const struct net_offload udpv4_offload = {
	.callbacks = {
		.gso_segment = udp4_ufo_fragment,
T
Tom Herbert 已提交
448 449
		.gro_receive  =	udp4_gro_receive,
		.gro_complete =	udp4_gro_complete,
450 451 452 453 454 455 456
	},
};

int __init udpv4_offload_init(void)
{
	return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
}