udp_offload.c 11.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *	IPV4 GSO/GRO offload support
 *	Linux INET implementation
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 *	UDPv4 GSO support
 */

#include <linux/skbuff.h>
#include <net/udp.h>
#include <net/protocol.h>

17
static DEFINE_SPINLOCK(udp_offload_lock);
18
static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
19

20 21
#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock))

22 23
struct udp_offload_priv {
	struct udp_offload	*offload;
24
	possible_net_t	net;
25 26 27 28
	struct rcu_head		rcu;
	struct udp_offload_priv __rcu *next;
};

T
Tom Herbert 已提交
29 30 31 32
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
	netdev_features_t features,
	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
					     netdev_features_t features),
33
	__be16 new_protocol, bool is_ipv6)
34
{
35
	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
36
	struct sk_buff *segs = ERR_PTR(-EINVAL);
37
	bool remcsum, need_csum, offload_csum;
38
	struct udphdr *uh = udp_hdr(skb);
39 40
	u16 mac_offset = skb->mac_header;
	__be16 protocol = skb->protocol;
41
	u16 mac_len = skb->mac_len;
42
	int udp_offset, outer_hlen;
43
	u32 partial;
44 45 46 47

	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
		goto out;

48 49 50 51
	/* adjust partial header checksum to negate old length */
	partial = (__force u32)uh->check + (__force u16)~uh->len;

	/* setup inner skb. */
52 53 54 55 56
	skb->encapsulation = 0;
	__skb_pull(skb, tnl_hlen);
	skb_reset_mac_header(skb);
	skb_set_network_header(skb, skb_inner_network_offset(skb));
	skb->mac_len = skb_inner_network_offset(skb);
T
Tom Herbert 已提交
57
	skb->protocol = new_protocol;
58 59

	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
60
	skb->encap_hdr_csum = need_csum;
61 62

	remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
63
	skb->remcsum_offload = remcsum;
64

65 66
	/* Try to offload checksum if possible */
	offload_csum = !!(need_csum &&
67 68 69
			  (skb->dev->features &
			   (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
				      (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
70

71 72
	features &= skb->dev->hw_enc_features;

73 74 75 76 77 78 79 80 81 82
	/* The only checksum offload we care about from here on out is the
	 * outer one so strip the existing checksum feature flags and
	 * instead set the flag based on our outer checksum offload value.
	 */
	if (remcsum) {
		features &= ~NETIF_F_CSUM_MASK;
		if (offload_csum)
			features |= NETIF_F_HW_CSUM;
	}

83
	/* segment inner packet. */
84
	segs = gso_inner_segment(skb, features);
85
	if (IS_ERR_OR_NULL(segs)) {
86 87 88 89 90 91 92 93 94
		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
				     mac_len);
		goto out;
	}

	outer_hlen = skb_tnl_header_len(skb);
	udp_offset = outer_hlen - tnl_hlen;
	skb = segs;
	do {
95
		__be16 len;
96

97
		if (remcsum)
98
			skb->ip_summed = CHECKSUM_NONE;
99 100 101

		/* Set up inner headers if we are offloading inner checksum */
		if (skb->ip_summed == CHECKSUM_PARTIAL) {
102 103 104
			skb_reset_inner_headers(skb);
			skb->encapsulation = 1;
		}
105 106

		skb->mac_len = mac_len;
107
		skb->protocol = protocol;
108

109
		__skb_push(skb, outer_hlen);
110 111 112
		skb_reset_mac_header(skb);
		skb_set_network_header(skb, mac_len);
		skb_set_transport_header(skb, udp_offset);
113
		len = htons(skb->len - udp_offset);
114
		uh = udp_hdr(skb);
115
		uh->len = len;
116

117 118 119 120
		if (!need_csum)
			continue;

		uh->check = ~csum_fold((__force __wsum)
121
				       ((__force u32)len + partial));
122

123 124
		if (skb->encapsulation || !offload_csum) {
			uh->check = gso_make_checksum(skb, ~uh->check);
125 126
			if (uh->check == 0)
				uh->check = CSUM_MANGLED_0;
127 128 129 130
		} else {
			skb->ip_summed = CHECKSUM_PARTIAL;
			skb->csum_start = skb_transport_header(skb) - skb->head;
			skb->csum_offset = offsetof(struct udphdr, check);
131 132 133 134 135 136
		}
	} while ((skb = skb->next));
out:
	return segs;
}

T
Tom Herbert 已提交
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
				       netdev_features_t features,
				       bool is_ipv6)
{
	__be16 protocol = skb->protocol;
	const struct net_offload **offloads;
	const struct net_offload *ops;
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
					     netdev_features_t features);

	rcu_read_lock();

	switch (skb->inner_protocol_type) {
	case ENCAP_TYPE_ETHER:
		protocol = skb->inner_protocol;
		gso_inner_segment = skb_mac_gso_segment;
		break;
	case ENCAP_TYPE_IPPROTO:
		offloads = is_ipv6 ? inet6_offloads : inet_offloads;
		ops = rcu_dereference(offloads[skb->inner_ipproto]);
		if (!ops || !ops->callbacks.gso_segment)
			goto out_unlock;
		gso_inner_segment = ops->callbacks.gso_segment;
		break;
	default:
		goto out_unlock;
	}

	segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
167
					protocol, is_ipv6);
T
Tom Herbert 已提交
168 169 170 171 172 173 174

out_unlock:
	rcu_read_unlock();

	return segs;
}

175 176 177 178 179
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
					 netdev_features_t features)
{
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	unsigned int mss;
180
	__wsum csum;
181 182
	struct udphdr *uh;
	struct iphdr *iph;
183 184

	if (skb->encapsulation &&
185 186
	    (skb_shinfo(skb)->gso_type &
	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
T
Tom Herbert 已提交
187
		segs = skb_udp_tunnel_segment(skb, features, false);
188 189
		goto out;
	}
190

191 192 193
	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
		goto out;

194 195 196 197 198 199 200 201 202 203
	mss = skb_shinfo(skb)->gso_size;
	if (unlikely(skb->len <= mss))
		goto out;

	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
		/* Packet is from an untrusted source, reset gso_segs. */
		int type = skb_shinfo(skb)->gso_type;

		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
				      SKB_GSO_UDP_TUNNEL |
204
				      SKB_GSO_UDP_TUNNEL_CSUM |
205
				      SKB_GSO_TUNNEL_REMCSUM |
E
Eric Dumazet 已提交
206
				      SKB_GSO_IPIP |
P
Pravin B Shelar 已提交
207
				      SKB_GSO_GRE | SKB_GSO_GRE_CSUM) ||
208 209 210 211 212 213 214 215 216
			     !(type & (SKB_GSO_UDP))))
			goto out;

		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);

		segs = NULL;
		goto out;
	}

217 218 219 220
	/* Do software UFO. Complete and fill in the UDP checksum as
	 * HW cannot do checksum of UDP packets sent as multiple
	 * IP fragments.
	 */
221 222 223 224 225 226 227 228 229 230

	uh = udp_hdr(skb);
	iph = ip_hdr(skb);

	uh->check = 0;
	csum = skb_checksum(skb, 0, skb->len, 0);
	uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
	if (uh->check == 0)
		uh->check = CSUM_MANGLED_0;

231 232
	skb->ip_summed = CHECKSUM_NONE;

233 234 235
	/* Fragment the skb. IP headers of the fragments are updated in
	 * inet_gso_segment()
	 */
236
	segs = skb_segment(skb, features);
237 238 239 240
out:
	return segs;
}

241
int udp_add_offload(struct net *net, struct udp_offload *uo)
242
{
243
	struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
244 245 246 247

	if (!new_offload)
		return -ENOMEM;

248
	write_pnet(&new_offload->net, net);
249 250 251
	new_offload->offload = uo;

	spin_lock(&udp_offload_lock);
252 253
	new_offload->next = udp_offload_base;
	rcu_assign_pointer(udp_offload_base, new_offload);
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
	spin_unlock(&udp_offload_lock);

	return 0;
}
EXPORT_SYMBOL(udp_add_offload);

static void udp_offload_free_routine(struct rcu_head *head)
{
	struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu);
	kfree(ou_priv);
}

void udp_del_offload(struct udp_offload *uo)
{
	struct udp_offload_priv __rcu **head = &udp_offload_base;
	struct udp_offload_priv *uo_priv;

	spin_lock(&udp_offload_lock);

273
	uo_priv = udp_deref_protected(*head);
274
	for (; uo_priv != NULL;
275
	     uo_priv = udp_deref_protected(*head)) {
276
		if (uo_priv->offload == uo) {
277 278
			rcu_assign_pointer(*head,
					   udp_deref_protected(uo_priv->next));
279 280 281 282
			goto unlock;
		}
		head = &uo_priv->next;
	}
283
	pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port));
284 285
unlock:
	spin_unlock(&udp_offload_lock);
286
	if (uo_priv)
287 288 289 290
		call_rcu(&uo_priv->rcu, udp_offload_free_routine);
}
EXPORT_SYMBOL(udp_del_offload);

T
Tom Herbert 已提交
291 292
struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
				 struct udphdr *uh)
293 294 295
{
	struct udp_offload_priv *uo_priv;
	struct sk_buff *p, **pp = NULL;
T
Tom Herbert 已提交
296 297
	struct udphdr *uh2;
	unsigned int off = skb_gro_offset(skb);
298 299 300
	int flush = 1;

	if (NAPI_GRO_CB(skb)->udp_mark ||
301 302 303
	    (skb->ip_summed != CHECKSUM_PARTIAL &&
	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
	     !NAPI_GRO_CB(skb)->csum_valid))
304 305 306 307 308 309 310 311
		goto out;

	/* mark that this skb passed once through the udp gro layer */
	NAPI_GRO_CB(skb)->udp_mark = 1;

	rcu_read_lock();
	uo_priv = rcu_dereference(udp_offload_base);
	for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
312 313
		if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
		    uo_priv->offload->port == uh->dest &&
314 315 316 317 318 319 320 321 322 323 324 325 326
		    uo_priv->offload->callbacks.gro_receive)
			goto unflush;
	}
	goto out_unlock;

unflush:
	flush = 0;

	for (p = *head; p; p = p->next) {
		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		uh2 = (struct udphdr   *)(p->data + off);
T
Tom Herbert 已提交
327 328 329 330 331 332

		/* Match ports and either checksums are either both zero
		 * or nonzero.
		 */
		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) ||
		    (!uh->check ^ !uh2->check)) {
333 334 335 336 337 338
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}
	}

	skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
339
	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
T
Tom Herbert 已提交
340
	NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
341 342
	pp = uo_priv->offload->callbacks.gro_receive(head, skb,
						     uo_priv->offload);
343 344 345 346 347 348 349 350

out_unlock:
	rcu_read_unlock();
out:
	NAPI_GRO_CB(skb)->flush |= flush;
	return pp;
}

T
Tom Herbert 已提交
351 352 353 354 355
static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
					 struct sk_buff *skb)
{
	struct udphdr *uh = udp_gro_udphdr(skb);

356 357
	if (unlikely(!uh))
		goto flush;
T
Tom Herbert 已提交
358

359
	/* Don't bother verifying checksum if we're going to flush anyway. */
360
	if (NAPI_GRO_CB(skb)->flush)
361 362 363 364 365 366 367 368 369
		goto skip;

	if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
						 inet_gro_compute_pseudo))
		goto flush;
	else if (uh->check)
		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
					     inet_gro_compute_pseudo);
skip:
370
	NAPI_GRO_CB(skb)->is_ipv6 = 0;
T
Tom Herbert 已提交
371
	return udp_gro_receive(head, skb, uh);
372 373 374 375

flush:
	NAPI_GRO_CB(skb)->flush = 1;
	return NULL;
T
Tom Herbert 已提交
376 377 378
}

int udp_gro_complete(struct sk_buff *skb, int nhoff)
379 380 381 382 383 384 385 386 387 388 389 390
{
	struct udp_offload_priv *uo_priv;
	__be16 newlen = htons(skb->len - nhoff);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
	int err = -ENOSYS;

	uh->len = newlen;

	rcu_read_lock();

	uo_priv = rcu_dereference(udp_offload_base);
	for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
391 392
		if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
		    uo_priv->offload->port == uh->dest &&
393 394 395 396
		    uo_priv->offload->callbacks.gro_complete)
			break;
	}

397
	if (uo_priv) {
T
Tom Herbert 已提交
398
		NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
399 400 401
		err = uo_priv->offload->callbacks.gro_complete(skb,
				nhoff + sizeof(struct udphdr),
				uo_priv->offload);
T
Tom Herbert 已提交
402
	}
403 404

	rcu_read_unlock();
405 406 407 408 409 410 411

	if (skb->remcsum_offload)
		skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;

	skb->encapsulation = 1;
	skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr));

412 413 414
	return err;
}

415
static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
T
Tom Herbert 已提交
416 417 418 419
{
	const struct iphdr *iph = ip_hdr(skb);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);

420 421
	if (uh->check) {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
T
Tom Herbert 已提交
422 423
		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
					  iph->daddr, 0);
424 425 426
	} else {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
	}
T
Tom Herbert 已提交
427 428 429 430

	return udp_gro_complete(skb, nhoff);
}

431 432 433
static const struct net_offload udpv4_offload = {
	.callbacks = {
		.gso_segment = udp4_ufo_fragment,
T
Tom Herbert 已提交
434 435
		.gro_receive  =	udp4_gro_receive,
		.gro_complete =	udp4_gro_complete,
436 437 438 439 440 441 442
	},
};

int __init udpv4_offload_init(void)
{
	return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
}