udp_offload.c 10.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *	IPV4 GSO/GRO offload support
 *	Linux INET implementation
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 *	UDPv4 GSO support
 */

#include <linux/skbuff.h>
#include <net/udp.h>
#include <net/protocol.h>

T
Tom Herbert 已提交
17 18 19 20
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
	netdev_features_t features,
	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
					     netdev_features_t features),
21
	__be16 new_protocol, bool is_ipv6)
22
{
23
	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
24
	bool remcsum, need_csum, offload_csum, ufo, gso_partial;
25
	struct sk_buff *segs = ERR_PTR(-EINVAL);
26
	struct udphdr *uh = udp_hdr(skb);
27 28
	u16 mac_offset = skb->mac_header;
	__be16 protocol = skb->protocol;
29
	u16 mac_len = skb->mac_len;
30
	int udp_offset, outer_hlen;
31
	__wsum partial;
32
	bool need_ipsec;
33 34 35 36

	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
		goto out;

37 38 39 40 41 42
	/* Adjust partial header checksum to negate old length.
	 * We cannot rely on the value contained in uh->len as it is
	 * possible that the actual value exceeds the boundaries of the
	 * 16 bit length field due to the header being added outside of an
	 * IP or IPv6 frame that was already limited to 64K - 1.
	 */
43 44 45 46 47
	if (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL)
		partial = (__force __wsum)uh->len;
	else
		partial = (__force __wsum)htonl(skb->len);
	partial = csum_sub(csum_unfold(uh->check), partial);
48 49

	/* setup inner skb. */
50
	skb->encapsulation = 0;
51
	SKB_GSO_CB(skb)->encap_level = 0;
52 53 54 55
	__skb_pull(skb, tnl_hlen);
	skb_reset_mac_header(skb);
	skb_set_network_header(skb, skb_inner_network_offset(skb));
	skb->mac_len = skb_inner_network_offset(skb);
T
Tom Herbert 已提交
56
	skb->protocol = new_protocol;
57 58

	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
59
	skb->encap_hdr_csum = need_csum;
60 61

	remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
62
	skb->remcsum_offload = remcsum;
63

64 65
	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);

66
	need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
67 68
	/* Try to offload checksum if possible */
	offload_csum = !!(need_csum &&
69
			  !need_ipsec &&
70 71 72
			  (skb->dev->features &
			   (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
				      (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
73

74 75
	features &= skb->dev->hw_enc_features;

76 77 78 79
	/* The only checksum offload we care about from here on out is the
	 * outer one so strip the existing checksum feature flags and
	 * instead set the flag based on our outer checksum offload value.
	 */
80
	if (remcsum || ufo) {
81
		features &= ~NETIF_F_CSUM_MASK;
82
		if (!need_csum || offload_csum)
83 84 85
			features |= NETIF_F_HW_CSUM;
	}

86
	/* segment inner packet. */
87
	segs = gso_inner_segment(skb, features);
88
	if (IS_ERR_OR_NULL(segs)) {
89 90 91 92 93
		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
				     mac_len);
		goto out;
	}

94 95
	gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);

96 97 98 99
	outer_hlen = skb_tnl_header_len(skb);
	udp_offset = outer_hlen - tnl_hlen;
	skb = segs;
	do {
100
		unsigned int len;
101

102
		if (remcsum)
103
			skb->ip_summed = CHECKSUM_NONE;
104 105 106

		/* Set up inner headers if we are offloading inner checksum */
		if (skb->ip_summed == CHECKSUM_PARTIAL) {
107 108 109
			skb_reset_inner_headers(skb);
			skb->encapsulation = 1;
		}
110 111

		skb->mac_len = mac_len;
112
		skb->protocol = protocol;
113

114
		__skb_push(skb, outer_hlen);
115 116 117
		skb_reset_mac_header(skb);
		skb_set_network_header(skb, mac_len);
		skb_set_transport_header(skb, udp_offset);
118
		len = skb->len - udp_offset;
119
		uh = udp_hdr(skb);
120 121 122 123 124

		/* If we are only performing partial GSO the inner header
		 * will be using a length value equal to only one MSS sized
		 * segment instead of the entire frame.
		 */
125
		if (gso_partial) {
126 127 128 129 130 131
			uh->len = htons(skb_shinfo(skb)->gso_size +
					SKB_GSO_CB(skb)->data_offset +
					skb->head - (unsigned char *)uh);
		} else {
			uh->len = htons(len);
		}
132

133 134 135
		if (!need_csum)
			continue;

136 137
		uh->check = ~csum_fold(csum_add(partial,
				       (__force __wsum)htonl(len)));
138

139 140
		if (skb->encapsulation || !offload_csum) {
			uh->check = gso_make_checksum(skb, ~uh->check);
141 142
			if (uh->check == 0)
				uh->check = CSUM_MANGLED_0;
143 144 145 146
		} else {
			skb->ip_summed = CHECKSUM_PARTIAL;
			skb->csum_start = skb_transport_header(skb) - skb->head;
			skb->csum_offset = offsetof(struct udphdr, check);
147 148 149 150 151 152
		}
	} while ((skb = skb->next));
out:
	return segs;
}

T
Tom Herbert 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
				       netdev_features_t features,
				       bool is_ipv6)
{
	__be16 protocol = skb->protocol;
	const struct net_offload **offloads;
	const struct net_offload *ops;
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
					     netdev_features_t features);

	rcu_read_lock();

	switch (skb->inner_protocol_type) {
	case ENCAP_TYPE_ETHER:
		protocol = skb->inner_protocol;
		gso_inner_segment = skb_mac_gso_segment;
		break;
	case ENCAP_TYPE_IPPROTO:
		offloads = is_ipv6 ? inet6_offloads : inet_offloads;
		ops = rcu_dereference(offloads[skb->inner_ipproto]);
		if (!ops || !ops->callbacks.gso_segment)
			goto out_unlock;
		gso_inner_segment = ops->callbacks.gso_segment;
		break;
	default:
		goto out_unlock;
	}

	segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
183
					protocol, is_ipv6);
T
Tom Herbert 已提交
184 185 186 187 188 189

out_unlock:
	rcu_read_unlock();

	return segs;
}
190
EXPORT_SYMBOL(skb_udp_tunnel_segment);
T
Tom Herbert 已提交
191

192 193 194 195 196
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
					 netdev_features_t features)
{
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	unsigned int mss;
197
	__wsum csum;
198 199
	struct udphdr *uh;
	struct iphdr *iph;
200 201

	if (skb->encapsulation &&
202 203
	    (skb_shinfo(skb)->gso_type &
	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
T
Tom Herbert 已提交
204
		segs = skb_udp_tunnel_segment(skb, features, false);
205 206
		goto out;
	}
207

208 209 210
	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
		goto out;

211 212 213 214 215 216 217 218 219 220 221 222 223
	mss = skb_shinfo(skb)->gso_size;
	if (unlikely(skb->len <= mss))
		goto out;

	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
		/* Packet is from an untrusted source, reset gso_segs. */

		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);

		segs = NULL;
		goto out;
	}

224 225 226 227
	/* Do software UFO. Complete and fill in the UDP checksum as
	 * HW cannot do checksum of UDP packets sent as multiple
	 * IP fragments.
	 */
228 229 230 231 232 233 234 235 236 237

	uh = udp_hdr(skb);
	iph = ip_hdr(skb);

	uh->check = 0;
	csum = skb_checksum(skb, 0, skb->len, 0);
	uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
	if (uh->check == 0)
		uh->check = CSUM_MANGLED_0;

238 239
	skb->ip_summed = CHECKSUM_NONE;

240 241 242 243 244 245 246
	/* If there is no outer header we can fake a checksum offload
	 * due to the fact that we have already done the checksum in
	 * software prior to segmenting the frame.
	 */
	if (!skb->encap_hdr_csum)
		features |= NETIF_F_HW_CSUM;

247 248 249
	/* Fragment the skb. IP headers of the fragments are updated in
	 * inet_gso_segment()
	 */
250
	segs = skb_segment(skb, features);
251 252 253 254
out:
	return segs;
}

T
Tom Herbert 已提交
255
struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
256
				 struct udphdr *uh, udp_lookup_t lookup)
257 258
{
	struct sk_buff *p, **pp = NULL;
T
Tom Herbert 已提交
259 260
	struct udphdr *uh2;
	unsigned int off = skb_gro_offset(skb);
261
	int flush = 1;
262
	struct sock *sk;
263

264
	if (NAPI_GRO_CB(skb)->encap_mark ||
265 266 267
	    (skb->ip_summed != CHECKSUM_PARTIAL &&
	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
	     !NAPI_GRO_CB(skb)->csum_valid))
268 269
		goto out;

270 271
	/* mark that this skb passed once through the tunnel gro layer */
	NAPI_GRO_CB(skb)->encap_mark = 1;
272 273

	rcu_read_lock();
274 275 276 277
	sk = (*lookup)(skb, uh->source, uh->dest);

	if (sk && udp_sk(sk)->gro_receive)
		goto unflush;
278 279 280 281 282 283 284 285 286 287
	goto out_unlock;

unflush:
	flush = 0;

	for (p = *head; p; p = p->next) {
		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		uh2 = (struct udphdr   *)(p->data + off);
T
Tom Herbert 已提交
288 289 290 291 292 293

		/* Match ports and either checksums are either both zero
		 * or nonzero.
		 */
		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) ||
		    (!uh->check ^ !uh2->check)) {
294 295 296 297 298 299
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}
	}

	skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
300
	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
S
Sabrina Dubroca 已提交
301
	pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
302 303 304 305 306 307 308

out_unlock:
	rcu_read_unlock();
out:
	NAPI_GRO_CB(skb)->flush |= flush;
	return pp;
}
309
EXPORT_SYMBOL(udp_gro_receive);
310

T
Tom Herbert 已提交
311 312 313 314 315
static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
					 struct sk_buff *skb)
{
	struct udphdr *uh = udp_gro_udphdr(skb);

316 317
	if (unlikely(!uh))
		goto flush;
T
Tom Herbert 已提交
318

319
	/* Don't bother verifying checksum if we're going to flush anyway. */
320
	if (NAPI_GRO_CB(skb)->flush)
321 322 323 324 325 326 327 328 329
		goto skip;

	if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
						 inet_gro_compute_pseudo))
		goto flush;
	else if (uh->check)
		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
					     inet_gro_compute_pseudo);
skip:
330
	NAPI_GRO_CB(skb)->is_ipv6 = 0;
331
	return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb);
332 333 334 335

flush:
	NAPI_GRO_CB(skb)->flush = 1;
	return NULL;
T
Tom Herbert 已提交
336 337
}

338 339
int udp_gro_complete(struct sk_buff *skb, int nhoff,
		     udp_lookup_t lookup)
340 341 342 343
{
	__be16 newlen = htons(skb->len - nhoff);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
	int err = -ENOSYS;
344
	struct sock *sk;
345 346 347

	uh->len = newlen;

348 349 350 351 352
	/* Set encapsulation before calling into inner gro_complete() functions
	 * to make them set up the inner offsets.
	 */
	skb->encapsulation = 1;

353
	rcu_read_lock();
354 355 356 357
	sk = (*lookup)(skb, uh->source, uh->dest);
	if (sk && udp_sk(sk)->gro_complete)
		err = udp_sk(sk)->gro_complete(sk, skb,
				nhoff + sizeof(struct udphdr));
358
	rcu_read_unlock();
359 360 361 362

	if (skb->remcsum_offload)
		skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;

363 364
	return err;
}
365
EXPORT_SYMBOL(udp_gro_complete);
366

367
static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
T
Tom Herbert 已提交
368 369 370 371
{
	const struct iphdr *iph = ip_hdr(skb);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);

372 373
	if (uh->check) {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
T
Tom Herbert 已提交
374 375
		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
					  iph->daddr, 0);
376 377 378
	} else {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
	}
T
Tom Herbert 已提交
379

380
	return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
T
Tom Herbert 已提交
381 382
}

383 384 385
static const struct net_offload udpv4_offload = {
	.callbacks = {
		.gso_segment = udp4_ufo_fragment,
T
Tom Herbert 已提交
386 387
		.gro_receive  =	udp4_gro_receive,
		.gro_complete =	udp4_gro_complete,
388 389 390 391 392 393 394
	},
};

int __init udpv4_offload_init(void)
{
	return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
}