fou.c 18.8 KB
Newer Older
1 2 3 4 5 6 7 8 9
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <net/genetlink.h>
10
#include <net/gue.h>
11
#include <net/ip.h>
T
Tom Herbert 已提交
12
#include <net/protocol.h>
13 14 15 16 17 18 19 20 21
#include <net/udp.h>
#include <net/udp_tunnel.h>
#include <net/xfrm.h>
#include <uapi/linux/fou.h>
#include <uapi/linux/genetlink.h>

struct fou {
	struct socket *sock;
	u8 protocol;
22
	u8 flags;
W
WANG Cong 已提交
23
	__be16 port;
T
Tom Herbert 已提交
24
	struct udp_offload udp_offloads;
25 26 27
	struct list_head list;
};

28 29
#define FOU_F_REMCSUM_NOPARTIAL BIT(0)

30
struct fou_cfg {
31
	u16 type;
32
	u8 protocol;
33
	u8 flags;
34 35 36
	struct udp_port_cfg udp_config;
};

W
WANG Cong 已提交
37 38 39 40 41 42 43
static unsigned int fou_net_id;

struct fou_net {
	struct list_head fou_list;
	struct mutex fou_lock;
};

44 45 46 47 48
static inline struct fou *fou_from_sock(struct sock *sk)
{
	return sk->sk_user_data;
}

49
static void fou_recv_pull(struct sk_buff *skb, size_t len)
50 51 52 53
{
	struct iphdr *iph = ip_hdr(skb);

	/* Remove 'len' bytes from the packet (UDP header and
54
	 * FOU header if present).
55 56 57 58 59 60 61 62 63 64 65 66 67 68
	 */
	iph->tot_len = htons(ntohs(iph->tot_len) - len);
	__skb_pull(skb, len);
	skb_postpull_rcsum(skb, udp_hdr(skb), len);
	skb_reset_transport_header(skb);
}

static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
{
	struct fou *fou = fou_from_sock(sk);

	if (!fou)
		return 1;

69 70 71 72 73
	fou_recv_pull(skb, sizeof(struct udphdr));

	return -fou->protocol;
}

74
static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
75 76
				  void *data, size_t hdrlen, u8 ipproto,
				  bool nopartial)
77 78
{
	__be16 *pd = data;
T
Tom Herbert 已提交
79 80 81
	size_t start = ntohs(pd[0]);
	size_t offset = ntohs(pd[1]);
	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
82 83 84 85 86

	if (!pskb_may_pull(skb, plen))
		return NULL;
	guehdr = (struct guehdr *)&udp_hdr(skb)[1];

87 88
	skb_remcsum_process(skb, (void *)guehdr + hdrlen,
			    start, offset, nopartial);
89 90 91 92

	return guehdr;
}

93 94 95 96 97
static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
{
	/* No support yet */
	kfree_skb(skb);
	return 0;
98 99
}

100 101 102
static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
{
	struct fou *fou = fou_from_sock(sk);
103
	size_t len, optlen, hdrlen;
104
	struct guehdr *guehdr;
105
	void *data;
106
	u16 doffset = 0;
107 108 109 110 111 112 113 114

	if (!fou)
		return 1;

	len = sizeof(struct udphdr) + sizeof(struct guehdr);
	if (!pskb_may_pull(skb, len))
		goto drop;

115 116 117 118
	guehdr = (struct guehdr *)&udp_hdr(skb)[1];

	optlen = guehdr->hlen << 2;
	len += optlen;
119 120 121 122

	if (!pskb_may_pull(skb, len))
		goto drop;

123 124
	/* guehdr may change after pull */
	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
125

126
	hdrlen = sizeof(struct guehdr) + optlen;
127

128
	if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
129
		goto drop;
130

131 132 133 134 135 136 137 138
	hdrlen = sizeof(struct guehdr) + optlen;

	ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);

	/* Pull csum through the guehdr now . This can be used if
	 * there is a remote checksum offload.
	 */
	skb_postpull_rcsum(skb, udp_hdr(skb), len);
139 140 141 142

	data = &guehdr[1];

	if (guehdr->flags & GUE_FLAG_PRIV) {
143 144 145 146 147 148
		__be32 flags = *(__be32 *)(data + doffset);

		doffset += GUE_LEN_PRIV;

		if (flags & GUE_PFLAG_REMCSUM) {
			guehdr = gue_remcsum(skb, guehdr, data + doffset,
149 150 151
					     hdrlen, guehdr->proto_ctype,
					     !!(fou->flags &
						FOU_F_REMCSUM_NOPARTIAL));
152 153 154 155
			if (!guehdr)
				goto drop;

			data = &guehdr[1];
156

157 158
			doffset += GUE_PLEN_REMCSUM;
		}
159 160
	}

161 162 163
	if (unlikely(guehdr->control))
		return gue_control_message(skb, guehdr);

T
Tom Herbert 已提交
164
	__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
165 166
	skb_reset_transport_header(skb);

167 168
	return -guehdr->proto_ctype;

169 170 171 172 173
drop:
	kfree_skb(skb);
	return 0;
}

T
Tom Herbert 已提交
174
static struct sk_buff **fou_gro_receive(struct sk_buff **head,
175 176
					struct sk_buff *skb,
					struct udp_offload *uoff)
T
Tom Herbert 已提交
177 178 179 180
{
	const struct net_offload *ops;
	struct sk_buff **pp = NULL;
	u8 proto = NAPI_GRO_CB(skb)->proto;
181
	const struct net_offload **offloads;
T
Tom Herbert 已提交
182 183

	rcu_read_lock();
184
	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
T
Tom Herbert 已提交
185 186 187 188 189 190 191 192 193 194 195 196
	ops = rcu_dereference(offloads[proto]);
	if (!ops || !ops->callbacks.gro_receive)
		goto out_unlock;

	pp = ops->callbacks.gro_receive(head, skb);

out_unlock:
	rcu_read_unlock();

	return pp;
}

197 198
static int fou_gro_complete(struct sk_buff *skb, int nhoff,
			    struct udp_offload *uoff)
T
Tom Herbert 已提交
199 200 201 202
{
	const struct net_offload *ops;
	u8 proto = NAPI_GRO_CB(skb)->proto;
	int err = -ENOSYS;
203
	const struct net_offload **offloads;
T
Tom Herbert 已提交
204

205 206
	udp_tunnel_gro_complete(skb, nhoff);

T
Tom Herbert 已提交
207
	rcu_read_lock();
208
	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
T
Tom Herbert 已提交
209 210 211 212 213 214 215 216 217 218 219 220
	ops = rcu_dereference(offloads[proto]);
	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
		goto out_unlock;

	err = ops->callbacks.gro_complete(skb, nhoff);

out_unlock:
	rcu_read_unlock();

	return err;
}

221 222
static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
				      struct guehdr *guehdr, void *data,
223
				      size_t hdrlen, u8 ipproto,
224
				      struct gro_remcsum *grc, bool nopartial)
225 226
{
	__be16 *pd = data;
T
Tom Herbert 已提交
227 228 229
	size_t start = ntohs(pd[0]);
	size_t offset = ntohs(pd[1]);
	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
230 231

	if (skb->remcsum_offload)
232
		return NULL;
233

T
Tom Herbert 已提交
234
	if (!NAPI_GRO_CB(skb)->csum_valid)
235 236 237 238 239 240 241 242 243
		return NULL;

	/* Pull checksum that will be written */
	if (skb_gro_header_hard(skb, off + plen)) {
		guehdr = skb_gro_header_slow(skb, off + plen, off);
		if (!guehdr)
			return NULL;
	}

244
	skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen,
245
				start, offset, grc, nopartial);
246 247 248 249 250 251

	skb->remcsum_offload = 1;

	return guehdr;
}

252
static struct sk_buff **gue_gro_receive(struct sk_buff **head,
253 254
					struct sk_buff *skb,
					struct udp_offload *uoff)
255 256 257 258 259 260
{
	const struct net_offload **offloads;
	const struct net_offload *ops;
	struct sk_buff **pp = NULL;
	struct sk_buff *p;
	struct guehdr *guehdr;
261 262
	size_t len, optlen, hdrlen, off;
	void *data;
263
	u16 doffset = 0;
264
	int flush = 1;
265
	struct fou *fou = container_of(uoff, struct fou, udp_offloads);
266 267 268
	struct gro_remcsum grc;

	skb_gro_remcsum_init(&grc);
269 270

	off = skb_gro_offset(skb);
271 272
	len = off + sizeof(*guehdr);

273
	guehdr = skb_gro_header_fast(skb, off);
274 275
	if (skb_gro_header_hard(skb, len)) {
		guehdr = skb_gro_header_slow(skb, len, off);
276 277 278 279
		if (unlikely(!guehdr))
			goto out;
	}

280 281
	optlen = guehdr->hlen << 2;
	len += optlen;
282

283 284 285 286 287
	if (skb_gro_header_hard(skb, len)) {
		guehdr = skb_gro_header_slow(skb, len, off);
		if (unlikely(!guehdr))
			goto out;
	}
288

289 290 291
	if (unlikely(guehdr->control) || guehdr->version != 0 ||
	    validate_gue_flags(guehdr, optlen))
		goto out;
292

293 294
	hdrlen = sizeof(*guehdr) + optlen;

295 296 297
	/* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
	 * this is needed if there is a remote checkcsum offload.
	 */
298 299 300 301 302
	skb_gro_postpull_rcsum(skb, guehdr, hdrlen);

	data = &guehdr[1];

	if (guehdr->flags & GUE_FLAG_PRIV) {
303
		__be32 flags = *(__be32 *)(data + doffset);
304

305 306 307 308 309
		doffset += GUE_LEN_PRIV;

		if (flags & GUE_PFLAG_REMCSUM) {
			guehdr = gue_gro_remcsum(skb, off, guehdr,
						 data + doffset, hdrlen,
310 311 312
						 guehdr->proto_ctype, &grc,
						 !!(fou->flags &
						    FOU_F_REMCSUM_NOPARTIAL));
313 314 315 316 317 318 319
			if (!guehdr)
				goto out;

			data = &guehdr[1];

			doffset += GUE_PLEN_REMCSUM;
		}
320 321
	}

322 323
	skb_gro_pull(skb, hdrlen);

324 325 326 327 328 329 330 331 332 333 334
	flush = 0;

	for (p = *head; p; p = p->next) {
		const struct guehdr *guehdr2;

		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		guehdr2 = (struct guehdr *)(p->data + off);

		/* Compare base GUE header to be equal (covers
335
		 * hlen, version, proto_ctype, and flags.
336 337 338 339 340 341 342 343 344 345 346 347 348 349
		 */
		if (guehdr->word != guehdr2->word) {
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}

		/* Compare optional fields are the same. */
		if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
					   guehdr->hlen << 2)) {
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}
	}

350 351 352 353 354
	rcu_read_lock();
	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
	ops = rcu_dereference(offloads[guehdr->proto_ctype]);
	if (WARN_ON(!ops || !ops->callbacks.gro_receive))
		goto out_unlock;
355 356 357 358 359 360 361

	pp = ops->callbacks.gro_receive(head, skb);

out_unlock:
	rcu_read_unlock();
out:
	NAPI_GRO_CB(skb)->flush |= flush;
362
	skb_gro_remcsum_cleanup(skb, &grc);
363 364 365 366

	return pp;
}

367 368
static int gue_gro_complete(struct sk_buff *skb, int nhoff,
			    struct udp_offload *uoff)
369 370 371 372 373 374 375 376
{
	const struct net_offload **offloads;
	struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
	const struct net_offload *ops;
	unsigned int guehlen;
	u8 proto;
	int err = -ENOENT;

377
	proto = guehdr->proto_ctype;
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393

	guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);

	rcu_read_lock();
	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
	ops = rcu_dereference(offloads[proto]);
	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
		goto out_unlock;

	err = ops->callbacks.gro_complete(skb, nhoff + guehlen);

out_unlock:
	rcu_read_unlock();
	return err;
}

W
WANG Cong 已提交
394
static int fou_add_to_port_list(struct net *net, struct fou *fou)
395
{
W
WANG Cong 已提交
396
	struct fou_net *fn = net_generic(net, fou_net_id);
397 398
	struct fou *fout;

W
WANG Cong 已提交
399 400
	mutex_lock(&fn->fou_lock);
	list_for_each_entry(fout, &fn->fou_list, list) {
401
		if (fou->port == fout->port) {
W
WANG Cong 已提交
402
			mutex_unlock(&fn->fou_lock);
403 404 405 406
			return -EALREADY;
		}
	}

W
WANG Cong 已提交
407 408
	list_add(&fou->list, &fn->fou_list);
	mutex_unlock(&fn->fou_lock);
409 410 411 412 413 414 415 416 417

	return 0;
}

static void fou_release(struct fou *fou)
{
	struct socket *sock = fou->sock;
	struct sock *sk = sock->sk;

418 419
	if (sk->sk_family == AF_INET)
		udp_del_offload(&fou->udp_offloads);
420
	list_del(&fou->list);
W
WANG Cong 已提交
421
	udp_tunnel_sock_release(sock);
422 423 424 425

	kfree(fou);
}

426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
{
	udp_sk(sk)->encap_rcv = fou_udp_recv;
	fou->protocol = cfg->protocol;
	fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
	fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
	fou->udp_offloads.ipproto = cfg->protocol;

	return 0;
}

static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
{
	udp_sk(sk)->encap_rcv = gue_udp_recv;
	fou->udp_offloads.callbacks.gro_receive = gue_gro_receive;
	fou->udp_offloads.callbacks.gro_complete = gue_gro_complete;
	fou->udp_offloads.port = cfg->udp_config.local_udp_port;

	return 0;
}

448 449 450 451
static int fou_create(struct net *net, struct fou_cfg *cfg,
		      struct socket **sockp)
{
	struct socket *sock = NULL;
W
WANG Cong 已提交
452
	struct fou *fou = NULL;
453
	struct sock *sk;
W
WANG Cong 已提交
454
	int err;
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469

	/* Open UDP socket */
	err = udp_sock_create(net, &cfg->udp_config, &sock);
	if (err < 0)
		goto error;

	/* Allocate FOU port structure */
	fou = kzalloc(sizeof(*fou), GFP_KERNEL);
	if (!fou) {
		err = -ENOMEM;
		goto error;
	}

	sk = sock->sk;

470
	fou->flags = cfg->flags;
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
	fou->port = cfg->udp_config.local_udp_port;

	/* Initial for fou type */
	switch (cfg->type) {
	case FOU_ENCAP_DIRECT:
		err = fou_encap_init(sk, fou, cfg);
		if (err)
			goto error;
		break;
	case FOU_ENCAP_GUE:
		err = gue_encap_init(sk, fou, cfg);
		if (err)
			goto error;
		break;
	default:
		err = -EINVAL;
		goto error;
	}
489 490 491 492 493 494 495

	udp_sk(sk)->encap_type = 1;
	udp_encap_enable();

	sk->sk_user_data = fou;
	fou->sock = sock;

496
	inet_inc_convert_csum(sk);
497 498 499

	sk->sk_allocation = GFP_ATOMIC;

T
Tom Herbert 已提交
500 501 502 503 504 505
	if (cfg->udp_config.family == AF_INET) {
		err = udp_add_offload(&fou->udp_offloads);
		if (err)
			goto error;
	}

W
WANG Cong 已提交
506
	err = fou_add_to_port_list(net, fou);
507 508 509 510 511 512 513 514 515 516 517
	if (err)
		goto error;

	if (sockp)
		*sockp = sock;

	return 0;

error:
	kfree(fou);
	if (sock)
W
WANG Cong 已提交
518
		udp_tunnel_sock_release(sock);
519 520 521 522 523 524

	return err;
}

static int fou_destroy(struct net *net, struct fou_cfg *cfg)
{
W
WANG Cong 已提交
525
	struct fou_net *fn = net_generic(net, fou_net_id);
W
WANG Cong 已提交
526
	__be16 port = cfg->udp_config.local_udp_port;
527
	int err = -EINVAL;
W
WANG Cong 已提交
528
	struct fou *fou;
529

W
WANG Cong 已提交
530 531
	mutex_lock(&fn->fou_lock);
	list_for_each_entry(fou, &fn->fou_list, list) {
532 533 534 535 536 537
		if (fou->port == port) {
			fou_release(fou);
			err = 0;
			break;
		}
	}
W
WANG Cong 已提交
538
	mutex_unlock(&fn->fou_lock);
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555

	return err;
}

static struct genl_family fou_nl_family = {
	.id		= GENL_ID_GENERATE,
	.hdrsize	= 0,
	.name		= FOU_GENL_NAME,
	.version	= FOU_GENL_VERSION,
	.maxattr	= FOU_ATTR_MAX,
	.netnsok	= true,
};

static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
	[FOU_ATTR_PORT] = { .type = NLA_U16, },
	[FOU_ATTR_AF] = { .type = NLA_U8, },
	[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
556
	[FOU_ATTR_TYPE] = { .type = NLA_U8, },
557
	[FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
};

static int parse_nl_config(struct genl_info *info,
			   struct fou_cfg *cfg)
{
	memset(cfg, 0, sizeof(*cfg));

	cfg->udp_config.family = AF_INET;

	if (info->attrs[FOU_ATTR_AF]) {
		u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);

		if (family != AF_INET && family != AF_INET6)
			return -EINVAL;

		cfg->udp_config.family = family;
	}

	if (info->attrs[FOU_ATTR_PORT]) {
W
WANG Cong 已提交
577
		__be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
578 579 580 581 582 583 584

		cfg->udp_config.local_udp_port = port;
	}

	if (info->attrs[FOU_ATTR_IPPROTO])
		cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);

585 586 587
	if (info->attrs[FOU_ATTR_TYPE])
		cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);

588 589 590
	if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
		cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;

591 592 593 594 595
	return 0;
}

static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
{
W
WANG Cong 已提交
596
	struct net *net = genl_info_net(info);
597 598 599 600 601 602 603
	struct fou_cfg cfg;
	int err;

	err = parse_nl_config(info, &cfg);
	if (err)
		return err;

W
WANG Cong 已提交
604
	return fou_create(net, &cfg, NULL);
605 606 607 608
}

static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
{
W
WANG Cong 已提交
609
	struct net *net = genl_info_net(info);
610
	struct fou_cfg cfg;
611
	int err;
612

613 614 615
	err = parse_nl_config(info, &cfg);
	if (err)
		return err;
616

W
WANG Cong 已提交
617
	return fou_destroy(net, &cfg);
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
}

static const struct genl_ops fou_nl_ops[] = {
	{
		.cmd = FOU_CMD_ADD,
		.doit = fou_nl_cmd_add_port,
		.policy = fou_nl_policy,
		.flags = GENL_ADMIN_PERM,
	},
	{
		.cmd = FOU_CMD_DEL,
		.doit = fou_nl_cmd_rm_port,
		.policy = fou_nl_policy,
		.flags = GENL_ADMIN_PERM,
	},
};

635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
size_t fou_encap_hlen(struct ip_tunnel_encap *e)
{
	return sizeof(struct udphdr);
}
EXPORT_SYMBOL(fou_encap_hlen);

size_t gue_encap_hlen(struct ip_tunnel_encap *e)
{
	size_t len;
	bool need_priv = false;

	len = sizeof(struct udphdr) + sizeof(struct guehdr);

	if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) {
		len += GUE_PLEN_REMCSUM;
		need_priv = true;
	}

	len += need_priv ? GUE_LEN_PRIV : 0;

	return len;
}
EXPORT_SYMBOL(gue_encap_hlen);

659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
			  struct flowi4 *fl4, u8 *protocol, __be16 sport)
{
	struct udphdr *uh;

	skb_push(skb, sizeof(struct udphdr));
	skb_reset_transport_header(skb);

	uh = udp_hdr(skb);

	uh->dest = e->dport;
	uh->source = sport;
	uh->len = htons(skb->len);
	uh->check = 0;
	udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
		     fl4->saddr, fl4->daddr, skb->len);

	*protocol = IPPROTO_UDP;
}

int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
		     u8 *protocol, struct flowi4 *fl4)
{
	bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
	int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
	__be16 sport;

	skb = iptunnel_handle_offloads(skb, csum, type);

	if (IS_ERR(skb))
		return PTR_ERR(skb);

	sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
					       skb, 0, 0, false);
	fou_build_udp(skb, e, fl4, protocol, sport);

	return 0;
}
EXPORT_SYMBOL(fou_build_header);

int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
		     u8 *protocol, struct flowi4 *fl4)
{
	bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
	int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
	struct guehdr *guehdr;
705
	size_t hdrlen, optlen = 0;
706
	__be16 sport;
707 708 709
	void *data;
	bool need_priv = false;

710 711 712 713 714 715 716 717
	if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
	    skb->ip_summed == CHECKSUM_PARTIAL) {
		csum = false;
		optlen += GUE_PLEN_REMCSUM;
		type |= SKB_GSO_TUNNEL_REMCSUM;
		need_priv = true;
	}

718
	optlen += need_priv ? GUE_LEN_PRIV : 0;
719 720 721 722 723 724 725 726 727 728

	skb = iptunnel_handle_offloads(skb, csum, type);

	if (IS_ERR(skb))
		return PTR_ERR(skb);

	/* Get source port (based on flow hash) before skb_push */
	sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
					       skb, 0, 0, false);

729 730 731
	hdrlen = sizeof(struct guehdr) + optlen;

	skb_push(skb, hdrlen);
732 733 734

	guehdr = (struct guehdr *)skb->data;

735
	guehdr->control = 0;
736
	guehdr->version = 0;
737
	guehdr->hlen = optlen >> 2;
738
	guehdr->flags = 0;
739 740 741 742 743 744 745 746 747 748 749
	guehdr->proto_ctype = *protocol;

	data = &guehdr[1];

	if (need_priv) {
		__be32 *flags = data;

		guehdr->flags |= GUE_FLAG_PRIV;
		*flags = 0;
		data += GUE_LEN_PRIV;

750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769
		if (type & SKB_GSO_TUNNEL_REMCSUM) {
			u16 csum_start = skb_checksum_start_offset(skb);
			__be16 *pd = data;

			if (csum_start < hdrlen)
				return -EINVAL;

			csum_start -= hdrlen;
			pd[0] = htons(csum_start);
			pd[1] = htons(csum_start + skb->csum_offset);

			if (!skb_is_gso(skb)) {
				skb->ip_summed = CHECKSUM_NONE;
				skb->encapsulation = 0;
			}

			*flags |= GUE_PFLAG_REMCSUM;
			data += GUE_PLEN_REMCSUM;
		}

770
	}
771 772 773 774 775 776 777

	fou_build_udp(skb, e, fl4, protocol, sport);

	return 0;
}
EXPORT_SYMBOL(gue_build_header);

778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
#ifdef CONFIG_NET_FOU_IP_TUNNELS

static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops = {
	.encap_hlen = fou_encap_hlen,
	.build_header = fou_build_header,
};

static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops = {
	.encap_hlen = gue_encap_hlen,
	.build_header = gue_build_header,
};

static int ip_tunnel_encap_add_fou_ops(void)
{
	int ret;

	ret = ip_tunnel_encap_add_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
	if (ret < 0) {
		pr_err("can't add fou ops\n");
		return ret;
	}

	ret = ip_tunnel_encap_add_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
	if (ret < 0) {
		pr_err("can't add gue ops\n");
		ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
		return ret;
	}

	return 0;
}

static void ip_tunnel_encap_del_fou_ops(void)
{
	ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
	ip_tunnel_encap_del_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
}

#else

static int ip_tunnel_encap_add_fou_ops(void)
{
	return 0;
}

823
static void ip_tunnel_encap_del_fou_ops(void)
824 825 826 827 828
{
}

#endif

W
WANG Cong 已提交
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856
static __net_init int fou_init_net(struct net *net)
{
	struct fou_net *fn = net_generic(net, fou_net_id);

	INIT_LIST_HEAD(&fn->fou_list);
	mutex_init(&fn->fou_lock);
	return 0;
}

static __net_exit void fou_exit_net(struct net *net)
{
	struct fou_net *fn = net_generic(net, fou_net_id);
	struct fou *fou, *next;

	/* Close all the FOU sockets */
	mutex_lock(&fn->fou_lock);
	list_for_each_entry_safe(fou, next, &fn->fou_list, list)
		fou_release(fou);
	mutex_unlock(&fn->fou_lock);
}

static struct pernet_operations fou_net_ops = {
	.init = fou_init_net,
	.exit = fou_exit_net,
	.id   = &fou_net_id,
	.size = sizeof(struct fou_net),
};

857 858 859 860
static int __init fou_init(void)
{
	int ret;

W
WANG Cong 已提交
861 862 863 864
	ret = register_pernet_device(&fou_net_ops);
	if (ret)
		goto exit;

865 866
	ret = genl_register_family_with_ops(&fou_nl_family,
					    fou_nl_ops);
867
	if (ret < 0)
W
WANG Cong 已提交
868
		goto unregister;
869 870

	ret = ip_tunnel_encap_add_fou_ops();
W
WANG Cong 已提交
871 872
	if (ret == 0)
		return 0;
873

W
WANG Cong 已提交
874 875 876
	genl_unregister_family(&fou_nl_family);
unregister:
	unregister_pernet_device(&fou_net_ops);
877
exit:
878 879 880 881 882
	return ret;
}

static void __exit fou_fini(void)
{
883
	ip_tunnel_encap_del_fou_ops();
884
	genl_unregister_family(&fou_nl_family);
W
WANG Cong 已提交
885
	unregister_pernet_device(&fou_net_ops);
886 887 888 889 890 891
}

module_init(fou_init);
module_exit(fou_fini);
MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
MODULE_LICENSE("GPL");