xfrm_input.c 12.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2 3 4 5 6 7
/*
 * xfrm_input.c
 *
 * Changes:
 * 	YOSHIFUJI Hideaki @USAGI
 * 		Split up af-specific portion
8
 *
L
Linus Torvalds 已提交
9 10
 */

11
#include <linux/bottom_half.h>
12
#include <linux/cache.h>
13
#include <linux/interrupt.h>
L
Linus Torvalds 已提交
14 15
#include <linux/slab.h>
#include <linux/module.h>
16
#include <linux/netdevice.h>
17
#include <linux/percpu.h>
18
#include <net/dst.h>
L
Linus Torvalds 已提交
19 20
#include <net/ip.h>
#include <net/xfrm.h>
21 22
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
L
Linus Torvalds 已提交
23

24 25 26 27 28 29
struct xfrm_trans_tasklet {
	struct tasklet_struct tasklet;
	struct sk_buff_head queue;
};

struct xfrm_trans_cb {
30 31 32 33 34 35
	union {
		struct inet_skb_parm	h4;
#if IS_ENABLED(CONFIG_IPV6)
		struct inet6_skb_parm	h6;
#endif
	} header;
36 37 38 39 40
	int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
};

#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))

41
static struct kmem_cache *secpath_cachep __ro_after_init;
L
Linus Torvalds 已提交
42

43
static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
44
static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
45

46 47 48
static struct gro_cells gro_cells;
static struct net_device xfrm_napi_dev;

49 50
static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);

51
int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
52 53 54
{
	int err = 0;

55
	if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo)))
56
		return -EAFNOSUPPORT;
57

58 59
	spin_lock_bh(&xfrm_input_afinfo_lock);
	if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL))
60
		err = -EEXIST;
61 62 63 64 65 66 67
	else
		rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo);
	spin_unlock_bh(&xfrm_input_afinfo_lock);
	return err;
}
EXPORT_SYMBOL(xfrm_input_register_afinfo);

68
int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo)
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
{
	int err = 0;

	spin_lock_bh(&xfrm_input_afinfo_lock);
	if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) {
		if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo))
			err = -EINVAL;
		else
			RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL);
	}
	spin_unlock_bh(&xfrm_input_afinfo_lock);
	synchronize_rcu();
	return err;
}
EXPORT_SYMBOL(xfrm_input_unregister_afinfo);

85
static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family)
86
{
87
	const struct xfrm_input_afinfo *afinfo;
88

89
	if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo)))
90
		return NULL;
91

92 93 94 95 96 97 98 99 100 101 102
	rcu_read_lock();
	afinfo = rcu_dereference(xfrm_input_afinfo[family]);
	if (unlikely(!afinfo))
		rcu_read_unlock();
	return afinfo;
}

static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol,
		       int err)
{
	int ret;
103
	const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family);
104 105 106 107 108

	if (!afinfo)
		return -EAFNOSUPPORT;

	ret = afinfo->callback(skb, protocol, err);
109
	rcu_read_unlock();
110 111 112 113

	return ret;
}

L
Linus Torvalds 已提交
114 115 116 117
void __secpath_destroy(struct sec_path *sp)
{
	int i;
	for (i = 0; i < sp->len; i++)
118
		xfrm_state_put(sp->xvec[i]);
L
Linus Torvalds 已提交
119 120 121 122 123 124 125 126
	kmem_cache_free(secpath_cachep, sp);
}
EXPORT_SYMBOL(__secpath_destroy);

struct sec_path *secpath_dup(struct sec_path *src)
{
	struct sec_path *sp;

127
	sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC);
L
Linus Torvalds 已提交
128 129 130 131
	if (!sp)
		return NULL;

	sp->len = 0;
132 133
	sp->olen = 0;

134 135
	memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH]));

L
Linus Torvalds 已提交
136 137 138 139 140
	if (src) {
		int i;

		memcpy(sp, src, sizeof(*sp));
		for (i = 0; i < sp->len; i++)
141
			xfrm_state_hold(sp->xvec[i]);
L
Linus Torvalds 已提交
142
	}
143
	refcount_set(&sp->refcnt, 1);
L
Linus Torvalds 已提交
144 145 146 147
	return sp;
}
EXPORT_SYMBOL(secpath_dup);

148 149 150 151 152
int secpath_set(struct sk_buff *skb)
{
	struct sec_path *sp;

	/* Allocate new secpath or COW existing one. */
153
	if (!skb->sp || refcount_read(&skb->sp->refcnt) != 1) {
154 155 156 157 158 159 160 161 162 163 164 165
		sp = secpath_dup(skb->sp);
		if (!sp)
			return -ENOMEM;

		if (skb->sp)
			secpath_put(skb->sp);
		skb->sp = sp;
	}
	return 0;
}
EXPORT_SYMBOL(secpath_set);

L
Linus Torvalds 已提交
166 167
/* Fetch spi and seq from ipsec header */

A
Al Viro 已提交
168
int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
L
Linus Torvalds 已提交
169 170
{
	int offset, offset_seq;
171
	int hlen;
L
Linus Torvalds 已提交
172 173 174

	switch (nexthdr) {
	case IPPROTO_AH:
175
		hlen = sizeof(struct ip_auth_hdr);
L
Linus Torvalds 已提交
176 177 178 179
		offset = offsetof(struct ip_auth_hdr, spi);
		offset_seq = offsetof(struct ip_auth_hdr, seq_no);
		break;
	case IPPROTO_ESP:
180
		hlen = sizeof(struct ip_esp_hdr);
L
Linus Torvalds 已提交
181 182 183 184 185 186
		offset = offsetof(struct ip_esp_hdr, spi);
		offset_seq = offsetof(struct ip_esp_hdr, seq_no);
		break;
	case IPPROTO_COMP:
		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
			return -EINVAL;
187
		*spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2)));
L
Linus Torvalds 已提交
188 189 190 191 192 193
		*seq = 0;
		return 0;
	default:
		return 1;
	}

194
	if (!pskb_may_pull(skb, hlen))
L
Linus Torvalds 已提交
195 196
		return -EINVAL;

197 198
	*spi = *(__be32 *)(skb_transport_header(skb) + offset);
	*seq = *(__be32 *)(skb_transport_header(skb) + offset_seq);
L
Linus Torvalds 已提交
199 200
	return 0;
}
S
Steffen Klassert 已提交
201
EXPORT_SYMBOL(xfrm_parse_spi);
L
Linus Torvalds 已提交
202

203 204
int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
{
205
	struct xfrm_mode *inner_mode = x->inner_mode;
206 207 208 209 210 211
	int err;

	err = x->outer_mode->afinfo->extract_input(x, skb);
	if (err)
		return err;

212 213 214 215 216 217 218 219
	if (x->sel.family == AF_UNSPEC) {
		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
		if (inner_mode == NULL)
			return -EAFNOSUPPORT;
	}

	skb->protocol = inner_mode->afinfo->eth_proto;
	return inner_mode->input2(x, skb);
220 221 222
}
EXPORT_SYMBOL(xfrm_prepare_input);

223 224
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
{
A
Alexey Dobriyan 已提交
225
	struct net *net = dev_net(skb->dev);
226 227
	int err;
	__be32 seq;
228
	__be32 seq_hi;
229
	struct xfrm_state *x = NULL;
230
	xfrm_address_t *daddr;
231
	struct xfrm_mode *inner_mode;
232
	u32 mark = skb->mark;
233
	unsigned int family = AF_UNSPEC;
234
	int decaps = 0;
235
	int async = 0;
236
	bool xfrm_gro = false;
237 238
	bool crypto_done = false;
	struct xfrm_offload *xo = xfrm_offload(skb);
239 240

	if (encap_type < 0) {
241
		x = xfrm_input_state(skb);
242 243 244 245 246 247 248 249 250 251

		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
			if (x->km.state == XFRM_STATE_ACQ)
				XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
			else
				XFRM_INC_STATS(net,
					       LINUX_MIB_XFRMINSTATEINVALID);
			goto drop;
		}

252
		family = x->outer_mode->afinfo->family;
253 254 255 256 257 258 259

		/* An encap_type of -1 indicates async resumption. */
		if (encap_type == -1) {
			async = 1;
			seq = XFRM_SKB_CB(skb)->seq.input.low;
			goto resume;
		}
260

261 262 263
		/* encap_type < -1 indicates a GRO call. */
		encap_type = 0;
		seq = XFRM_SPI_SKB_CB(skb)->seq;
264

265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
		if (xo && (xo->flags & CRYPTO_DONE)) {
			crypto_done = true;
			family = XFRM_SPI_SKB_CB(skb)->family;

			if (!(xo->status & CRYPTO_SUCCESS)) {
				if (xo->status &
				    (CRYPTO_TRANSPORT_AH_AUTH_FAILED |
				     CRYPTO_TRANSPORT_ESP_AUTH_FAILED |
				     CRYPTO_TUNNEL_AH_AUTH_FAILED |
				     CRYPTO_TUNNEL_ESP_AUTH_FAILED)) {

					xfrm_audit_state_icvfail(x, skb,
								 x->type->proto);
					x->stats.integrity_failed++;
					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
					goto drop;
				}

283 284 285 286 287
				if (xo->status & CRYPTO_INVALID_PROTOCOL) {
					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
					goto drop;
				}

288
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
289 290 291
				goto drop;
			}

292 293 294 295
			if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
				goto drop;
			}
296 297
		}

298
		goto lock;
299
	}
300

301 302
	family = XFRM_SPI_SKB_CB(skb)->family;

303
	/* if tunnel is present override skb->mark value with tunnel i_key */
304 305 306
	switch (family) {
	case AF_INET:
		if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4)
307
			mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key);
308 309 310
		break;
	case AF_INET6:
		if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6)
311
			mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key);
312
		break;
313 314
	}

315 316 317 318
	err = secpath_set(skb);
	if (err) {
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
		goto drop;
319 320
	}

321
	seq = 0;
322
	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
323
		secpath_reset(skb);
A
Alexey Dobriyan 已提交
324
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
325
		goto drop;
326
	}
327

328 329
	daddr = (xfrm_address_t *)(skb_network_header(skb) +
				   XFRM_SPI_SKB_CB(skb)->daddroff);
330
	do {
331
		if (skb->sp->len == XFRM_MAX_DEPTH) {
332
			secpath_reset(skb);
A
Alexey Dobriyan 已提交
333
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
334
			goto drop;
335
		}
336

337
		x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
338
		if (x == NULL) {
339
			secpath_reset(skb);
A
Alexey Dobriyan 已提交
340
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
P
Paul Moore 已提交
341
			xfrm_audit_state_notfound(skb, family, spi, seq);
342
			goto drop;
343
		}
344

345 346
		skb->mark = xfrm_smark_get(skb->mark, x);

347 348
		skb->sp->xvec[skb->sp->len++] = x;

349
lock:
350
		spin_lock(&x->lock);
351

352
		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
L
Li RongQing 已提交
353 354 355 356 357
			if (x->km.state == XFRM_STATE_ACQ)
				XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
			else
				XFRM_INC_STATS(net,
					       LINUX_MIB_XFRMINSTATEINVALID);
358
			goto drop_unlock;
359
		}
360

361 362 363 364 365
		if ((x->encap ? x->encap->encap_type : 0) != encap_type) {
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
			goto drop_unlock;
		}

366
		if (x->repl->check(x, skb, seq)) {
A
Alexey Dobriyan 已提交
367
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
368
			goto drop_unlock;
369
		}
370

371
		if (xfrm_state_check_expire(x)) {
A
Alexey Dobriyan 已提交
372
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED);
373
			goto drop_unlock;
374
		}
375

376 377
		spin_unlock(&x->lock);

378 379 380 381 382
		if (xfrm_tunnel_check(skb, x, family)) {
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
			goto drop;
		}

383 384
		seq_hi = htonl(xfrm_replay_seqhi(x, seq));

385
		XFRM_SKB_CB(skb)->seq.input.low = seq;
386
		XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
387

388
		skb_dst_force(skb);
389
		dev_hold(skb->dev);
390

391 392 393 394
		if (crypto_done)
			nexthdr = x->type_offload->input_tail(x, skb);
		else
			nexthdr = x->type->input(x, skb);
395

396 397 398
		if (nexthdr == -EINPROGRESS)
			return 0;
resume:
399 400
		dev_put(skb->dev);

401
		spin_lock(&x->lock);
402
		if (nexthdr <= 0) {
403 404 405
			if (nexthdr == -EBADMSG) {
				xfrm_audit_state_icvfail(x, skb,
							 x->type->proto);
406
				x->stats.integrity_failed++;
407
			}
A
Alexey Dobriyan 已提交
408
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
409
			goto drop_unlock;
410
		}
411 412 413 414

		/* only the first xfrm gets the encap type */
		encap_type = 0;

415
		if (async && x->repl->recheck(x, skb, seq)) {
416 417 418 419
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
			goto drop_unlock;
		}

420
		x->repl->advance(x, seq);
421 422 423 424 425 426

		x->curlft.bytes += skb->len;
		x->curlft.packets++;

		spin_unlock(&x->lock);

427 428
		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;

429 430 431 432
		inner_mode = x->inner_mode;

		if (x->sel.family == AF_UNSPEC) {
			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
433 434
			if (inner_mode == NULL) {
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
435
				goto drop;
436
			}
437 438 439
		}

		if (inner_mode->input(x, skb)) {
A
Alexey Dobriyan 已提交
440
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
441
			goto drop;
442
		}
443 444 445 446 447 448

		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
			decaps = 1;
			break;
		}

449 450 451 452 453
		/*
		 * We need the inner address.  However, we only get here for
		 * transport mode so the outer address is identical.
		 */
		daddr = &x->id.daddr;
454
		family = x->outer_mode->afinfo->family;
455

456
		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
457
		if (err < 0) {
A
Alexey Dobriyan 已提交
458
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
459
			goto drop;
460
		}
461 462
	} while (!err);

463 464 465 466
	err = xfrm_rcv_cb(skb, family, x->type->proto, 0);
	if (err)
		goto drop;

467 468 469
	nf_reset(skb);

	if (decaps) {
470 471
		if (skb->sp)
			skb->sp->olen = 0;
E
Eric Dumazet 已提交
472
		skb_dst_drop(skb);
473
		gro_cells_receive(&gro_cells, skb);
474 475
		return 0;
	} else {
476 477 478 479
		xo = xfrm_offload(skb);
		if (xo)
			xfrm_gro = xo->flags & XFRM_GRO;

480
		err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
481
		if (xfrm_gro) {
482 483
			if (skb->sp)
				skb->sp->olen = 0;
484 485 486 487 488 489
			skb_dst_drop(skb);
			gro_cells_receive(&gro_cells, skb);
			return err;
		}

		return err;
490 491 492 493 494
	}

drop_unlock:
	spin_unlock(&x->lock);
drop:
495
	xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1);
496 497 498 499 500
	kfree_skb(skb);
	return 0;
}
EXPORT_SYMBOL(xfrm_input);

501 502 503 504 505 506
int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
{
	return xfrm_input(skb, nexthdr, 0, -1);
}
EXPORT_SYMBOL(xfrm_input_resume);

507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
static void xfrm_trans_reinject(unsigned long data)
{
	struct xfrm_trans_tasklet *trans = (void *)data;
	struct sk_buff_head queue;
	struct sk_buff *skb;

	__skb_queue_head_init(&queue);
	skb_queue_splice_init(&trans->queue, &queue);

	while ((skb = __skb_dequeue(&queue)))
		XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
}

int xfrm_trans_queue(struct sk_buff *skb,
		     int (*finish)(struct net *, struct sock *,
				   struct sk_buff *))
{
	struct xfrm_trans_tasklet *trans;

	trans = this_cpu_ptr(&xfrm_trans_tasklet);

	if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
		return -ENOBUFS;

	XFRM_TRANS_SKB_CB(skb)->finish = finish;
532
	__skb_queue_tail(&trans->queue, skb);
533 534 535 536 537
	tasklet_schedule(&trans->tasklet);
	return 0;
}
EXPORT_SYMBOL(xfrm_trans_queue);

L
Linus Torvalds 已提交
538 539
void __init xfrm_input_init(void)
{
540
	int err;
541
	int i;
542 543 544 545 546 547

	init_dummy_netdev(&xfrm_napi_dev);
	err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
	if (err)
		gro_cells.cells = NULL;

L
Linus Torvalds 已提交
548 549
	secpath_cachep = kmem_cache_create("secpath_cache",
					   sizeof(struct sec_path),
A
Alexey Dobriyan 已提交
550
					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
551
					   NULL);
552 553 554 555 556 557 558 559 560

	for_each_possible_cpu(i) {
		struct xfrm_trans_tasklet *trans;

		trans = &per_cpu(xfrm_trans_tasklet, i);
		__skb_queue_head_init(&trans->queue);
		tasklet_init(&trans->tasklet, xfrm_trans_reinject,
			     (unsigned long)trans);
	}
L
Linus Torvalds 已提交
561
}