xfrm_input.c 12.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2 3 4 5 6 7
/*
 * xfrm_input.c
 *
 * Changes:
 * 	YOSHIFUJI Hideaki @USAGI
 * 		Split up af-specific portion
8
 *
L
Linus Torvalds 已提交
9 10
 */

11 12
#include <linux/bottom_half.h>
#include <linux/interrupt.h>
L
Linus Torvalds 已提交
13 14
#include <linux/slab.h>
#include <linux/module.h>
15
#include <linux/netdevice.h>
16
#include <linux/percpu.h>
17
#include <net/dst.h>
L
Linus Torvalds 已提交
18 19
#include <net/ip.h>
#include <net/xfrm.h>
20 21
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
L
Linus Torvalds 已提交
22

23 24 25 26 27 28 29 30 31 32 33
struct xfrm_trans_tasklet {
	struct tasklet_struct tasklet;
	struct sk_buff_head queue;
};

struct xfrm_trans_cb {
	int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
};

#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))

34
static struct kmem_cache *secpath_cachep __read_mostly;
L
Linus Torvalds 已提交
35

36
static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
37
static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
38

39 40 41
static struct gro_cells gro_cells;
static struct net_device xfrm_napi_dev;

42 43
static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);

44
int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
45 46 47
{
	int err = 0;

48
	if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo)))
49
		return -EAFNOSUPPORT;
50

51 52
	spin_lock_bh(&xfrm_input_afinfo_lock);
	if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL))
53
		err = -EEXIST;
54 55 56 57 58 59 60
	else
		rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo);
	spin_unlock_bh(&xfrm_input_afinfo_lock);
	return err;
}
EXPORT_SYMBOL(xfrm_input_register_afinfo);

61
int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo)
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
{
	int err = 0;

	spin_lock_bh(&xfrm_input_afinfo_lock);
	if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) {
		if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo))
			err = -EINVAL;
		else
			RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL);
	}
	spin_unlock_bh(&xfrm_input_afinfo_lock);
	synchronize_rcu();
	return err;
}
EXPORT_SYMBOL(xfrm_input_unregister_afinfo);

78
static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family)
79
{
80
	const struct xfrm_input_afinfo *afinfo;
81

82
	if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo)))
83
		return NULL;
84

85 86 87 88 89 90 91 92 93 94 95
	rcu_read_lock();
	afinfo = rcu_dereference(xfrm_input_afinfo[family]);
	if (unlikely(!afinfo))
		rcu_read_unlock();
	return afinfo;
}

static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol,
		       int err)
{
	int ret;
96
	const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family);
97 98 99 100 101

	if (!afinfo)
		return -EAFNOSUPPORT;

	ret = afinfo->callback(skb, protocol, err);
102
	rcu_read_unlock();
103 104 105 106

	return ret;
}

L
Linus Torvalds 已提交
107 108 109 110
void __secpath_destroy(struct sec_path *sp)
{
	int i;
	for (i = 0; i < sp->len; i++)
111
		xfrm_state_put(sp->xvec[i]);
L
Linus Torvalds 已提交
112 113 114 115 116 117 118 119
	kmem_cache_free(secpath_cachep, sp);
}
EXPORT_SYMBOL(__secpath_destroy);

struct sec_path *secpath_dup(struct sec_path *src)
{
	struct sec_path *sp;

120
	sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC);
L
Linus Torvalds 已提交
121 122 123 124
	if (!sp)
		return NULL;

	sp->len = 0;
125 126
	sp->olen = 0;

127 128
	memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH]));

L
Linus Torvalds 已提交
129 130 131 132 133
	if (src) {
		int i;

		memcpy(sp, src, sizeof(*sp));
		for (i = 0; i < sp->len; i++)
134
			xfrm_state_hold(sp->xvec[i]);
L
Linus Torvalds 已提交
135
	}
136
	refcount_set(&sp->refcnt, 1);
L
Linus Torvalds 已提交
137 138 139 140
	return sp;
}
EXPORT_SYMBOL(secpath_dup);

141 142 143 144 145
int secpath_set(struct sk_buff *skb)
{
	struct sec_path *sp;

	/* Allocate new secpath or COW existing one. */
146
	if (!skb->sp || refcount_read(&skb->sp->refcnt) != 1) {
147 148 149 150 151 152 153 154 155 156 157 158
		sp = secpath_dup(skb->sp);
		if (!sp)
			return -ENOMEM;

		if (skb->sp)
			secpath_put(skb->sp);
		skb->sp = sp;
	}
	return 0;
}
EXPORT_SYMBOL(secpath_set);

L
Linus Torvalds 已提交
159 160
/* Fetch spi and seq from ipsec header */

A
Al Viro 已提交
161
int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
L
Linus Torvalds 已提交
162 163
{
	int offset, offset_seq;
164
	int hlen;
L
Linus Torvalds 已提交
165 166 167

	switch (nexthdr) {
	case IPPROTO_AH:
168
		hlen = sizeof(struct ip_auth_hdr);
L
Linus Torvalds 已提交
169 170 171 172
		offset = offsetof(struct ip_auth_hdr, spi);
		offset_seq = offsetof(struct ip_auth_hdr, seq_no);
		break;
	case IPPROTO_ESP:
173
		hlen = sizeof(struct ip_esp_hdr);
L
Linus Torvalds 已提交
174 175 176 177 178 179
		offset = offsetof(struct ip_esp_hdr, spi);
		offset_seq = offsetof(struct ip_esp_hdr, seq_no);
		break;
	case IPPROTO_COMP:
		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
			return -EINVAL;
180
		*spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2)));
L
Linus Torvalds 已提交
181 182 183 184 185 186
		*seq = 0;
		return 0;
	default:
		return 1;
	}

187
	if (!pskb_may_pull(skb, hlen))
L
Linus Torvalds 已提交
188 189
		return -EINVAL;

190 191
	*spi = *(__be32 *)(skb_transport_header(skb) + offset);
	*seq = *(__be32 *)(skb_transport_header(skb) + offset_seq);
L
Linus Torvalds 已提交
192 193
	return 0;
}
S
Steffen Klassert 已提交
194
EXPORT_SYMBOL(xfrm_parse_spi);
L
Linus Torvalds 已提交
195

196 197
int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
{
198
	struct xfrm_mode *inner_mode = x->inner_mode;
199 200 201 202 203 204
	int err;

	err = x->outer_mode->afinfo->extract_input(x, skb);
	if (err)
		return err;

205 206 207 208 209 210 211 212
	if (x->sel.family == AF_UNSPEC) {
		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
		if (inner_mode == NULL)
			return -EAFNOSUPPORT;
	}

	skb->protocol = inner_mode->afinfo->eth_proto;
	return inner_mode->input2(x, skb);
213 214 215
}
EXPORT_SYMBOL(xfrm_prepare_input);

216 217
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
{
A
Alexey Dobriyan 已提交
218
	struct net *net = dev_net(skb->dev);
219 220
	int err;
	__be32 seq;
221
	__be32 seq_hi;
222
	struct xfrm_state *x = NULL;
223
	xfrm_address_t *daddr;
224
	struct xfrm_mode *inner_mode;
225
	u32 mark = skb->mark;
226
	unsigned int family = AF_UNSPEC;
227
	int decaps = 0;
228
	int async = 0;
229
	bool xfrm_gro = false;
230 231
	bool crypto_done = false;
	struct xfrm_offload *xo = xfrm_offload(skb);
232 233

	if (encap_type < 0) {
234
		x = xfrm_input_state(skb);
235 236 237 238 239 240 241 242 243 244

		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
			if (x->km.state == XFRM_STATE_ACQ)
				XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
			else
				XFRM_INC_STATS(net,
					       LINUX_MIB_XFRMINSTATEINVALID);
			goto drop;
		}

245
		family = x->outer_mode->afinfo->family;
246 247 248 249 250 251 252

		/* An encap_type of -1 indicates async resumption. */
		if (encap_type == -1) {
			async = 1;
			seq = XFRM_SKB_CB(skb)->seq.input.low;
			goto resume;
		}
253

254 255 256
		/* encap_type < -1 indicates a GRO call. */
		encap_type = 0;
		seq = XFRM_SPI_SKB_CB(skb)->seq;
257

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
		if (xo && (xo->flags & CRYPTO_DONE)) {
			crypto_done = true;
			x = xfrm_input_state(skb);
			family = XFRM_SPI_SKB_CB(skb)->family;

			if (!(xo->status & CRYPTO_SUCCESS)) {
				if (xo->status &
				    (CRYPTO_TRANSPORT_AH_AUTH_FAILED |
				     CRYPTO_TRANSPORT_ESP_AUTH_FAILED |
				     CRYPTO_TUNNEL_AH_AUTH_FAILED |
				     CRYPTO_TUNNEL_ESP_AUTH_FAILED)) {

					xfrm_audit_state_icvfail(x, skb,
								 x->type->proto);
					x->stats.integrity_failed++;
					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
					goto drop;
				}

277 278 279 280 281
				if (xo->status & CRYPTO_INVALID_PROTOCOL) {
					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
					goto drop;
				}

282
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
283 284 285
				goto drop;
			}

286 287 288 289
			if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
				goto drop;
			}
290 291
		}

292
		goto lock;
293
	}
294

295 296
	family = XFRM_SPI_SKB_CB(skb)->family;

297
	/* if tunnel is present override skb->mark value with tunnel i_key */
298 299 300
	switch (family) {
	case AF_INET:
		if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4)
301
			mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key);
302 303 304
		break;
	case AF_INET6:
		if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6)
305
			mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key);
306
		break;
307 308
	}

309 310 311 312
	err = secpath_set(skb);
	if (err) {
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
		goto drop;
313 314
	}

315
	seq = 0;
316
	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
A
Alexey Dobriyan 已提交
317
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
318
		goto drop;
319
	}
320

321 322
	daddr = (xfrm_address_t *)(skb_network_header(skb) +
				   XFRM_SPI_SKB_CB(skb)->daddroff);
323
	do {
324
		if (skb->sp->len == XFRM_MAX_DEPTH) {
A
Alexey Dobriyan 已提交
325
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
326
			goto drop;
327
		}
328

329
		x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
330
		if (x == NULL) {
A
Alexey Dobriyan 已提交
331
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
P
Paul Moore 已提交
332
			xfrm_audit_state_notfound(skb, family, spi, seq);
333
			goto drop;
334
		}
335

336 337
		skb->sp->xvec[skb->sp->len++] = x;

338
lock:
339
		spin_lock(&x->lock);
340

341
		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
L
Li RongQing 已提交
342 343 344 345 346
			if (x->km.state == XFRM_STATE_ACQ)
				XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
			else
				XFRM_INC_STATS(net,
					       LINUX_MIB_XFRMINSTATEINVALID);
347
			goto drop_unlock;
348
		}
349

350 351 352 353 354
		if ((x->encap ? x->encap->encap_type : 0) != encap_type) {
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
			goto drop_unlock;
		}

355
		if (x->repl->check(x, skb, seq)) {
A
Alexey Dobriyan 已提交
356
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
357
			goto drop_unlock;
358
		}
359

360
		if (xfrm_state_check_expire(x)) {
A
Alexey Dobriyan 已提交
361
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED);
362
			goto drop_unlock;
363
		}
364

365 366
		spin_unlock(&x->lock);

367 368 369 370 371
		if (xfrm_tunnel_check(skb, x, family)) {
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
			goto drop;
		}

372 373
		seq_hi = htonl(xfrm_replay_seqhi(x, seq));

374
		XFRM_SKB_CB(skb)->seq.input.low = seq;
375
		XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
376

377
		skb_dst_force(skb);
378
		dev_hold(skb->dev);
379

380 381 382 383
		if (crypto_done)
			nexthdr = x->type_offload->input_tail(x, skb);
		else
			nexthdr = x->type->input(x, skb);
384

385 386 387
		if (nexthdr == -EINPROGRESS)
			return 0;
resume:
388 389
		dev_put(skb->dev);

390
		spin_lock(&x->lock);
391
		if (nexthdr <= 0) {
392 393 394
			if (nexthdr == -EBADMSG) {
				xfrm_audit_state_icvfail(x, skb,
							 x->type->proto);
395
				x->stats.integrity_failed++;
396
			}
A
Alexey Dobriyan 已提交
397
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
398
			goto drop_unlock;
399
		}
400 401 402 403

		/* only the first xfrm gets the encap type */
		encap_type = 0;

404
		if (async && x->repl->recheck(x, skb, seq)) {
405 406 407 408
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
			goto drop_unlock;
		}

409
		x->repl->advance(x, seq);
410 411 412 413 414 415

		x->curlft.bytes += skb->len;
		x->curlft.packets++;

		spin_unlock(&x->lock);

416 417
		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;

418 419 420 421
		inner_mode = x->inner_mode;

		if (x->sel.family == AF_UNSPEC) {
			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
422 423
			if (inner_mode == NULL) {
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
424
				goto drop;
425
			}
426 427 428
		}

		if (inner_mode->input(x, skb)) {
A
Alexey Dobriyan 已提交
429
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
430
			goto drop;
431
		}
432 433 434 435 436 437

		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
			decaps = 1;
			break;
		}

438 439 440 441 442
		/*
		 * We need the inner address.  However, we only get here for
		 * transport mode so the outer address is identical.
		 */
		daddr = &x->id.daddr;
443
		family = x->outer_mode->afinfo->family;
444

445
		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
446
		if (err < 0) {
A
Alexey Dobriyan 已提交
447
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
448
			goto drop;
449
		}
450 451
	} while (!err);

452 453 454 455
	err = xfrm_rcv_cb(skb, family, x->type->proto, 0);
	if (err)
		goto drop;

456 457 458
	nf_reset(skb);

	if (decaps) {
459 460
		if (skb->sp)
			skb->sp->olen = 0;
E
Eric Dumazet 已提交
461
		skb_dst_drop(skb);
462
		gro_cells_receive(&gro_cells, skb);
463 464
		return 0;
	} else {
465 466 467 468
		xo = xfrm_offload(skb);
		if (xo)
			xfrm_gro = xo->flags & XFRM_GRO;

469
		err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
470
		if (xfrm_gro) {
471 472
			if (skb->sp)
				skb->sp->olen = 0;
473 474 475 476 477 478
			skb_dst_drop(skb);
			gro_cells_receive(&gro_cells, skb);
			return err;
		}

		return err;
479 480 481 482 483
	}

drop_unlock:
	spin_unlock(&x->lock);
drop:
484
	xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1);
485 486 487 488 489
	kfree_skb(skb);
	return 0;
}
EXPORT_SYMBOL(xfrm_input);

490 491 492 493 494 495
int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
{
	return xfrm_input(skb, nexthdr, 0, -1);
}
EXPORT_SYMBOL(xfrm_input_resume);

496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
static void xfrm_trans_reinject(unsigned long data)
{
	struct xfrm_trans_tasklet *trans = (void *)data;
	struct sk_buff_head queue;
	struct sk_buff *skb;

	__skb_queue_head_init(&queue);
	skb_queue_splice_init(&trans->queue, &queue);

	while ((skb = __skb_dequeue(&queue)))
		XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
}

int xfrm_trans_queue(struct sk_buff *skb,
		     int (*finish)(struct net *, struct sock *,
				   struct sk_buff *))
{
	struct xfrm_trans_tasklet *trans;

	trans = this_cpu_ptr(&xfrm_trans_tasklet);

	if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
		return -ENOBUFS;

	XFRM_TRANS_SKB_CB(skb)->finish = finish;
	skb_queue_tail(&trans->queue, skb);
	tasklet_schedule(&trans->tasklet);
	return 0;
}
EXPORT_SYMBOL(xfrm_trans_queue);

L
Linus Torvalds 已提交
527 528
void __init xfrm_input_init(void)
{
529
	int err;
530
	int i;
531 532 533 534 535 536

	init_dummy_netdev(&xfrm_napi_dev);
	err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
	if (err)
		gro_cells.cells = NULL;

L
Linus Torvalds 已提交
537 538
	secpath_cachep = kmem_cache_create("secpath_cache",
					   sizeof(struct sec_path),
A
Alexey Dobriyan 已提交
539
					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
540
					   NULL);
541 542 543 544 545 546 547 548 549

	for_each_possible_cpu(i) {
		struct xfrm_trans_tasklet *trans;

		trans = &per_cpu(xfrm_trans_tasklet, i);
		__skb_queue_head_init(&trans->queue);
		tasklet_init(&trans->tasklet, xfrm_trans_reinject,
			     (unsigned long)trans);
	}
L
Linus Torvalds 已提交
550
}