xfrm_policy.c 80.0 KB
Newer Older
1
/*
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10 11 12
 * xfrm_policy.c
 *
 * Changes:
 *	Mitsuru KANDA @USAGI
 * 	Kazunori MIYAZAWA @USAGI
 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
 * 		IPv6 support
 * 	Kazunori MIYAZAWA @USAGI
 * 	YOSHIFUJI Hideaki
 * 		Split up af-specific portion
 *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13
 *
L
Linus Torvalds 已提交
14 15
 */

16
#include <linux/err.h>
L
Linus Torvalds 已提交
17 18 19 20 21 22 23
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
24
#include <linux/netfilter.h>
L
Linus Torvalds 已提交
25
#include <linux/module.h>
26
#include <linux/cache.h>
P
Paul Moore 已提交
27
#include <linux/audit.h>
28
#include <net/dst.h>
29
#include <net/flow.h>
L
Linus Torvalds 已提交
30 31
#include <net/xfrm.h>
#include <net/ip.h>
32 33 34
#ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h>
#endif
L
Linus Torvalds 已提交
35

36 37
#include "xfrm_hash.h"

38 39 40 41
#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN	100

42 43 44 45 46
struct xfrm_flo {
	struct dst_entry *dst_orig;
	u8 flags;
};

47 48 49
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
						__read_mostly;
L
Linus Torvalds 已提交
50

51
static struct kmem_cache *xfrm_dst_cache __read_mostly;
L
Linus Torvalds 已提交
52

53
static void xfrm_init_pmtu(struct dst_entry *dst);
54
static int stale_bundle(struct dst_entry *dst);
55
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
56
static void xfrm_policy_queue_process(unsigned long arg);
L
Linus Torvalds 已提交
57

58
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
W
Wei Yongjun 已提交
59 60 61
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
						int dir);

62
static inline bool
63
__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
64
{
65 66
	const struct flowi4 *fl4 = &fl->u.ip4;

67 68
	return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
		addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
69 70 71 72
		!((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
		!((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
		(fl4->flowi4_proto == sel->proto || !sel->proto) &&
		(fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
73 74
}

75
static inline bool
76
__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
77
{
78 79 80 81 82 83 84 85
	const struct flowi6 *fl6 = &fl->u.ip6;

	return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
		addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
		!((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
		!((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
		(fl6->flowi6_proto == sel->proto || !sel->proto) &&
		(fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
86 87
}

88 89
bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
			 unsigned short family)
90 91 92 93 94 95 96
{
	switch (family) {
	case AF_INET:
		return __xfrm4_selector_match(sel, fl);
	case AF_INET6:
		return __xfrm6_selector_match(sel, fl);
	}
97
	return false;
98 99
}

E
Eric Dumazet 已提交
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
{
	struct xfrm_policy_afinfo *afinfo;

	if (unlikely(family >= NPROTO))
		return NULL;
	rcu_read_lock();
	afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
	if (unlikely(!afinfo))
		rcu_read_unlock();
	return afinfo;
}

static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
{
	rcu_read_unlock();
}

D
David Ahern 已提交
118 119
static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
						  int tos, int oif,
120 121
						  const xfrm_address_t *saddr,
						  const xfrm_address_t *daddr,
122 123 124 125 126 127 128 129 130
						  int family)
{
	struct xfrm_policy_afinfo *afinfo;
	struct dst_entry *dst;

	afinfo = xfrm_policy_get_afinfo(family);
	if (unlikely(afinfo == NULL))
		return ERR_PTR(-EAFNOSUPPORT);

D
David Ahern 已提交
131
	dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
132 133 134 135 136 137

	xfrm_policy_put_afinfo(afinfo);

	return dst;
}

D
David Ahern 已提交
138 139
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
						int tos, int oif,
140 141
						xfrm_address_t *prev_saddr,
						xfrm_address_t *prev_daddr,
142
						int family)
L
Linus Torvalds 已提交
143
{
144
	struct net *net = xs_net(x);
145 146 147 148
	xfrm_address_t *saddr = &x->props.saddr;
	xfrm_address_t *daddr = &x->id.daddr;
	struct dst_entry *dst;

149
	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
150
		saddr = x->coaddr;
151 152 153 154
		daddr = prev_daddr;
	}
	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
		saddr = prev_saddr;
155
		daddr = x->coaddr;
156
	}
L
Linus Torvalds 已提交
157

D
David Ahern 已提交
158
	dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
159 160 161 162 163 164 165

	if (!IS_ERR(dst)) {
		if (prev_saddr != saddr)
			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
		if (prev_daddr != daddr)
			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
	}
L
Linus Torvalds 已提交
166

167
	return dst;
L
Linus Torvalds 已提交
168 169 170 171 172 173 174
}

static inline unsigned long make_jiffies(long secs)
{
	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
		return MAX_SCHEDULE_TIMEOUT-1;
	else
175
		return secs*HZ;
L
Linus Torvalds 已提交
176 177 178 179
}

static void xfrm_policy_timer(unsigned long data)
{
180
	struct xfrm_policy *xp = (struct xfrm_policy *)data;
181
	unsigned long now = get_seconds();
L
Linus Torvalds 已提交
182 183 184 185 186 187
	long next = LONG_MAX;
	int warn = 0;
	int dir;

	read_lock(&xp->lock);

188
	if (unlikely(xp->walk.dead))
L
Linus Torvalds 已提交
189 190
		goto out;

191
	dir = xfrm_policy_id2dir(xp->index);
L
Linus Torvalds 已提交
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230

	if (xp->lft.hard_add_expires_seconds) {
		long tmo = xp->lft.hard_add_expires_seconds +
			xp->curlft.add_time - now;
		if (tmo <= 0)
			goto expired;
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.hard_use_expires_seconds) {
		long tmo = xp->lft.hard_use_expires_seconds +
			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
		if (tmo <= 0)
			goto expired;
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.soft_add_expires_seconds) {
		long tmo = xp->lft.soft_add_expires_seconds +
			xp->curlft.add_time - now;
		if (tmo <= 0) {
			warn = 1;
			tmo = XFRM_KM_TIMEOUT;
		}
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.soft_use_expires_seconds) {
		long tmo = xp->lft.soft_use_expires_seconds +
			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
		if (tmo <= 0) {
			warn = 1;
			tmo = XFRM_KM_TIMEOUT;
		}
		if (tmo < next)
			next = tmo;
	}

	if (warn)
231
		km_policy_expired(xp, dir, 0, 0);
L
Linus Torvalds 已提交
232 233 234 235 236 237 238 239 240 241 242
	if (next != LONG_MAX &&
	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
		xfrm_pol_hold(xp);

out:
	read_unlock(&xp->lock);
	xfrm_pol_put(xp);
	return;

expired:
	read_unlock(&xp->lock);
243
	if (!xfrm_policy_delete(xp, dir))
244
		km_policy_expired(xp, dir, 1, 0);
L
Linus Torvalds 已提交
245 246 247
	xfrm_pol_put(xp);
}

248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
{
	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);

	if (unlikely(pol->walk.dead))
		flo = NULL;
	else
		xfrm_pol_hold(pol);

	return flo;
}

static int xfrm_policy_flo_check(struct flow_cache_object *flo)
{
	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);

	return !pol->walk.dead;
}

static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
{
	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
}

static const struct flow_cache_ops xfrm_policy_fc_ops = {
	.get = xfrm_policy_flo_get,
	.check = xfrm_policy_flo_check,
	.delete = xfrm_policy_flo_delete,
};
L
Linus Torvalds 已提交
277 278 279 280 281

/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
 * SPD calls.
 */

282
struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
L
Linus Torvalds 已提交
283 284 285
{
	struct xfrm_policy *policy;

286
	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
L
Linus Torvalds 已提交
287 288

	if (policy) {
289
		write_pnet(&policy->xp_net, net);
H
Herbert Xu 已提交
290
		INIT_LIST_HEAD(&policy->walk.all);
291 292
		INIT_HLIST_NODE(&policy->bydst);
		INIT_HLIST_NODE(&policy->byidx);
L
Linus Torvalds 已提交
293
		rwlock_init(&policy->lock);
294
		atomic_set(&policy->refcnt, 1);
295
		skb_queue_head_init(&policy->polq.hold_queue);
296 297
		setup_timer(&policy->timer, xfrm_policy_timer,
				(unsigned long)policy);
298 299
		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
			    (unsigned long)policy);
300
		policy->flo.ops = &xfrm_policy_fc_ops;
L
Linus Torvalds 已提交
301 302 303 304 305 306 307
	}
	return policy;
}
EXPORT_SYMBOL(xfrm_policy_alloc);

/* Destroy xfrm_policy: descendant resources must be released to this moment. */

308
void xfrm_policy_destroy(struct xfrm_policy *policy)
L
Linus Torvalds 已提交
309
{
H
Herbert Xu 已提交
310
	BUG_ON(!policy->walk.dead);
L
Linus Torvalds 已提交
311

312
	if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
L
Linus Torvalds 已提交
313 314
		BUG();

315
	security_xfrm_policy_free(policy->security);
L
Linus Torvalds 已提交
316 317
	kfree(policy);
}
318
EXPORT_SYMBOL(xfrm_policy_destroy);
L
Linus Torvalds 已提交
319 320 321 322 323 324 325

/* Rule must be locked. Release descentant resources, announce
 * entry dead. The rule must be unlinked from lists to the moment.
 */

static void xfrm_policy_kill(struct xfrm_policy *policy)
{
H
Herbert Xu 已提交
326
	policy->walk.dead = 1;
L
Linus Torvalds 已提交
327

328
	atomic_inc(&policy->genid);
L
Linus Torvalds 已提交
329

330 331
	if (del_timer(&policy->polq.hold_timer))
		xfrm_pol_put(policy);
332
	skb_queue_purge(&policy->polq.hold_queue);
333

334 335 336 337
	if (del_timer(&policy->timer))
		xfrm_pol_put(policy);

	xfrm_pol_put(policy);
L
Linus Torvalds 已提交
338 339
}

340 341
static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;

342
static inline unsigned int idx_hash(struct net *net, u32 index)
343
{
344
	return __idx_hash(index, net->xfrm.policy_idx_hmask);
345 346
}

347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
/* calculate policy hash thresholds */
static void __get_hash_thresh(struct net *net,
			      unsigned short family, int dir,
			      u8 *dbits, u8 *sbits)
{
	switch (family) {
	case AF_INET:
		*dbits = net->xfrm.policy_bydst[dir].dbits4;
		*sbits = net->xfrm.policy_bydst[dir].sbits4;
		break;

	case AF_INET6:
		*dbits = net->xfrm.policy_bydst[dir].dbits6;
		*sbits = net->xfrm.policy_bydst[dir].sbits6;
		break;

	default:
		*dbits = 0;
		*sbits = 0;
	}
}

369 370 371
static struct hlist_head *policy_hash_bysel(struct net *net,
					    const struct xfrm_selector *sel,
					    unsigned short family, int dir)
372
{
373
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
374 375 376 377 378 379
	unsigned int hash;
	u8 dbits;
	u8 sbits;

	__get_hash_thresh(net, family, dir, &dbits, &sbits);
	hash = __sel_hash(sel, family, hmask, dbits, sbits);
380 381

	return (hash == hmask + 1 ?
382 383
		&net->xfrm.policy_inexact[dir] :
		net->xfrm.policy_bydst[dir].table + hash);
384 385
}

386 387 388 389
static struct hlist_head *policy_hash_direct(struct net *net,
					     const xfrm_address_t *daddr,
					     const xfrm_address_t *saddr,
					     unsigned short family, int dir)
390
{
391
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
392 393 394 395 396 397
	unsigned int hash;
	u8 dbits;
	u8 sbits;

	__get_hash_thresh(net, family, dir, &dbits, &sbits);
	hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
398

399
	return net->xfrm.policy_bydst[dir].table + hash;
400 401
}

402 403
static void xfrm_dst_hash_transfer(struct net *net,
				   struct hlist_head *list,
404
				   struct hlist_head *ndsttable,
405 406
				   unsigned int nhashmask,
				   int dir)
407
{
408
	struct hlist_node *tmp, *entry0 = NULL;
409
	struct xfrm_policy *pol;
410
	unsigned int h0 = 0;
411 412
	u8 dbits;
	u8 sbits;
413

414
redo:
415
	hlist_for_each_entry_safe(pol, tmp, list, bydst) {
416 417
		unsigned int h;

418
		__get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
419
		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
420
				pol->family, nhashmask, dbits, sbits);
421
		if (!entry0) {
422
			hlist_del(&pol->bydst);
423 424 425 426 427
			hlist_add_head(&pol->bydst, ndsttable+h);
			h0 = h;
		} else {
			if (h != h0)
				continue;
428
			hlist_del(&pol->bydst);
429
			hlist_add_behind(&pol->bydst, entry0);
430
		}
431
		entry0 = &pol->bydst;
432 433 434 435
	}
	if (!hlist_empty(list)) {
		entry0 = NULL;
		goto redo;
436 437 438 439 440 441 442
	}
}

static void xfrm_idx_hash_transfer(struct hlist_head *list,
				   struct hlist_head *nidxtable,
				   unsigned int nhashmask)
{
443
	struct hlist_node *tmp;
444 445
	struct xfrm_policy *pol;

446
	hlist_for_each_entry_safe(pol, tmp, list, byidx) {
447 448 449 450 451 452 453 454 455 456 457 458
		unsigned int h;

		h = __idx_hash(pol->index, nhashmask);
		hlist_add_head(&pol->byidx, nidxtable+h);
	}
}

static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
{
	return ((old_hmask + 1) << 1) - 1;
}

459
static void xfrm_bydst_resize(struct net *net, int dir)
460
{
461
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
462 463
	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
464
	struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
465
	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
466 467 468 469 470
	int i;

	if (!ndst)
		return;

F
Fan Du 已提交
471
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
472 473

	for (i = hmask; i >= 0; i--)
474
		xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
475

476 477
	net->xfrm.policy_bydst[dir].table = ndst;
	net->xfrm.policy_bydst[dir].hmask = nhashmask;
478

F
Fan Du 已提交
479
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
480

481
	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
482 483
}

484
static void xfrm_byidx_resize(struct net *net, int total)
485
{
486
	unsigned int hmask = net->xfrm.policy_idx_hmask;
487 488
	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
489
	struct hlist_head *oidx = net->xfrm.policy_byidx;
490
	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
491 492 493 494 495
	int i;

	if (!nidx)
		return;

F
Fan Du 已提交
496
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
497 498 499 500

	for (i = hmask; i >= 0; i--)
		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);

501 502
	net->xfrm.policy_byidx = nidx;
	net->xfrm.policy_idx_hmask = nhashmask;
503

F
Fan Du 已提交
504
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
505

506
	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
507 508
}

509
static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
510
{
511 512
	unsigned int cnt = net->xfrm.policy_count[dir];
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
513 514 515 516 517 518 519 520 521 522 523

	if (total)
		*total += cnt;

	if ((hmask + 1) < xfrm_policy_hashmax &&
	    cnt > hmask)
		return 1;

	return 0;
}

524
static inline int xfrm_byidx_should_resize(struct net *net, int total)
525
{
526
	unsigned int hmask = net->xfrm.policy_idx_hmask;
527 528 529 530 531 532 533 534

	if ((hmask + 1) < xfrm_policy_hashmax &&
	    total > hmask)
		return 1;

	return 0;
}

535
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
J
Jamal Hadi Salim 已提交
536
{
F
Fan Du 已提交
537
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
538 539 540 541 542 543 544
	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
	si->spdhcnt = net->xfrm.policy_idx_hmask;
J
Jamal Hadi Salim 已提交
545
	si->spdhmcnt = xfrm_policy_hashmax;
F
Fan Du 已提交
546
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
J
Jamal Hadi Salim 已提交
547 548
}
EXPORT_SYMBOL(xfrm_spd_getinfo);
549

J
Jamal Hadi Salim 已提交
550
static DEFINE_MUTEX(hash_resize_mutex);
551
static void xfrm_hash_resize(struct work_struct *work)
552
{
553
	struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
554 555 556 557 558
	int dir, total;

	mutex_lock(&hash_resize_mutex);

	total = 0;
H
Herbert Xu 已提交
559
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
560 561
		if (xfrm_bydst_should_resize(net, dir, &total))
			xfrm_bydst_resize(net, dir);
562
	}
563 564
	if (xfrm_byidx_should_resize(net, total))
		xfrm_byidx_resize(net, total);
565 566 567 568

	mutex_unlock(&hash_resize_mutex);
}

569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
static void xfrm_hash_rebuild(struct work_struct *work)
{
	struct net *net = container_of(work, struct net,
				       xfrm.policy_hthresh.work);
	unsigned int hmask;
	struct xfrm_policy *pol;
	struct xfrm_policy *policy;
	struct hlist_head *chain;
	struct hlist_head *odst;
	struct hlist_node *newpos;
	int i;
	int dir;
	unsigned seq;
	u8 lbits4, rbits4, lbits6, rbits6;

	mutex_lock(&hash_resize_mutex);

	/* read selector prefixlen thresholds */
	do {
		seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);

		lbits4 = net->xfrm.policy_hthresh.lbits4;
		rbits4 = net->xfrm.policy_hthresh.rbits4;
		lbits6 = net->xfrm.policy_hthresh.lbits6;
		rbits6 = net->xfrm.policy_hthresh.rbits6;
	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));

	write_lock_bh(&net->xfrm.xfrm_policy_lock);

	/* reset the bydst and inexact table in all directions */
H
Herbert Xu 已提交
599
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
		hmask = net->xfrm.policy_bydst[dir].hmask;
		odst = net->xfrm.policy_bydst[dir].table;
		for (i = hmask; i >= 0; i--)
			INIT_HLIST_HEAD(odst + i);
		if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
			/* dir out => dst = remote, src = local */
			net->xfrm.policy_bydst[dir].dbits4 = rbits4;
			net->xfrm.policy_bydst[dir].sbits4 = lbits4;
			net->xfrm.policy_bydst[dir].dbits6 = rbits6;
			net->xfrm.policy_bydst[dir].sbits6 = lbits6;
		} else {
			/* dir in/fwd => dst = local, src = remote */
			net->xfrm.policy_bydst[dir].dbits4 = lbits4;
			net->xfrm.policy_bydst[dir].sbits4 = rbits4;
			net->xfrm.policy_bydst[dir].dbits6 = lbits6;
			net->xfrm.policy_bydst[dir].sbits6 = rbits6;
		}
	}

	/* re-insert all policies by order of creation */
	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
		newpos = NULL;
		chain = policy_hash_bysel(net, &policy->selector,
					  policy->family,
					  xfrm_policy_id2dir(policy->index));
		hlist_for_each_entry(pol, chain, bydst) {
			if (policy->priority >= pol->priority)
				newpos = &pol->bydst;
			else
				break;
		}
		if (newpos)
			hlist_add_behind(&policy->bydst, newpos);
		else
			hlist_add_head(&policy->bydst, chain);
	}

	write_unlock_bh(&net->xfrm.xfrm_policy_lock);

	mutex_unlock(&hash_resize_mutex);
}

void xfrm_policy_hash_rebuild(struct net *net)
{
	schedule_work(&net->xfrm.policy_hthresh.work);
}
EXPORT_SYMBOL(xfrm_policy_hash_rebuild);

L
Linus Torvalds 已提交
649 650
/* Generate new index... KAME seems to generate them ordered by cost
 * of an absolute inpredictability of ordering of rules. This will not pass. */
651
static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
L
Linus Torvalds 已提交
652 653 654 655
{
	static u32 idx_generator;

	for (;;) {
656 657 658 659 660
		struct hlist_head *list;
		struct xfrm_policy *p;
		u32 idx;
		int found;

661 662 663 664 665 666 667 668
		if (!index) {
			idx = (idx_generator | dir);
			idx_generator += 8;
		} else {
			idx = index;
			index = 0;
		}

L
Linus Torvalds 已提交
669 670
		if (idx == 0)
			idx = 8;
671
		list = net->xfrm.policy_byidx + idx_hash(net, idx);
672
		found = 0;
673
		hlist_for_each_entry(p, list, byidx) {
674 675
			if (p->index == idx) {
				found = 1;
L
Linus Torvalds 已提交
676
				break;
677
			}
L
Linus Torvalds 已提交
678
		}
679
		if (!found)
L
Linus Torvalds 已提交
680 681 682 683
			return idx;
	}
}

684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
{
	u32 *p1 = (u32 *) s1;
	u32 *p2 = (u32 *) s2;
	int len = sizeof(struct xfrm_selector) / sizeof(u32);
	int i;

	for (i = 0; i < len; i++) {
		if (p1[i] != p2[i])
			return 1;
	}

	return 0;
}

699 700 701 702 703 704
static void xfrm_policy_requeue(struct xfrm_policy *old,
				struct xfrm_policy *new)
{
	struct xfrm_policy_queue *pq = &old->polq;
	struct sk_buff_head list;

705 706 707
	if (skb_queue_empty(&pq->hold_queue))
		return;

708 709 710 711
	__skb_queue_head_init(&list);

	spin_lock_bh(&pq->hold_queue.lock);
	skb_queue_splice_init(&pq->hold_queue, &list);
712 713
	if (del_timer(&pq->hold_timer))
		xfrm_pol_put(old);
714 715 716 717 718 719 720
	spin_unlock_bh(&pq->hold_queue.lock);

	pq = &new->polq;

	spin_lock_bh(&pq->hold_queue.lock);
	skb_queue_splice(&list, &pq->hold_queue);
	pq->timeout = XFRM_QUEUE_TMO_MIN;
721 722
	if (!mod_timer(&pq->hold_timer, jiffies))
		xfrm_pol_hold(new);
723 724 725
	spin_unlock_bh(&pq->hold_queue.lock);
}

726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
				   struct xfrm_policy *pol)
{
	u32 mark = policy->mark.v & policy->mark.m;

	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
		return true;

	if ((mark & pol->mark.m) == pol->mark.v &&
	    policy->priority == pol->priority)
		return true;

	return false;
}

L
Linus Torvalds 已提交
741 742
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
{
743
	struct net *net = xp_net(policy);
744 745 746
	struct xfrm_policy *pol;
	struct xfrm_policy *delpol;
	struct hlist_head *chain;
747
	struct hlist_node *newpos;
L
Linus Torvalds 已提交
748

F
Fan Du 已提交
749
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
750
	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
751 752
	delpol = NULL;
	newpos = NULL;
753
	hlist_for_each_entry(pol, chain, bydst) {
H
Herbert Xu 已提交
754
		if (pol->type == policy->type &&
755
		    !selector_cmp(&pol->selector, &policy->selector) &&
756
		    xfrm_policy_mark_match(policy, pol) &&
H
Herbert Xu 已提交
757 758
		    xfrm_sec_ctx_match(pol->security, policy->security) &&
		    !WARN_ON(delpol)) {
L
Linus Torvalds 已提交
759
			if (excl) {
F
Fan Du 已提交
760
				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
761 762 763 764 765 766
				return -EEXIST;
			}
			delpol = pol;
			if (policy->priority > pol->priority)
				continue;
		} else if (policy->priority >= pol->priority) {
H
Herbert Xu 已提交
767
			newpos = &pol->bydst;
L
Linus Torvalds 已提交
768 769 770 771 772 773
			continue;
		}
		if (delpol)
			break;
	}
	if (newpos)
774
		hlist_add_behind(&policy->bydst, newpos);
775 776
	else
		hlist_add_head(&policy->bydst, chain);
777
	__xfrm_policy_link(policy, dir);
778
	atomic_inc(&net->xfrm.flow_cache_genid);
F
fan.du 已提交
779 780 781 782 783 784 785

	/* After previous checking, family can either be AF_INET or AF_INET6 */
	if (policy->family == AF_INET)
		rt_genid_bump_ipv4(net);
	else
		rt_genid_bump_ipv6(net);

786 787
	if (delpol) {
		xfrm_policy_requeue(delpol, policy);
W
Wei Yongjun 已提交
788
		__xfrm_policy_unlink(delpol, dir);
789
	}
790
	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
791
	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
792
	policy->curlft.add_time = get_seconds();
L
Linus Torvalds 已提交
793 794 795
	policy->curlft.use_time = 0;
	if (!mod_timer(&policy->timer, jiffies + HZ))
		xfrm_pol_hold(policy);
F
Fan Du 已提交
796
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
797

798
	if (delpol)
L
Linus Torvalds 已提交
799
		xfrm_policy_kill(delpol);
800 801
	else if (xfrm_bydst_should_resize(net, dir, NULL))
		schedule_work(&net->xfrm.policy_hash_work);
802

L
Linus Torvalds 已提交
803 804 805 806
	return 0;
}
EXPORT_SYMBOL(xfrm_policy_insert);

807 808
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
					  int dir, struct xfrm_selector *sel,
809 810
					  struct xfrm_sec_ctx *ctx, int delete,
					  int *err)
L
Linus Torvalds 已提交
811
{
812 813
	struct xfrm_policy *pol, *ret;
	struct hlist_head *chain;
L
Linus Torvalds 已提交
814

815
	*err = 0;
F
Fan Du 已提交
816
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
817
	chain = policy_hash_bysel(net, sel, sel->family, dir);
818
	ret = NULL;
819
	hlist_for_each_entry(pol, chain, bydst) {
820
		if (pol->type == type &&
J
Jamal Hadi Salim 已提交
821
		    (mark & pol->mark.m) == pol->mark.v &&
822 823
		    !selector_cmp(sel, &pol->selector) &&
		    xfrm_sec_ctx_match(ctx, pol->security)) {
L
Linus Torvalds 已提交
824
			xfrm_pol_hold(pol);
825
			if (delete) {
826 827
				*err = security_xfrm_policy_delete(
								pol->security);
828
				if (*err) {
F
Fan Du 已提交
829
					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
830 831
					return pol;
				}
W
Wei Yongjun 已提交
832
				__xfrm_policy_unlink(pol, dir);
833 834
			}
			ret = pol;
L
Linus Torvalds 已提交
835 836 837
			break;
		}
	}
F
Fan Du 已提交
838
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
839

840
	if (ret && delete)
841 842
		xfrm_policy_kill(ret);
	return ret;
L
Linus Torvalds 已提交
843
}
844
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
L
Linus Torvalds 已提交
845

846 847
struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
				     int dir, u32 id, int delete, int *err)
L
Linus Torvalds 已提交
848
{
849 850
	struct xfrm_policy *pol, *ret;
	struct hlist_head *chain;
L
Linus Torvalds 已提交
851

852 853 854 855
	*err = -ENOENT;
	if (xfrm_policy_id2dir(id) != dir)
		return NULL;

856
	*err = 0;
F
Fan Du 已提交
857
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
858
	chain = net->xfrm.policy_byidx + idx_hash(net, id);
859
	ret = NULL;
860
	hlist_for_each_entry(pol, chain, byidx) {
J
Jamal Hadi Salim 已提交
861 862
		if (pol->type == type && pol->index == id &&
		    (mark & pol->mark.m) == pol->mark.v) {
L
Linus Torvalds 已提交
863
			xfrm_pol_hold(pol);
864
			if (delete) {
865 866
				*err = security_xfrm_policy_delete(
								pol->security);
867
				if (*err) {
F
Fan Du 已提交
868
					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
869 870
					return pol;
				}
W
Wei Yongjun 已提交
871
				__xfrm_policy_unlink(pol, dir);
872 873
			}
			ret = pol;
L
Linus Torvalds 已提交
874 875 876
			break;
		}
	}
F
Fan Du 已提交
877
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
878

879
	if (ret && delete)
880 881
		xfrm_policy_kill(ret);
	return ret;
L
Linus Torvalds 已提交
882 883 884
}
EXPORT_SYMBOL(xfrm_policy_byid);

885 886
#ifdef CONFIG_SECURITY_NETWORK_XFRM
static inline int
887
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
L
Linus Torvalds 已提交
888
{
889 890 891 892 893 894
	int dir, err = 0;

	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
		struct xfrm_policy *pol;
		int i;

895
		hlist_for_each_entry(pol,
896
				     &net->xfrm.policy_inexact[dir], bydst) {
897 898
			if (pol->type != type)
				continue;
899
			err = security_xfrm_policy_delete(pol->security);
900
			if (err) {
901
				xfrm_audit_policy_delete(pol, 0, task_valid);
902 903
				return err;
			}
904
		}
905
		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
906
			hlist_for_each_entry(pol,
907
					     net->xfrm.policy_bydst[dir].table + i,
908 909 910
					     bydst) {
				if (pol->type != type)
					continue;
911 912
				err = security_xfrm_policy_delete(
								pol->security);
913
				if (err) {
J
Joy Latten 已提交
914
					xfrm_audit_policy_delete(pol, 0,
915
								 task_valid);
916 917 918 919 920 921 922 923 924
					return err;
				}
			}
		}
	}
	return err;
}
#else
static inline int
925
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
926 927 928 929 930
{
	return 0;
}
#endif

931
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
932
{
933
	int dir, err = 0, cnt = 0;
L
Linus Torvalds 已提交
934

F
Fan Du 已提交
935
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
936

937
	err = xfrm_policy_flush_secctx_check(net, type, task_valid);
938 939 940
	if (err)
		goto out;

L
Linus Torvalds 已提交
941
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
942
		struct xfrm_policy *pol;
W
Wei Yongjun 已提交
943
		int i;
944 945

	again1:
946
		hlist_for_each_entry(pol,
947
				     &net->xfrm.policy_inexact[dir], bydst) {
948 949
			if (pol->type != type)
				continue;
950
			__xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
951
			write_unlock_bh(&net->xfrm.xfrm_policy_lock);
952
			cnt++;
L
Linus Torvalds 已提交
953

954
			xfrm_audit_policy_delete(pol, 1, task_valid);
J
Joy Latten 已提交
955

956
			xfrm_policy_kill(pol);
L
Linus Torvalds 已提交
957

F
Fan Du 已提交
958
			write_lock_bh(&net->xfrm.xfrm_policy_lock);
959 960 961
			goto again1;
		}

962
		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
963
	again2:
964
			hlist_for_each_entry(pol,
965
					     net->xfrm.policy_bydst[dir].table + i,
966 967 968
					     bydst) {
				if (pol->type != type)
					continue;
969
				__xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
970
				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
971
				cnt++;
972

973
				xfrm_audit_policy_delete(pol, 1, task_valid);
974 975
				xfrm_policy_kill(pol);

F
Fan Du 已提交
976
				write_lock_bh(&net->xfrm.xfrm_policy_lock);
977 978
				goto again2;
			}
L
Linus Torvalds 已提交
979
		}
980

L
Linus Torvalds 已提交
981
	}
982 983
	if (!cnt)
		err = -ESRCH;
984
out:
F
Fan Du 已提交
985
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
986
	return err;
L
Linus Torvalds 已提交
987 988 989
}
EXPORT_SYMBOL(xfrm_policy_flush);

990
int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
991
		     int (*func)(struct xfrm_policy *, int, int, void*),
L
Linus Torvalds 已提交
992 993
		     void *data)
{
H
Herbert Xu 已提交
994 995
	struct xfrm_policy *pol;
	struct xfrm_policy_walk_entry *x;
996 997 998 999 1000
	int error = 0;

	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
	    walk->type != XFRM_POLICY_TYPE_ANY)
		return -EINVAL;
L
Linus Torvalds 已提交
1001

H
Herbert Xu 已提交
1002
	if (list_empty(&walk->walk.all) && walk->seq != 0)
1003 1004
		return 0;

F
Fan Du 已提交
1005
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
H
Herbert Xu 已提交
1006
	if (list_empty(&walk->walk.all))
1007
		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
H
Herbert Xu 已提交
1008
	else
1009 1010 1011
		x = list_first_entry(&walk->walk.all,
				     struct xfrm_policy_walk_entry, all);

1012
	list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
H
Herbert Xu 已提交
1013
		if (x->dead)
1014
			continue;
H
Herbert Xu 已提交
1015 1016 1017 1018 1019 1020 1021 1022 1023
		pol = container_of(x, struct xfrm_policy, walk);
		if (walk->type != XFRM_POLICY_TYPE_ANY &&
		    walk->type != pol->type)
			continue;
		error = func(pol, xfrm_policy_id2dir(pol->index),
			     walk->seq, data);
		if (error) {
			list_move_tail(&walk->walk.all, &x->all);
			goto out;
1024
		}
H
Herbert Xu 已提交
1025
		walk->seq++;
L
Linus Torvalds 已提交
1026
	}
H
Herbert Xu 已提交
1027
	if (walk->seq == 0) {
J
Jamal Hadi Salim 已提交
1028 1029 1030
		error = -ENOENT;
		goto out;
	}
H
Herbert Xu 已提交
1031
	list_del_init(&walk->walk.all);
L
Linus Torvalds 已提交
1032
out:
F
Fan Du 已提交
1033
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1034 1035 1036 1037
	return error;
}
EXPORT_SYMBOL(xfrm_policy_walk);

H
Herbert Xu 已提交
1038 1039 1040 1041 1042 1043 1044 1045 1046
void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
{
	INIT_LIST_HEAD(&walk->walk.all);
	walk->walk.dead = 1;
	walk->type = type;
	walk->seq = 0;
}
EXPORT_SYMBOL(xfrm_policy_walk_init);

F
Fan Du 已提交
1047
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
H
Herbert Xu 已提交
1048 1049 1050 1051
{
	if (list_empty(&walk->walk.all))
		return;

F
Fan Du 已提交
1052
	write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
H
Herbert Xu 已提交
1053
	list_del(&walk->walk.all);
F
Fan Du 已提交
1054
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
H
Herbert Xu 已提交
1055 1056 1057
}
EXPORT_SYMBOL(xfrm_policy_walk_done);

1058 1059 1060 1061 1062
/*
 * Find policy to apply to this flow.
 *
 * Returns 0 if policy found, else an -errno.
 */
1063 1064
static int xfrm_policy_match(const struct xfrm_policy *pol,
			     const struct flowi *fl,
1065
			     u8 type, u16 family, int dir)
L
Linus Torvalds 已提交
1066
{
1067
	const struct xfrm_selector *sel = &pol->selector;
1068 1069
	int ret = -ESRCH;
	bool match;
L
Linus Torvalds 已提交
1070

1071
	if (pol->family != family ||
1072
	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
1073
	    pol->type != type)
1074
		return ret;
L
Linus Torvalds 已提交
1075

1076
	match = xfrm_selector_match(sel, fl, family);
1077
	if (match)
1078
		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
1079
						  dir);
1080

1081
	return ret;
1082
}
L
Linus Torvalds 已提交
1083

A
Alexey Dobriyan 已提交
1084
static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1085
						     const struct flowi *fl,
1086 1087
						     u16 family, u8 dir)
{
1088
	int err;
1089
	struct xfrm_policy *pol, *ret;
1090
	const xfrm_address_t *daddr, *saddr;
1091
	struct hlist_head *chain;
1092
	u32 priority = ~0U;
1093

1094 1095 1096 1097 1098
	daddr = xfrm_flowi_daddr(fl, family);
	saddr = xfrm_flowi_saddr(fl, family);
	if (unlikely(!daddr || !saddr))
		return NULL;

F
Fan Du 已提交
1099
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
A
Alexey Dobriyan 已提交
1100
	chain = policy_hash_direct(net, daddr, saddr, family, dir);
1101
	ret = NULL;
1102
	hlist_for_each_entry(pol, chain, bydst) {
1103 1104 1105 1106 1107 1108 1109 1110 1111
		err = xfrm_policy_match(pol, fl, type, family, dir);
		if (err) {
			if (err == -ESRCH)
				continue;
			else {
				ret = ERR_PTR(err);
				goto fail;
			}
		} else {
1112
			ret = pol;
1113
			priority = ret->priority;
1114 1115 1116
			break;
		}
	}
A
Alexey Dobriyan 已提交
1117
	chain = &net->xfrm.policy_inexact[dir];
1118
	hlist_for_each_entry(pol, chain, bydst) {
1119 1120 1121
		if ((pol->priority >= priority) && ret)
			break;

1122 1123 1124 1125 1126 1127 1128 1129
		err = xfrm_policy_match(pol, fl, type, family, dir);
		if (err) {
			if (err == -ESRCH)
				continue;
			else {
				ret = ERR_PTR(err);
				goto fail;
			}
1130
		} else {
1131 1132
			ret = pol;
			break;
L
Linus Torvalds 已提交
1133 1134
		}
	}
1135 1136

	xfrm_pol_hold(ret);
1137
fail:
F
Fan Du 已提交
1138
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1139

1140
	return ret;
1141 1142
}

1143
static struct xfrm_policy *
1144
__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_policy *pol;

	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
	if (pol != NULL)
		return pol;
#endif
	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
}

1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
static int flow_to_policy_dir(int dir)
{
	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
		return dir;

	switch (dir) {
	default:
	case FLOW_DIR_IN:
		return XFRM_POLICY_IN;
	case FLOW_DIR_OUT:
		return XFRM_POLICY_OUT;
	case FLOW_DIR_FWD:
		return XFRM_POLICY_FWD;
	}
}

1174
static struct flow_cache_object *
1175
xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1176
		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
1177 1178
{
	struct xfrm_policy *pol;
1179 1180 1181

	if (old_obj)
		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1182

1183
	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1184
	if (IS_ERR_OR_NULL(pol))
1185 1186 1187 1188 1189 1190 1191
		return ERR_CAST(pol);

	/* Resolver returns two references:
	 * one for cache and one for caller of flow_cache_lookup() */
	xfrm_pol_hold(pol);

	return &pol->flo;
L
Linus Torvalds 已提交
1192 1193
}

1194 1195 1196
static inline int policy_to_flow_dir(int dir)
{
	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
		return dir;
	switch (dir) {
	default:
	case XFRM_POLICY_IN:
		return FLOW_DIR_IN;
	case XFRM_POLICY_OUT:
		return FLOW_DIR_OUT;
	case XFRM_POLICY_FWD:
		return FLOW_DIR_FWD;
1208
	}
1209 1210
}

1211 1212
static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
						 const struct flowi *fl)
L
Linus Torvalds 已提交
1213 1214
{
	struct xfrm_policy *pol;
F
Fan Du 已提交
1215
	struct net *net = sock_net(sk);
L
Linus Torvalds 已提交
1216

F
Fan Du 已提交
1217
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1218
	if ((pol = sk->sk_policy[dir]) != NULL) {
1219 1220
		bool match = xfrm_selector_match(&pol->selector, fl,
						 sk->sk_family);
1221
		int err = 0;
1222

1223
		if (match) {
J
Jamal Hadi Salim 已提交
1224 1225 1226 1227
			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
				pol = NULL;
				goto out;
			}
1228
			err = security_xfrm_policy_lookup(pol->security,
1229
						      fl->flowi_secid,
1230
						      policy_to_flow_dir(dir));
1231 1232 1233 1234 1235 1236 1237
			if (!err)
				xfrm_pol_hold(pol);
			else if (err == -ESRCH)
				pol = NULL;
			else
				pol = ERR_PTR(err);
		} else
L
Linus Torvalds 已提交
1238 1239
			pol = NULL;
	}
J
Jamal Hadi Salim 已提交
1240
out:
F
Fan Du 已提交
1241
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1242 1243 1244 1245 1246
	return pol;
}

static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
1247
	struct net *net = xp_net(pol);
1248

1249 1250
	list_add(&pol->walk.all, &net->xfrm.policy_all);
	net->xfrm.policy_count[dir]++;
L
Linus Torvalds 已提交
1251 1252 1253 1254 1255 1256
	xfrm_pol_hold(pol);
}

static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
						int dir)
{
1257 1258
	struct net *net = xp_net(pol);

H
Herbert Xu 已提交
1259
	if (list_empty(&pol->walk.all))
1260
		return NULL;
L
Linus Torvalds 已提交
1261

H
Herbert Xu 已提交
1262 1263 1264 1265 1266 1267 1268
	/* Socket policies are not hashed. */
	if (!hlist_unhashed(&pol->bydst)) {
		hlist_del(&pol->bydst);
		hlist_del(&pol->byidx);
	}

	list_del_init(&pol->walk.all);
1269
	net->xfrm.policy_count[dir]--;
1270 1271

	return pol;
L
Linus Torvalds 已提交
1272 1273
}

H
Herbert Xu 已提交
1274 1275 1276 1277 1278 1279 1280 1281 1282 1283
static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
{
	__xfrm_policy_link(pol, XFRM_POLICY_MAX + dir);
}

static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
{
	__xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir);
}

1284
int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
L
Linus Torvalds 已提交
1285
{
F
Fan Du 已提交
1286 1287 1288
	struct net *net = xp_net(pol);

	write_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1289
	pol = __xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
1290
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1291 1292
	if (pol) {
		xfrm_policy_kill(pol);
1293
		return 0;
L
Linus Torvalds 已提交
1294
	}
1295
	return -ENOENT;
L
Linus Torvalds 已提交
1296
}
1297
EXPORT_SYMBOL(xfrm_policy_delete);
L
Linus Torvalds 已提交
1298 1299 1300

int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
{
1301
	struct net *net = xp_net(pol);
L
Linus Torvalds 已提交
1302 1303
	struct xfrm_policy *old_pol;

1304 1305 1306 1307 1308
#ifdef CONFIG_XFRM_SUB_POLICY
	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
		return -EINVAL;
#endif

F
Fan Du 已提交
1309
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1310 1311 1312
	old_pol = sk->sk_policy[dir];
	sk->sk_policy[dir] = pol;
	if (pol) {
1313
		pol->curlft.add_time = get_seconds();
1314
		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
H
Herbert Xu 已提交
1315
		xfrm_sk_policy_link(pol, dir);
L
Linus Torvalds 已提交
1316
	}
1317 1318 1319 1320
	if (old_pol) {
		if (pol)
			xfrm_policy_requeue(old_pol, pol);

1321 1322 1323
		/* Unlinking succeeds always. This is the only function
		 * allowed to delete or replace socket policy.
		 */
H
Herbert Xu 已提交
1324
		xfrm_sk_policy_unlink(old_pol, dir);
1325
	}
F
Fan Du 已提交
1326
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1327 1328 1329 1330 1331 1332 1333

	if (old_pol) {
		xfrm_policy_kill(old_pol);
	}
	return 0;
}

1334
static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
L
Linus Torvalds 已提交
1335
{
1336
	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
F
Fan Du 已提交
1337
	struct net *net = xp_net(old);
L
Linus Torvalds 已提交
1338 1339 1340

	if (newp) {
		newp->selector = old->selector;
1341 1342
		if (security_xfrm_policy_clone(old->security,
					       &newp->security)) {
1343 1344 1345
			kfree(newp);
			return NULL;  /* ENOMEM */
		}
L
Linus Torvalds 已提交
1346 1347
		newp->lft = old->lft;
		newp->curlft = old->curlft;
1348
		newp->mark = old->mark;
L
Linus Torvalds 已提交
1349 1350 1351 1352
		newp->action = old->action;
		newp->flags = old->flags;
		newp->xfrm_nr = old->xfrm_nr;
		newp->index = old->index;
1353
		newp->type = old->type;
L
Linus Torvalds 已提交
1354 1355
		memcpy(newp->xfrm_vec, old->xfrm_vec,
		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
F
Fan Du 已提交
1356
		write_lock_bh(&net->xfrm.xfrm_policy_lock);
H
Herbert Xu 已提交
1357
		xfrm_sk_policy_link(newp, dir);
F
Fan Du 已提交
1358
		write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376
		xfrm_pol_put(newp);
	}
	return newp;
}

int __xfrm_sk_clone_policy(struct sock *sk)
{
	struct xfrm_policy *p0 = sk->sk_policy[0],
			   *p1 = sk->sk_policy[1];

	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
		return -ENOMEM;
	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
		return -ENOMEM;
	return 0;
}

1377
static int
D
David Ahern 已提交
1378 1379
xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
	       xfrm_address_t *remote, unsigned short family)
1380 1381 1382 1383 1384 1385
{
	int err;
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);

	if (unlikely(afinfo == NULL))
		return -EINVAL;
D
David Ahern 已提交
1386
	err = afinfo->get_saddr(net, oif, local, remote);
1387 1388 1389 1390
	xfrm_policy_put_afinfo(afinfo);
	return err;
}

L
Linus Torvalds 已提交
1391 1392 1393
/* Resolve list of templates for the flow, given policy. */

static int
1394 1395
xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
		      struct xfrm_state **xfrm, unsigned short family)
L
Linus Torvalds 已提交
1396
{
A
Alexey Dobriyan 已提交
1397
	struct net *net = xp_net(policy);
L
Linus Torvalds 已提交
1398 1399 1400 1401
	int nx;
	int i, error;
	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1402
	xfrm_address_t tmp;
L
Linus Torvalds 已提交
1403

1404
	for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
L
Linus Torvalds 已提交
1405 1406 1407 1408 1409
		struct xfrm_state *x;
		xfrm_address_t *remote = daddr;
		xfrm_address_t *local  = saddr;
		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];

1410 1411
		if (tmpl->mode == XFRM_MODE_TUNNEL ||
		    tmpl->mode == XFRM_MODE_BEET) {
L
Linus Torvalds 已提交
1412 1413
			remote = &tmpl->id.daddr;
			local = &tmpl->saddr;
1414
			if (xfrm_addr_any(local, tmpl->encap_family)) {
D
David Ahern 已提交
1415 1416 1417
				error = xfrm_get_saddr(net, fl->flowi_oif,
						       &tmp, remote,
						       tmpl->encap_family);
1418 1419 1420 1421
				if (error)
					goto fail;
				local = &tmp;
			}
L
Linus Torvalds 已提交
1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435
		}

		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);

		if (x && x->km.state == XFRM_STATE_VALID) {
			xfrm[nx++] = x;
			daddr = remote;
			saddr = local;
			continue;
		}
		if (x) {
			error = (x->km.state == XFRM_STATE_ERROR ?
				 -EINVAL : -EAGAIN);
			xfrm_state_put(x);
W
Weilong Chen 已提交
1436
		} else if (error == -ESRCH) {
1437
			error = -EAGAIN;
W
Weilong Chen 已提交
1438
		}
L
Linus Torvalds 已提交
1439 1440 1441 1442 1443 1444 1445

		if (!tmpl->optional)
			goto fail;
	}
	return nx;

fail:
1446
	for (nx--; nx >= 0; nx--)
L
Linus Torvalds 已提交
1447 1448 1449 1450
		xfrm_state_put(xfrm[nx]);
	return error;
}

1451
static int
1452 1453
xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
		  struct xfrm_state **xfrm, unsigned short family)
1454
{
1455 1456
	struct xfrm_state *tp[XFRM_MAX_DEPTH];
	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1457 1458 1459 1460 1461 1462 1463 1464 1465 1466
	int cnx = 0;
	int error;
	int ret;
	int i;

	for (i = 0; i < npols; i++) {
		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
			error = -ENOBUFS;
			goto fail;
		}
1467 1468

		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1469 1470 1471 1472 1473 1474 1475
		if (ret < 0) {
			error = ret;
			goto fail;
		} else
			cnx += ret;
	}

1476 1477 1478 1479
	/* found states are sorted for outbound processing */
	if (npols > 1)
		xfrm_state_sort(xfrm, tpp, cnx, family);

1480 1481 1482
	return cnx;

 fail:
1483
	for (cnx--; cnx >= 0; cnx--)
1484
		xfrm_state_put(tpp[cnx]);
1485 1486 1487 1488
	return error;

}

L
Linus Torvalds 已提交
1489 1490 1491 1492
/* Check that the bundle accepts the flow and its components are
 * still valid.
 */

1493
static inline int xfrm_get_tos(const struct flowi *fl, int family)
1494 1495 1496
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
	int tos;
L
Linus Torvalds 已提交
1497

1498 1499 1500 1501 1502 1503 1504 1505 1506 1507
	if (!afinfo)
		return -EINVAL;

	tos = afinfo->get_tos(fl);

	xfrm_policy_put_afinfo(afinfo);

	return tos;
}

1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518
static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	if (xdst->route == NULL) {
		/* Dummy bundle - if it has xfrms we were not
		 * able to build bundle as template resolution failed.
		 * It means we need to try again resolving. */
		if (xdst->num_xfrms > 0)
			return NULL;
1519 1520
	} else if (dst->flags & DST_XFRM_QUEUE) {
		return NULL;
1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557
	} else {
		/* Real bundle */
		if (stale_bundle(dst))
			return NULL;
	}

	dst_hold(dst);
	return flo;
}

static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	if (!xdst->route)
		return 0;
	if (stale_bundle(dst))
		return 0;

	return 1;
}

static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	dst_free(dst);
}

static const struct flow_cache_ops xfrm_bundle_fc_ops = {
	.get = xfrm_bundle_flo_get,
	.check = xfrm_bundle_flo_check,
	.delete = xfrm_bundle_flo_delete,
};

1558
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
L
Linus Torvalds 已提交
1559 1560
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1561
	struct dst_ops *dst_ops;
1562 1563 1564 1565 1566
	struct xfrm_dst *xdst;

	if (!afinfo)
		return ERR_PTR(-EINVAL);

1567 1568 1569 1570
	switch (family) {
	case AF_INET:
		dst_ops = &net->xfrm.xfrm4_dst_ops;
		break;
E
Eric Dumazet 已提交
1571
#if IS_ENABLED(CONFIG_IPV6)
1572 1573 1574 1575 1576 1577 1578
	case AF_INET6:
		dst_ops = &net->xfrm.xfrm6_dst_ops;
		break;
#endif
	default:
		BUG();
	}
1579
	xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1580

1581
	if (likely(xdst)) {
1582 1583 1584
		struct dst_entry *dst = &xdst->u.dst;

		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1585
		xdst->flo.ops = &xfrm_bundle_fc_ops;
1586
	} else
1587
		xdst = ERR_PTR(-ENOBUFS);
1588

1589 1590
	xfrm_policy_put_afinfo(afinfo);

1591 1592 1593
	return xdst;
}

1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610
static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
				 int nfheader_len)
{
	struct xfrm_policy_afinfo *afinfo =
		xfrm_policy_get_afinfo(dst->ops->family);
	int err;

	if (!afinfo)
		return -EINVAL;

	err = afinfo->init_path(path, dst, nfheader_len);

	xfrm_policy_put_afinfo(afinfo);

	return err;
}

H
Herbert Xu 已提交
1611
static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1612
				const struct flowi *fl)
1613 1614 1615 1616 1617 1618
{
	struct xfrm_policy_afinfo *afinfo =
		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
	int err;

	if (!afinfo)
L
Linus Torvalds 已提交
1619
		return -EINVAL;
1620

H
Herbert Xu 已提交
1621
	err = afinfo->fill_dst(xdst, dev, fl);
1622

L
Linus Torvalds 已提交
1623
	xfrm_policy_put_afinfo(afinfo);
1624

L
Linus Torvalds 已提交
1625 1626 1627
	return err;
}

1628

1629 1630 1631 1632 1633 1634
/* Allocate chain of dst_entry's, attach known xfrm's, calculate
 * all the metrics... Shortly, bundle a bundle.
 */

static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
					    struct xfrm_state **xfrm, int nx,
1635
					    const struct flowi *fl,
1636 1637
					    struct dst_entry *dst)
{
1638
	struct net *net = xp_net(policy);
1639 1640
	unsigned long now = jiffies;
	struct net_device *dev;
1641
	struct xfrm_mode *inner_mode;
1642 1643 1644 1645 1646
	struct dst_entry *dst_prev = NULL;
	struct dst_entry *dst0 = NULL;
	int i = 0;
	int err;
	int header_len = 0;
1647
	int nfheader_len = 0;
1648 1649 1650
	int trailer_len = 0;
	int tos;
	int family = policy->selector.family;
1651 1652 1653
	xfrm_address_t saddr, daddr;

	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1654 1655 1656 1657 1658 1659 1660 1661 1662

	tos = xfrm_get_tos(fl, family);
	err = tos;
	if (tos < 0)
		goto put_states;

	dst_hold(dst);

	for (; i < nx; i++) {
1663
		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1664 1665 1666 1667 1668 1669 1670 1671
		struct dst_entry *dst1 = &xdst->u.dst;

		err = PTR_ERR(xdst);
		if (IS_ERR(xdst)) {
			dst_release(dst);
			goto put_states;
		}

1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682
		if (xfrm[i]->sel.family == AF_UNSPEC) {
			inner_mode = xfrm_ip2inner_mode(xfrm[i],
							xfrm_af2proto(family));
			if (!inner_mode) {
				err = -EAFNOSUPPORT;
				dst_release(dst);
				goto put_states;
			}
		} else
			inner_mode = xfrm[i]->inner_mode;

1683 1684 1685 1686 1687 1688 1689 1690
		if (!dst_prev)
			dst0 = dst1;
		else {
			dst_prev->child = dst_clone(dst1);
			dst1->flags |= DST_NOHASH;
		}

		xdst->route = dst;
1691
		dst_copy_metrics(dst1, dst);
1692 1693 1694

		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
			family = xfrm[i]->props.family;
D
David Ahern 已提交
1695 1696
			dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
					      &saddr, &daddr, family);
1697 1698 1699 1700 1701 1702 1703
			err = PTR_ERR(dst);
			if (IS_ERR(dst))
				goto put_states;
		} else
			dst_hold(dst);

		dst1->xfrm = xfrm[i];
1704
		xdst->xfrm_genid = xfrm[i]->genid;
1705

1706
		dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1707 1708 1709 1710
		dst1->flags |= DST_HOST;
		dst1->lastuse = now;

		dst1->input = dst_discard;
1711
		dst1->output = inner_mode->afinfo->output;
1712 1713 1714 1715 1716

		dst1->next = dst_prev;
		dst_prev = dst1;

		header_len += xfrm[i]->props.header_len;
1717 1718
		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
			nfheader_len += xfrm[i]->props.header_len;
1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729
		trailer_len += xfrm[i]->props.trailer_len;
	}

	dst_prev->child = dst;
	dst0->path = dst;

	err = -ENODEV;
	dev = dst->dev;
	if (!dev)
		goto free_dst;

1730
	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1731 1732 1733 1734 1735
	xfrm_init_pmtu(dst_prev);

	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;

H
Herbert Xu 已提交
1736
		err = xfrm_fill_dst(xdst, dev, fl);
1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758
		if (err)
			goto free_dst;

		dst_prev->header_len = header_len;
		dst_prev->trailer_len = trailer_len;
		header_len -= xdst->u.dst.xfrm->props.header_len;
		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
	}

out:
	return dst0;

put_states:
	for (; i < nx; i++)
		xfrm_state_put(xfrm[i]);
free_dst:
	if (dst0)
		dst_free(dst0);
	dst0 = ERR_PTR(err);
	goto out;
}

1759
#ifdef CONFIG_XFRM_SUB_POLICY
1760
static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
1761 1762 1763 1764 1765 1766
{
	if (!*target) {
		*target = kmalloc(size, GFP_ATOMIC);
		if (!*target)
			return -ENOMEM;
	}
1767

1768 1769 1770
	memcpy(*target, src, size);
	return 0;
}
1771
#endif
1772

1773 1774
static int xfrm_dst_update_parent(struct dst_entry *dst,
				  const struct xfrm_selector *sel)
1775 1776 1777 1778 1779 1780 1781 1782 1783 1784
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
				   sel, sizeof(*sel));
#else
	return 0;
#endif
}

1785 1786
static int xfrm_dst_update_origin(struct dst_entry *dst,
				  const struct flowi *fl)
1787 1788 1789 1790 1791 1792 1793 1794
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
#else
	return 0;
#endif
}
L
Linus Torvalds 已提交
1795

1796
static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823
				struct xfrm_policy **pols,
				int *num_pols, int *num_xfrms)
{
	int i;

	if (*num_pols == 0 || !pols[0]) {
		*num_pols = 0;
		*num_xfrms = 0;
		return 0;
	}
	if (IS_ERR(pols[0]))
		return PTR_ERR(pols[0]);

	*num_xfrms = pols[0]->xfrm_nr;

#ifdef CONFIG_XFRM_SUB_POLICY
	if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
	    pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
						    XFRM_POLICY_TYPE_MAIN,
						    fl, family,
						    XFRM_POLICY_OUT);
		if (pols[1]) {
			if (IS_ERR(pols[1])) {
				xfrm_pols_put(pols, *num_pols);
				return PTR_ERR(pols[1]);
			}
1824
			(*num_pols)++;
1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841
			(*num_xfrms) += pols[1]->xfrm_nr;
		}
	}
#endif
	for (i = 0; i < *num_pols; i++) {
		if (pols[i]->action != XFRM_POLICY_ALLOW) {
			*num_xfrms = -1;
			break;
		}
	}

	return 0;

}

static struct xfrm_dst *
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1842
			       const struct flowi *fl, u16 family,
1843 1844 1845 1846 1847 1848 1849 1850 1851 1852
			       struct dst_entry *dst_orig)
{
	struct net *net = xp_net(pols[0]);
	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
	struct dst_entry *dst;
	struct xfrm_dst *xdst;
	int err;

	/* Try to instantiate a bundle */
	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1853 1854
	if (err <= 0) {
		if (err != 0 && err != -EAGAIN)
1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877
			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
		return ERR_PTR(err);
	}

	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
	if (IS_ERR(dst)) {
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
		return ERR_CAST(dst);
	}

	xdst = (struct xfrm_dst *)dst;
	xdst->num_xfrms = err;
	if (num_pols > 1)
		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
	else
		err = xfrm_dst_update_origin(dst, fl);
	if (unlikely(err)) {
		dst_free(dst);
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
		return ERR_PTR(err);
	}

	xdst->num_pols = num_pols;
1878
	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
1879 1880 1881 1882 1883
	xdst->policy_genid = atomic_read(&pols[0]->genid);

	return xdst;
}

1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895
static void xfrm_policy_queue_process(unsigned long arg)
{
	struct sk_buff *skb;
	struct sock *sk;
	struct dst_entry *dst;
	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
	struct xfrm_policy_queue *pq = &pol->polq;
	struct flowi fl;
	struct sk_buff_head list;

	spin_lock(&pq->hold_queue.lock);
	skb = skb_peek(&pq->hold_queue);
1896 1897 1898 1899
	if (!skb) {
		spin_unlock(&pq->hold_queue.lock);
		goto out;
	}
1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917
	dst = skb_dst(skb);
	sk = skb->sk;
	xfrm_decode_session(skb, &fl, dst->ops->family);
	spin_unlock(&pq->hold_queue.lock);

	dst_hold(dst->path);
	dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
			  sk, 0);
	if (IS_ERR(dst))
		goto purge_queue;

	if (dst->flags & DST_XFRM_QUEUE) {
		dst_release(dst);

		if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
			goto purge_queue;

		pq->timeout = pq->timeout << 1;
1918 1919 1920
		if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
			xfrm_pol_hold(pol);
	goto out;
1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947
	}

	dst_release(dst);

	__skb_queue_head_init(&list);

	spin_lock(&pq->hold_queue.lock);
	pq->timeout = 0;
	skb_queue_splice_init(&pq->hold_queue, &list);
	spin_unlock(&pq->hold_queue.lock);

	while (!skb_queue_empty(&list)) {
		skb = __skb_dequeue(&list);

		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
		dst_hold(skb_dst(skb)->path);
		dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
				  &fl, skb->sk, 0);
		if (IS_ERR(dst)) {
			kfree_skb(skb);
			continue;
		}

		nf_reset(skb);
		skb_dst_drop(skb);
		skb_dst_set(skb, dst);

1948
		dst_output(skb->sk, skb);
1949 1950
	}

1951 1952
out:
	xfrm_pol_put(pol);
1953 1954 1955 1956
	return;

purge_queue:
	pq->timeout = 0;
1957
	skb_queue_purge(&pq->hold_queue);
1958
	xfrm_pol_put(pol);
1959 1960
}

1961
static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
1962 1963 1964 1965
{
	unsigned long sched_next;
	struct dst_entry *dst = skb_dst(skb);
	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1966 1967
	struct xfrm_policy *pol = xdst->pols[0];
	struct xfrm_policy_queue *pq = &pol->polq;
1968

1969
	if (unlikely(skb_fclone_busy(sk, skb))) {
1970 1971 1972
		kfree_skb(skb);
		return 0;
	}
1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990

	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
		kfree_skb(skb);
		return -EAGAIN;
	}

	skb_dst_force(skb);

	spin_lock_bh(&pq->hold_queue.lock);

	if (!pq->timeout)
		pq->timeout = XFRM_QUEUE_TMO_MIN;

	sched_next = jiffies + pq->timeout;

	if (del_timer(&pq->hold_timer)) {
		if (time_before(pq->hold_timer.expires, sched_next))
			sched_next = pq->hold_timer.expires;
1991
		xfrm_pol_put(pol);
1992 1993 1994
	}

	__skb_queue_tail(&pq->hold_queue, skb);
1995 1996
	if (!mod_timer(&pq->hold_timer, sched_next))
		xfrm_pol_hold(pol);
1997 1998 1999 2000 2001 2002 2003

	spin_unlock_bh(&pq->hold_queue.lock);

	return 0;
}

static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
2004
						 struct xfrm_flo *xflo,
2005 2006 2007 2008 2009 2010
						 const struct flowi *fl,
						 int num_xfrms,
						 u16 family)
{
	int err;
	struct net_device *dev;
2011
	struct dst_entry *dst;
2012 2013 2014 2015 2016 2017 2018
	struct dst_entry *dst1;
	struct xfrm_dst *xdst;

	xdst = xfrm_alloc_dst(net, family);
	if (IS_ERR(xdst))
		return xdst;

2019 2020 2021
	if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
	    net->xfrm.sysctl_larval_drop ||
	    num_xfrms <= 0)
2022 2023
		return xdst;

2024
	dst = xflo->dst_orig;
2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061
	dst1 = &xdst->u.dst;
	dst_hold(dst);
	xdst->route = dst;

	dst_copy_metrics(dst1, dst);

	dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
	dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
	dst1->lastuse = jiffies;

	dst1->input = dst_discard;
	dst1->output = xdst_queue_output;

	dst_hold(dst);
	dst1->child = dst;
	dst1->path = dst;

	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);

	err = -ENODEV;
	dev = dst->dev;
	if (!dev)
		goto free_dst;

	err = xfrm_fill_dst(xdst, dev, fl);
	if (err)
		goto free_dst;

out:
	return xdst;

free_dst:
	dst_release(dst1);
	xdst = ERR_PTR(err);
	goto out;
}

2062
static struct flow_cache_object *
2063
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
2064 2065
		   struct flow_cache_object *oldflo, void *ctx)
{
2066
	struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
	struct xfrm_dst *xdst, *new_xdst;
	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;

	/* Check if the policies from old bundle are usable */
	xdst = NULL;
	if (oldflo) {
		xdst = container_of(oldflo, struct xfrm_dst, flo);
		num_pols = xdst->num_pols;
		num_xfrms = xdst->num_xfrms;
		pol_dead = 0;
		for (i = 0; i < num_pols; i++) {
			pols[i] = xdst->pols[i];
			pol_dead |= pols[i]->walk.dead;
		}
		if (pol_dead) {
			dst_free(&xdst->u.dst);
			xdst = NULL;
			num_pols = 0;
			num_xfrms = 0;
			oldflo = NULL;
		}
	}

	/* Resolve policies to use if we couldn't get them from
	 * previous cache entry */
	if (xdst == NULL) {
		num_pols = 1;
2095 2096
		pols[0] = __xfrm_policy_lookup(net, fl, family,
					       flow_to_policy_dir(dir));
2097 2098 2099 2100 2101 2102 2103 2104 2105 2106
		err = xfrm_expand_policies(fl, family, pols,
					   &num_pols, &num_xfrms);
		if (err < 0)
			goto inc_error;
		if (num_pols == 0)
			return NULL;
		if (num_xfrms <= 0)
			goto make_dummy_bundle;
	}

2107 2108
	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
						  xflo->dst_orig);
2109 2110 2111 2112 2113 2114 2115 2116
	if (IS_ERR(new_xdst)) {
		err = PTR_ERR(new_xdst);
		if (err != -EAGAIN)
			goto error;
		if (oldflo == NULL)
			goto make_dummy_bundle;
		dst_hold(&xdst->u.dst);
		return oldflo;
2117 2118 2119 2120 2121 2122 2123
	} else if (new_xdst == NULL) {
		num_xfrms = 0;
		if (oldflo == NULL)
			goto make_dummy_bundle;
		xdst->num_xfrms = 0;
		dst_hold(&xdst->u.dst);
		return oldflo;
2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141
	}

	/* Kill the previous bundle */
	if (xdst) {
		/* The policies were stolen for newly generated bundle */
		xdst->num_pols = 0;
		dst_free(&xdst->u.dst);
	}

	/* Flow cache does not have reference, it dst_free()'s,
	 * but we do need to return one reference for original caller */
	dst_hold(&new_xdst->u.dst);
	return &new_xdst->flo;

make_dummy_bundle:
	/* We found policies, but there's no bundles to instantiate:
	 * either because the policy blocks, has no transformations or
	 * we could not build template (no xfrm_states).*/
2142
	xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
2143 2144 2145 2146 2147 2148
	if (IS_ERR(xdst)) {
		xfrm_pols_put(pols, num_pols);
		return ERR_CAST(xdst);
	}
	xdst->num_pols = num_pols;
	xdst->num_xfrms = num_xfrms;
2149
	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162

	dst_hold(&xdst->u.dst);
	return &xdst->flo;

inc_error:
	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
error:
	if (xdst != NULL)
		dst_free(&xdst->u.dst);
	else
		xfrm_pols_put(pols, num_pols);
	return ERR_PTR(err);
}
L
Linus Torvalds 已提交
2163

2164 2165 2166 2167 2168 2169 2170 2171
static struct dst_entry *make_blackhole(struct net *net, u16 family,
					struct dst_entry *dst_orig)
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
	struct dst_entry *ret;

	if (!afinfo) {
		dst_release(dst_orig);
2172
		return ERR_PTR(-EINVAL);
2173 2174 2175 2176 2177 2178 2179 2180
	} else {
		ret = afinfo->blackhole_route(net, dst_orig);
	}
	xfrm_policy_put_afinfo(afinfo);

	return ret;
}

L
Linus Torvalds 已提交
2181 2182 2183 2184 2185
/* Main function: finds/creates a bundle for given flow.
 *
 * At the moment we eat a raw IP route. Mostly to speed up lookups
 * on interfaces with disabled IPsec.
 */
2186 2187 2188
struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
			      const struct flowi *fl,
			      struct sock *sk, int flags)
L
Linus Torvalds 已提交
2189
{
2190
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2191 2192
	struct flow_cache_object *flo;
	struct xfrm_dst *xdst;
2193
	struct dst_entry *dst, *route;
2194
	u16 family = dst_orig->ops->family;
2195
	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2196
	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2197

2198 2199 2200
	dst = NULL;
	xdst = NULL;
	route = NULL;
2201

2202
	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2203 2204 2205 2206 2207
		num_pols = 1;
		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
		err = xfrm_expand_policies(fl, family, pols,
					   &num_pols, &num_xfrms);
		if (err < 0)
2208
			goto dropdst;
2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222

		if (num_pols) {
			if (num_xfrms <= 0) {
				drop_pols = num_pols;
				goto no_transform;
			}

			xdst = xfrm_resolve_and_create_bundle(
					pols, num_pols, fl,
					family, dst_orig);
			if (IS_ERR(xdst)) {
				xfrm_pols_put(pols, num_pols);
				err = PTR_ERR(xdst);
				goto dropdst;
2223 2224 2225 2226
			} else if (xdst == NULL) {
				num_xfrms = 0;
				drop_pols = num_pols;
				goto no_transform;
2227 2228
			}

2229 2230
			dst_hold(&xdst->u.dst);
			xdst->u.dst.flags |= DST_NOCACHE;
2231
			route = xdst->route;
2232
		}
2233
	}
L
Linus Torvalds 已提交
2234

2235
	if (xdst == NULL) {
2236 2237 2238 2239 2240
		struct xfrm_flo xflo;

		xflo.dst_orig = dst_orig;
		xflo.flags = flags;

L
Linus Torvalds 已提交
2241
		/* To accelerate a bit...  */
2242
		if ((dst_orig->flags & DST_NOXFRM) ||
A
Alexey Dobriyan 已提交
2243
		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
2244
			goto nopol;
L
Linus Torvalds 已提交
2245

2246
		flo = flow_cache_lookup(net, fl, family, dir,
2247
					xfrm_bundle_lookup, &xflo);
2248 2249
		if (flo == NULL)
			goto nopol;
2250
		if (IS_ERR(flo)) {
2251
			err = PTR_ERR(flo);
2252
			goto dropdst;
2253
		}
2254 2255 2256 2257
		xdst = container_of(flo, struct xfrm_dst, flo);

		num_pols = xdst->num_pols;
		num_xfrms = xdst->num_xfrms;
2258
		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271
		route = xdst->route;
	}

	dst = &xdst->u.dst;
	if (route == NULL && num_xfrms > 0) {
		/* The only case when xfrm_bundle_lookup() returns a
		 * bundle with null route, is when the template could
		 * not be resolved. It means policies are there, but
		 * bundle could not be created, since we don't yet
		 * have the xfrm_state's. We need to wait for KM to
		 * negotiate new SA's or bail out with error.*/
		if (net->xfrm.sysctl_larval_drop) {
			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2272 2273
			err = -EREMOTE;
			goto error;
2274 2275
		}

2276
		err = -EAGAIN;
2277 2278 2279

		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
		goto error;
L
Linus Torvalds 已提交
2280 2281
	}

2282 2283
no_transform:
	if (num_pols == 0)
2284
		goto nopol;
L
Linus Torvalds 已提交
2285

2286 2287 2288
	if ((flags & XFRM_LOOKUP_ICMP) &&
	    !(pols[0]->flags & XFRM_POLICY_ICMP)) {
		err = -ENOENT;
2289
		goto error;
2290
	}
2291

2292 2293
	for (i = 0; i < num_pols; i++)
		pols[i]->curlft.use_time = get_seconds();
2294

2295
	if (num_xfrms < 0) {
L
Linus Torvalds 已提交
2296
		/* Prohibit the flow */
A
Alexey Dobriyan 已提交
2297
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2298 2299
		err = -EPERM;
		goto error;
2300 2301 2302 2303 2304 2305
	} else if (num_xfrms > 0) {
		/* Flow transformed */
		dst_release(dst_orig);
	} else {
		/* Flow passes untransformed */
		dst_release(dst);
2306
		dst = dst_orig;
L
Linus Torvalds 已提交
2307
	}
2308 2309
ok:
	xfrm_pols_put(pols, drop_pols);
G
Gao feng 已提交
2310 2311 2312
	if (dst && dst->xfrm &&
	    dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
		dst->flags |= DST_XFRM_TUNNEL;
2313
	return dst;
L
Linus Torvalds 已提交
2314

2315
nopol:
2316 2317
	if (!(flags & XFRM_LOOKUP_ICMP)) {
		dst = dst_orig;
2318
		goto ok;
2319
	}
2320
	err = -ENOENT;
L
Linus Torvalds 已提交
2321
error:
2322
	dst_release(dst);
2323
dropdst:
2324 2325
	if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
		dst_release(dst_orig);
2326
	xfrm_pols_put(pols, drop_pols);
2327
	return ERR_PTR(err);
L
Linus Torvalds 已提交
2328 2329 2330
}
EXPORT_SYMBOL(xfrm_lookup);

2331 2332 2333 2334 2335 2336 2337
/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
 * Otherwise we may send out blackholed packets.
 */
struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
				    const struct flowi *fl,
				    struct sock *sk, int flags)
{
2338
	struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
2339 2340
					    flags | XFRM_LOOKUP_QUEUE |
					    XFRM_LOOKUP_KEEP_DST_REF);
2341 2342 2343 2344 2345 2346 2347 2348

	if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
		return make_blackhole(net, dst_orig->ops->family, dst_orig);

	return dst;
}
EXPORT_SYMBOL(xfrm_lookup_route);

2349
static inline int
2350
xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2351 2352 2353 2354 2355 2356 2357 2358
{
	struct xfrm_state *x;

	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
		return 0;
	x = skb->sp->xvec[idx];
	if (!x->type->reject)
		return 0;
2359
	return x->type->reject(x, skb, fl);
2360 2361
}

L
Linus Torvalds 已提交
2362 2363 2364 2365 2366 2367 2368
/* When skb is transformed back to its "native" form, we have to
 * check policy restrictions. At the moment we make this in maximally
 * stupid way. Shame on me. :-) Of course, connected sockets must
 * have policy cached at them.
 */

static inline int
2369
xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
L
Linus Torvalds 已提交
2370 2371 2372
	      unsigned short family)
{
	if (xfrm_state_kern(x))
2373
		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
L
Linus Torvalds 已提交
2374 2375 2376 2377
	return	x->id.proto == tmpl->id.proto &&
		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
		x->props.mode == tmpl->mode &&
2378
		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2379
		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2380 2381
		!(x->props.mode != XFRM_MODE_TRANSPORT &&
		  xfrm_state_addr_cmp(tmpl, x, family));
L
Linus Torvalds 已提交
2382 2383
}

2384 2385 2386 2387 2388 2389 2390
/*
 * 0 or more than 0 is returned when validation is succeeded (either bypass
 * because of optional transport mode, or next index of the mathced secpath
 * state with the template.
 * -1 is returned when no matching template is found.
 * Otherwise "-2 - errored_index" is returned.
 */
L
Linus Torvalds 已提交
2391
static inline int
2392
xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
L
Linus Torvalds 已提交
2393 2394 2395 2396 2397
	       unsigned short family)
{
	int idx = start;

	if (tmpl->optional) {
2398
		if (tmpl->mode == XFRM_MODE_TRANSPORT)
L
Linus Torvalds 已提交
2399 2400 2401 2402
			return start;
	} else
		start = -1;
	for (; idx < sp->len; idx++) {
2403
		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
L
Linus Torvalds 已提交
2404
			return ++idx;
2405 2406 2407
		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
			if (start == -1)
				start = -2-idx;
L
Linus Torvalds 已提交
2408
			break;
2409
		}
L
Linus Torvalds 已提交
2410 2411 2412 2413
	}
	return start;
}

2414 2415
int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
			  unsigned int family, int reverse)
L
Linus Torvalds 已提交
2416 2417
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2418
	int err;
L
Linus Torvalds 已提交
2419 2420 2421 2422

	if (unlikely(afinfo == NULL))
		return -EAFNOSUPPORT;

2423
	afinfo->decode_session(skb, fl, reverse);
2424
	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
L
Linus Torvalds 已提交
2425
	xfrm_policy_put_afinfo(afinfo);
2426
	return err;
L
Linus Torvalds 已提交
2427
}
2428
EXPORT_SYMBOL(__xfrm_decode_session);
L
Linus Torvalds 已提交
2429

2430
static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
L
Linus Torvalds 已提交
2431 2432
{
	for (; k < sp->len; k++) {
2433
		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2434
			*idxp = k;
L
Linus Torvalds 已提交
2435
			return 1;
2436
		}
L
Linus Torvalds 已提交
2437 2438 2439 2440 2441
	}

	return 0;
}

2442
int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
L
Linus Torvalds 已提交
2443 2444
			unsigned short family)
{
2445
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
2446
	struct xfrm_policy *pol;
2447 2448 2449 2450
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
	int npols = 0;
	int xfrm_nr;
	int pi;
2451
	int reverse;
L
Linus Torvalds 已提交
2452
	struct flowi fl;
2453
	u8 fl_dir;
2454
	int xerr_idx = -1;
L
Linus Torvalds 已提交
2455

2456 2457 2458 2459
	reverse = dir & ~XFRM_POLICY_MASK;
	dir &= XFRM_POLICY_MASK;
	fl_dir = policy_to_flow_dir(dir);

2460
	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
A
Alexey Dobriyan 已提交
2461
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
L
Linus Torvalds 已提交
2462
		return 0;
2463 2464
	}

2465
	nf_nat_decode_session(skb, &fl, family);
L
Linus Torvalds 已提交
2466 2467 2468 2469 2470

	/* First, check used SA against their selectors. */
	if (skb->sp) {
		int i;

2471
		for (i = skb->sp->len-1; i >= 0; i--) {
2472
			struct xfrm_state *x = skb->sp->xvec[i];
2473
			if (!xfrm_selector_match(&x->sel, &fl, family)) {
A
Alexey Dobriyan 已提交
2474
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
L
Linus Torvalds 已提交
2475
				return 0;
2476
			}
L
Linus Torvalds 已提交
2477 2478 2479 2480
		}
	}

	pol = NULL;
2481
	if (sk && sk->sk_policy[dir]) {
2482
		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2483
		if (IS_ERR(pol)) {
A
Alexey Dobriyan 已提交
2484
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2485
			return 0;
2486
		}
2487
	}
L
Linus Torvalds 已提交
2488

2489 2490 2491 2492 2493 2494 2495 2496 2497 2498
	if (!pol) {
		struct flow_cache_object *flo;

		flo = flow_cache_lookup(net, &fl, family, fl_dir,
					xfrm_policy_lookup, NULL);
		if (IS_ERR_OR_NULL(flo))
			pol = ERR_CAST(flo);
		else
			pol = container_of(flo, struct xfrm_policy, flo);
	}
L
Linus Torvalds 已提交
2499

2500
	if (IS_ERR(pol)) {
A
Alexey Dobriyan 已提交
2501
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2502
		return 0;
2503
	}
2504

2505
	if (!pol) {
2506
		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2507
			xfrm_secpath_reject(xerr_idx, skb, &fl);
A
Alexey Dobriyan 已提交
2508
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2509 2510 2511 2512
			return 0;
		}
		return 1;
	}
L
Linus Torvalds 已提交
2513

2514
	pol->curlft.use_time = get_seconds();
L
Linus Torvalds 已提交
2515

2516
	pols[0] = pol;
2517
	npols++;
2518 2519
#ifdef CONFIG_XFRM_SUB_POLICY
	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2520
		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2521 2522 2523
						    &fl, family,
						    XFRM_POLICY_IN);
		if (pols[1]) {
2524
			if (IS_ERR(pols[1])) {
A
Alexey Dobriyan 已提交
2525
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2526
				return 0;
2527
			}
2528
			pols[1]->curlft.use_time = get_seconds();
2529
			npols++;
2530 2531 2532 2533
		}
	}
#endif

L
Linus Torvalds 已提交
2534 2535 2536
	if (pol->action == XFRM_POLICY_ALLOW) {
		struct sec_path *sp;
		static struct sec_path dummy;
2537
		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2538
		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2539 2540
		struct xfrm_tmpl **tpp = tp;
		int ti = 0;
L
Linus Torvalds 已提交
2541 2542 2543 2544 2545
		int i, k;

		if ((sp = skb->sp) == NULL)
			sp = &dummy;

2546 2547
		for (pi = 0; pi < npols; pi++) {
			if (pols[pi] != pol &&
2548
			    pols[pi]->action != XFRM_POLICY_ALLOW) {
A
Alexey Dobriyan 已提交
2549
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2550
				goto reject;
2551 2552
			}
			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
A
Alexey Dobriyan 已提交
2553
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2554
				goto reject_error;
2555
			}
2556 2557 2558 2559
			for (i = 0; i < pols[pi]->xfrm_nr; i++)
				tpp[ti++] = &pols[pi]->xfrm_vec[i];
		}
		xfrm_nr = ti;
2560
		if (npols > 1) {
F
Fan Du 已提交
2561
			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
2562 2563
			tpp = stp;
		}
2564

L
Linus Torvalds 已提交
2565 2566 2567 2568 2569 2570
		/* For each tunnel xfrm, find the first matching tmpl.
		 * For each tmpl before that, find corresponding xfrm.
		 * Order is _important_. Later we will implement
		 * some barriers, but at the moment barriers
		 * are implied between each two transformations.
		 */
2571 2572
		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
			k = xfrm_policy_ok(tpp[i], sp, k, family);
2573
			if (k < 0) {
2574 2575 2576
				if (k < -1)
					/* "-2 - errored_index" returned */
					xerr_idx = -(2+k);
A
Alexey Dobriyan 已提交
2577
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
L
Linus Torvalds 已提交
2578
				goto reject;
2579
			}
L
Linus Torvalds 已提交
2580 2581
		}

2582
		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
A
Alexey Dobriyan 已提交
2583
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
L
Linus Torvalds 已提交
2584
			goto reject;
2585
		}
L
Linus Torvalds 已提交
2586

2587
		xfrm_pols_put(pols, npols);
L
Linus Torvalds 已提交
2588 2589
		return 1;
	}
A
Alexey Dobriyan 已提交
2590
	XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
L
Linus Torvalds 已提交
2591 2592

reject:
2593
	xfrm_secpath_reject(xerr_idx, skb, &fl);
2594 2595
reject_error:
	xfrm_pols_put(pols, npols);
L
Linus Torvalds 已提交
2596 2597 2598 2599 2600 2601
	return 0;
}
EXPORT_SYMBOL(__xfrm_policy_check);

int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
{
2602
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
2603
	struct flowi fl;
E
Eric Dumazet 已提交
2604
	struct dst_entry *dst;
E
Eric Dumazet 已提交
2605
	int res = 1;
L
Linus Torvalds 已提交
2606

2607
	if (xfrm_decode_session(skb, &fl, family) < 0) {
2608
		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
L
Linus Torvalds 已提交
2609
		return 0;
2610
	}
L
Linus Torvalds 已提交
2611

2612
	skb_dst_force(skb);
E
Eric Dumazet 已提交
2613

2614
	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
2615
	if (IS_ERR(dst)) {
E
Eric Dumazet 已提交
2616
		res = 0;
2617 2618
		dst = NULL;
	}
E
Eric Dumazet 已提交
2619 2620
	skb_dst_set(skb, dst);
	return res;
L
Linus Torvalds 已提交
2621 2622 2623
}
EXPORT_SYMBOL(__xfrm_route_forward);

2624 2625
/* Optimize later using cookies and generation ids. */

L
Linus Torvalds 已提交
2626 2627
static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
{
2628
	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2629 2630 2631 2632 2633 2634 2635
	 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
	 * get validated by dst_ops->check on every use.  We do this
	 * because when a normal route referenced by an XFRM dst is
	 * obsoleted we do not go looking around for all parent
	 * referencing XFRM dsts so that we can invalidate them.  It
	 * is just too much work.  Instead we make the checks here on
	 * every use.  For example:
2636 2637 2638 2639 2640 2641 2642 2643 2644
	 *
	 *	XFRM dst A --> IPv4 dst X
	 *
	 * X is the "xdst->route" of A (X is also the "dst->path" of A
	 * in this example).  If X is marked obsolete, "A" will not
	 * notice.  That's what we are validating here via the
	 * stale_bundle() check.
	 *
	 * When a policy's bundle is pruned, we dst_free() the XFRM
2645 2646 2647
	 * dst which causes it's ->obsolete field to be set to
	 * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
	 * this, we want to force a new route lookup.
2648
	 */
2649 2650 2651
	if (dst->obsolete < 0 && !stale_bundle(dst))
		return dst;

L
Linus Torvalds 已提交
2652 2653 2654 2655 2656
	return NULL;
}

static int stale_bundle(struct dst_entry *dst)
{
2657
	return !xfrm_bundle_ok((struct xfrm_dst *)dst);
L
Linus Torvalds 已提交
2658 2659
}

H
Herbert Xu 已提交
2660
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
L
Linus Torvalds 已提交
2661 2662
{
	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2663
		dst->dev = dev_net(dev)->loopback_dev;
2664
		dev_hold(dst->dev);
L
Linus Torvalds 已提交
2665 2666 2667
		dev_put(dev);
	}
}
H
Herbert Xu 已提交
2668
EXPORT_SYMBOL(xfrm_dst_ifdown);
L
Linus Torvalds 已提交
2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685

static void xfrm_link_failure(struct sk_buff *skb)
{
	/* Impossible. Such dst must be popped before reaches point of failure. */
}

static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
{
	if (dst) {
		if (dst->obsolete) {
			dst_release(dst);
			dst = NULL;
		}
	}
	return dst;
}

2686
void xfrm_garbage_collect(struct net *net)
2687
{
2688
	flow_cache_flush(net);
2689
}
2690
EXPORT_SYMBOL(xfrm_garbage_collect);
2691 2692 2693

static void xfrm_garbage_collect_deferred(struct net *net)
{
2694
	flow_cache_flush_deferred(net);
2695 2696
}

2697
static void xfrm_init_pmtu(struct dst_entry *dst)
L
Linus Torvalds 已提交
2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713
{
	do {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
		u32 pmtu, route_mtu_cached;

		pmtu = dst_mtu(dst->child);
		xdst->child_mtu_cached = pmtu;

		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);

		route_mtu_cached = dst_mtu(xdst->route);
		xdst->route_mtu_cached = route_mtu_cached;

		if (pmtu > route_mtu_cached)
			pmtu = route_mtu_cached;

2714
		dst_metric_set(dst, RTAX_MTU, pmtu);
L
Linus Torvalds 已提交
2715 2716 2717 2718 2719 2720 2721
	} while ((dst = dst->next));
}

/* Check that the bundle accepts the flow and its components are
 * still valid.
 */

2722
static int xfrm_bundle_ok(struct xfrm_dst *first)
L
Linus Torvalds 已提交
2723 2724 2725 2726 2727
{
	struct dst_entry *dst = &first->u.dst;
	struct xfrm_dst *last;
	u32 mtu;

2728
	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
L
Linus Torvalds 已提交
2729 2730 2731
	    (dst->dev && !netif_running(dst->dev)))
		return 0;

2732 2733 2734
	if (dst->flags & DST_XFRM_QUEUE)
		return 1;

L
Linus Torvalds 已提交
2735 2736 2737 2738 2739 2740 2741
	last = NULL;

	do {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;

		if (dst->xfrm->km.state != XFRM_STATE_VALID)
			return 0;
2742 2743
		if (xdst->xfrm_genid != dst->xfrm->genid)
			return 0;
2744 2745
		if (xdst->num_pols > 0 &&
		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2746
			return 0;
2747

L
Linus Torvalds 已提交
2748 2749 2750 2751 2752 2753
		mtu = dst_mtu(dst->child);
		if (xdst->child_mtu_cached != mtu) {
			last = xdst;
			xdst->child_mtu_cached = mtu;
		}

2754
		if (!dst_check(xdst->route, xdst->route_cookie))
L
Linus Torvalds 已提交
2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774
			return 0;
		mtu = dst_mtu(xdst->route);
		if (xdst->route_mtu_cached != mtu) {
			last = xdst;
			xdst->route_mtu_cached = mtu;
		}

		dst = dst->child;
	} while (dst->xfrm);

	if (likely(!last))
		return 1;

	mtu = last->child_mtu_cached;
	for (;;) {
		dst = &last->u.dst;

		mtu = xfrm_state_mtu(dst->xfrm, mtu);
		if (mtu > last->route_mtu_cached)
			mtu = last->route_mtu_cached;
2775
		dst_metric_set(dst, RTAX_MTU, mtu);
L
Linus Torvalds 已提交
2776 2777 2778 2779

		if (last == first)
			break;

2780
		last = (struct xfrm_dst *)last->u.dst.next;
L
Linus Torvalds 已提交
2781 2782 2783 2784 2785 2786
		last->child_mtu_cached = mtu;
	}

	return 1;
}

2787 2788 2789 2790 2791
static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
{
	return dst_metric_advmss(dst->path);
}

2792
static unsigned int xfrm_mtu(const struct dst_entry *dst)
2793
{
2794 2795 2796
	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);

	return mtu ? : dst_mtu(dst->path);
2797 2798
}

2799 2800 2801
static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
					   struct sk_buff *skb,
					   const void *daddr)
2802
{
2803
	return dst->path->ops->neigh_lookup(dst, skb, daddr);
2804 2805
}

L
Linus Torvalds 已提交
2806 2807
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
{
2808
	struct net *net;
L
Linus Torvalds 已提交
2809 2810 2811 2812 2813
	int err = 0;
	if (unlikely(afinfo == NULL))
		return -EINVAL;
	if (unlikely(afinfo->family >= NPROTO))
		return -EAFNOSUPPORT;
E
Eric Dumazet 已提交
2814
	spin_lock(&xfrm_policy_afinfo_lock);
L
Linus Torvalds 已提交
2815
	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2816
		err = -EEXIST;
L
Linus Torvalds 已提交
2817 2818 2819 2820 2821 2822
	else {
		struct dst_ops *dst_ops = afinfo->dst_ops;
		if (likely(dst_ops->kmem_cachep == NULL))
			dst_ops->kmem_cachep = xfrm_dst_cache;
		if (likely(dst_ops->check == NULL))
			dst_ops->check = xfrm_dst_check;
2823 2824
		if (likely(dst_ops->default_advmss == NULL))
			dst_ops->default_advmss = xfrm_default_advmss;
2825 2826
		if (likely(dst_ops->mtu == NULL))
			dst_ops->mtu = xfrm_mtu;
L
Linus Torvalds 已提交
2827 2828 2829 2830
		if (likely(dst_ops->negative_advice == NULL))
			dst_ops->negative_advice = xfrm_negative_advice;
		if (likely(dst_ops->link_failure == NULL))
			dst_ops->link_failure = xfrm_link_failure;
2831 2832
		if (likely(dst_ops->neigh_lookup == NULL))
			dst_ops->neigh_lookup = xfrm_neigh_lookup;
L
Linus Torvalds 已提交
2833
		if (likely(afinfo->garbage_collect == NULL))
2834
			afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2835
		rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
L
Linus Torvalds 已提交
2836
	}
E
Eric Dumazet 已提交
2837
	spin_unlock(&xfrm_policy_afinfo_lock);
2838 2839 2840 2841 2842 2843 2844 2845 2846

	rtnl_lock();
	for_each_net(net) {
		struct dst_ops *xfrm_dst_ops;

		switch (afinfo->family) {
		case AF_INET:
			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
			break;
E
Eric Dumazet 已提交
2847
#if IS_ENABLED(CONFIG_IPV6)
2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858
		case AF_INET6:
			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
			break;
#endif
		default:
			BUG();
		}
		*xfrm_dst_ops = *afinfo->dst_ops;
	}
	rtnl_unlock();

L
Linus Torvalds 已提交
2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869
	return err;
}
EXPORT_SYMBOL(xfrm_policy_register_afinfo);

int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
{
	int err = 0;
	if (unlikely(afinfo == NULL))
		return -EINVAL;
	if (unlikely(afinfo->family >= NPROTO))
		return -EAFNOSUPPORT;
E
Eric Dumazet 已提交
2870
	spin_lock(&xfrm_policy_afinfo_lock);
L
Linus Torvalds 已提交
2871 2872 2873
	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
			err = -EINVAL;
E
Eric Dumazet 已提交
2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888
		else
			RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
					 NULL);
	}
	spin_unlock(&xfrm_policy_afinfo_lock);
	if (!err) {
		struct dst_ops *dst_ops = afinfo->dst_ops;

		synchronize_rcu();

		dst_ops->kmem_cachep = NULL;
		dst_ops->check = NULL;
		dst_ops->negative_advice = NULL;
		dst_ops->link_failure = NULL;
		afinfo->garbage_collect = NULL;
L
Linus Torvalds 已提交
2889 2890 2891 2892 2893
	}
	return err;
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);

2894 2895 2896 2897
static void __net_init xfrm_dst_ops_init(struct net *net)
{
	struct xfrm_policy_afinfo *afinfo;

E
Eric Dumazet 已提交
2898 2899
	rcu_read_lock();
	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
2900 2901
	if (afinfo)
		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
E
Eric Dumazet 已提交
2902
#if IS_ENABLED(CONFIG_IPV6)
E
Eric Dumazet 已提交
2903
	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
2904 2905 2906
	if (afinfo)
		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
#endif
2907
	rcu_read_unlock();
2908 2909
}

L
Linus Torvalds 已提交
2910 2911
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
2912
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2913

L
Linus Torvalds 已提交
2914 2915
	switch (event) {
	case NETDEV_DOWN:
2916
		xfrm_garbage_collect(dev_net(dev));
L
Linus Torvalds 已提交
2917 2918 2919 2920 2921
	}
	return NOTIFY_DONE;
}

static struct notifier_block xfrm_dev_notifier = {
A
Alexey Dobriyan 已提交
2922
	.notifier_call	= xfrm_dev_event,
L
Linus Torvalds 已提交
2923 2924
};

2925
#ifdef CONFIG_XFRM_STATISTICS
A
Alexey Dobriyan 已提交
2926
static int __net_init xfrm_statistics_init(struct net *net)
2927
{
2928
	int rv;
W
WANG Cong 已提交
2929 2930
	net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
	if (!net->mib.xfrm_statistics)
2931
		return -ENOMEM;
2932 2933
	rv = xfrm_proc_init(net);
	if (rv < 0)
W
WANG Cong 已提交
2934
		free_percpu(net->mib.xfrm_statistics);
2935
	return rv;
2936
}
A
Alexey Dobriyan 已提交
2937 2938 2939

static void xfrm_statistics_fini(struct net *net)
{
2940
	xfrm_proc_fini(net);
W
WANG Cong 已提交
2941
	free_percpu(net->mib.xfrm_statistics);
A
Alexey Dobriyan 已提交
2942 2943 2944 2945 2946 2947 2948 2949 2950 2951
}
#else
static int __net_init xfrm_statistics_init(struct net *net)
{
	return 0;
}

static void xfrm_statistics_fini(struct net *net)
{
}
2952 2953
#endif

2954
static int __net_init xfrm_policy_init(struct net *net)
L
Linus Torvalds 已提交
2955
{
2956 2957 2958
	unsigned int hmask, sz;
	int dir;

2959 2960
	if (net_eq(net, &init_net))
		xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
L
Linus Torvalds 已提交
2961
					   sizeof(struct xfrm_dst),
A
Alexey Dobriyan 已提交
2962
					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2963
					   NULL);
L
Linus Torvalds 已提交
2964

2965 2966 2967
	hmask = 8 - 1;
	sz = (hmask+1) * sizeof(struct hlist_head);

2968 2969 2970
	net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
	if (!net->xfrm.policy_byidx)
		goto out_byidx;
2971
	net->xfrm.policy_idx_hmask = hmask;
2972

H
Herbert Xu 已提交
2973
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
2974 2975
		struct xfrm_policy_hash *htab;

2976
		net->xfrm.policy_count[dir] = 0;
H
Herbert Xu 已提交
2977
		net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
2978
		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2979

2980
		htab = &net->xfrm.policy_bydst[dir];
2981
		htab->table = xfrm_hash_alloc(sz);
2982
		if (!htab->table)
2983 2984
			goto out_bydst;
		htab->hmask = hmask;
2985 2986 2987 2988
		htab->dbits4 = 32;
		htab->sbits4 = 32;
		htab->dbits6 = 128;
		htab->sbits6 = 128;
2989
	}
2990 2991 2992 2993 2994 2995
	net->xfrm.policy_hthresh.lbits4 = 32;
	net->xfrm.policy_hthresh.rbits4 = 32;
	net->xfrm.policy_hthresh.lbits6 = 128;
	net->xfrm.policy_hthresh.rbits6 = 128;

	seqlock_init(&net->xfrm.policy_hthresh.lock);
2996

2997
	INIT_LIST_HEAD(&net->xfrm.policy_all);
2998
	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2999
	INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
3000 3001 3002
	if (net_eq(net, &init_net))
		register_netdevice_notifier(&xfrm_dev_notifier);
	return 0;
3003

3004 3005 3006 3007 3008 3009 3010 3011
out_bydst:
	for (dir--; dir >= 0; dir--) {
		struct xfrm_policy_hash *htab;

		htab = &net->xfrm.policy_bydst[dir];
		xfrm_hash_free(htab->table, sz);
	}
	xfrm_hash_free(net->xfrm.policy_byidx, sz);
3012 3013
out_byidx:
	return -ENOMEM;
3014 3015 3016 3017
}

static void xfrm_policy_fini(struct net *net)
{
3018
	unsigned int sz;
3019
	int dir;
3020

3021 3022
	flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
3023
	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
3024
#endif
3025
	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
3026

3027
	WARN_ON(!list_empty(&net->xfrm.policy_all));
3028

H
Herbert Xu 已提交
3029
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
3030 3031
		struct xfrm_policy_hash *htab;

3032
		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
3033 3034

		htab = &net->xfrm.policy_bydst[dir];
3035
		sz = (htab->hmask + 1) * sizeof(struct hlist_head);
3036 3037
		WARN_ON(!hlist_empty(htab->table));
		xfrm_hash_free(htab->table, sz);
3038 3039
	}

3040
	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
3041 3042
	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
	xfrm_hash_free(net->xfrm.policy_byidx, sz);
L
Linus Torvalds 已提交
3043 3044
}

3045 3046 3047 3048
static int __net_init xfrm_net_init(struct net *net)
{
	int rv;

A
Alexey Dobriyan 已提交
3049 3050 3051
	rv = xfrm_statistics_init(net);
	if (rv < 0)
		goto out_statistics;
3052 3053 3054 3055 3056 3057
	rv = xfrm_state_init(net);
	if (rv < 0)
		goto out_state;
	rv = xfrm_policy_init(net);
	if (rv < 0)
		goto out_policy;
3058
	xfrm_dst_ops_init(net);
A
Alexey Dobriyan 已提交
3059 3060 3061
	rv = xfrm_sysctl_init(net);
	if (rv < 0)
		goto out_sysctl;
3062 3063 3064
	rv = flow_cache_init(net);
	if (rv < 0)
		goto out;
F
Fan Du 已提交
3065 3066 3067 3068 3069 3070

	/* Initialize the per-net locks here */
	spin_lock_init(&net->xfrm.xfrm_state_lock);
	rwlock_init(&net->xfrm.xfrm_policy_lock);
	mutex_init(&net->xfrm.xfrm_cfg_mutex);

3071 3072
	return 0;

3073 3074
out:
	xfrm_sysctl_fini(net);
A
Alexey Dobriyan 已提交
3075 3076
out_sysctl:
	xfrm_policy_fini(net);
3077 3078 3079
out_policy:
	xfrm_state_fini(net);
out_state:
A
Alexey Dobriyan 已提交
3080 3081
	xfrm_statistics_fini(net);
out_statistics:
3082 3083 3084 3085 3086
	return rv;
}

static void __net_exit xfrm_net_exit(struct net *net)
{
3087
	flow_cache_fini(net);
A
Alexey Dobriyan 已提交
3088
	xfrm_sysctl_fini(net);
3089 3090
	xfrm_policy_fini(net);
	xfrm_state_fini(net);
A
Alexey Dobriyan 已提交
3091
	xfrm_statistics_fini(net);
3092 3093 3094 3095 3096 3097 3098
}

static struct pernet_operations __net_initdata xfrm_net_ops = {
	.init = xfrm_net_init,
	.exit = xfrm_net_exit,
};

L
Linus Torvalds 已提交
3099 3100
void __init xfrm_init(void)
{
3101
	register_pernet_subsys(&xfrm_net_ops);
L
Linus Torvalds 已提交
3102 3103 3104
	xfrm_input_init();
}

J
Joy Latten 已提交
3105
#ifdef CONFIG_AUDITSYSCALL
3106 3107
static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
					 struct audit_buffer *audit_buf)
J
Joy Latten 已提交
3108
{
3109 3110 3111 3112
	struct xfrm_sec_ctx *ctx = xp->security;
	struct xfrm_selector *sel = &xp->selector;

	if (ctx)
J
Joy Latten 已提交
3113
		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
3114
				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
J
Joy Latten 已提交
3115

3116
	switch (sel->family) {
J
Joy Latten 已提交
3117
	case AF_INET:
H
Harvey Harrison 已提交
3118
		audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
3119 3120 3121
		if (sel->prefixlen_s != 32)
			audit_log_format(audit_buf, " src_prefixlen=%d",
					 sel->prefixlen_s);
H
Harvey Harrison 已提交
3122
		audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
3123 3124 3125
		if (sel->prefixlen_d != 32)
			audit_log_format(audit_buf, " dst_prefixlen=%d",
					 sel->prefixlen_d);
J
Joy Latten 已提交
3126 3127
		break;
	case AF_INET6:
H
Harvey Harrison 已提交
3128
		audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
3129 3130 3131
		if (sel->prefixlen_s != 128)
			audit_log_format(audit_buf, " src_prefixlen=%d",
					 sel->prefixlen_s);
H
Harvey Harrison 已提交
3132
		audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
3133 3134 3135
		if (sel->prefixlen_d != 128)
			audit_log_format(audit_buf, " dst_prefixlen=%d",
					 sel->prefixlen_d);
J
Joy Latten 已提交
3136 3137 3138 3139
		break;
	}
}

3140
void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
J
Joy Latten 已提交
3141 3142 3143
{
	struct audit_buffer *audit_buf;

P
Paul Moore 已提交
3144
	audit_buf = xfrm_audit_start("SPD-add");
J
Joy Latten 已提交
3145 3146
	if (audit_buf == NULL)
		return;
3147
	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
P
Paul Moore 已提交
3148
	audit_log_format(audit_buf, " res=%u", result);
J
Joy Latten 已提交
3149 3150 3151 3152 3153
	xfrm_audit_common_policyinfo(xp, audit_buf);
	audit_log_end(audit_buf);
}
EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);

P
Paul Moore 已提交
3154
void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
3155
			      bool task_valid)
J
Joy Latten 已提交
3156 3157 3158
{
	struct audit_buffer *audit_buf;

P
Paul Moore 已提交
3159
	audit_buf = xfrm_audit_start("SPD-delete");
J
Joy Latten 已提交
3160 3161
	if (audit_buf == NULL)
		return;
3162
	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
P
Paul Moore 已提交
3163
	audit_log_format(audit_buf, " res=%u", result);
J
Joy Latten 已提交
3164 3165 3166 3167 3168 3169
	xfrm_audit_common_policyinfo(xp, audit_buf);
	audit_log_end(audit_buf);
}
EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif

3170
#ifdef CONFIG_XFRM_MIGRATE
3171 3172
static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
					const struct xfrm_selector *sel_tgt)
3173 3174 3175
{
	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
		if (sel_tgt->family == sel_cmp->family &&
3176 3177 3178 3179
		    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
				    sel_cmp->family) &&
		    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
				    sel_cmp->family) &&
3180 3181
		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3182
			return true;
3183 3184 3185
		}
	} else {
		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3186
			return true;
3187 3188
		}
	}
3189
	return false;
3190 3191
}

3192 3193
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
						    u8 dir, u8 type, struct net *net)
3194 3195 3196 3197 3198
{
	struct xfrm_policy *pol, *ret = NULL;
	struct hlist_head *chain;
	u32 priority = ~0U;

F
Fan Du 已提交
3199
	read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
3200
	chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3201
	hlist_for_each_entry(pol, chain, bydst) {
3202 3203 3204 3205 3206 3207 3208
		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
		    pol->type == type) {
			ret = pol;
			priority = ret->priority;
			break;
		}
	}
3209
	chain = &net->xfrm.policy_inexact[dir];
3210
	hlist_for_each_entry(pol, chain, bydst) {
3211 3212 3213
		if ((pol->priority >= priority) && ret)
			break;

3214
		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3215
		    pol->type == type) {
3216 3217 3218 3219 3220
			ret = pol;
			break;
		}
	}

3221
	xfrm_pol_hold(ret);
3222

F
Fan Du 已提交
3223
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3224 3225 3226 3227

	return ret;
}

3228
static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3229 3230 3231 3232 3233 3234 3235 3236
{
	int match = 0;

	if (t->mode == m->mode && t->id.proto == m->proto &&
	    (m->reqid == 0 || t->reqid == m->reqid)) {
		switch (t->mode) {
		case XFRM_MODE_TUNNEL:
		case XFRM_MODE_BEET:
3237 3238 3239 3240
			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
					    m->old_family) &&
			    xfrm_addr_equal(&t->saddr, &m->old_saddr,
					    m->old_family)) {
3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264
				match = 1;
			}
			break;
		case XFRM_MODE_TRANSPORT:
			/* in case of transport mode, template does not store
			   any IP addresses, hence we just compare mode and
			   protocol */
			match = 1;
			break;
		default:
			break;
		}
	}
	return match;
}

/* update endpoint address(es) of template(s) */
static int xfrm_policy_migrate(struct xfrm_policy *pol,
			       struct xfrm_migrate *m, int num_migrate)
{
	struct xfrm_migrate *mp;
	int i, j, n = 0;

	write_lock_bh(&pol->lock);
H
Herbert Xu 已提交
3265
	if (unlikely(pol->walk.dead)) {
3266 3267 3268 3269 3270 3271 3272 3273 3274 3275
		/* target policy has been deleted */
		write_unlock_bh(&pol->lock);
		return -ENOENT;
	}

	for (i = 0; i < pol->xfrm_nr; i++) {
		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
				continue;
			n++;
H
Herbert Xu 已提交
3276 3277
			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3278 3279 3280 3281 3282 3283 3284 3285
				continue;
			/* update endpoints */
			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
			       sizeof(pol->xfrm_vec[i].id.daddr));
			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
			       sizeof(pol->xfrm_vec[i].saddr));
			pol->xfrm_vec[i].encap_family = mp->new_family;
			/* flush bundles */
3286
			atomic_inc(&pol->genid);
3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297
		}
	}

	write_unlock_bh(&pol->lock);

	if (!n)
		return -ENODATA;

	return 0;
}

3298
static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3299 3300 3301 3302 3303 3304 3305
{
	int i, j;

	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
		return -EINVAL;

	for (i = 0; i < num_migrate; i++) {
3306 3307 3308 3309
		if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
				    m[i].old_family) &&
		    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
				    m[i].old_family))
3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331
			return -EINVAL;
		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
			return -EINVAL;

		/* check if there is any duplicated entry */
		for (j = i + 1; j < num_migrate; j++) {
			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
				    sizeof(m[i].old_daddr)) &&
			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
				    sizeof(m[i].old_saddr)) &&
			    m[i].proto == m[j].proto &&
			    m[i].mode == m[j].mode &&
			    m[i].reqid == m[j].reqid &&
			    m[i].old_family == m[j].old_family)
				return -EINVAL;
		}
	}

	return 0;
}

3332
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3333
		 struct xfrm_migrate *m, int num_migrate,
3334
		 struct xfrm_kmaddress *k, struct net *net)
3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346
{
	int i, err, nx_cur = 0, nx_new = 0;
	struct xfrm_policy *pol = NULL;
	struct xfrm_state *x, *xc;
	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
	struct xfrm_migrate *mp;

	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
		goto out;

	/* Stage 1 - find policy */
3347
	if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
3348 3349 3350 3351 3352 3353
		err = -ENOENT;
		goto out;
	}

	/* Stage 2 - find and update state(s) */
	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
F
Fan Du 已提交
3354
		if ((x = xfrm_migrate_state_find(mp, net))) {
3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377
			x_cur[nx_cur] = x;
			nx_cur++;
			if ((xc = xfrm_state_migrate(x, mp))) {
				x_new[nx_new] = xc;
				nx_new++;
			} else {
				err = -ENODATA;
				goto restore_state;
			}
		}
	}

	/* Stage 3 - update policy */
	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
		goto restore_state;

	/* Stage 4 - delete old state(s) */
	if (nx_cur) {
		xfrm_states_put(x_cur, nx_cur);
		xfrm_states_delete(x_cur, nx_cur);
	}

	/* Stage 5 - announce */
3378
	km_migrate(sel, dir, type, m, num_migrate, k);
3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395

	xfrm_pol_put(pol);

	return 0;
out:
	return err;

restore_state:
	if (pol)
		xfrm_pol_put(pol);
	if (nx_cur)
		xfrm_states_put(x_cur, nx_cur);
	if (nx_new)
		xfrm_states_delete(x_new, nx_new);

	return err;
}
3396
EXPORT_SYMBOL(xfrm_migrate);
3397
#endif