xfrm_policy.c 75.9 KB
Newer Older
1
/*
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10 11 12
 * xfrm_policy.c
 *
 * Changes:
 *	Mitsuru KANDA @USAGI
 * 	Kazunori MIYAZAWA @USAGI
 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
 * 		IPv6 support
 * 	Kazunori MIYAZAWA @USAGI
 * 	YOSHIFUJI Hideaki
 * 		Split up af-specific portion
 *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13
 *
L
Linus Torvalds 已提交
14 15
 */

16
#include <linux/err.h>
L
Linus Torvalds 已提交
17 18 19 20 21 22 23
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
24
#include <linux/netfilter.h>
L
Linus Torvalds 已提交
25
#include <linux/module.h>
26
#include <linux/cache.h>
P
Paul Moore 已提交
27
#include <linux/audit.h>
28
#include <net/dst.h>
29
#include <net/flow.h>
L
Linus Torvalds 已提交
30 31
#include <net/xfrm.h>
#include <net/ip.h>
32 33 34
#ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h>
#endif
L
Linus Torvalds 已提交
35

36 37
#include "xfrm_hash.h"

38 39 40 41
#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN	100

42 43 44
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
						__read_mostly;
L
Linus Torvalds 已提交
45

46
static struct kmem_cache *xfrm_dst_cache __read_mostly;
L
Linus Torvalds 已提交
47

48
static void xfrm_init_pmtu(struct dst_entry *dst);
49
static int stale_bundle(struct dst_entry *dst);
50
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
51
static void xfrm_policy_queue_process(unsigned long arg);
L
Linus Torvalds 已提交
52

W
Wei Yongjun 已提交
53 54 55
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
						int dir);

56
static inline bool
57
__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
58
{
59 60
	const struct flowi4 *fl4 = &fl->u.ip4;

61 62
	return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
		addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
63 64 65 66
		!((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
		!((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
		(fl4->flowi4_proto == sel->proto || !sel->proto) &&
		(fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
67 68
}

69
static inline bool
70
__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
71
{
72 73 74 75 76 77 78 79
	const struct flowi6 *fl6 = &fl->u.ip6;

	return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
		addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
		!((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
		!((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
		(fl6->flowi6_proto == sel->proto || !sel->proto) &&
		(fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
80 81
}

82 83
bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
			 unsigned short family)
84 85 86 87 88 89 90
{
	switch (family) {
	case AF_INET:
		return __xfrm4_selector_match(sel, fl);
	case AF_INET6:
		return __xfrm6_selector_match(sel, fl);
	}
91
	return false;
92 93
}

E
Eric Dumazet 已提交
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
{
	struct xfrm_policy_afinfo *afinfo;

	if (unlikely(family >= NPROTO))
		return NULL;
	rcu_read_lock();
	afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
	if (unlikely(!afinfo))
		rcu_read_unlock();
	return afinfo;
}

static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
{
	rcu_read_unlock();
}

112
static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
113 114
						  const xfrm_address_t *saddr,
						  const xfrm_address_t *daddr,
115 116 117 118 119 120 121 122 123
						  int family)
{
	struct xfrm_policy_afinfo *afinfo;
	struct dst_entry *dst;

	afinfo = xfrm_policy_get_afinfo(family);
	if (unlikely(afinfo == NULL))
		return ERR_PTR(-EAFNOSUPPORT);

124
	dst = afinfo->dst_lookup(net, tos, saddr, daddr);
125 126 127 128 129 130

	xfrm_policy_put_afinfo(afinfo);

	return dst;
}

131
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
132 133
						xfrm_address_t *prev_saddr,
						xfrm_address_t *prev_daddr,
134
						int family)
L
Linus Torvalds 已提交
135
{
136
	struct net *net = xs_net(x);
137 138 139 140
	xfrm_address_t *saddr = &x->props.saddr;
	xfrm_address_t *daddr = &x->id.daddr;
	struct dst_entry *dst;

141
	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
142
		saddr = x->coaddr;
143 144 145 146
		daddr = prev_daddr;
	}
	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
		saddr = prev_saddr;
147
		daddr = x->coaddr;
148
	}
L
Linus Torvalds 已提交
149

150
	dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
151 152 153 154 155 156 157

	if (!IS_ERR(dst)) {
		if (prev_saddr != saddr)
			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
		if (prev_daddr != daddr)
			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
	}
L
Linus Torvalds 已提交
158

159
	return dst;
L
Linus Torvalds 已提交
160 161 162 163 164 165 166
}

static inline unsigned long make_jiffies(long secs)
{
	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
		return MAX_SCHEDULE_TIMEOUT-1;
	else
167
		return secs*HZ;
L
Linus Torvalds 已提交
168 169 170 171
}

static void xfrm_policy_timer(unsigned long data)
{
172
	struct xfrm_policy *xp = (struct xfrm_policy *)data;
173
	unsigned long now = get_seconds();
L
Linus Torvalds 已提交
174 175 176 177 178 179
	long next = LONG_MAX;
	int warn = 0;
	int dir;

	read_lock(&xp->lock);

180
	if (unlikely(xp->walk.dead))
L
Linus Torvalds 已提交
181 182
		goto out;

183
	dir = xfrm_policy_id2dir(xp->index);
L
Linus Torvalds 已提交
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

	if (xp->lft.hard_add_expires_seconds) {
		long tmo = xp->lft.hard_add_expires_seconds +
			xp->curlft.add_time - now;
		if (tmo <= 0)
			goto expired;
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.hard_use_expires_seconds) {
		long tmo = xp->lft.hard_use_expires_seconds +
			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
		if (tmo <= 0)
			goto expired;
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.soft_add_expires_seconds) {
		long tmo = xp->lft.soft_add_expires_seconds +
			xp->curlft.add_time - now;
		if (tmo <= 0) {
			warn = 1;
			tmo = XFRM_KM_TIMEOUT;
		}
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.soft_use_expires_seconds) {
		long tmo = xp->lft.soft_use_expires_seconds +
			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
		if (tmo <= 0) {
			warn = 1;
			tmo = XFRM_KM_TIMEOUT;
		}
		if (tmo < next)
			next = tmo;
	}

	if (warn)
223
		km_policy_expired(xp, dir, 0, 0);
L
Linus Torvalds 已提交
224 225 226 227 228 229 230 231 232 233 234
	if (next != LONG_MAX &&
	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
		xfrm_pol_hold(xp);

out:
	read_unlock(&xp->lock);
	xfrm_pol_put(xp);
	return;

expired:
	read_unlock(&xp->lock);
235
	if (!xfrm_policy_delete(xp, dir))
236
		km_policy_expired(xp, dir, 1, 0);
L
Linus Torvalds 已提交
237 238 239
	xfrm_pol_put(xp);
}

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
{
	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);

	if (unlikely(pol->walk.dead))
		flo = NULL;
	else
		xfrm_pol_hold(pol);

	return flo;
}

static int xfrm_policy_flo_check(struct flow_cache_object *flo)
{
	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);

	return !pol->walk.dead;
}

static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
{
	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
}

static const struct flow_cache_ops xfrm_policy_fc_ops = {
	.get = xfrm_policy_flo_get,
	.check = xfrm_policy_flo_check,
	.delete = xfrm_policy_flo_delete,
};
L
Linus Torvalds 已提交
269 270 271 272 273

/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
 * SPD calls.
 */

274
struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
L
Linus Torvalds 已提交
275 276 277
{
	struct xfrm_policy *policy;

278
	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
L
Linus Torvalds 已提交
279 280

	if (policy) {
281
		write_pnet(&policy->xp_net, net);
H
Herbert Xu 已提交
282
		INIT_LIST_HEAD(&policy->walk.all);
283 284
		INIT_HLIST_NODE(&policy->bydst);
		INIT_HLIST_NODE(&policy->byidx);
L
Linus Torvalds 已提交
285
		rwlock_init(&policy->lock);
286
		atomic_set(&policy->refcnt, 1);
287
		skb_queue_head_init(&policy->polq.hold_queue);
288 289
		setup_timer(&policy->timer, xfrm_policy_timer,
				(unsigned long)policy);
290 291
		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
			    (unsigned long)policy);
292
		policy->flo.ops = &xfrm_policy_fc_ops;
L
Linus Torvalds 已提交
293 294 295 296 297 298 299
	}
	return policy;
}
EXPORT_SYMBOL(xfrm_policy_alloc);

/* Destroy xfrm_policy: descendant resources must be released to this moment. */

300
void xfrm_policy_destroy(struct xfrm_policy *policy)
L
Linus Torvalds 已提交
301
{
H
Herbert Xu 已提交
302
	BUG_ON(!policy->walk.dead);
L
Linus Torvalds 已提交
303

304
	if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
L
Linus Torvalds 已提交
305 306
		BUG();

307
	security_xfrm_policy_free(policy->security);
L
Linus Torvalds 已提交
308 309
	kfree(policy);
}
310
EXPORT_SYMBOL(xfrm_policy_destroy);
L
Linus Torvalds 已提交
311

312 313 314 315
static void xfrm_queue_purge(struct sk_buff_head *list)
{
	struct sk_buff *skb;

316
	while ((skb = skb_dequeue(list)) != NULL)
317 318 319
		kfree_skb(skb);
}

L
Linus Torvalds 已提交
320 321 322 323 324 325
/* Rule must be locked. Release descentant resources, announce
 * entry dead. The rule must be unlinked from lists to the moment.
 */

static void xfrm_policy_kill(struct xfrm_policy *policy)
{
H
Herbert Xu 已提交
326
	policy->walk.dead = 1;
L
Linus Torvalds 已提交
327

328
	atomic_inc(&policy->genid);
L
Linus Torvalds 已提交
329

330 331
	if (del_timer(&policy->polq.hold_timer))
		xfrm_pol_put(policy);
332 333
	xfrm_queue_purge(&policy->polq.hold_queue);

334 335 336 337
	if (del_timer(&policy->timer))
		xfrm_pol_put(policy);

	xfrm_pol_put(policy);
L
Linus Torvalds 已提交
338 339
}

340 341
static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;

342
static inline unsigned int idx_hash(struct net *net, u32 index)
343
{
344
	return __idx_hash(index, net->xfrm.policy_idx_hmask);
345 346
}

347 348 349
static struct hlist_head *policy_hash_bysel(struct net *net,
					    const struct xfrm_selector *sel,
					    unsigned short family, int dir)
350
{
351
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
352 353 354
	unsigned int hash = __sel_hash(sel, family, hmask);

	return (hash == hmask + 1 ?
355 356
		&net->xfrm.policy_inexact[dir] :
		net->xfrm.policy_bydst[dir].table + hash);
357 358
}

359 360 361 362
static struct hlist_head *policy_hash_direct(struct net *net,
					     const xfrm_address_t *daddr,
					     const xfrm_address_t *saddr,
					     unsigned short family, int dir)
363
{
364
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
365 366
	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);

367
	return net->xfrm.policy_bydst[dir].table + hash;
368 369 370 371 372 373
}

static void xfrm_dst_hash_transfer(struct hlist_head *list,
				   struct hlist_head *ndsttable,
				   unsigned int nhashmask)
{
374
	struct hlist_node *tmp, *entry0 = NULL;
375
	struct xfrm_policy *pol;
376
	unsigned int h0 = 0;
377

378
redo:
379
	hlist_for_each_entry_safe(pol, tmp, list, bydst) {
380 381 382 383
		unsigned int h;

		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
				pol->family, nhashmask);
384
		if (!entry0) {
385
			hlist_del(&pol->bydst);
386 387 388 389 390
			hlist_add_head(&pol->bydst, ndsttable+h);
			h0 = h;
		} else {
			if (h != h0)
				continue;
391
			hlist_del(&pol->bydst);
392
			hlist_add_behind(&pol->bydst, entry0);
393
		}
394
		entry0 = &pol->bydst;
395 396 397 398
	}
	if (!hlist_empty(list)) {
		entry0 = NULL;
		goto redo;
399 400 401 402 403 404 405
	}
}

static void xfrm_idx_hash_transfer(struct hlist_head *list,
				   struct hlist_head *nidxtable,
				   unsigned int nhashmask)
{
406
	struct hlist_node *tmp;
407 408
	struct xfrm_policy *pol;

409
	hlist_for_each_entry_safe(pol, tmp, list, byidx) {
410 411 412 413 414 415 416 417 418 419 420 421
		unsigned int h;

		h = __idx_hash(pol->index, nhashmask);
		hlist_add_head(&pol->byidx, nidxtable+h);
	}
}

static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
{
	return ((old_hmask + 1) << 1) - 1;
}

422
static void xfrm_bydst_resize(struct net *net, int dir)
423
{
424
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
425 426
	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
427
	struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
428
	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
429 430 431 432 433
	int i;

	if (!ndst)
		return;

F
Fan Du 已提交
434
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
435 436 437 438

	for (i = hmask; i >= 0; i--)
		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);

439 440
	net->xfrm.policy_bydst[dir].table = ndst;
	net->xfrm.policy_bydst[dir].hmask = nhashmask;
441

F
Fan Du 已提交
442
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
443

444
	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
445 446
}

447
static void xfrm_byidx_resize(struct net *net, int total)
448
{
449
	unsigned int hmask = net->xfrm.policy_idx_hmask;
450 451
	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
452
	struct hlist_head *oidx = net->xfrm.policy_byidx;
453
	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
454 455 456 457 458
	int i;

	if (!nidx)
		return;

F
Fan Du 已提交
459
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
460 461 462 463

	for (i = hmask; i >= 0; i--)
		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);

464 465
	net->xfrm.policy_byidx = nidx;
	net->xfrm.policy_idx_hmask = nhashmask;
466

F
Fan Du 已提交
467
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
468

469
	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
470 471
}

472
static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
473
{
474 475
	unsigned int cnt = net->xfrm.policy_count[dir];
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
476 477 478 479 480 481 482 483 484 485 486

	if (total)
		*total += cnt;

	if ((hmask + 1) < xfrm_policy_hashmax &&
	    cnt > hmask)
		return 1;

	return 0;
}

487
static inline int xfrm_byidx_should_resize(struct net *net, int total)
488
{
489
	unsigned int hmask = net->xfrm.policy_idx_hmask;
490 491 492 493 494 495 496 497

	if ((hmask + 1) < xfrm_policy_hashmax &&
	    total > hmask)
		return 1;

	return 0;
}

498
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
J
Jamal Hadi Salim 已提交
499
{
F
Fan Du 已提交
500
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
501 502 503 504 505 506 507
	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
	si->spdhcnt = net->xfrm.policy_idx_hmask;
J
Jamal Hadi Salim 已提交
508
	si->spdhmcnt = xfrm_policy_hashmax;
F
Fan Du 已提交
509
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
J
Jamal Hadi Salim 已提交
510 511
}
EXPORT_SYMBOL(xfrm_spd_getinfo);
512

J
Jamal Hadi Salim 已提交
513
static DEFINE_MUTEX(hash_resize_mutex);
514
static void xfrm_hash_resize(struct work_struct *work)
515
{
516
	struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
517 518 519 520 521 522
	int dir, total;

	mutex_lock(&hash_resize_mutex);

	total = 0;
	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
523 524
		if (xfrm_bydst_should_resize(net, dir, &total))
			xfrm_bydst_resize(net, dir);
525
	}
526 527
	if (xfrm_byidx_should_resize(net, total))
		xfrm_byidx_resize(net, total);
528 529 530 531

	mutex_unlock(&hash_resize_mutex);
}

L
Linus Torvalds 已提交
532 533
/* Generate new index... KAME seems to generate them ordered by cost
 * of an absolute inpredictability of ordering of rules. This will not pass. */
534
static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
L
Linus Torvalds 已提交
535 536 537 538
{
	static u32 idx_generator;

	for (;;) {
539 540 541 542 543
		struct hlist_head *list;
		struct xfrm_policy *p;
		u32 idx;
		int found;

544 545 546 547 548 549 550 551
		if (!index) {
			idx = (idx_generator | dir);
			idx_generator += 8;
		} else {
			idx = index;
			index = 0;
		}

L
Linus Torvalds 已提交
552 553
		if (idx == 0)
			idx = 8;
554
		list = net->xfrm.policy_byidx + idx_hash(net, idx);
555
		found = 0;
556
		hlist_for_each_entry(p, list, byidx) {
557 558
			if (p->index == idx) {
				found = 1;
L
Linus Torvalds 已提交
559
				break;
560
			}
L
Linus Torvalds 已提交
561
		}
562
		if (!found)
L
Linus Torvalds 已提交
563 564 565 566
			return idx;
	}
}

567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
{
	u32 *p1 = (u32 *) s1;
	u32 *p2 = (u32 *) s2;
	int len = sizeof(struct xfrm_selector) / sizeof(u32);
	int i;

	for (i = 0; i < len; i++) {
		if (p1[i] != p2[i])
			return 1;
	}

	return 0;
}

582 583 584 585 586 587 588 589 590 591
static void xfrm_policy_requeue(struct xfrm_policy *old,
				struct xfrm_policy *new)
{
	struct xfrm_policy_queue *pq = &old->polq;
	struct sk_buff_head list;

	__skb_queue_head_init(&list);

	spin_lock_bh(&pq->hold_queue.lock);
	skb_queue_splice_init(&pq->hold_queue, &list);
592 593
	if (del_timer(&pq->hold_timer))
		xfrm_pol_put(old);
594 595 596 597 598 599 600 601 602 603
	spin_unlock_bh(&pq->hold_queue.lock);

	if (skb_queue_empty(&list))
		return;

	pq = &new->polq;

	spin_lock_bh(&pq->hold_queue.lock);
	skb_queue_splice(&list, &pq->hold_queue);
	pq->timeout = XFRM_QUEUE_TMO_MIN;
604 605
	if (!mod_timer(&pq->hold_timer, jiffies))
		xfrm_pol_hold(new);
606 607 608
	spin_unlock_bh(&pq->hold_queue.lock);
}

609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
				   struct xfrm_policy *pol)
{
	u32 mark = policy->mark.v & policy->mark.m;

	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
		return true;

	if ((mark & pol->mark.m) == pol->mark.v &&
	    policy->priority == pol->priority)
		return true;

	return false;
}

L
Linus Torvalds 已提交
624 625
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
{
626
	struct net *net = xp_net(policy);
627 628 629
	struct xfrm_policy *pol;
	struct xfrm_policy *delpol;
	struct hlist_head *chain;
630
	struct hlist_node *newpos;
L
Linus Torvalds 已提交
631

F
Fan Du 已提交
632
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
633
	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
634 635
	delpol = NULL;
	newpos = NULL;
636
	hlist_for_each_entry(pol, chain, bydst) {
H
Herbert Xu 已提交
637
		if (pol->type == policy->type &&
638
		    !selector_cmp(&pol->selector, &policy->selector) &&
639
		    xfrm_policy_mark_match(policy, pol) &&
H
Herbert Xu 已提交
640 641
		    xfrm_sec_ctx_match(pol->security, policy->security) &&
		    !WARN_ON(delpol)) {
L
Linus Torvalds 已提交
642
			if (excl) {
F
Fan Du 已提交
643
				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
644 645 646 647 648 649
				return -EEXIST;
			}
			delpol = pol;
			if (policy->priority > pol->priority)
				continue;
		} else if (policy->priority >= pol->priority) {
H
Herbert Xu 已提交
650
			newpos = &pol->bydst;
L
Linus Torvalds 已提交
651 652 653 654 655 656
			continue;
		}
		if (delpol)
			break;
	}
	if (newpos)
657
		hlist_add_behind(&policy->bydst, newpos);
658 659
	else
		hlist_add_head(&policy->bydst, chain);
L
Linus Torvalds 已提交
660
	xfrm_pol_hold(policy);
661
	net->xfrm.policy_count[dir]++;
662
	atomic_inc(&net->xfrm.flow_cache_genid);
F
fan.du 已提交
663 664 665 666 667 668 669

	/* After previous checking, family can either be AF_INET or AF_INET6 */
	if (policy->family == AF_INET)
		rt_genid_bump_ipv4(net);
	else
		rt_genid_bump_ipv6(net);

670 671
	if (delpol) {
		xfrm_policy_requeue(delpol, policy);
W
Wei Yongjun 已提交
672
		__xfrm_policy_unlink(delpol, dir);
673
	}
674
	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
675
	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
676
	policy->curlft.add_time = get_seconds();
L
Linus Torvalds 已提交
677 678 679
	policy->curlft.use_time = 0;
	if (!mod_timer(&policy->timer, jiffies + HZ))
		xfrm_pol_hold(policy);
680
	list_add(&policy->walk.all, &net->xfrm.policy_all);
F
Fan Du 已提交
681
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
682

683
	if (delpol)
L
Linus Torvalds 已提交
684
		xfrm_policy_kill(delpol);
685 686
	else if (xfrm_bydst_should_resize(net, dir, NULL))
		schedule_work(&net->xfrm.policy_hash_work);
687

L
Linus Torvalds 已提交
688 689 690 691
	return 0;
}
EXPORT_SYMBOL(xfrm_policy_insert);

692 693
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
					  int dir, struct xfrm_selector *sel,
694 695
					  struct xfrm_sec_ctx *ctx, int delete,
					  int *err)
L
Linus Torvalds 已提交
696
{
697 698
	struct xfrm_policy *pol, *ret;
	struct hlist_head *chain;
L
Linus Torvalds 已提交
699

700
	*err = 0;
F
Fan Du 已提交
701
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
702
	chain = policy_hash_bysel(net, sel, sel->family, dir);
703
	ret = NULL;
704
	hlist_for_each_entry(pol, chain, bydst) {
705
		if (pol->type == type &&
J
Jamal Hadi Salim 已提交
706
		    (mark & pol->mark.m) == pol->mark.v &&
707 708
		    !selector_cmp(sel, &pol->selector) &&
		    xfrm_sec_ctx_match(ctx, pol->security)) {
L
Linus Torvalds 已提交
709
			xfrm_pol_hold(pol);
710
			if (delete) {
711 712
				*err = security_xfrm_policy_delete(
								pol->security);
713
				if (*err) {
F
Fan Du 已提交
714
					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
715 716
					return pol;
				}
W
Wei Yongjun 已提交
717
				__xfrm_policy_unlink(pol, dir);
718 719
			}
			ret = pol;
L
Linus Torvalds 已提交
720 721 722
			break;
		}
	}
F
Fan Du 已提交
723
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
724

725
	if (ret && delete)
726 727
		xfrm_policy_kill(ret);
	return ret;
L
Linus Torvalds 已提交
728
}
729
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
L
Linus Torvalds 已提交
730

731 732
struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
				     int dir, u32 id, int delete, int *err)
L
Linus Torvalds 已提交
733
{
734 735
	struct xfrm_policy *pol, *ret;
	struct hlist_head *chain;
L
Linus Torvalds 已提交
736

737 738 739 740
	*err = -ENOENT;
	if (xfrm_policy_id2dir(id) != dir)
		return NULL;

741
	*err = 0;
F
Fan Du 已提交
742
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
743
	chain = net->xfrm.policy_byidx + idx_hash(net, id);
744
	ret = NULL;
745
	hlist_for_each_entry(pol, chain, byidx) {
J
Jamal Hadi Salim 已提交
746 747
		if (pol->type == type && pol->index == id &&
		    (mark & pol->mark.m) == pol->mark.v) {
L
Linus Torvalds 已提交
748
			xfrm_pol_hold(pol);
749
			if (delete) {
750 751
				*err = security_xfrm_policy_delete(
								pol->security);
752
				if (*err) {
F
Fan Du 已提交
753
					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
754 755
					return pol;
				}
W
Wei Yongjun 已提交
756
				__xfrm_policy_unlink(pol, dir);
757 758
			}
			ret = pol;
L
Linus Torvalds 已提交
759 760 761
			break;
		}
	}
F
Fan Du 已提交
762
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
763

764
	if (ret && delete)
765 766
		xfrm_policy_kill(ret);
	return ret;
L
Linus Torvalds 已提交
767 768 769
}
EXPORT_SYMBOL(xfrm_policy_byid);

770 771
#ifdef CONFIG_SECURITY_NETWORK_XFRM
static inline int
772
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
L
Linus Torvalds 已提交
773
{
774 775 776 777 778 779
	int dir, err = 0;

	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
		struct xfrm_policy *pol;
		int i;

780
		hlist_for_each_entry(pol,
781
				     &net->xfrm.policy_inexact[dir], bydst) {
782 783
			if (pol->type != type)
				continue;
784
			err = security_xfrm_policy_delete(pol->security);
785
			if (err) {
786
				xfrm_audit_policy_delete(pol, 0, task_valid);
787 788
				return err;
			}
789
		}
790
		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
791
			hlist_for_each_entry(pol,
792
					     net->xfrm.policy_bydst[dir].table + i,
793 794 795
					     bydst) {
				if (pol->type != type)
					continue;
796 797
				err = security_xfrm_policy_delete(
								pol->security);
798
				if (err) {
J
Joy Latten 已提交
799
					xfrm_audit_policy_delete(pol, 0,
800
								 task_valid);
801 802 803 804 805 806 807 808 809
					return err;
				}
			}
		}
	}
	return err;
}
#else
static inline int
810
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
811 812 813 814 815
{
	return 0;
}
#endif

816
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
817
{
818
	int dir, err = 0, cnt = 0;
L
Linus Torvalds 已提交
819

F
Fan Du 已提交
820
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
821

822
	err = xfrm_policy_flush_secctx_check(net, type, task_valid);
823 824 825
	if (err)
		goto out;

L
Linus Torvalds 已提交
826
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
827
		struct xfrm_policy *pol;
W
Wei Yongjun 已提交
828
		int i;
829 830

	again1:
831
		hlist_for_each_entry(pol,
832
				     &net->xfrm.policy_inexact[dir], bydst) {
833 834
			if (pol->type != type)
				continue;
835
			__xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
836
			write_unlock_bh(&net->xfrm.xfrm_policy_lock);
837
			cnt++;
L
Linus Torvalds 已提交
838

839
			xfrm_audit_policy_delete(pol, 1, task_valid);
J
Joy Latten 已提交
840

841
			xfrm_policy_kill(pol);
L
Linus Torvalds 已提交
842

F
Fan Du 已提交
843
			write_lock_bh(&net->xfrm.xfrm_policy_lock);
844 845 846
			goto again1;
		}

847
		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
848
	again2:
849
			hlist_for_each_entry(pol,
850
					     net->xfrm.policy_bydst[dir].table + i,
851 852 853
					     bydst) {
				if (pol->type != type)
					continue;
854
				__xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
855
				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
856
				cnt++;
857

858
				xfrm_audit_policy_delete(pol, 1, task_valid);
859 860
				xfrm_policy_kill(pol);

F
Fan Du 已提交
861
				write_lock_bh(&net->xfrm.xfrm_policy_lock);
862 863
				goto again2;
			}
L
Linus Torvalds 已提交
864
		}
865

L
Linus Torvalds 已提交
866
	}
867 868
	if (!cnt)
		err = -ESRCH;
869
out:
F
Fan Du 已提交
870
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
871
	return err;
L
Linus Torvalds 已提交
872 873 874
}
EXPORT_SYMBOL(xfrm_policy_flush);

875
int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
876
		     int (*func)(struct xfrm_policy *, int, int, void*),
L
Linus Torvalds 已提交
877 878
		     void *data)
{
H
Herbert Xu 已提交
879 880
	struct xfrm_policy *pol;
	struct xfrm_policy_walk_entry *x;
881 882 883 884 885
	int error = 0;

	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
	    walk->type != XFRM_POLICY_TYPE_ANY)
		return -EINVAL;
L
Linus Torvalds 已提交
886

H
Herbert Xu 已提交
887
	if (list_empty(&walk->walk.all) && walk->seq != 0)
888 889
		return 0;

F
Fan Du 已提交
890
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
H
Herbert Xu 已提交
891
	if (list_empty(&walk->walk.all))
892
		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
H
Herbert Xu 已提交
893 894
	else
		x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
895
	list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
H
Herbert Xu 已提交
896
		if (x->dead)
897
			continue;
H
Herbert Xu 已提交
898 899 900 901 902 903 904 905 906
		pol = container_of(x, struct xfrm_policy, walk);
		if (walk->type != XFRM_POLICY_TYPE_ANY &&
		    walk->type != pol->type)
			continue;
		error = func(pol, xfrm_policy_id2dir(pol->index),
			     walk->seq, data);
		if (error) {
			list_move_tail(&walk->walk.all, &x->all);
			goto out;
907
		}
H
Herbert Xu 已提交
908
		walk->seq++;
L
Linus Torvalds 已提交
909
	}
H
Herbert Xu 已提交
910
	if (walk->seq == 0) {
J
Jamal Hadi Salim 已提交
911 912 913
		error = -ENOENT;
		goto out;
	}
H
Herbert Xu 已提交
914
	list_del_init(&walk->walk.all);
L
Linus Torvalds 已提交
915
out:
F
Fan Du 已提交
916
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
917 918 919 920
	return error;
}
EXPORT_SYMBOL(xfrm_policy_walk);

H
Herbert Xu 已提交
921 922 923 924 925 926 927 928 929
void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
{
	INIT_LIST_HEAD(&walk->walk.all);
	walk->walk.dead = 1;
	walk->type = type;
	walk->seq = 0;
}
EXPORT_SYMBOL(xfrm_policy_walk_init);

F
Fan Du 已提交
930
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
H
Herbert Xu 已提交
931 932 933 934
{
	if (list_empty(&walk->walk.all))
		return;

F
Fan Du 已提交
935
	write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
H
Herbert Xu 已提交
936
	list_del(&walk->walk.all);
F
Fan Du 已提交
937
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
H
Herbert Xu 已提交
938 939 940
}
EXPORT_SYMBOL(xfrm_policy_walk_done);

941 942 943 944 945
/*
 * Find policy to apply to this flow.
 *
 * Returns 0 if policy found, else an -errno.
 */
946 947
static int xfrm_policy_match(const struct xfrm_policy *pol,
			     const struct flowi *fl,
948
			     u8 type, u16 family, int dir)
L
Linus Torvalds 已提交
949
{
950
	const struct xfrm_selector *sel = &pol->selector;
951 952
	int ret = -ESRCH;
	bool match;
L
Linus Torvalds 已提交
953

954
	if (pol->family != family ||
955
	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
956
	    pol->type != type)
957
		return ret;
L
Linus Torvalds 已提交
958

959
	match = xfrm_selector_match(sel, fl, family);
960
	if (match)
961
		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
962
						  dir);
963

964
	return ret;
965
}
L
Linus Torvalds 已提交
966

A
Alexey Dobriyan 已提交
967
static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
968
						     const struct flowi *fl,
969 970
						     u16 family, u8 dir)
{
971
	int err;
972
	struct xfrm_policy *pol, *ret;
973
	const xfrm_address_t *daddr, *saddr;
974
	struct hlist_head *chain;
975
	u32 priority = ~0U;
976

977 978 979 980 981
	daddr = xfrm_flowi_daddr(fl, family);
	saddr = xfrm_flowi_saddr(fl, family);
	if (unlikely(!daddr || !saddr))
		return NULL;

F
Fan Du 已提交
982
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
A
Alexey Dobriyan 已提交
983
	chain = policy_hash_direct(net, daddr, saddr, family, dir);
984
	ret = NULL;
985
	hlist_for_each_entry(pol, chain, bydst) {
986 987 988 989 990 991 992 993 994
		err = xfrm_policy_match(pol, fl, type, family, dir);
		if (err) {
			if (err == -ESRCH)
				continue;
			else {
				ret = ERR_PTR(err);
				goto fail;
			}
		} else {
995
			ret = pol;
996
			priority = ret->priority;
997 998 999
			break;
		}
	}
A
Alexey Dobriyan 已提交
1000
	chain = &net->xfrm.policy_inexact[dir];
1001
	hlist_for_each_entry(pol, chain, bydst) {
1002 1003 1004 1005 1006 1007 1008 1009 1010
		err = xfrm_policy_match(pol, fl, type, family, dir);
		if (err) {
			if (err == -ESRCH)
				continue;
			else {
				ret = ERR_PTR(err);
				goto fail;
			}
		} else if (pol->priority < priority) {
1011 1012
			ret = pol;
			break;
L
Linus Torvalds 已提交
1013 1014
		}
	}
1015 1016
	if (ret)
		xfrm_pol_hold(ret);
1017
fail:
F
Fan Du 已提交
1018
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1019

1020
	return ret;
1021 1022
}

1023
static struct xfrm_policy *
1024
__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_policy *pol;

	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
	if (pol != NULL)
		return pol;
#endif
	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
}

1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
static int flow_to_policy_dir(int dir)
{
	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
		return dir;

	switch (dir) {
	default:
	case FLOW_DIR_IN:
		return XFRM_POLICY_IN;
	case FLOW_DIR_OUT:
		return XFRM_POLICY_OUT;
	case FLOW_DIR_FWD:
		return XFRM_POLICY_FWD;
	}
}

1054
static struct flow_cache_object *
1055
xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1056
		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
1057 1058
{
	struct xfrm_policy *pol;
1059 1060 1061

	if (old_obj)
		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1062

1063
	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1064
	if (IS_ERR_OR_NULL(pol))
1065 1066 1067 1068 1069 1070 1071
		return ERR_CAST(pol);

	/* Resolver returns two references:
	 * one for cache and one for caller of flow_cache_lookup() */
	xfrm_pol_hold(pol);

	return &pol->flo;
L
Linus Torvalds 已提交
1072 1073
}

1074 1075 1076
static inline int policy_to_flow_dir(int dir)
{
	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087
	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
		return dir;
	switch (dir) {
	default:
	case XFRM_POLICY_IN:
		return FLOW_DIR_IN;
	case XFRM_POLICY_OUT:
		return FLOW_DIR_OUT;
	case XFRM_POLICY_FWD:
		return FLOW_DIR_FWD;
1088
	}
1089 1090
}

1091 1092
static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
						 const struct flowi *fl)
L
Linus Torvalds 已提交
1093 1094
{
	struct xfrm_policy *pol;
F
Fan Du 已提交
1095
	struct net *net = sock_net(sk);
L
Linus Torvalds 已提交
1096

F
Fan Du 已提交
1097
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1098
	if ((pol = sk->sk_policy[dir]) != NULL) {
1099 1100
		bool match = xfrm_selector_match(&pol->selector, fl,
						 sk->sk_family);
1101
		int err = 0;
1102

1103
		if (match) {
J
Jamal Hadi Salim 已提交
1104 1105 1106 1107
			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
				pol = NULL;
				goto out;
			}
1108
			err = security_xfrm_policy_lookup(pol->security,
1109
						      fl->flowi_secid,
1110
						      policy_to_flow_dir(dir));
1111 1112 1113 1114 1115 1116 1117
			if (!err)
				xfrm_pol_hold(pol);
			else if (err == -ESRCH)
				pol = NULL;
			else
				pol = ERR_PTR(err);
		} else
L
Linus Torvalds 已提交
1118 1119
			pol = NULL;
	}
J
Jamal Hadi Salim 已提交
1120
out:
F
Fan Du 已提交
1121
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1122 1123 1124 1125 1126
	return pol;
}

static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
1127
	struct net *net = xp_net(pol);
1128
	struct hlist_head *chain = policy_hash_bysel(net, &pol->selector,
1129
						     pol->family, dir);
1130

1131
	list_add(&pol->walk.all, &net->xfrm.policy_all);
1132
	hlist_add_head(&pol->bydst, chain);
1133
	hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index));
1134
	net->xfrm.policy_count[dir]++;
L
Linus Torvalds 已提交
1135
	xfrm_pol_hold(pol);
1136

1137 1138
	if (xfrm_bydst_should_resize(net, dir, NULL))
		schedule_work(&net->xfrm.policy_hash_work);
L
Linus Torvalds 已提交
1139 1140 1141 1142 1143
}

static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
						int dir)
{
1144 1145
	struct net *net = xp_net(pol);

1146 1147
	if (hlist_unhashed(&pol->bydst))
		return NULL;
L
Linus Torvalds 已提交
1148

1149
	hlist_del_init(&pol->bydst);
1150
	hlist_del(&pol->byidx);
H
Herbert Xu 已提交
1151
	list_del(&pol->walk.all);
1152
	net->xfrm.policy_count[dir]--;
1153 1154

	return pol;
L
Linus Torvalds 已提交
1155 1156
}

1157
int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
L
Linus Torvalds 已提交
1158
{
F
Fan Du 已提交
1159 1160 1161
	struct net *net = xp_net(pol);

	write_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1162
	pol = __xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
1163
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1164 1165
	if (pol) {
		xfrm_policy_kill(pol);
1166
		return 0;
L
Linus Torvalds 已提交
1167
	}
1168
	return -ENOENT;
L
Linus Torvalds 已提交
1169
}
1170
EXPORT_SYMBOL(xfrm_policy_delete);
L
Linus Torvalds 已提交
1171 1172 1173

int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
{
1174
	struct net *net = xp_net(pol);
L
Linus Torvalds 已提交
1175 1176
	struct xfrm_policy *old_pol;

1177 1178 1179 1180 1181
#ifdef CONFIG_XFRM_SUB_POLICY
	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
		return -EINVAL;
#endif

F
Fan Du 已提交
1182
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1183 1184 1185
	old_pol = sk->sk_policy[dir];
	sk->sk_policy[dir] = pol;
	if (pol) {
1186
		pol->curlft.add_time = get_seconds();
1187
		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
L
Linus Torvalds 已提交
1188 1189
		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
	}
1190 1191 1192 1193
	if (old_pol) {
		if (pol)
			xfrm_policy_requeue(old_pol, pol);

1194 1195 1196
		/* Unlinking succeeds always. This is the only function
		 * allowed to delete or replace socket policy.
		 */
L
Linus Torvalds 已提交
1197
		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1198
	}
F
Fan Du 已提交
1199
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1200 1201 1202 1203 1204 1205 1206

	if (old_pol) {
		xfrm_policy_kill(old_pol);
	}
	return 0;
}

1207
static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
L
Linus Torvalds 已提交
1208
{
1209
	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
F
Fan Du 已提交
1210
	struct net *net = xp_net(old);
L
Linus Torvalds 已提交
1211 1212 1213

	if (newp) {
		newp->selector = old->selector;
1214 1215
		if (security_xfrm_policy_clone(old->security,
					       &newp->security)) {
1216 1217 1218
			kfree(newp);
			return NULL;  /* ENOMEM */
		}
L
Linus Torvalds 已提交
1219 1220
		newp->lft = old->lft;
		newp->curlft = old->curlft;
1221
		newp->mark = old->mark;
L
Linus Torvalds 已提交
1222 1223 1224 1225
		newp->action = old->action;
		newp->flags = old->flags;
		newp->xfrm_nr = old->xfrm_nr;
		newp->index = old->index;
1226
		newp->type = old->type;
L
Linus Torvalds 已提交
1227 1228
		memcpy(newp->xfrm_vec, old->xfrm_vec,
		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
F
Fan Du 已提交
1229
		write_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1230
		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
F
Fan Du 已提交
1231
		write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249
		xfrm_pol_put(newp);
	}
	return newp;
}

int __xfrm_sk_clone_policy(struct sock *sk)
{
	struct xfrm_policy *p0 = sk->sk_policy[0],
			   *p1 = sk->sk_policy[1];

	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
		return -ENOMEM;
	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
		return -ENOMEM;
	return 0;
}

1250
static int
A
Alexey Dobriyan 已提交
1251
xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1252 1253 1254 1255 1256 1257 1258
	       unsigned short family)
{
	int err;
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);

	if (unlikely(afinfo == NULL))
		return -EINVAL;
A
Alexey Dobriyan 已提交
1259
	err = afinfo->get_saddr(net, local, remote);
1260 1261 1262 1263
	xfrm_policy_put_afinfo(afinfo);
	return err;
}

L
Linus Torvalds 已提交
1264 1265 1266
/* Resolve list of templates for the flow, given policy. */

static int
1267 1268
xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
		      struct xfrm_state **xfrm, unsigned short family)
L
Linus Torvalds 已提交
1269
{
A
Alexey Dobriyan 已提交
1270
	struct net *net = xp_net(policy);
L
Linus Torvalds 已提交
1271 1272 1273 1274
	int nx;
	int i, error;
	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1275
	xfrm_address_t tmp;
L
Linus Torvalds 已提交
1276

1277
	for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
L
Linus Torvalds 已提交
1278 1279 1280 1281 1282
		struct xfrm_state *x;
		xfrm_address_t *remote = daddr;
		xfrm_address_t *local  = saddr;
		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];

1283 1284
		if (tmpl->mode == XFRM_MODE_TUNNEL ||
		    tmpl->mode == XFRM_MODE_BEET) {
L
Linus Torvalds 已提交
1285 1286
			remote = &tmpl->id.daddr;
			local = &tmpl->saddr;
1287 1288
			if (xfrm_addr_any(local, tmpl->encap_family)) {
				error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
1289 1290 1291 1292
				if (error)
					goto fail;
				local = &tmp;
			}
L
Linus Torvalds 已提交
1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306
		}

		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);

		if (x && x->km.state == XFRM_STATE_VALID) {
			xfrm[nx++] = x;
			daddr = remote;
			saddr = local;
			continue;
		}
		if (x) {
			error = (x->km.state == XFRM_STATE_ERROR ?
				 -EINVAL : -EAGAIN);
			xfrm_state_put(x);
W
Weilong Chen 已提交
1307
		} else if (error == -ESRCH) {
1308
			error = -EAGAIN;
W
Weilong Chen 已提交
1309
		}
L
Linus Torvalds 已提交
1310 1311 1312 1313 1314 1315 1316

		if (!tmpl->optional)
			goto fail;
	}
	return nx;

fail:
1317
	for (nx--; nx >= 0; nx--)
L
Linus Torvalds 已提交
1318 1319 1320 1321
		xfrm_state_put(xfrm[nx]);
	return error;
}

1322
static int
1323 1324
xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
		  struct xfrm_state **xfrm, unsigned short family)
1325
{
1326 1327
	struct xfrm_state *tp[XFRM_MAX_DEPTH];
	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1328 1329 1330 1331 1332 1333 1334 1335 1336 1337
	int cnx = 0;
	int error;
	int ret;
	int i;

	for (i = 0; i < npols; i++) {
		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
			error = -ENOBUFS;
			goto fail;
		}
1338 1339

		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1340 1341 1342 1343 1344 1345 1346
		if (ret < 0) {
			error = ret;
			goto fail;
		} else
			cnx += ret;
	}

1347 1348 1349 1350
	/* found states are sorted for outbound processing */
	if (npols > 1)
		xfrm_state_sort(xfrm, tpp, cnx, family);

1351 1352 1353
	return cnx;

 fail:
1354
	for (cnx--; cnx >= 0; cnx--)
1355
		xfrm_state_put(tpp[cnx]);
1356 1357 1358 1359
	return error;

}

L
Linus Torvalds 已提交
1360 1361 1362 1363
/* Check that the bundle accepts the flow and its components are
 * still valid.
 */

1364
static inline int xfrm_get_tos(const struct flowi *fl, int family)
1365 1366 1367
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
	int tos;
L
Linus Torvalds 已提交
1368

1369 1370 1371 1372 1373 1374 1375 1376 1377 1378
	if (!afinfo)
		return -EINVAL;

	tos = afinfo->get_tos(fl);

	xfrm_policy_put_afinfo(afinfo);

	return tos;
}

1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389
static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	if (xdst->route == NULL) {
		/* Dummy bundle - if it has xfrms we were not
		 * able to build bundle as template resolution failed.
		 * It means we need to try again resolving. */
		if (xdst->num_xfrms > 0)
			return NULL;
1390 1391
	} else if (dst->flags & DST_XFRM_QUEUE) {
		return NULL;
1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428
	} else {
		/* Real bundle */
		if (stale_bundle(dst))
			return NULL;
	}

	dst_hold(dst);
	return flo;
}

static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	if (!xdst->route)
		return 0;
	if (stale_bundle(dst))
		return 0;

	return 1;
}

static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	dst_free(dst);
}

static const struct flow_cache_ops xfrm_bundle_fc_ops = {
	.get = xfrm_bundle_flo_get,
	.check = xfrm_bundle_flo_check,
	.delete = xfrm_bundle_flo_delete,
};

1429
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
L
Linus Torvalds 已提交
1430 1431
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1432
	struct dst_ops *dst_ops;
1433 1434 1435 1436 1437
	struct xfrm_dst *xdst;

	if (!afinfo)
		return ERR_PTR(-EINVAL);

1438 1439 1440 1441
	switch (family) {
	case AF_INET:
		dst_ops = &net->xfrm.xfrm4_dst_ops;
		break;
E
Eric Dumazet 已提交
1442
#if IS_ENABLED(CONFIG_IPV6)
1443 1444 1445 1446 1447 1448 1449
	case AF_INET6:
		dst_ops = &net->xfrm.xfrm6_dst_ops;
		break;
#endif
	default:
		BUG();
	}
1450
	xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1451

1452
	if (likely(xdst)) {
1453 1454 1455
		struct dst_entry *dst = &xdst->u.dst;

		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1456
		xdst->flo.ops = &xfrm_bundle_fc_ops;
1457 1458
		if (afinfo->init_dst)
			afinfo->init_dst(net, xdst);
1459
	} else
1460
		xdst = ERR_PTR(-ENOBUFS);
1461

1462 1463
	xfrm_policy_put_afinfo(afinfo);

1464 1465 1466
	return xdst;
}

1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
				 int nfheader_len)
{
	struct xfrm_policy_afinfo *afinfo =
		xfrm_policy_get_afinfo(dst->ops->family);
	int err;

	if (!afinfo)
		return -EINVAL;

	err = afinfo->init_path(path, dst, nfheader_len);

	xfrm_policy_put_afinfo(afinfo);

	return err;
}

H
Herbert Xu 已提交
1484
static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1485
				const struct flowi *fl)
1486 1487 1488 1489 1490 1491
{
	struct xfrm_policy_afinfo *afinfo =
		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
	int err;

	if (!afinfo)
L
Linus Torvalds 已提交
1492
		return -EINVAL;
1493

H
Herbert Xu 已提交
1494
	err = afinfo->fill_dst(xdst, dev, fl);
1495

L
Linus Torvalds 已提交
1496
	xfrm_policy_put_afinfo(afinfo);
1497

L
Linus Torvalds 已提交
1498 1499 1500
	return err;
}

1501

1502 1503 1504 1505 1506 1507
/* Allocate chain of dst_entry's, attach known xfrm's, calculate
 * all the metrics... Shortly, bundle a bundle.
 */

static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
					    struct xfrm_state **xfrm, int nx,
1508
					    const struct flowi *fl,
1509 1510
					    struct dst_entry *dst)
{
1511
	struct net *net = xp_net(policy);
1512 1513
	unsigned long now = jiffies;
	struct net_device *dev;
1514
	struct xfrm_mode *inner_mode;
1515 1516 1517 1518 1519
	struct dst_entry *dst_prev = NULL;
	struct dst_entry *dst0 = NULL;
	int i = 0;
	int err;
	int header_len = 0;
1520
	int nfheader_len = 0;
1521 1522 1523
	int trailer_len = 0;
	int tos;
	int family = policy->selector.family;
1524 1525 1526
	xfrm_address_t saddr, daddr;

	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1527 1528 1529 1530 1531 1532 1533 1534 1535

	tos = xfrm_get_tos(fl, family);
	err = tos;
	if (tos < 0)
		goto put_states;

	dst_hold(dst);

	for (; i < nx; i++) {
1536
		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1537 1538 1539 1540 1541 1542 1543 1544
		struct dst_entry *dst1 = &xdst->u.dst;

		err = PTR_ERR(xdst);
		if (IS_ERR(xdst)) {
			dst_release(dst);
			goto put_states;
		}

1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555
		if (xfrm[i]->sel.family == AF_UNSPEC) {
			inner_mode = xfrm_ip2inner_mode(xfrm[i],
							xfrm_af2proto(family));
			if (!inner_mode) {
				err = -EAFNOSUPPORT;
				dst_release(dst);
				goto put_states;
			}
		} else
			inner_mode = xfrm[i]->inner_mode;

1556 1557 1558 1559 1560 1561 1562 1563
		if (!dst_prev)
			dst0 = dst1;
		else {
			dst_prev->child = dst_clone(dst1);
			dst1->flags |= DST_NOHASH;
		}

		xdst->route = dst;
1564
		dst_copy_metrics(dst1, dst);
1565 1566 1567

		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
			family = xfrm[i]->props.family;
1568 1569
			dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
					      family);
1570 1571 1572 1573 1574 1575 1576
			err = PTR_ERR(dst);
			if (IS_ERR(dst))
				goto put_states;
		} else
			dst_hold(dst);

		dst1->xfrm = xfrm[i];
1577
		xdst->xfrm_genid = xfrm[i]->genid;
1578

1579
		dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1580 1581 1582 1583
		dst1->flags |= DST_HOST;
		dst1->lastuse = now;

		dst1->input = dst_discard;
1584
		dst1->output = inner_mode->afinfo->output;
1585 1586 1587 1588 1589

		dst1->next = dst_prev;
		dst_prev = dst1;

		header_len += xfrm[i]->props.header_len;
1590 1591
		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
			nfheader_len += xfrm[i]->props.header_len;
1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602
		trailer_len += xfrm[i]->props.trailer_len;
	}

	dst_prev->child = dst;
	dst0->path = dst;

	err = -ENODEV;
	dev = dst->dev;
	if (!dev)
		goto free_dst;

1603
	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1604 1605 1606 1607 1608
	xfrm_init_pmtu(dst_prev);

	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;

H
Herbert Xu 已提交
1609
		err = xfrm_fill_dst(xdst, dev, fl);
1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631
		if (err)
			goto free_dst;

		dst_prev->header_len = header_len;
		dst_prev->trailer_len = trailer_len;
		header_len -= xdst->u.dst.xfrm->props.header_len;
		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
	}

out:
	return dst0;

put_states:
	for (; i < nx; i++)
		xfrm_state_put(xfrm[i]);
free_dst:
	if (dst0)
		dst_free(dst0);
	dst0 = ERR_PTR(err);
	goto out;
}

1632
#ifdef CONFIG_XFRM_SUB_POLICY
1633
static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
1634 1635 1636 1637 1638 1639
{
	if (!*target) {
		*target = kmalloc(size, GFP_ATOMIC);
		if (!*target)
			return -ENOMEM;
	}
1640

1641 1642 1643
	memcpy(*target, src, size);
	return 0;
}
1644
#endif
1645

1646 1647
static int xfrm_dst_update_parent(struct dst_entry *dst,
				  const struct xfrm_selector *sel)
1648 1649 1650 1651 1652 1653 1654 1655 1656 1657
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
				   sel, sizeof(*sel));
#else
	return 0;
#endif
}

1658 1659
static int xfrm_dst_update_origin(struct dst_entry *dst,
				  const struct flowi *fl)
1660 1661 1662 1663 1664 1665 1666 1667
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
#else
	return 0;
#endif
}
L
Linus Torvalds 已提交
1668

1669
static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696
				struct xfrm_policy **pols,
				int *num_pols, int *num_xfrms)
{
	int i;

	if (*num_pols == 0 || !pols[0]) {
		*num_pols = 0;
		*num_xfrms = 0;
		return 0;
	}
	if (IS_ERR(pols[0]))
		return PTR_ERR(pols[0]);

	*num_xfrms = pols[0]->xfrm_nr;

#ifdef CONFIG_XFRM_SUB_POLICY
	if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
	    pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
						    XFRM_POLICY_TYPE_MAIN,
						    fl, family,
						    XFRM_POLICY_OUT);
		if (pols[1]) {
			if (IS_ERR(pols[1])) {
				xfrm_pols_put(pols, *num_pols);
				return PTR_ERR(pols[1]);
			}
1697
			(*num_pols)++;
1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714
			(*num_xfrms) += pols[1]->xfrm_nr;
		}
	}
#endif
	for (i = 0; i < *num_pols; i++) {
		if (pols[i]->action != XFRM_POLICY_ALLOW) {
			*num_xfrms = -1;
			break;
		}
	}

	return 0;

}

static struct xfrm_dst *
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1715
			       const struct flowi *fl, u16 family,
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725
			       struct dst_entry *dst_orig)
{
	struct net *net = xp_net(pols[0]);
	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
	struct dst_entry *dst;
	struct xfrm_dst *xdst;
	int err;

	/* Try to instantiate a bundle */
	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1726 1727
	if (err <= 0) {
		if (err != 0 && err != -EAGAIN)
1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750
			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
		return ERR_PTR(err);
	}

	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
	if (IS_ERR(dst)) {
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
		return ERR_CAST(dst);
	}

	xdst = (struct xfrm_dst *)dst;
	xdst->num_xfrms = err;
	if (num_pols > 1)
		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
	else
		err = xfrm_dst_update_origin(dst, fl);
	if (unlikely(err)) {
		dst_free(dst);
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
		return ERR_PTR(err);
	}

	xdst->num_pols = num_pols;
1751
	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
1752 1753 1754 1755 1756
	xdst->policy_genid = atomic_read(&pols[0]->genid);

	return xdst;
}

1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769
static void xfrm_policy_queue_process(unsigned long arg)
{
	int err = 0;
	struct sk_buff *skb;
	struct sock *sk;
	struct dst_entry *dst;
	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
	struct xfrm_policy_queue *pq = &pol->polq;
	struct flowi fl;
	struct sk_buff_head list;

	spin_lock(&pq->hold_queue.lock);
	skb = skb_peek(&pq->hold_queue);
1770 1771 1772 1773
	if (!skb) {
		spin_unlock(&pq->hold_queue.lock);
		goto out;
	}
1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791
	dst = skb_dst(skb);
	sk = skb->sk;
	xfrm_decode_session(skb, &fl, dst->ops->family);
	spin_unlock(&pq->hold_queue.lock);

	dst_hold(dst->path);
	dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
			  sk, 0);
	if (IS_ERR(dst))
		goto purge_queue;

	if (dst->flags & DST_XFRM_QUEUE) {
		dst_release(dst);

		if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
			goto purge_queue;

		pq->timeout = pq->timeout << 1;
1792 1793 1794
		if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
			xfrm_pol_hold(pol);
	goto out;
1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824
	}

	dst_release(dst);

	__skb_queue_head_init(&list);

	spin_lock(&pq->hold_queue.lock);
	pq->timeout = 0;
	skb_queue_splice_init(&pq->hold_queue, &list);
	spin_unlock(&pq->hold_queue.lock);

	while (!skb_queue_empty(&list)) {
		skb = __skb_dequeue(&list);

		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
		dst_hold(skb_dst(skb)->path);
		dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
				  &fl, skb->sk, 0);
		if (IS_ERR(dst)) {
			kfree_skb(skb);
			continue;
		}

		nf_reset(skb);
		skb_dst_drop(skb);
		skb_dst_set(skb, dst);

		err = dst_output(skb);
	}

1825 1826
out:
	xfrm_pol_put(pol);
1827 1828 1829 1830 1831
	return;

purge_queue:
	pq->timeout = 0;
	xfrm_queue_purge(&pq->hold_queue);
1832
	xfrm_pol_put(pol);
1833 1834
}

1835
static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
1836 1837 1838 1839
{
	unsigned long sched_next;
	struct dst_entry *dst = skb_dst(skb);
	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1840 1841
	struct xfrm_policy *pol = xdst->pols[0];
	struct xfrm_policy_queue *pq = &pol->polq;
1842 1843 1844 1845 1846 1847 1848
	const struct sk_buff *fclone = skb + 1;

	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
		     fclone->fclone == SKB_FCLONE_CLONE)) {
		kfree_skb(skb);
		return 0;
	}
1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866

	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
		kfree_skb(skb);
		return -EAGAIN;
	}

	skb_dst_force(skb);

	spin_lock_bh(&pq->hold_queue.lock);

	if (!pq->timeout)
		pq->timeout = XFRM_QUEUE_TMO_MIN;

	sched_next = jiffies + pq->timeout;

	if (del_timer(&pq->hold_timer)) {
		if (time_before(pq->hold_timer.expires, sched_next))
			sched_next = pq->hold_timer.expires;
1867
		xfrm_pol_put(pol);
1868 1869 1870
	}

	__skb_queue_tail(&pq->hold_queue, skb);
1871 1872
	if (!mod_timer(&pq->hold_timer, sched_next))
		xfrm_pol_hold(pol);
1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893

	spin_unlock_bh(&pq->hold_queue.lock);

	return 0;
}

static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
						 struct dst_entry *dst,
						 const struct flowi *fl,
						 int num_xfrms,
						 u16 family)
{
	int err;
	struct net_device *dev;
	struct dst_entry *dst1;
	struct xfrm_dst *xdst;

	xdst = xfrm_alloc_dst(net, family);
	if (IS_ERR(xdst))
		return xdst;

1894
	if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0)
1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933
		return xdst;

	dst1 = &xdst->u.dst;
	dst_hold(dst);
	xdst->route = dst;

	dst_copy_metrics(dst1, dst);

	dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
	dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
	dst1->lastuse = jiffies;

	dst1->input = dst_discard;
	dst1->output = xdst_queue_output;

	dst_hold(dst);
	dst1->child = dst;
	dst1->path = dst;

	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);

	err = -ENODEV;
	dev = dst->dev;
	if (!dev)
		goto free_dst;

	err = xfrm_fill_dst(xdst, dev, fl);
	if (err)
		goto free_dst;

out:
	return xdst;

free_dst:
	dst_release(dst1);
	xdst = ERR_PTR(err);
	goto out;
}

1934
static struct flow_cache_object *
1935
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966
		   struct flow_cache_object *oldflo, void *ctx)
{
	struct dst_entry *dst_orig = (struct dst_entry *)ctx;
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
	struct xfrm_dst *xdst, *new_xdst;
	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;

	/* Check if the policies from old bundle are usable */
	xdst = NULL;
	if (oldflo) {
		xdst = container_of(oldflo, struct xfrm_dst, flo);
		num_pols = xdst->num_pols;
		num_xfrms = xdst->num_xfrms;
		pol_dead = 0;
		for (i = 0; i < num_pols; i++) {
			pols[i] = xdst->pols[i];
			pol_dead |= pols[i]->walk.dead;
		}
		if (pol_dead) {
			dst_free(&xdst->u.dst);
			xdst = NULL;
			num_pols = 0;
			num_xfrms = 0;
			oldflo = NULL;
		}
	}

	/* Resolve policies to use if we couldn't get them from
	 * previous cache entry */
	if (xdst == NULL) {
		num_pols = 1;
1967 1968
		pols[0] = __xfrm_policy_lookup(net, fl, family,
					       flow_to_policy_dir(dir));
1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987
		err = xfrm_expand_policies(fl, family, pols,
					   &num_pols, &num_xfrms);
		if (err < 0)
			goto inc_error;
		if (num_pols == 0)
			return NULL;
		if (num_xfrms <= 0)
			goto make_dummy_bundle;
	}

	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
	if (IS_ERR(new_xdst)) {
		err = PTR_ERR(new_xdst);
		if (err != -EAGAIN)
			goto error;
		if (oldflo == NULL)
			goto make_dummy_bundle;
		dst_hold(&xdst->u.dst);
		return oldflo;
1988 1989 1990 1991 1992 1993 1994
	} else if (new_xdst == NULL) {
		num_xfrms = 0;
		if (oldflo == NULL)
			goto make_dummy_bundle;
		xdst->num_xfrms = 0;
		dst_hold(&xdst->u.dst);
		return oldflo;
1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
	}

	/* Kill the previous bundle */
	if (xdst) {
		/* The policies were stolen for newly generated bundle */
		xdst->num_pols = 0;
		dst_free(&xdst->u.dst);
	}

	/* Flow cache does not have reference, it dst_free()'s,
	 * but we do need to return one reference for original caller */
	dst_hold(&new_xdst->u.dst);
	return &new_xdst->flo;

make_dummy_bundle:
	/* We found policies, but there's no bundles to instantiate:
	 * either because the policy blocks, has no transformations or
	 * we could not build template (no xfrm_states).*/
2013
	xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
2014 2015 2016 2017 2018 2019
	if (IS_ERR(xdst)) {
		xfrm_pols_put(pols, num_pols);
		return ERR_CAST(xdst);
	}
	xdst->num_pols = num_pols;
	xdst->num_xfrms = num_xfrms;
2020
	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033

	dst_hold(&xdst->u.dst);
	return &xdst->flo;

inc_error:
	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
error:
	if (xdst != NULL)
		dst_free(&xdst->u.dst);
	else
		xfrm_pols_put(pols, num_pols);
	return ERR_PTR(err);
}
L
Linus Torvalds 已提交
2034

2035 2036 2037 2038 2039 2040 2041 2042
static struct dst_entry *make_blackhole(struct net *net, u16 family,
					struct dst_entry *dst_orig)
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
	struct dst_entry *ret;

	if (!afinfo) {
		dst_release(dst_orig);
2043
		return ERR_PTR(-EINVAL);
2044 2045 2046 2047 2048 2049 2050 2051
	} else {
		ret = afinfo->blackhole_route(net, dst_orig);
	}
	xfrm_policy_put_afinfo(afinfo);

	return ret;
}

L
Linus Torvalds 已提交
2052 2053 2054 2055 2056
/* Main function: finds/creates a bundle for given flow.
 *
 * At the moment we eat a raw IP route. Mostly to speed up lookups
 * on interfaces with disabled IPsec.
 */
2057 2058 2059
struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
			      const struct flowi *fl,
			      struct sock *sk, int flags)
L
Linus Torvalds 已提交
2060
{
2061
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2062 2063
	struct flow_cache_object *flo;
	struct xfrm_dst *xdst;
2064
	struct dst_entry *dst, *route;
2065
	u16 family = dst_orig->ops->family;
2066
	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2067
	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2068

2069 2070 2071
	dst = NULL;
	xdst = NULL;
	route = NULL;
2072

2073
	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2074 2075 2076 2077 2078
		num_pols = 1;
		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
		err = xfrm_expand_policies(fl, family, pols,
					   &num_pols, &num_xfrms);
		if (err < 0)
2079
			goto dropdst;
2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093

		if (num_pols) {
			if (num_xfrms <= 0) {
				drop_pols = num_pols;
				goto no_transform;
			}

			xdst = xfrm_resolve_and_create_bundle(
					pols, num_pols, fl,
					family, dst_orig);
			if (IS_ERR(xdst)) {
				xfrm_pols_put(pols, num_pols);
				err = PTR_ERR(xdst);
				goto dropdst;
2094 2095 2096 2097
			} else if (xdst == NULL) {
				num_xfrms = 0;
				drop_pols = num_pols;
				goto no_transform;
2098 2099
			}

2100 2101
			dst_hold(&xdst->u.dst);
			xdst->u.dst.flags |= DST_NOCACHE;
2102
			route = xdst->route;
2103
		}
2104
	}
L
Linus Torvalds 已提交
2105

2106
	if (xdst == NULL) {
L
Linus Torvalds 已提交
2107
		/* To accelerate a bit...  */
2108
		if ((dst_orig->flags & DST_NOXFRM) ||
A
Alexey Dobriyan 已提交
2109
		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
2110
			goto nopol;
L
Linus Torvalds 已提交
2111

2112 2113 2114 2115
		flo = flow_cache_lookup(net, fl, family, dir,
					xfrm_bundle_lookup, dst_orig);
		if (flo == NULL)
			goto nopol;
2116
		if (IS_ERR(flo)) {
2117
			err = PTR_ERR(flo);
2118
			goto dropdst;
2119
		}
2120 2121 2122 2123
		xdst = container_of(flo, struct xfrm_dst, flo);

		num_pols = xdst->num_pols;
		num_xfrms = xdst->num_xfrms;
2124
		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137
		route = xdst->route;
	}

	dst = &xdst->u.dst;
	if (route == NULL && num_xfrms > 0) {
		/* The only case when xfrm_bundle_lookup() returns a
		 * bundle with null route, is when the template could
		 * not be resolved. It means policies are there, but
		 * bundle could not be created, since we don't yet
		 * have the xfrm_state's. We need to wait for KM to
		 * negotiate new SA's or bail out with error.*/
		if (net->xfrm.sysctl_larval_drop) {
			dst_release(dst);
2138
			xfrm_pols_put(pols, drop_pols);
2139
			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2140

2141
			return make_blackhole(net, family, dst_orig);
2142 2143
		}

2144
		err = -EAGAIN;
2145 2146 2147

		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
		goto error;
L
Linus Torvalds 已提交
2148 2149
	}

2150 2151
no_transform:
	if (num_pols == 0)
2152
		goto nopol;
L
Linus Torvalds 已提交
2153

2154 2155 2156
	if ((flags & XFRM_LOOKUP_ICMP) &&
	    !(pols[0]->flags & XFRM_POLICY_ICMP)) {
		err = -ENOENT;
2157
		goto error;
2158
	}
2159

2160 2161
	for (i = 0; i < num_pols; i++)
		pols[i]->curlft.use_time = get_seconds();
2162

2163
	if (num_xfrms < 0) {
L
Linus Torvalds 已提交
2164
		/* Prohibit the flow */
A
Alexey Dobriyan 已提交
2165
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2166 2167
		err = -EPERM;
		goto error;
2168 2169 2170 2171 2172 2173
	} else if (num_xfrms > 0) {
		/* Flow transformed */
		dst_release(dst_orig);
	} else {
		/* Flow passes untransformed */
		dst_release(dst);
2174
		dst = dst_orig;
L
Linus Torvalds 已提交
2175
	}
2176 2177
ok:
	xfrm_pols_put(pols, drop_pols);
G
Gao feng 已提交
2178 2179 2180
	if (dst && dst->xfrm &&
	    dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
		dst->flags |= DST_XFRM_TUNNEL;
2181
	return dst;
L
Linus Torvalds 已提交
2182

2183
nopol:
2184 2185
	if (!(flags & XFRM_LOOKUP_ICMP)) {
		dst = dst_orig;
2186
		goto ok;
2187
	}
2188
	err = -ENOENT;
L
Linus Torvalds 已提交
2189
error:
2190
	dst_release(dst);
2191 2192
dropdst:
	dst_release(dst_orig);
2193
	xfrm_pols_put(pols, drop_pols);
2194
	return ERR_PTR(err);
L
Linus Torvalds 已提交
2195 2196 2197
}
EXPORT_SYMBOL(xfrm_lookup);

2198
static inline int
2199
xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2200 2201 2202 2203 2204 2205 2206 2207
{
	struct xfrm_state *x;

	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
		return 0;
	x = skb->sp->xvec[idx];
	if (!x->type->reject)
		return 0;
2208
	return x->type->reject(x, skb, fl);
2209 2210
}

L
Linus Torvalds 已提交
2211 2212 2213 2214 2215 2216 2217
/* When skb is transformed back to its "native" form, we have to
 * check policy restrictions. At the moment we make this in maximally
 * stupid way. Shame on me. :-) Of course, connected sockets must
 * have policy cached at them.
 */

static inline int
2218
xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
L
Linus Torvalds 已提交
2219 2220 2221
	      unsigned short family)
{
	if (xfrm_state_kern(x))
2222
		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
L
Linus Torvalds 已提交
2223 2224 2225 2226
	return	x->id.proto == tmpl->id.proto &&
		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
		x->props.mode == tmpl->mode &&
2227
		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2228
		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2229 2230
		!(x->props.mode != XFRM_MODE_TRANSPORT &&
		  xfrm_state_addr_cmp(tmpl, x, family));
L
Linus Torvalds 已提交
2231 2232
}

2233 2234 2235 2236 2237 2238 2239
/*
 * 0 or more than 0 is returned when validation is succeeded (either bypass
 * because of optional transport mode, or next index of the mathced secpath
 * state with the template.
 * -1 is returned when no matching template is found.
 * Otherwise "-2 - errored_index" is returned.
 */
L
Linus Torvalds 已提交
2240
static inline int
2241
xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
L
Linus Torvalds 已提交
2242 2243 2244 2245 2246
	       unsigned short family)
{
	int idx = start;

	if (tmpl->optional) {
2247
		if (tmpl->mode == XFRM_MODE_TRANSPORT)
L
Linus Torvalds 已提交
2248 2249 2250 2251
			return start;
	} else
		start = -1;
	for (; idx < sp->len; idx++) {
2252
		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
L
Linus Torvalds 已提交
2253
			return ++idx;
2254 2255 2256
		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
			if (start == -1)
				start = -2-idx;
L
Linus Torvalds 已提交
2257
			break;
2258
		}
L
Linus Torvalds 已提交
2259 2260 2261 2262
	}
	return start;
}

2263 2264
int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
			  unsigned int family, int reverse)
L
Linus Torvalds 已提交
2265 2266
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2267
	int err;
L
Linus Torvalds 已提交
2268 2269 2270 2271

	if (unlikely(afinfo == NULL))
		return -EAFNOSUPPORT;

2272
	afinfo->decode_session(skb, fl, reverse);
2273
	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
L
Linus Torvalds 已提交
2274
	xfrm_policy_put_afinfo(afinfo);
2275
	return err;
L
Linus Torvalds 已提交
2276
}
2277
EXPORT_SYMBOL(__xfrm_decode_session);
L
Linus Torvalds 已提交
2278

2279
static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
L
Linus Torvalds 已提交
2280 2281
{
	for (; k < sp->len; k++) {
2282
		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2283
			*idxp = k;
L
Linus Torvalds 已提交
2284
			return 1;
2285
		}
L
Linus Torvalds 已提交
2286 2287 2288 2289 2290
	}

	return 0;
}

2291
int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
L
Linus Torvalds 已提交
2292 2293
			unsigned short family)
{
2294
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
2295
	struct xfrm_policy *pol;
2296 2297 2298 2299
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
	int npols = 0;
	int xfrm_nr;
	int pi;
2300
	int reverse;
L
Linus Torvalds 已提交
2301
	struct flowi fl;
2302
	u8 fl_dir;
2303
	int xerr_idx = -1;
L
Linus Torvalds 已提交
2304

2305 2306 2307 2308
	reverse = dir & ~XFRM_POLICY_MASK;
	dir &= XFRM_POLICY_MASK;
	fl_dir = policy_to_flow_dir(dir);

2309
	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
A
Alexey Dobriyan 已提交
2310
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
L
Linus Torvalds 已提交
2311
		return 0;
2312 2313
	}

2314
	nf_nat_decode_session(skb, &fl, family);
L
Linus Torvalds 已提交
2315 2316 2317 2318 2319

	/* First, check used SA against their selectors. */
	if (skb->sp) {
		int i;

2320
		for (i = skb->sp->len-1; i >= 0; i--) {
2321
			struct xfrm_state *x = skb->sp->xvec[i];
2322
			if (!xfrm_selector_match(&x->sel, &fl, family)) {
A
Alexey Dobriyan 已提交
2323
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
L
Linus Torvalds 已提交
2324
				return 0;
2325
			}
L
Linus Torvalds 已提交
2326 2327 2328 2329
		}
	}

	pol = NULL;
2330
	if (sk && sk->sk_policy[dir]) {
2331
		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2332
		if (IS_ERR(pol)) {
A
Alexey Dobriyan 已提交
2333
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2334
			return 0;
2335
		}
2336
	}
L
Linus Torvalds 已提交
2337

2338 2339 2340 2341 2342 2343 2344 2345 2346 2347
	if (!pol) {
		struct flow_cache_object *flo;

		flo = flow_cache_lookup(net, &fl, family, fl_dir,
					xfrm_policy_lookup, NULL);
		if (IS_ERR_OR_NULL(flo))
			pol = ERR_CAST(flo);
		else
			pol = container_of(flo, struct xfrm_policy, flo);
	}
L
Linus Torvalds 已提交
2348

2349
	if (IS_ERR(pol)) {
A
Alexey Dobriyan 已提交
2350
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2351
		return 0;
2352
	}
2353

2354
	if (!pol) {
2355
		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2356
			xfrm_secpath_reject(xerr_idx, skb, &fl);
A
Alexey Dobriyan 已提交
2357
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2358 2359 2360 2361
			return 0;
		}
		return 1;
	}
L
Linus Torvalds 已提交
2362

2363
	pol->curlft.use_time = get_seconds();
L
Linus Torvalds 已提交
2364

2365
	pols[0] = pol;
2366
	npols++;
2367 2368
#ifdef CONFIG_XFRM_SUB_POLICY
	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2369
		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2370 2371 2372
						    &fl, family,
						    XFRM_POLICY_IN);
		if (pols[1]) {
2373
			if (IS_ERR(pols[1])) {
A
Alexey Dobriyan 已提交
2374
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2375
				return 0;
2376
			}
2377
			pols[1]->curlft.use_time = get_seconds();
2378
			npols++;
2379 2380 2381 2382
		}
	}
#endif

L
Linus Torvalds 已提交
2383 2384 2385
	if (pol->action == XFRM_POLICY_ALLOW) {
		struct sec_path *sp;
		static struct sec_path dummy;
2386
		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2387
		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2388 2389
		struct xfrm_tmpl **tpp = tp;
		int ti = 0;
L
Linus Torvalds 已提交
2390 2391 2392 2393 2394
		int i, k;

		if ((sp = skb->sp) == NULL)
			sp = &dummy;

2395 2396
		for (pi = 0; pi < npols; pi++) {
			if (pols[pi] != pol &&
2397
			    pols[pi]->action != XFRM_POLICY_ALLOW) {
A
Alexey Dobriyan 已提交
2398
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2399
				goto reject;
2400 2401
			}
			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
A
Alexey Dobriyan 已提交
2402
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2403
				goto reject_error;
2404
			}
2405 2406 2407 2408
			for (i = 0; i < pols[pi]->xfrm_nr; i++)
				tpp[ti++] = &pols[pi]->xfrm_vec[i];
		}
		xfrm_nr = ti;
2409
		if (npols > 1) {
F
Fan Du 已提交
2410
			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
2411 2412
			tpp = stp;
		}
2413

L
Linus Torvalds 已提交
2414 2415 2416 2417 2418 2419
		/* For each tunnel xfrm, find the first matching tmpl.
		 * For each tmpl before that, find corresponding xfrm.
		 * Order is _important_. Later we will implement
		 * some barriers, but at the moment barriers
		 * are implied between each two transformations.
		 */
2420 2421
		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
			k = xfrm_policy_ok(tpp[i], sp, k, family);
2422
			if (k < 0) {
2423 2424 2425
				if (k < -1)
					/* "-2 - errored_index" returned */
					xerr_idx = -(2+k);
A
Alexey Dobriyan 已提交
2426
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
L
Linus Torvalds 已提交
2427
				goto reject;
2428
			}
L
Linus Torvalds 已提交
2429 2430
		}

2431
		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
A
Alexey Dobriyan 已提交
2432
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
L
Linus Torvalds 已提交
2433
			goto reject;
2434
		}
L
Linus Torvalds 已提交
2435

2436
		xfrm_pols_put(pols, npols);
L
Linus Torvalds 已提交
2437 2438
		return 1;
	}
A
Alexey Dobriyan 已提交
2439
	XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
L
Linus Torvalds 已提交
2440 2441

reject:
2442
	xfrm_secpath_reject(xerr_idx, skb, &fl);
2443 2444
reject_error:
	xfrm_pols_put(pols, npols);
L
Linus Torvalds 已提交
2445 2446 2447 2448 2449 2450
	return 0;
}
EXPORT_SYMBOL(__xfrm_policy_check);

int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
{
2451
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
2452
	struct flowi fl;
E
Eric Dumazet 已提交
2453
	struct dst_entry *dst;
E
Eric Dumazet 已提交
2454
	int res = 1;
L
Linus Torvalds 已提交
2455

2456
	if (xfrm_decode_session(skb, &fl, family) < 0) {
2457
		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
L
Linus Torvalds 已提交
2458
		return 0;
2459
	}
L
Linus Torvalds 已提交
2460

2461
	skb_dst_force(skb);
E
Eric Dumazet 已提交
2462

2463 2464
	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
	if (IS_ERR(dst)) {
E
Eric Dumazet 已提交
2465
		res = 0;
2466 2467
		dst = NULL;
	}
E
Eric Dumazet 已提交
2468 2469
	skb_dst_set(skb, dst);
	return res;
L
Linus Torvalds 已提交
2470 2471 2472
}
EXPORT_SYMBOL(__xfrm_route_forward);

2473 2474
/* Optimize later using cookies and generation ids. */

L
Linus Torvalds 已提交
2475 2476
static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
{
2477
	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2478 2479 2480 2481 2482 2483 2484
	 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
	 * get validated by dst_ops->check on every use.  We do this
	 * because when a normal route referenced by an XFRM dst is
	 * obsoleted we do not go looking around for all parent
	 * referencing XFRM dsts so that we can invalidate them.  It
	 * is just too much work.  Instead we make the checks here on
	 * every use.  For example:
2485 2486 2487 2488 2489 2490 2491 2492 2493
	 *
	 *	XFRM dst A --> IPv4 dst X
	 *
	 * X is the "xdst->route" of A (X is also the "dst->path" of A
	 * in this example).  If X is marked obsolete, "A" will not
	 * notice.  That's what we are validating here via the
	 * stale_bundle() check.
	 *
	 * When a policy's bundle is pruned, we dst_free() the XFRM
2494 2495 2496
	 * dst which causes it's ->obsolete field to be set to
	 * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
	 * this, we want to force a new route lookup.
2497
	 */
2498 2499 2500
	if (dst->obsolete < 0 && !stale_bundle(dst))
		return dst;

L
Linus Torvalds 已提交
2501 2502 2503 2504 2505
	return NULL;
}

static int stale_bundle(struct dst_entry *dst)
{
2506
	return !xfrm_bundle_ok((struct xfrm_dst *)dst);
L
Linus Torvalds 已提交
2507 2508
}

H
Herbert Xu 已提交
2509
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
L
Linus Torvalds 已提交
2510 2511
{
	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2512
		dst->dev = dev_net(dev)->loopback_dev;
2513
		dev_hold(dst->dev);
L
Linus Torvalds 已提交
2514 2515 2516
		dev_put(dev);
	}
}
H
Herbert Xu 已提交
2517
EXPORT_SYMBOL(xfrm_dst_ifdown);
L
Linus Torvalds 已提交
2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534

static void xfrm_link_failure(struct sk_buff *skb)
{
	/* Impossible. Such dst must be popped before reaches point of failure. */
}

static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
{
	if (dst) {
		if (dst->obsolete) {
			dst_release(dst);
			dst = NULL;
		}
	}
	return dst;
}

2535
void xfrm_garbage_collect(struct net *net)
2536
{
2537
	flow_cache_flush(net);
2538
}
2539
EXPORT_SYMBOL(xfrm_garbage_collect);
2540 2541 2542

static void xfrm_garbage_collect_deferred(struct net *net)
{
2543
	flow_cache_flush_deferred(net);
2544 2545
}

2546
static void xfrm_init_pmtu(struct dst_entry *dst)
L
Linus Torvalds 已提交
2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562
{
	do {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
		u32 pmtu, route_mtu_cached;

		pmtu = dst_mtu(dst->child);
		xdst->child_mtu_cached = pmtu;

		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);

		route_mtu_cached = dst_mtu(xdst->route);
		xdst->route_mtu_cached = route_mtu_cached;

		if (pmtu > route_mtu_cached)
			pmtu = route_mtu_cached;

2563
		dst_metric_set(dst, RTAX_MTU, pmtu);
L
Linus Torvalds 已提交
2564 2565 2566 2567 2568 2569 2570
	} while ((dst = dst->next));
}

/* Check that the bundle accepts the flow and its components are
 * still valid.
 */

2571
static int xfrm_bundle_ok(struct xfrm_dst *first)
L
Linus Torvalds 已提交
2572 2573 2574 2575 2576
{
	struct dst_entry *dst = &first->u.dst;
	struct xfrm_dst *last;
	u32 mtu;

2577
	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
L
Linus Torvalds 已提交
2578 2579 2580
	    (dst->dev && !netif_running(dst->dev)))
		return 0;

2581 2582 2583
	if (dst->flags & DST_XFRM_QUEUE)
		return 1;

L
Linus Torvalds 已提交
2584 2585 2586 2587 2588 2589 2590
	last = NULL;

	do {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;

		if (dst->xfrm->km.state != XFRM_STATE_VALID)
			return 0;
2591 2592
		if (xdst->xfrm_genid != dst->xfrm->genid)
			return 0;
2593 2594
		if (xdst->num_pols > 0 &&
		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2595
			return 0;
2596

L
Linus Torvalds 已提交
2597 2598 2599 2600 2601 2602
		mtu = dst_mtu(dst->child);
		if (xdst->child_mtu_cached != mtu) {
			last = xdst;
			xdst->child_mtu_cached = mtu;
		}

2603
		if (!dst_check(xdst->route, xdst->route_cookie))
L
Linus Torvalds 已提交
2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623
			return 0;
		mtu = dst_mtu(xdst->route);
		if (xdst->route_mtu_cached != mtu) {
			last = xdst;
			xdst->route_mtu_cached = mtu;
		}

		dst = dst->child;
	} while (dst->xfrm);

	if (likely(!last))
		return 1;

	mtu = last->child_mtu_cached;
	for (;;) {
		dst = &last->u.dst;

		mtu = xfrm_state_mtu(dst->xfrm, mtu);
		if (mtu > last->route_mtu_cached)
			mtu = last->route_mtu_cached;
2624
		dst_metric_set(dst, RTAX_MTU, mtu);
L
Linus Torvalds 已提交
2625 2626 2627 2628

		if (last == first)
			break;

2629
		last = (struct xfrm_dst *)last->u.dst.next;
L
Linus Torvalds 已提交
2630 2631 2632 2633 2634 2635
		last->child_mtu_cached = mtu;
	}

	return 1;
}

2636 2637 2638 2639 2640
static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
{
	return dst_metric_advmss(dst->path);
}

2641
static unsigned int xfrm_mtu(const struct dst_entry *dst)
2642
{
2643 2644 2645
	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);

	return mtu ? : dst_mtu(dst->path);
2646 2647
}

2648 2649 2650
static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
					   struct sk_buff *skb,
					   const void *daddr)
2651
{
2652
	return dst->path->ops->neigh_lookup(dst, skb, daddr);
2653 2654
}

L
Linus Torvalds 已提交
2655 2656
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
{
2657
	struct net *net;
L
Linus Torvalds 已提交
2658 2659 2660 2661 2662
	int err = 0;
	if (unlikely(afinfo == NULL))
		return -EINVAL;
	if (unlikely(afinfo->family >= NPROTO))
		return -EAFNOSUPPORT;
E
Eric Dumazet 已提交
2663
	spin_lock(&xfrm_policy_afinfo_lock);
L
Linus Torvalds 已提交
2664 2665 2666 2667 2668 2669 2670 2671
	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
		err = -ENOBUFS;
	else {
		struct dst_ops *dst_ops = afinfo->dst_ops;
		if (likely(dst_ops->kmem_cachep == NULL))
			dst_ops->kmem_cachep = xfrm_dst_cache;
		if (likely(dst_ops->check == NULL))
			dst_ops->check = xfrm_dst_check;
2672 2673
		if (likely(dst_ops->default_advmss == NULL))
			dst_ops->default_advmss = xfrm_default_advmss;
2674 2675
		if (likely(dst_ops->mtu == NULL))
			dst_ops->mtu = xfrm_mtu;
L
Linus Torvalds 已提交
2676 2677 2678 2679
		if (likely(dst_ops->negative_advice == NULL))
			dst_ops->negative_advice = xfrm_negative_advice;
		if (likely(dst_ops->link_failure == NULL))
			dst_ops->link_failure = xfrm_link_failure;
2680 2681
		if (likely(dst_ops->neigh_lookup == NULL))
			dst_ops->neigh_lookup = xfrm_neigh_lookup;
L
Linus Torvalds 已提交
2682
		if (likely(afinfo->garbage_collect == NULL))
2683
			afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2684
		rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
L
Linus Torvalds 已提交
2685
	}
E
Eric Dumazet 已提交
2686
	spin_unlock(&xfrm_policy_afinfo_lock);
2687 2688 2689 2690 2691 2692 2693 2694 2695

	rtnl_lock();
	for_each_net(net) {
		struct dst_ops *xfrm_dst_ops;

		switch (afinfo->family) {
		case AF_INET:
			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
			break;
E
Eric Dumazet 已提交
2696
#if IS_ENABLED(CONFIG_IPV6)
2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707
		case AF_INET6:
			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
			break;
#endif
		default:
			BUG();
		}
		*xfrm_dst_ops = *afinfo->dst_ops;
	}
	rtnl_unlock();

L
Linus Torvalds 已提交
2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718
	return err;
}
EXPORT_SYMBOL(xfrm_policy_register_afinfo);

int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
{
	int err = 0;
	if (unlikely(afinfo == NULL))
		return -EINVAL;
	if (unlikely(afinfo->family >= NPROTO))
		return -EAFNOSUPPORT;
E
Eric Dumazet 已提交
2719
	spin_lock(&xfrm_policy_afinfo_lock);
L
Linus Torvalds 已提交
2720 2721 2722
	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
			err = -EINVAL;
E
Eric Dumazet 已提交
2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737
		else
			RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
					 NULL);
	}
	spin_unlock(&xfrm_policy_afinfo_lock);
	if (!err) {
		struct dst_ops *dst_ops = afinfo->dst_ops;

		synchronize_rcu();

		dst_ops->kmem_cachep = NULL;
		dst_ops->check = NULL;
		dst_ops->negative_advice = NULL;
		dst_ops->link_failure = NULL;
		afinfo->garbage_collect = NULL;
L
Linus Torvalds 已提交
2738 2739 2740 2741 2742
	}
	return err;
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);

2743 2744 2745 2746
static void __net_init xfrm_dst_ops_init(struct net *net)
{
	struct xfrm_policy_afinfo *afinfo;

E
Eric Dumazet 已提交
2747 2748
	rcu_read_lock();
	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
2749 2750
	if (afinfo)
		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
E
Eric Dumazet 已提交
2751
#if IS_ENABLED(CONFIG_IPV6)
E
Eric Dumazet 已提交
2752
	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
2753 2754 2755
	if (afinfo)
		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
#endif
2756
	rcu_read_unlock();
2757 2758
}

L
Linus Torvalds 已提交
2759 2760
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
2761
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2762

L
Linus Torvalds 已提交
2763 2764
	switch (event) {
	case NETDEV_DOWN:
2765
		xfrm_garbage_collect(dev_net(dev));
L
Linus Torvalds 已提交
2766 2767 2768 2769 2770
	}
	return NOTIFY_DONE;
}

static struct notifier_block xfrm_dev_notifier = {
A
Alexey Dobriyan 已提交
2771
	.notifier_call	= xfrm_dev_event,
L
Linus Torvalds 已提交
2772 2773
};

2774
#ifdef CONFIG_XFRM_STATISTICS
A
Alexey Dobriyan 已提交
2775
static int __net_init xfrm_statistics_init(struct net *net)
2776
{
2777
	int rv;
W
WANG Cong 已提交
2778 2779
	net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
	if (!net->mib.xfrm_statistics)
2780
		return -ENOMEM;
2781 2782
	rv = xfrm_proc_init(net);
	if (rv < 0)
W
WANG Cong 已提交
2783
		free_percpu(net->mib.xfrm_statistics);
2784
	return rv;
2785
}
A
Alexey Dobriyan 已提交
2786 2787 2788

static void xfrm_statistics_fini(struct net *net)
{
2789
	xfrm_proc_fini(net);
W
WANG Cong 已提交
2790
	free_percpu(net->mib.xfrm_statistics);
A
Alexey Dobriyan 已提交
2791 2792 2793 2794 2795 2796 2797 2798 2799 2800
}
#else
static int __net_init xfrm_statistics_init(struct net *net)
{
	return 0;
}

static void xfrm_statistics_fini(struct net *net)
{
}
2801 2802
#endif

2803
static int __net_init xfrm_policy_init(struct net *net)
L
Linus Torvalds 已提交
2804
{
2805 2806 2807
	unsigned int hmask, sz;
	int dir;

2808 2809
	if (net_eq(net, &init_net))
		xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
L
Linus Torvalds 已提交
2810
					   sizeof(struct xfrm_dst),
A
Alexey Dobriyan 已提交
2811
					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2812
					   NULL);
L
Linus Torvalds 已提交
2813

2814 2815 2816
	hmask = 8 - 1;
	sz = (hmask+1) * sizeof(struct hlist_head);

2817 2818 2819
	net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
	if (!net->xfrm.policy_byidx)
		goto out_byidx;
2820
	net->xfrm.policy_idx_hmask = hmask;
2821 2822 2823 2824

	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
		struct xfrm_policy_hash *htab;

2825
		net->xfrm.policy_count[dir] = 0;
2826
		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2827

2828
		htab = &net->xfrm.policy_bydst[dir];
2829
		htab->table = xfrm_hash_alloc(sz);
2830
		if (!htab->table)
2831 2832
			goto out_bydst;
		htab->hmask = hmask;
2833 2834
	}

2835
	INIT_LIST_HEAD(&net->xfrm.policy_all);
2836
	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2837 2838 2839
	if (net_eq(net, &init_net))
		register_netdevice_notifier(&xfrm_dev_notifier);
	return 0;
2840

2841 2842 2843 2844 2845 2846 2847 2848
out_bydst:
	for (dir--; dir >= 0; dir--) {
		struct xfrm_policy_hash *htab;

		htab = &net->xfrm.policy_bydst[dir];
		xfrm_hash_free(htab->table, sz);
	}
	xfrm_hash_free(net->xfrm.policy_byidx, sz);
2849 2850
out_byidx:
	return -ENOMEM;
2851 2852 2853 2854
}

static void xfrm_policy_fini(struct net *net)
{
2855
	unsigned int sz;
2856
	int dir;
2857

2858 2859
	flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
2860
	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
2861
#endif
2862
	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
2863

2864
	WARN_ON(!list_empty(&net->xfrm.policy_all));
2865

2866
	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2867 2868
		struct xfrm_policy_hash *htab;

2869
		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
2870 2871

		htab = &net->xfrm.policy_bydst[dir];
2872
		sz = (htab->hmask + 1) * sizeof(struct hlist_head);
2873 2874
		WARN_ON(!hlist_empty(htab->table));
		xfrm_hash_free(htab->table, sz);
2875 2876
	}

2877
	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
2878 2879
	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
	xfrm_hash_free(net->xfrm.policy_byidx, sz);
L
Linus Torvalds 已提交
2880 2881
}

2882 2883 2884 2885
static int __net_init xfrm_net_init(struct net *net)
{
	int rv;

A
Alexey Dobriyan 已提交
2886 2887 2888
	rv = xfrm_statistics_init(net);
	if (rv < 0)
		goto out_statistics;
2889 2890 2891 2892 2893 2894
	rv = xfrm_state_init(net);
	if (rv < 0)
		goto out_state;
	rv = xfrm_policy_init(net);
	if (rv < 0)
		goto out_policy;
2895
	xfrm_dst_ops_init(net);
A
Alexey Dobriyan 已提交
2896 2897 2898
	rv = xfrm_sysctl_init(net);
	if (rv < 0)
		goto out_sysctl;
2899 2900 2901
	rv = flow_cache_init(net);
	if (rv < 0)
		goto out;
F
Fan Du 已提交
2902 2903 2904 2905 2906 2907

	/* Initialize the per-net locks here */
	spin_lock_init(&net->xfrm.xfrm_state_lock);
	rwlock_init(&net->xfrm.xfrm_policy_lock);
	mutex_init(&net->xfrm.xfrm_cfg_mutex);

2908 2909
	return 0;

2910 2911
out:
	xfrm_sysctl_fini(net);
A
Alexey Dobriyan 已提交
2912 2913
out_sysctl:
	xfrm_policy_fini(net);
2914 2915 2916
out_policy:
	xfrm_state_fini(net);
out_state:
A
Alexey Dobriyan 已提交
2917 2918
	xfrm_statistics_fini(net);
out_statistics:
2919 2920 2921 2922 2923
	return rv;
}

static void __net_exit xfrm_net_exit(struct net *net)
{
2924
	flow_cache_fini(net);
A
Alexey Dobriyan 已提交
2925
	xfrm_sysctl_fini(net);
2926 2927
	xfrm_policy_fini(net);
	xfrm_state_fini(net);
A
Alexey Dobriyan 已提交
2928
	xfrm_statistics_fini(net);
2929 2930 2931 2932 2933 2934 2935
}

static struct pernet_operations __net_initdata xfrm_net_ops = {
	.init = xfrm_net_init,
	.exit = xfrm_net_exit,
};

L
Linus Torvalds 已提交
2936 2937
void __init xfrm_init(void)
{
2938
	register_pernet_subsys(&xfrm_net_ops);
L
Linus Torvalds 已提交
2939 2940 2941
	xfrm_input_init();
}

J
Joy Latten 已提交
2942
#ifdef CONFIG_AUDITSYSCALL
2943 2944
static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
					 struct audit_buffer *audit_buf)
J
Joy Latten 已提交
2945
{
2946 2947 2948 2949
	struct xfrm_sec_ctx *ctx = xp->security;
	struct xfrm_selector *sel = &xp->selector;

	if (ctx)
J
Joy Latten 已提交
2950
		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2951
				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
J
Joy Latten 已提交
2952

2953
	switch (sel->family) {
J
Joy Latten 已提交
2954
	case AF_INET:
H
Harvey Harrison 已提交
2955
		audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
2956 2957 2958
		if (sel->prefixlen_s != 32)
			audit_log_format(audit_buf, " src_prefixlen=%d",
					 sel->prefixlen_s);
H
Harvey Harrison 已提交
2959
		audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
2960 2961 2962
		if (sel->prefixlen_d != 32)
			audit_log_format(audit_buf, " dst_prefixlen=%d",
					 sel->prefixlen_d);
J
Joy Latten 已提交
2963 2964
		break;
	case AF_INET6:
H
Harvey Harrison 已提交
2965
		audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
2966 2967 2968
		if (sel->prefixlen_s != 128)
			audit_log_format(audit_buf, " src_prefixlen=%d",
					 sel->prefixlen_s);
H
Harvey Harrison 已提交
2969
		audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
2970 2971 2972
		if (sel->prefixlen_d != 128)
			audit_log_format(audit_buf, " dst_prefixlen=%d",
					 sel->prefixlen_d);
J
Joy Latten 已提交
2973 2974 2975 2976
		break;
	}
}

2977
void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
J
Joy Latten 已提交
2978 2979 2980
{
	struct audit_buffer *audit_buf;

P
Paul Moore 已提交
2981
	audit_buf = xfrm_audit_start("SPD-add");
J
Joy Latten 已提交
2982 2983
	if (audit_buf == NULL)
		return;
2984
	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
P
Paul Moore 已提交
2985
	audit_log_format(audit_buf, " res=%u", result);
J
Joy Latten 已提交
2986 2987 2988 2989 2990
	xfrm_audit_common_policyinfo(xp, audit_buf);
	audit_log_end(audit_buf);
}
EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);

P
Paul Moore 已提交
2991
void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2992
			      bool task_valid)
J
Joy Latten 已提交
2993 2994 2995
{
	struct audit_buffer *audit_buf;

P
Paul Moore 已提交
2996
	audit_buf = xfrm_audit_start("SPD-delete");
J
Joy Latten 已提交
2997 2998
	if (audit_buf == NULL)
		return;
2999
	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
P
Paul Moore 已提交
3000
	audit_log_format(audit_buf, " res=%u", result);
J
Joy Latten 已提交
3001 3002 3003 3004 3005 3006
	xfrm_audit_common_policyinfo(xp, audit_buf);
	audit_log_end(audit_buf);
}
EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif

3007
#ifdef CONFIG_XFRM_MIGRATE
3008 3009
static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
					const struct xfrm_selector *sel_tgt)
3010 3011 3012
{
	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
		if (sel_tgt->family == sel_cmp->family &&
3013 3014 3015 3016
		    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
				    sel_cmp->family) &&
		    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
				    sel_cmp->family) &&
3017 3018
		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3019
			return true;
3020 3021 3022
		}
	} else {
		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3023
			return true;
3024 3025
		}
	}
3026
	return false;
3027 3028
}

3029 3030
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
						    u8 dir, u8 type, struct net *net)
3031 3032 3033 3034 3035
{
	struct xfrm_policy *pol, *ret = NULL;
	struct hlist_head *chain;
	u32 priority = ~0U;

F
Fan Du 已提交
3036
	read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
3037
	chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3038
	hlist_for_each_entry(pol, chain, bydst) {
3039 3040 3041 3042 3043 3044 3045
		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
		    pol->type == type) {
			ret = pol;
			priority = ret->priority;
			break;
		}
	}
3046
	chain = &net->xfrm.policy_inexact[dir];
3047
	hlist_for_each_entry(pol, chain, bydst) {
3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058
		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
		    pol->type == type &&
		    pol->priority < priority) {
			ret = pol;
			break;
		}
	}

	if (ret)
		xfrm_pol_hold(ret);

F
Fan Du 已提交
3059
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3060 3061 3062 3063

	return ret;
}

3064
static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3065 3066 3067 3068 3069 3070 3071 3072
{
	int match = 0;

	if (t->mode == m->mode && t->id.proto == m->proto &&
	    (m->reqid == 0 || t->reqid == m->reqid)) {
		switch (t->mode) {
		case XFRM_MODE_TUNNEL:
		case XFRM_MODE_BEET:
3073 3074 3075 3076
			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
					    m->old_family) &&
			    xfrm_addr_equal(&t->saddr, &m->old_saddr,
					    m->old_family)) {
3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100
				match = 1;
			}
			break;
		case XFRM_MODE_TRANSPORT:
			/* in case of transport mode, template does not store
			   any IP addresses, hence we just compare mode and
			   protocol */
			match = 1;
			break;
		default:
			break;
		}
	}
	return match;
}

/* update endpoint address(es) of template(s) */
static int xfrm_policy_migrate(struct xfrm_policy *pol,
			       struct xfrm_migrate *m, int num_migrate)
{
	struct xfrm_migrate *mp;
	int i, j, n = 0;

	write_lock_bh(&pol->lock);
H
Herbert Xu 已提交
3101
	if (unlikely(pol->walk.dead)) {
3102 3103 3104 3105 3106 3107 3108 3109 3110 3111
		/* target policy has been deleted */
		write_unlock_bh(&pol->lock);
		return -ENOENT;
	}

	for (i = 0; i < pol->xfrm_nr; i++) {
		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
				continue;
			n++;
H
Herbert Xu 已提交
3112 3113
			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3114 3115 3116 3117 3118 3119 3120 3121
				continue;
			/* update endpoints */
			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
			       sizeof(pol->xfrm_vec[i].id.daddr));
			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
			       sizeof(pol->xfrm_vec[i].saddr));
			pol->xfrm_vec[i].encap_family = mp->new_family;
			/* flush bundles */
3122
			atomic_inc(&pol->genid);
3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133
		}
	}

	write_unlock_bh(&pol->lock);

	if (!n)
		return -ENODATA;

	return 0;
}

3134
static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3135 3136 3137 3138 3139 3140 3141
{
	int i, j;

	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
		return -EINVAL;

	for (i = 0; i < num_migrate; i++) {
3142 3143 3144 3145
		if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
				    m[i].old_family) &&
		    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
				    m[i].old_family))
3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167
			return -EINVAL;
		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
			return -EINVAL;

		/* check if there is any duplicated entry */
		for (j = i + 1; j < num_migrate; j++) {
			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
				    sizeof(m[i].old_daddr)) &&
			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
				    sizeof(m[i].old_saddr)) &&
			    m[i].proto == m[j].proto &&
			    m[i].mode == m[j].mode &&
			    m[i].reqid == m[j].reqid &&
			    m[i].old_family == m[j].old_family)
				return -EINVAL;
		}
	}

	return 0;
}

3168
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3169
		 struct xfrm_migrate *m, int num_migrate,
3170
		 struct xfrm_kmaddress *k, struct net *net)
3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182
{
	int i, err, nx_cur = 0, nx_new = 0;
	struct xfrm_policy *pol = NULL;
	struct xfrm_state *x, *xc;
	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
	struct xfrm_migrate *mp;

	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
		goto out;

	/* Stage 1 - find policy */
3183
	if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
3184 3185 3186 3187 3188 3189
		err = -ENOENT;
		goto out;
	}

	/* Stage 2 - find and update state(s) */
	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
F
Fan Du 已提交
3190
		if ((x = xfrm_migrate_state_find(mp, net))) {
3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213
			x_cur[nx_cur] = x;
			nx_cur++;
			if ((xc = xfrm_state_migrate(x, mp))) {
				x_new[nx_new] = xc;
				nx_new++;
			} else {
				err = -ENODATA;
				goto restore_state;
			}
		}
	}

	/* Stage 3 - update policy */
	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
		goto restore_state;

	/* Stage 4 - delete old state(s) */
	if (nx_cur) {
		xfrm_states_put(x_cur, nx_cur);
		xfrm_states_delete(x_cur, nx_cur);
	}

	/* Stage 5 - announce */
3214
	km_migrate(sel, dir, type, m, num_migrate, k);
3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231

	xfrm_pol_put(pol);

	return 0;
out:
	return err;

restore_state:
	if (pol)
		xfrm_pol_put(pol);
	if (nx_cur)
		xfrm_states_put(x_cur, nx_cur);
	if (nx_new)
		xfrm_states_delete(x_new, nx_new);

	return err;
}
3232
EXPORT_SYMBOL(xfrm_migrate);
3233
#endif