xfrm_policy.c 76.8 KB
Newer Older
1
/*
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10 11 12
 * xfrm_policy.c
 *
 * Changes:
 *	Mitsuru KANDA @USAGI
 * 	Kazunori MIYAZAWA @USAGI
 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
 * 		IPv6 support
 * 	Kazunori MIYAZAWA @USAGI
 * 	YOSHIFUJI Hideaki
 * 		Split up af-specific portion
 *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13
 *
L
Linus Torvalds 已提交
14 15
 */

16
#include <linux/err.h>
L
Linus Torvalds 已提交
17 18 19 20 21 22 23
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
24
#include <linux/netfilter.h>
L
Linus Torvalds 已提交
25
#include <linux/module.h>
26
#include <linux/cache.h>
P
Paul Moore 已提交
27
#include <linux/audit.h>
28
#include <net/dst.h>
29
#include <net/flow.h>
L
Linus Torvalds 已提交
30 31
#include <net/xfrm.h>
#include <net/ip.h>
32 33 34
#ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h>
#endif
L
Linus Torvalds 已提交
35

36 37
#include "xfrm_hash.h"

38 39 40 41
#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN	100

42 43 44
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
						__read_mostly;
L
Linus Torvalds 已提交
45

46
static struct kmem_cache *xfrm_dst_cache __read_mostly;
L
Linus Torvalds 已提交
47

48
static void xfrm_init_pmtu(struct dst_entry *dst);
49
static int stale_bundle(struct dst_entry *dst);
50
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
51
static void xfrm_policy_queue_process(unsigned long arg);
L
Linus Torvalds 已提交
52

W
Wei Yongjun 已提交
53 54 55
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
						int dir);

56
static inline bool
57
__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
58
{
59 60
	const struct flowi4 *fl4 = &fl->u.ip4;

61 62
	return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
		addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
63 64 65 66
		!((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
		!((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
		(fl4->flowi4_proto == sel->proto || !sel->proto) &&
		(fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
67 68
}

69
static inline bool
70
__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
71
{
72 73 74 75 76 77 78 79
	const struct flowi6 *fl6 = &fl->u.ip6;

	return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
		addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
		!((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
		!((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
		(fl6->flowi6_proto == sel->proto || !sel->proto) &&
		(fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
80 81
}

82 83
bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
			 unsigned short family)
84 85 86 87 88 89 90
{
	switch (family) {
	case AF_INET:
		return __xfrm4_selector_match(sel, fl);
	case AF_INET6:
		return __xfrm6_selector_match(sel, fl);
	}
91
	return false;
92 93
}

E
Eric Dumazet 已提交
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
{
	struct xfrm_policy_afinfo *afinfo;

	if (unlikely(family >= NPROTO))
		return NULL;
	rcu_read_lock();
	afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
	if (unlikely(!afinfo))
		rcu_read_unlock();
	return afinfo;
}

static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
{
	rcu_read_unlock();
}

112
static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
113 114
						  const xfrm_address_t *saddr,
						  const xfrm_address_t *daddr,
115 116 117 118 119 120 121 122 123
						  int family)
{
	struct xfrm_policy_afinfo *afinfo;
	struct dst_entry *dst;

	afinfo = xfrm_policy_get_afinfo(family);
	if (unlikely(afinfo == NULL))
		return ERR_PTR(-EAFNOSUPPORT);

124
	dst = afinfo->dst_lookup(net, tos, saddr, daddr);
125 126 127 128 129 130

	xfrm_policy_put_afinfo(afinfo);

	return dst;
}

131
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
132 133
						xfrm_address_t *prev_saddr,
						xfrm_address_t *prev_daddr,
134
						int family)
L
Linus Torvalds 已提交
135
{
136
	struct net *net = xs_net(x);
137 138 139 140
	xfrm_address_t *saddr = &x->props.saddr;
	xfrm_address_t *daddr = &x->id.daddr;
	struct dst_entry *dst;

141
	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
142
		saddr = x->coaddr;
143 144 145 146
		daddr = prev_daddr;
	}
	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
		saddr = prev_saddr;
147
		daddr = x->coaddr;
148
	}
L
Linus Torvalds 已提交
149

150
	dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
151 152 153 154 155 156 157

	if (!IS_ERR(dst)) {
		if (prev_saddr != saddr)
			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
		if (prev_daddr != daddr)
			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
	}
L
Linus Torvalds 已提交
158

159
	return dst;
L
Linus Torvalds 已提交
160 161 162 163 164 165 166
}

static inline unsigned long make_jiffies(long secs)
{
	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
		return MAX_SCHEDULE_TIMEOUT-1;
	else
167
		return secs*HZ;
L
Linus Torvalds 已提交
168 169 170 171
}

static void xfrm_policy_timer(unsigned long data)
{
172
	struct xfrm_policy *xp = (struct xfrm_policy *)data;
173
	unsigned long now = get_seconds();
L
Linus Torvalds 已提交
174 175 176 177 178 179
	long next = LONG_MAX;
	int warn = 0;
	int dir;

	read_lock(&xp->lock);

180
	if (unlikely(xp->walk.dead))
L
Linus Torvalds 已提交
181 182
		goto out;

183
	dir = xfrm_policy_id2dir(xp->index);
L
Linus Torvalds 已提交
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

	if (xp->lft.hard_add_expires_seconds) {
		long tmo = xp->lft.hard_add_expires_seconds +
			xp->curlft.add_time - now;
		if (tmo <= 0)
			goto expired;
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.hard_use_expires_seconds) {
		long tmo = xp->lft.hard_use_expires_seconds +
			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
		if (tmo <= 0)
			goto expired;
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.soft_add_expires_seconds) {
		long tmo = xp->lft.soft_add_expires_seconds +
			xp->curlft.add_time - now;
		if (tmo <= 0) {
			warn = 1;
			tmo = XFRM_KM_TIMEOUT;
		}
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.soft_use_expires_seconds) {
		long tmo = xp->lft.soft_use_expires_seconds +
			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
		if (tmo <= 0) {
			warn = 1;
			tmo = XFRM_KM_TIMEOUT;
		}
		if (tmo < next)
			next = tmo;
	}

	if (warn)
223
		km_policy_expired(xp, dir, 0, 0);
L
Linus Torvalds 已提交
224 225 226 227 228 229 230 231 232 233 234
	if (next != LONG_MAX &&
	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
		xfrm_pol_hold(xp);

out:
	read_unlock(&xp->lock);
	xfrm_pol_put(xp);
	return;

expired:
	read_unlock(&xp->lock);
235
	if (!xfrm_policy_delete(xp, dir))
236
		km_policy_expired(xp, dir, 1, 0);
L
Linus Torvalds 已提交
237 238 239
	xfrm_pol_put(xp);
}

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
{
	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);

	if (unlikely(pol->walk.dead))
		flo = NULL;
	else
		xfrm_pol_hold(pol);

	return flo;
}

static int xfrm_policy_flo_check(struct flow_cache_object *flo)
{
	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);

	return !pol->walk.dead;
}

static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
{
	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
}

static const struct flow_cache_ops xfrm_policy_fc_ops = {
	.get = xfrm_policy_flo_get,
	.check = xfrm_policy_flo_check,
	.delete = xfrm_policy_flo_delete,
};
L
Linus Torvalds 已提交
269 270 271 272 273

/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
 * SPD calls.
 */

274
struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
L
Linus Torvalds 已提交
275 276 277
{
	struct xfrm_policy *policy;

278
	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
L
Linus Torvalds 已提交
279 280

	if (policy) {
281
		write_pnet(&policy->xp_net, net);
H
Herbert Xu 已提交
282
		INIT_LIST_HEAD(&policy->walk.all);
283 284
		INIT_HLIST_NODE(&policy->bydst);
		INIT_HLIST_NODE(&policy->byidx);
L
Linus Torvalds 已提交
285
		rwlock_init(&policy->lock);
286
		atomic_set(&policy->refcnt, 1);
287
		skb_queue_head_init(&policy->polq.hold_queue);
288 289
		setup_timer(&policy->timer, xfrm_policy_timer,
				(unsigned long)policy);
290 291
		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
			    (unsigned long)policy);
292
		policy->flo.ops = &xfrm_policy_fc_ops;
L
Linus Torvalds 已提交
293 294 295 296 297 298 299
	}
	return policy;
}
EXPORT_SYMBOL(xfrm_policy_alloc);

/* Destroy xfrm_policy: descendant resources must be released to this moment. */

300
void xfrm_policy_destroy(struct xfrm_policy *policy)
L
Linus Torvalds 已提交
301
{
H
Herbert Xu 已提交
302
	BUG_ON(!policy->walk.dead);
L
Linus Torvalds 已提交
303

304
	if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
L
Linus Torvalds 已提交
305 306
		BUG();

307
	security_xfrm_policy_free(policy->security);
L
Linus Torvalds 已提交
308 309
	kfree(policy);
}
310
EXPORT_SYMBOL(xfrm_policy_destroy);
L
Linus Torvalds 已提交
311

312 313 314 315
static void xfrm_queue_purge(struct sk_buff_head *list)
{
	struct sk_buff *skb;

316
	while ((skb = skb_dequeue(list)) != NULL)
317 318 319
		kfree_skb(skb);
}

L
Linus Torvalds 已提交
320 321 322 323 324 325
/* Rule must be locked. Release descentant resources, announce
 * entry dead. The rule must be unlinked from lists to the moment.
 */

static void xfrm_policy_kill(struct xfrm_policy *policy)
{
H
Herbert Xu 已提交
326
	policy->walk.dead = 1;
L
Linus Torvalds 已提交
327

328
	atomic_inc(&policy->genid);
L
Linus Torvalds 已提交
329

330 331
	if (del_timer(&policy->polq.hold_timer))
		xfrm_pol_put(policy);
332 333
	xfrm_queue_purge(&policy->polq.hold_queue);

334 335 336 337
	if (del_timer(&policy->timer))
		xfrm_pol_put(policy);

	xfrm_pol_put(policy);
L
Linus Torvalds 已提交
338 339
}

340 341
static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;

342
static inline unsigned int idx_hash(struct net *net, u32 index)
343
{
344
	return __idx_hash(index, net->xfrm.policy_idx_hmask);
345 346
}

347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
/* calculate policy hash thresholds */
static void __get_hash_thresh(struct net *net,
			      unsigned short family, int dir,
			      u8 *dbits, u8 *sbits)
{
	switch (family) {
	case AF_INET:
		*dbits = net->xfrm.policy_bydst[dir].dbits4;
		*sbits = net->xfrm.policy_bydst[dir].sbits4;
		break;

	case AF_INET6:
		*dbits = net->xfrm.policy_bydst[dir].dbits6;
		*sbits = net->xfrm.policy_bydst[dir].sbits6;
		break;

	default:
		*dbits = 0;
		*sbits = 0;
	}
}

369 370 371
static struct hlist_head *policy_hash_bysel(struct net *net,
					    const struct xfrm_selector *sel,
					    unsigned short family, int dir)
372
{
373
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
374 375 376 377 378 379
	unsigned int hash;
	u8 dbits;
	u8 sbits;

	__get_hash_thresh(net, family, dir, &dbits, &sbits);
	hash = __sel_hash(sel, family, hmask, dbits, sbits);
380 381

	return (hash == hmask + 1 ?
382 383
		&net->xfrm.policy_inexact[dir] :
		net->xfrm.policy_bydst[dir].table + hash);
384 385
}

386 387 388 389
static struct hlist_head *policy_hash_direct(struct net *net,
					     const xfrm_address_t *daddr,
					     const xfrm_address_t *saddr,
					     unsigned short family, int dir)
390
{
391
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
392 393 394 395 396 397
	unsigned int hash;
	u8 dbits;
	u8 sbits;

	__get_hash_thresh(net, family, dir, &dbits, &sbits);
	hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
398

399
	return net->xfrm.policy_bydst[dir].table + hash;
400 401
}

402 403
static void xfrm_dst_hash_transfer(struct net *net,
				   struct hlist_head *list,
404
				   struct hlist_head *ndsttable,
405 406
				   unsigned int nhashmask,
				   int dir)
407
{
408
	struct hlist_node *tmp, *entry0 = NULL;
409
	struct xfrm_policy *pol;
410
	unsigned int h0 = 0;
411 412
	u8 dbits;
	u8 sbits;
413

414
redo:
415
	hlist_for_each_entry_safe(pol, tmp, list, bydst) {
416 417
		unsigned int h;

418
		__get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
419
		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
420
				pol->family, nhashmask, dbits, sbits);
421
		if (!entry0) {
422
			hlist_del(&pol->bydst);
423 424 425 426 427
			hlist_add_head(&pol->bydst, ndsttable+h);
			h0 = h;
		} else {
			if (h != h0)
				continue;
428
			hlist_del(&pol->bydst);
429
			hlist_add_behind(&pol->bydst, entry0);
430
		}
431
		entry0 = &pol->bydst;
432 433 434 435
	}
	if (!hlist_empty(list)) {
		entry0 = NULL;
		goto redo;
436 437 438 439 440 441 442
	}
}

static void xfrm_idx_hash_transfer(struct hlist_head *list,
				   struct hlist_head *nidxtable,
				   unsigned int nhashmask)
{
443
	struct hlist_node *tmp;
444 445
	struct xfrm_policy *pol;

446
	hlist_for_each_entry_safe(pol, tmp, list, byidx) {
447 448 449 450 451 452 453 454 455 456 457 458
		unsigned int h;

		h = __idx_hash(pol->index, nhashmask);
		hlist_add_head(&pol->byidx, nidxtable+h);
	}
}

static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
{
	return ((old_hmask + 1) << 1) - 1;
}

459
static void xfrm_bydst_resize(struct net *net, int dir)
460
{
461
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
462 463
	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
464
	struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
465
	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
466 467 468 469 470
	int i;

	if (!ndst)
		return;

F
Fan Du 已提交
471
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
472 473

	for (i = hmask; i >= 0; i--)
474
		xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
475

476 477
	net->xfrm.policy_bydst[dir].table = ndst;
	net->xfrm.policy_bydst[dir].hmask = nhashmask;
478

F
Fan Du 已提交
479
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
480

481
	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
482 483
}

484
static void xfrm_byidx_resize(struct net *net, int total)
485
{
486
	unsigned int hmask = net->xfrm.policy_idx_hmask;
487 488
	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
489
	struct hlist_head *oidx = net->xfrm.policy_byidx;
490
	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
491 492 493 494 495
	int i;

	if (!nidx)
		return;

F
Fan Du 已提交
496
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
497 498 499 500

	for (i = hmask; i >= 0; i--)
		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);

501 502
	net->xfrm.policy_byidx = nidx;
	net->xfrm.policy_idx_hmask = nhashmask;
503

F
Fan Du 已提交
504
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
505

506
	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
507 508
}

509
static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
510
{
511 512
	unsigned int cnt = net->xfrm.policy_count[dir];
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
513 514 515 516 517 518 519 520 521 522 523

	if (total)
		*total += cnt;

	if ((hmask + 1) < xfrm_policy_hashmax &&
	    cnt > hmask)
		return 1;

	return 0;
}

524
static inline int xfrm_byidx_should_resize(struct net *net, int total)
525
{
526
	unsigned int hmask = net->xfrm.policy_idx_hmask;
527 528 529 530 531 532 533 534

	if ((hmask + 1) < xfrm_policy_hashmax &&
	    total > hmask)
		return 1;

	return 0;
}

535
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
J
Jamal Hadi Salim 已提交
536
{
F
Fan Du 已提交
537
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
538 539 540 541 542 543 544
	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
	si->spdhcnt = net->xfrm.policy_idx_hmask;
J
Jamal Hadi Salim 已提交
545
	si->spdhmcnt = xfrm_policy_hashmax;
F
Fan Du 已提交
546
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
J
Jamal Hadi Salim 已提交
547 548
}
EXPORT_SYMBOL(xfrm_spd_getinfo);
549

J
Jamal Hadi Salim 已提交
550
static DEFINE_MUTEX(hash_resize_mutex);
551
static void xfrm_hash_resize(struct work_struct *work)
552
{
553
	struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
554 555 556 557 558 559
	int dir, total;

	mutex_lock(&hash_resize_mutex);

	total = 0;
	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
560 561
		if (xfrm_bydst_should_resize(net, dir, &total))
			xfrm_bydst_resize(net, dir);
562
	}
563 564
	if (xfrm_byidx_should_resize(net, total))
		xfrm_byidx_resize(net, total);
565 566 567 568

	mutex_unlock(&hash_resize_mutex);
}

L
Linus Torvalds 已提交
569 570
/* Generate new index... KAME seems to generate them ordered by cost
 * of an absolute inpredictability of ordering of rules. This will not pass. */
571
static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
L
Linus Torvalds 已提交
572 573 574 575
{
	static u32 idx_generator;

	for (;;) {
576 577 578 579 580
		struct hlist_head *list;
		struct xfrm_policy *p;
		u32 idx;
		int found;

581 582 583 584 585 586 587 588
		if (!index) {
			idx = (idx_generator | dir);
			idx_generator += 8;
		} else {
			idx = index;
			index = 0;
		}

L
Linus Torvalds 已提交
589 590
		if (idx == 0)
			idx = 8;
591
		list = net->xfrm.policy_byidx + idx_hash(net, idx);
592
		found = 0;
593
		hlist_for_each_entry(p, list, byidx) {
594 595
			if (p->index == idx) {
				found = 1;
L
Linus Torvalds 已提交
596
				break;
597
			}
L
Linus Torvalds 已提交
598
		}
599
		if (!found)
L
Linus Torvalds 已提交
600 601 602 603
			return idx;
	}
}

604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
{
	u32 *p1 = (u32 *) s1;
	u32 *p2 = (u32 *) s2;
	int len = sizeof(struct xfrm_selector) / sizeof(u32);
	int i;

	for (i = 0; i < len; i++) {
		if (p1[i] != p2[i])
			return 1;
	}

	return 0;
}

619 620 621 622 623 624 625 626 627 628
static void xfrm_policy_requeue(struct xfrm_policy *old,
				struct xfrm_policy *new)
{
	struct xfrm_policy_queue *pq = &old->polq;
	struct sk_buff_head list;

	__skb_queue_head_init(&list);

	spin_lock_bh(&pq->hold_queue.lock);
	skb_queue_splice_init(&pq->hold_queue, &list);
629 630
	if (del_timer(&pq->hold_timer))
		xfrm_pol_put(old);
631 632 633 634 635 636 637 638 639 640
	spin_unlock_bh(&pq->hold_queue.lock);

	if (skb_queue_empty(&list))
		return;

	pq = &new->polq;

	spin_lock_bh(&pq->hold_queue.lock);
	skb_queue_splice(&list, &pq->hold_queue);
	pq->timeout = XFRM_QUEUE_TMO_MIN;
641 642
	if (!mod_timer(&pq->hold_timer, jiffies))
		xfrm_pol_hold(new);
643 644 645
	spin_unlock_bh(&pq->hold_queue.lock);
}

646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
				   struct xfrm_policy *pol)
{
	u32 mark = policy->mark.v & policy->mark.m;

	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
		return true;

	if ((mark & pol->mark.m) == pol->mark.v &&
	    policy->priority == pol->priority)
		return true;

	return false;
}

L
Linus Torvalds 已提交
661 662
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
{
663
	struct net *net = xp_net(policy);
664 665 666
	struct xfrm_policy *pol;
	struct xfrm_policy *delpol;
	struct hlist_head *chain;
667
	struct hlist_node *newpos;
L
Linus Torvalds 已提交
668

F
Fan Du 已提交
669
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
670
	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
671 672
	delpol = NULL;
	newpos = NULL;
673
	hlist_for_each_entry(pol, chain, bydst) {
H
Herbert Xu 已提交
674
		if (pol->type == policy->type &&
675
		    !selector_cmp(&pol->selector, &policy->selector) &&
676
		    xfrm_policy_mark_match(policy, pol) &&
H
Herbert Xu 已提交
677 678
		    xfrm_sec_ctx_match(pol->security, policy->security) &&
		    !WARN_ON(delpol)) {
L
Linus Torvalds 已提交
679
			if (excl) {
F
Fan Du 已提交
680
				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
681 682 683 684 685 686
				return -EEXIST;
			}
			delpol = pol;
			if (policy->priority > pol->priority)
				continue;
		} else if (policy->priority >= pol->priority) {
H
Herbert Xu 已提交
687
			newpos = &pol->bydst;
L
Linus Torvalds 已提交
688 689 690 691 692 693
			continue;
		}
		if (delpol)
			break;
	}
	if (newpos)
694
		hlist_add_behind(&policy->bydst, newpos);
695 696
	else
		hlist_add_head(&policy->bydst, chain);
L
Linus Torvalds 已提交
697
	xfrm_pol_hold(policy);
698
	net->xfrm.policy_count[dir]++;
699
	atomic_inc(&net->xfrm.flow_cache_genid);
F
fan.du 已提交
700 701 702 703 704 705 706

	/* After previous checking, family can either be AF_INET or AF_INET6 */
	if (policy->family == AF_INET)
		rt_genid_bump_ipv4(net);
	else
		rt_genid_bump_ipv6(net);

707 708
	if (delpol) {
		xfrm_policy_requeue(delpol, policy);
W
Wei Yongjun 已提交
709
		__xfrm_policy_unlink(delpol, dir);
710
	}
711
	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
712
	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
713
	policy->curlft.add_time = get_seconds();
L
Linus Torvalds 已提交
714 715 716
	policy->curlft.use_time = 0;
	if (!mod_timer(&policy->timer, jiffies + HZ))
		xfrm_pol_hold(policy);
717
	list_add(&policy->walk.all, &net->xfrm.policy_all);
F
Fan Du 已提交
718
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
719

720
	if (delpol)
L
Linus Torvalds 已提交
721
		xfrm_policy_kill(delpol);
722 723
	else if (xfrm_bydst_should_resize(net, dir, NULL))
		schedule_work(&net->xfrm.policy_hash_work);
724

L
Linus Torvalds 已提交
725 726 727 728
	return 0;
}
EXPORT_SYMBOL(xfrm_policy_insert);

729 730
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
					  int dir, struct xfrm_selector *sel,
731 732
					  struct xfrm_sec_ctx *ctx, int delete,
					  int *err)
L
Linus Torvalds 已提交
733
{
734 735
	struct xfrm_policy *pol, *ret;
	struct hlist_head *chain;
L
Linus Torvalds 已提交
736

737
	*err = 0;
F
Fan Du 已提交
738
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
739
	chain = policy_hash_bysel(net, sel, sel->family, dir);
740
	ret = NULL;
741
	hlist_for_each_entry(pol, chain, bydst) {
742
		if (pol->type == type &&
J
Jamal Hadi Salim 已提交
743
		    (mark & pol->mark.m) == pol->mark.v &&
744 745
		    !selector_cmp(sel, &pol->selector) &&
		    xfrm_sec_ctx_match(ctx, pol->security)) {
L
Linus Torvalds 已提交
746
			xfrm_pol_hold(pol);
747
			if (delete) {
748 749
				*err = security_xfrm_policy_delete(
								pol->security);
750
				if (*err) {
F
Fan Du 已提交
751
					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
752 753
					return pol;
				}
W
Wei Yongjun 已提交
754
				__xfrm_policy_unlink(pol, dir);
755 756
			}
			ret = pol;
L
Linus Torvalds 已提交
757 758 759
			break;
		}
	}
F
Fan Du 已提交
760
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
761

762
	if (ret && delete)
763 764
		xfrm_policy_kill(ret);
	return ret;
L
Linus Torvalds 已提交
765
}
766
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
L
Linus Torvalds 已提交
767

768 769
struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
				     int dir, u32 id, int delete, int *err)
L
Linus Torvalds 已提交
770
{
771 772
	struct xfrm_policy *pol, *ret;
	struct hlist_head *chain;
L
Linus Torvalds 已提交
773

774 775 776 777
	*err = -ENOENT;
	if (xfrm_policy_id2dir(id) != dir)
		return NULL;

778
	*err = 0;
F
Fan Du 已提交
779
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
780
	chain = net->xfrm.policy_byidx + idx_hash(net, id);
781
	ret = NULL;
782
	hlist_for_each_entry(pol, chain, byidx) {
J
Jamal Hadi Salim 已提交
783 784
		if (pol->type == type && pol->index == id &&
		    (mark & pol->mark.m) == pol->mark.v) {
L
Linus Torvalds 已提交
785
			xfrm_pol_hold(pol);
786
			if (delete) {
787 788
				*err = security_xfrm_policy_delete(
								pol->security);
789
				if (*err) {
F
Fan Du 已提交
790
					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
791 792
					return pol;
				}
W
Wei Yongjun 已提交
793
				__xfrm_policy_unlink(pol, dir);
794 795
			}
			ret = pol;
L
Linus Torvalds 已提交
796 797 798
			break;
		}
	}
F
Fan Du 已提交
799
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
800

801
	if (ret && delete)
802 803
		xfrm_policy_kill(ret);
	return ret;
L
Linus Torvalds 已提交
804 805 806
}
EXPORT_SYMBOL(xfrm_policy_byid);

807 808
#ifdef CONFIG_SECURITY_NETWORK_XFRM
static inline int
809
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
L
Linus Torvalds 已提交
810
{
811 812 813 814 815 816
	int dir, err = 0;

	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
		struct xfrm_policy *pol;
		int i;

817
		hlist_for_each_entry(pol,
818
				     &net->xfrm.policy_inexact[dir], bydst) {
819 820
			if (pol->type != type)
				continue;
821
			err = security_xfrm_policy_delete(pol->security);
822
			if (err) {
823
				xfrm_audit_policy_delete(pol, 0, task_valid);
824 825
				return err;
			}
826
		}
827
		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
828
			hlist_for_each_entry(pol,
829
					     net->xfrm.policy_bydst[dir].table + i,
830 831 832
					     bydst) {
				if (pol->type != type)
					continue;
833 834
				err = security_xfrm_policy_delete(
								pol->security);
835
				if (err) {
J
Joy Latten 已提交
836
					xfrm_audit_policy_delete(pol, 0,
837
								 task_valid);
838 839 840 841 842 843 844 845 846
					return err;
				}
			}
		}
	}
	return err;
}
#else
static inline int
847
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
848 849 850 851 852
{
	return 0;
}
#endif

853
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
854
{
855
	int dir, err = 0, cnt = 0;
L
Linus Torvalds 已提交
856

F
Fan Du 已提交
857
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
858

859
	err = xfrm_policy_flush_secctx_check(net, type, task_valid);
860 861 862
	if (err)
		goto out;

L
Linus Torvalds 已提交
863
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
864
		struct xfrm_policy *pol;
W
Wei Yongjun 已提交
865
		int i;
866 867

	again1:
868
		hlist_for_each_entry(pol,
869
				     &net->xfrm.policy_inexact[dir], bydst) {
870 871
			if (pol->type != type)
				continue;
872
			__xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
873
			write_unlock_bh(&net->xfrm.xfrm_policy_lock);
874
			cnt++;
L
Linus Torvalds 已提交
875

876
			xfrm_audit_policy_delete(pol, 1, task_valid);
J
Joy Latten 已提交
877

878
			xfrm_policy_kill(pol);
L
Linus Torvalds 已提交
879

F
Fan Du 已提交
880
			write_lock_bh(&net->xfrm.xfrm_policy_lock);
881 882 883
			goto again1;
		}

884
		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
885
	again2:
886
			hlist_for_each_entry(pol,
887
					     net->xfrm.policy_bydst[dir].table + i,
888 889 890
					     bydst) {
				if (pol->type != type)
					continue;
891
				__xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
892
				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
893
				cnt++;
894

895
				xfrm_audit_policy_delete(pol, 1, task_valid);
896 897
				xfrm_policy_kill(pol);

F
Fan Du 已提交
898
				write_lock_bh(&net->xfrm.xfrm_policy_lock);
899 900
				goto again2;
			}
L
Linus Torvalds 已提交
901
		}
902

L
Linus Torvalds 已提交
903
	}
904 905
	if (!cnt)
		err = -ESRCH;
906
out:
F
Fan Du 已提交
907
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
908
	return err;
L
Linus Torvalds 已提交
909 910 911
}
EXPORT_SYMBOL(xfrm_policy_flush);

912
int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
913
		     int (*func)(struct xfrm_policy *, int, int, void*),
L
Linus Torvalds 已提交
914 915
		     void *data)
{
H
Herbert Xu 已提交
916 917
	struct xfrm_policy *pol;
	struct xfrm_policy_walk_entry *x;
918 919 920 921 922
	int error = 0;

	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
	    walk->type != XFRM_POLICY_TYPE_ANY)
		return -EINVAL;
L
Linus Torvalds 已提交
923

H
Herbert Xu 已提交
924
	if (list_empty(&walk->walk.all) && walk->seq != 0)
925 926
		return 0;

F
Fan Du 已提交
927
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
H
Herbert Xu 已提交
928
	if (list_empty(&walk->walk.all))
929
		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
H
Herbert Xu 已提交
930 931
	else
		x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
932
	list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
H
Herbert Xu 已提交
933
		if (x->dead)
934
			continue;
H
Herbert Xu 已提交
935 936 937 938 939 940 941 942 943
		pol = container_of(x, struct xfrm_policy, walk);
		if (walk->type != XFRM_POLICY_TYPE_ANY &&
		    walk->type != pol->type)
			continue;
		error = func(pol, xfrm_policy_id2dir(pol->index),
			     walk->seq, data);
		if (error) {
			list_move_tail(&walk->walk.all, &x->all);
			goto out;
944
		}
H
Herbert Xu 已提交
945
		walk->seq++;
L
Linus Torvalds 已提交
946
	}
H
Herbert Xu 已提交
947
	if (walk->seq == 0) {
J
Jamal Hadi Salim 已提交
948 949 950
		error = -ENOENT;
		goto out;
	}
H
Herbert Xu 已提交
951
	list_del_init(&walk->walk.all);
L
Linus Torvalds 已提交
952
out:
F
Fan Du 已提交
953
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
954 955 956 957
	return error;
}
EXPORT_SYMBOL(xfrm_policy_walk);

H
Herbert Xu 已提交
958 959 960 961 962 963 964 965 966
void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
{
	INIT_LIST_HEAD(&walk->walk.all);
	walk->walk.dead = 1;
	walk->type = type;
	walk->seq = 0;
}
EXPORT_SYMBOL(xfrm_policy_walk_init);

F
Fan Du 已提交
967
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
H
Herbert Xu 已提交
968 969 970 971
{
	if (list_empty(&walk->walk.all))
		return;

F
Fan Du 已提交
972
	write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
H
Herbert Xu 已提交
973
	list_del(&walk->walk.all);
F
Fan Du 已提交
974
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
H
Herbert Xu 已提交
975 976 977
}
EXPORT_SYMBOL(xfrm_policy_walk_done);

978 979 980 981 982
/*
 * Find policy to apply to this flow.
 *
 * Returns 0 if policy found, else an -errno.
 */
983 984
static int xfrm_policy_match(const struct xfrm_policy *pol,
			     const struct flowi *fl,
985
			     u8 type, u16 family, int dir)
L
Linus Torvalds 已提交
986
{
987
	const struct xfrm_selector *sel = &pol->selector;
988 989
	int ret = -ESRCH;
	bool match;
L
Linus Torvalds 已提交
990

991
	if (pol->family != family ||
992
	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
993
	    pol->type != type)
994
		return ret;
L
Linus Torvalds 已提交
995

996
	match = xfrm_selector_match(sel, fl, family);
997
	if (match)
998
		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
999
						  dir);
1000

1001
	return ret;
1002
}
L
Linus Torvalds 已提交
1003

A
Alexey Dobriyan 已提交
1004
static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1005
						     const struct flowi *fl,
1006 1007
						     u16 family, u8 dir)
{
1008
	int err;
1009
	struct xfrm_policy *pol, *ret;
1010
	const xfrm_address_t *daddr, *saddr;
1011
	struct hlist_head *chain;
1012
	u32 priority = ~0U;
1013

1014 1015 1016 1017 1018
	daddr = xfrm_flowi_daddr(fl, family);
	saddr = xfrm_flowi_saddr(fl, family);
	if (unlikely(!daddr || !saddr))
		return NULL;

F
Fan Du 已提交
1019
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
A
Alexey Dobriyan 已提交
1020
	chain = policy_hash_direct(net, daddr, saddr, family, dir);
1021
	ret = NULL;
1022
	hlist_for_each_entry(pol, chain, bydst) {
1023 1024 1025 1026 1027 1028 1029 1030 1031
		err = xfrm_policy_match(pol, fl, type, family, dir);
		if (err) {
			if (err == -ESRCH)
				continue;
			else {
				ret = ERR_PTR(err);
				goto fail;
			}
		} else {
1032
			ret = pol;
1033
			priority = ret->priority;
1034 1035 1036
			break;
		}
	}
A
Alexey Dobriyan 已提交
1037
	chain = &net->xfrm.policy_inexact[dir];
1038
	hlist_for_each_entry(pol, chain, bydst) {
1039 1040 1041 1042 1043 1044 1045 1046 1047
		err = xfrm_policy_match(pol, fl, type, family, dir);
		if (err) {
			if (err == -ESRCH)
				continue;
			else {
				ret = ERR_PTR(err);
				goto fail;
			}
		} else if (pol->priority < priority) {
1048 1049
			ret = pol;
			break;
L
Linus Torvalds 已提交
1050 1051
		}
	}
1052 1053
	if (ret)
		xfrm_pol_hold(ret);
1054
fail:
F
Fan Du 已提交
1055
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1056

1057
	return ret;
1058 1059
}

1060
static struct xfrm_policy *
1061
__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_policy *pol;

	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
	if (pol != NULL)
		return pol;
#endif
	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
}

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090
static int flow_to_policy_dir(int dir)
{
	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
		return dir;

	switch (dir) {
	default:
	case FLOW_DIR_IN:
		return XFRM_POLICY_IN;
	case FLOW_DIR_OUT:
		return XFRM_POLICY_OUT;
	case FLOW_DIR_FWD:
		return XFRM_POLICY_FWD;
	}
}

1091
static struct flow_cache_object *
1092
xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1093
		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
1094 1095
{
	struct xfrm_policy *pol;
1096 1097 1098

	if (old_obj)
		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1099

1100
	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1101
	if (IS_ERR_OR_NULL(pol))
1102 1103 1104 1105 1106 1107 1108
		return ERR_CAST(pol);

	/* Resolver returns two references:
	 * one for cache and one for caller of flow_cache_lookup() */
	xfrm_pol_hold(pol);

	return &pol->flo;
L
Linus Torvalds 已提交
1109 1110
}

1111 1112 1113
static inline int policy_to_flow_dir(int dir)
{
	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124
	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
		return dir;
	switch (dir) {
	default:
	case XFRM_POLICY_IN:
		return FLOW_DIR_IN;
	case XFRM_POLICY_OUT:
		return FLOW_DIR_OUT;
	case XFRM_POLICY_FWD:
		return FLOW_DIR_FWD;
1125
	}
1126 1127
}

1128 1129
static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
						 const struct flowi *fl)
L
Linus Torvalds 已提交
1130 1131
{
	struct xfrm_policy *pol;
F
Fan Du 已提交
1132
	struct net *net = sock_net(sk);
L
Linus Torvalds 已提交
1133

F
Fan Du 已提交
1134
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1135
	if ((pol = sk->sk_policy[dir]) != NULL) {
1136 1137
		bool match = xfrm_selector_match(&pol->selector, fl,
						 sk->sk_family);
1138
		int err = 0;
1139

1140
		if (match) {
J
Jamal Hadi Salim 已提交
1141 1142 1143 1144
			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
				pol = NULL;
				goto out;
			}
1145
			err = security_xfrm_policy_lookup(pol->security,
1146
						      fl->flowi_secid,
1147
						      policy_to_flow_dir(dir));
1148 1149 1150 1151 1152 1153 1154
			if (!err)
				xfrm_pol_hold(pol);
			else if (err == -ESRCH)
				pol = NULL;
			else
				pol = ERR_PTR(err);
		} else
L
Linus Torvalds 已提交
1155 1156
			pol = NULL;
	}
J
Jamal Hadi Salim 已提交
1157
out:
F
Fan Du 已提交
1158
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1159 1160 1161 1162 1163
	return pol;
}

static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
1164
	struct net *net = xp_net(pol);
1165
	struct hlist_head *chain = policy_hash_bysel(net, &pol->selector,
1166
						     pol->family, dir);
1167

1168
	list_add(&pol->walk.all, &net->xfrm.policy_all);
1169
	hlist_add_head(&pol->bydst, chain);
1170
	hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index));
1171
	net->xfrm.policy_count[dir]++;
L
Linus Torvalds 已提交
1172
	xfrm_pol_hold(pol);
1173

1174 1175
	if (xfrm_bydst_should_resize(net, dir, NULL))
		schedule_work(&net->xfrm.policy_hash_work);
L
Linus Torvalds 已提交
1176 1177 1178 1179 1180
}

static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
						int dir)
{
1181 1182
	struct net *net = xp_net(pol);

1183 1184
	if (hlist_unhashed(&pol->bydst))
		return NULL;
L
Linus Torvalds 已提交
1185

1186
	hlist_del_init(&pol->bydst);
1187
	hlist_del(&pol->byidx);
H
Herbert Xu 已提交
1188
	list_del(&pol->walk.all);
1189
	net->xfrm.policy_count[dir]--;
1190 1191

	return pol;
L
Linus Torvalds 已提交
1192 1193
}

1194
int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
L
Linus Torvalds 已提交
1195
{
F
Fan Du 已提交
1196 1197 1198
	struct net *net = xp_net(pol);

	write_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1199
	pol = __xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
1200
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1201 1202
	if (pol) {
		xfrm_policy_kill(pol);
1203
		return 0;
L
Linus Torvalds 已提交
1204
	}
1205
	return -ENOENT;
L
Linus Torvalds 已提交
1206
}
1207
EXPORT_SYMBOL(xfrm_policy_delete);
L
Linus Torvalds 已提交
1208 1209 1210

int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
{
1211
	struct net *net = xp_net(pol);
L
Linus Torvalds 已提交
1212 1213
	struct xfrm_policy *old_pol;

1214 1215 1216 1217 1218
#ifdef CONFIG_XFRM_SUB_POLICY
	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
		return -EINVAL;
#endif

F
Fan Du 已提交
1219
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1220 1221 1222
	old_pol = sk->sk_policy[dir];
	sk->sk_policy[dir] = pol;
	if (pol) {
1223
		pol->curlft.add_time = get_seconds();
1224
		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
L
Linus Torvalds 已提交
1225 1226
		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
	}
1227 1228 1229 1230
	if (old_pol) {
		if (pol)
			xfrm_policy_requeue(old_pol, pol);

1231 1232 1233
		/* Unlinking succeeds always. This is the only function
		 * allowed to delete or replace socket policy.
		 */
L
Linus Torvalds 已提交
1234
		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1235
	}
F
Fan Du 已提交
1236
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1237 1238 1239 1240 1241 1242 1243

	if (old_pol) {
		xfrm_policy_kill(old_pol);
	}
	return 0;
}

1244
static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
L
Linus Torvalds 已提交
1245
{
1246
	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
F
Fan Du 已提交
1247
	struct net *net = xp_net(old);
L
Linus Torvalds 已提交
1248 1249 1250

	if (newp) {
		newp->selector = old->selector;
1251 1252
		if (security_xfrm_policy_clone(old->security,
					       &newp->security)) {
1253 1254 1255
			kfree(newp);
			return NULL;  /* ENOMEM */
		}
L
Linus Torvalds 已提交
1256 1257
		newp->lft = old->lft;
		newp->curlft = old->curlft;
1258
		newp->mark = old->mark;
L
Linus Torvalds 已提交
1259 1260 1261 1262
		newp->action = old->action;
		newp->flags = old->flags;
		newp->xfrm_nr = old->xfrm_nr;
		newp->index = old->index;
1263
		newp->type = old->type;
L
Linus Torvalds 已提交
1264 1265
		memcpy(newp->xfrm_vec, old->xfrm_vec,
		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
F
Fan Du 已提交
1266
		write_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1267
		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
F
Fan Du 已提交
1268
		write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286
		xfrm_pol_put(newp);
	}
	return newp;
}

int __xfrm_sk_clone_policy(struct sock *sk)
{
	struct xfrm_policy *p0 = sk->sk_policy[0],
			   *p1 = sk->sk_policy[1];

	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
		return -ENOMEM;
	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
		return -ENOMEM;
	return 0;
}

1287
static int
A
Alexey Dobriyan 已提交
1288
xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1289 1290 1291 1292 1293 1294 1295
	       unsigned short family)
{
	int err;
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);

	if (unlikely(afinfo == NULL))
		return -EINVAL;
A
Alexey Dobriyan 已提交
1296
	err = afinfo->get_saddr(net, local, remote);
1297 1298 1299 1300
	xfrm_policy_put_afinfo(afinfo);
	return err;
}

L
Linus Torvalds 已提交
1301 1302 1303
/* Resolve list of templates for the flow, given policy. */

static int
1304 1305
xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
		      struct xfrm_state **xfrm, unsigned short family)
L
Linus Torvalds 已提交
1306
{
A
Alexey Dobriyan 已提交
1307
	struct net *net = xp_net(policy);
L
Linus Torvalds 已提交
1308 1309 1310 1311
	int nx;
	int i, error;
	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1312
	xfrm_address_t tmp;
L
Linus Torvalds 已提交
1313

1314
	for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
L
Linus Torvalds 已提交
1315 1316 1317 1318 1319
		struct xfrm_state *x;
		xfrm_address_t *remote = daddr;
		xfrm_address_t *local  = saddr;
		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];

1320 1321
		if (tmpl->mode == XFRM_MODE_TUNNEL ||
		    tmpl->mode == XFRM_MODE_BEET) {
L
Linus Torvalds 已提交
1322 1323
			remote = &tmpl->id.daddr;
			local = &tmpl->saddr;
1324 1325
			if (xfrm_addr_any(local, tmpl->encap_family)) {
				error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
1326 1327 1328 1329
				if (error)
					goto fail;
				local = &tmp;
			}
L
Linus Torvalds 已提交
1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343
		}

		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);

		if (x && x->km.state == XFRM_STATE_VALID) {
			xfrm[nx++] = x;
			daddr = remote;
			saddr = local;
			continue;
		}
		if (x) {
			error = (x->km.state == XFRM_STATE_ERROR ?
				 -EINVAL : -EAGAIN);
			xfrm_state_put(x);
W
Weilong Chen 已提交
1344
		} else if (error == -ESRCH) {
1345
			error = -EAGAIN;
W
Weilong Chen 已提交
1346
		}
L
Linus Torvalds 已提交
1347 1348 1349 1350 1351 1352 1353

		if (!tmpl->optional)
			goto fail;
	}
	return nx;

fail:
1354
	for (nx--; nx >= 0; nx--)
L
Linus Torvalds 已提交
1355 1356 1357 1358
		xfrm_state_put(xfrm[nx]);
	return error;
}

1359
static int
1360 1361
xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
		  struct xfrm_state **xfrm, unsigned short family)
1362
{
1363 1364
	struct xfrm_state *tp[XFRM_MAX_DEPTH];
	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1365 1366 1367 1368 1369 1370 1371 1372 1373 1374
	int cnx = 0;
	int error;
	int ret;
	int i;

	for (i = 0; i < npols; i++) {
		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
			error = -ENOBUFS;
			goto fail;
		}
1375 1376

		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1377 1378 1379 1380 1381 1382 1383
		if (ret < 0) {
			error = ret;
			goto fail;
		} else
			cnx += ret;
	}

1384 1385 1386 1387
	/* found states are sorted for outbound processing */
	if (npols > 1)
		xfrm_state_sort(xfrm, tpp, cnx, family);

1388 1389 1390
	return cnx;

 fail:
1391
	for (cnx--; cnx >= 0; cnx--)
1392
		xfrm_state_put(tpp[cnx]);
1393 1394 1395 1396
	return error;

}

L
Linus Torvalds 已提交
1397 1398 1399 1400
/* Check that the bundle accepts the flow and its components are
 * still valid.
 */

1401
static inline int xfrm_get_tos(const struct flowi *fl, int family)
1402 1403 1404
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
	int tos;
L
Linus Torvalds 已提交
1405

1406 1407 1408 1409 1410 1411 1412 1413 1414 1415
	if (!afinfo)
		return -EINVAL;

	tos = afinfo->get_tos(fl);

	xfrm_policy_put_afinfo(afinfo);

	return tos;
}

1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426
static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	if (xdst->route == NULL) {
		/* Dummy bundle - if it has xfrms we were not
		 * able to build bundle as template resolution failed.
		 * It means we need to try again resolving. */
		if (xdst->num_xfrms > 0)
			return NULL;
1427 1428
	} else if (dst->flags & DST_XFRM_QUEUE) {
		return NULL;
1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465
	} else {
		/* Real bundle */
		if (stale_bundle(dst))
			return NULL;
	}

	dst_hold(dst);
	return flo;
}

static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	if (!xdst->route)
		return 0;
	if (stale_bundle(dst))
		return 0;

	return 1;
}

static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	dst_free(dst);
}

static const struct flow_cache_ops xfrm_bundle_fc_ops = {
	.get = xfrm_bundle_flo_get,
	.check = xfrm_bundle_flo_check,
	.delete = xfrm_bundle_flo_delete,
};

1466
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
L
Linus Torvalds 已提交
1467 1468
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1469
	struct dst_ops *dst_ops;
1470 1471 1472 1473 1474
	struct xfrm_dst *xdst;

	if (!afinfo)
		return ERR_PTR(-EINVAL);

1475 1476 1477 1478
	switch (family) {
	case AF_INET:
		dst_ops = &net->xfrm.xfrm4_dst_ops;
		break;
E
Eric Dumazet 已提交
1479
#if IS_ENABLED(CONFIG_IPV6)
1480 1481 1482 1483 1484 1485 1486
	case AF_INET6:
		dst_ops = &net->xfrm.xfrm6_dst_ops;
		break;
#endif
	default:
		BUG();
	}
1487
	xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1488

1489
	if (likely(xdst)) {
1490 1491 1492
		struct dst_entry *dst = &xdst->u.dst;

		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1493
		xdst->flo.ops = &xfrm_bundle_fc_ops;
1494 1495
		if (afinfo->init_dst)
			afinfo->init_dst(net, xdst);
1496
	} else
1497
		xdst = ERR_PTR(-ENOBUFS);
1498

1499 1500
	xfrm_policy_put_afinfo(afinfo);

1501 1502 1503
	return xdst;
}

1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520
static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
				 int nfheader_len)
{
	struct xfrm_policy_afinfo *afinfo =
		xfrm_policy_get_afinfo(dst->ops->family);
	int err;

	if (!afinfo)
		return -EINVAL;

	err = afinfo->init_path(path, dst, nfheader_len);

	xfrm_policy_put_afinfo(afinfo);

	return err;
}

H
Herbert Xu 已提交
1521
static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1522
				const struct flowi *fl)
1523 1524 1525 1526 1527 1528
{
	struct xfrm_policy_afinfo *afinfo =
		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
	int err;

	if (!afinfo)
L
Linus Torvalds 已提交
1529
		return -EINVAL;
1530

H
Herbert Xu 已提交
1531
	err = afinfo->fill_dst(xdst, dev, fl);
1532

L
Linus Torvalds 已提交
1533
	xfrm_policy_put_afinfo(afinfo);
1534

L
Linus Torvalds 已提交
1535 1536 1537
	return err;
}

1538

1539 1540 1541 1542 1543 1544
/* Allocate chain of dst_entry's, attach known xfrm's, calculate
 * all the metrics... Shortly, bundle a bundle.
 */

static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
					    struct xfrm_state **xfrm, int nx,
1545
					    const struct flowi *fl,
1546 1547
					    struct dst_entry *dst)
{
1548
	struct net *net = xp_net(policy);
1549 1550
	unsigned long now = jiffies;
	struct net_device *dev;
1551
	struct xfrm_mode *inner_mode;
1552 1553 1554 1555 1556
	struct dst_entry *dst_prev = NULL;
	struct dst_entry *dst0 = NULL;
	int i = 0;
	int err;
	int header_len = 0;
1557
	int nfheader_len = 0;
1558 1559 1560
	int trailer_len = 0;
	int tos;
	int family = policy->selector.family;
1561 1562 1563
	xfrm_address_t saddr, daddr;

	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1564 1565 1566 1567 1568 1569 1570 1571 1572

	tos = xfrm_get_tos(fl, family);
	err = tos;
	if (tos < 0)
		goto put_states;

	dst_hold(dst);

	for (; i < nx; i++) {
1573
		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1574 1575 1576 1577 1578 1579 1580 1581
		struct dst_entry *dst1 = &xdst->u.dst;

		err = PTR_ERR(xdst);
		if (IS_ERR(xdst)) {
			dst_release(dst);
			goto put_states;
		}

1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592
		if (xfrm[i]->sel.family == AF_UNSPEC) {
			inner_mode = xfrm_ip2inner_mode(xfrm[i],
							xfrm_af2proto(family));
			if (!inner_mode) {
				err = -EAFNOSUPPORT;
				dst_release(dst);
				goto put_states;
			}
		} else
			inner_mode = xfrm[i]->inner_mode;

1593 1594 1595 1596 1597 1598 1599 1600
		if (!dst_prev)
			dst0 = dst1;
		else {
			dst_prev->child = dst_clone(dst1);
			dst1->flags |= DST_NOHASH;
		}

		xdst->route = dst;
1601
		dst_copy_metrics(dst1, dst);
1602 1603 1604

		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
			family = xfrm[i]->props.family;
1605 1606
			dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
					      family);
1607 1608 1609 1610 1611 1612 1613
			err = PTR_ERR(dst);
			if (IS_ERR(dst))
				goto put_states;
		} else
			dst_hold(dst);

		dst1->xfrm = xfrm[i];
1614
		xdst->xfrm_genid = xfrm[i]->genid;
1615

1616
		dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1617 1618 1619 1620
		dst1->flags |= DST_HOST;
		dst1->lastuse = now;

		dst1->input = dst_discard;
1621
		dst1->output = inner_mode->afinfo->output;
1622 1623 1624 1625 1626

		dst1->next = dst_prev;
		dst_prev = dst1;

		header_len += xfrm[i]->props.header_len;
1627 1628
		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
			nfheader_len += xfrm[i]->props.header_len;
1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639
		trailer_len += xfrm[i]->props.trailer_len;
	}

	dst_prev->child = dst;
	dst0->path = dst;

	err = -ENODEV;
	dev = dst->dev;
	if (!dev)
		goto free_dst;

1640
	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1641 1642 1643 1644 1645
	xfrm_init_pmtu(dst_prev);

	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;

H
Herbert Xu 已提交
1646
		err = xfrm_fill_dst(xdst, dev, fl);
1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668
		if (err)
			goto free_dst;

		dst_prev->header_len = header_len;
		dst_prev->trailer_len = trailer_len;
		header_len -= xdst->u.dst.xfrm->props.header_len;
		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
	}

out:
	return dst0;

put_states:
	for (; i < nx; i++)
		xfrm_state_put(xfrm[i]);
free_dst:
	if (dst0)
		dst_free(dst0);
	dst0 = ERR_PTR(err);
	goto out;
}

1669
#ifdef CONFIG_XFRM_SUB_POLICY
1670
static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
1671 1672 1673 1674 1675 1676
{
	if (!*target) {
		*target = kmalloc(size, GFP_ATOMIC);
		if (!*target)
			return -ENOMEM;
	}
1677

1678 1679 1680
	memcpy(*target, src, size);
	return 0;
}
1681
#endif
1682

1683 1684
static int xfrm_dst_update_parent(struct dst_entry *dst,
				  const struct xfrm_selector *sel)
1685 1686 1687 1688 1689 1690 1691 1692 1693 1694
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
				   sel, sizeof(*sel));
#else
	return 0;
#endif
}

1695 1696
static int xfrm_dst_update_origin(struct dst_entry *dst,
				  const struct flowi *fl)
1697 1698 1699 1700 1701 1702 1703 1704
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
#else
	return 0;
#endif
}
L
Linus Torvalds 已提交
1705

1706
static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733
				struct xfrm_policy **pols,
				int *num_pols, int *num_xfrms)
{
	int i;

	if (*num_pols == 0 || !pols[0]) {
		*num_pols = 0;
		*num_xfrms = 0;
		return 0;
	}
	if (IS_ERR(pols[0]))
		return PTR_ERR(pols[0]);

	*num_xfrms = pols[0]->xfrm_nr;

#ifdef CONFIG_XFRM_SUB_POLICY
	if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
	    pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
						    XFRM_POLICY_TYPE_MAIN,
						    fl, family,
						    XFRM_POLICY_OUT);
		if (pols[1]) {
			if (IS_ERR(pols[1])) {
				xfrm_pols_put(pols, *num_pols);
				return PTR_ERR(pols[1]);
			}
1734
			(*num_pols)++;
1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751
			(*num_xfrms) += pols[1]->xfrm_nr;
		}
	}
#endif
	for (i = 0; i < *num_pols; i++) {
		if (pols[i]->action != XFRM_POLICY_ALLOW) {
			*num_xfrms = -1;
			break;
		}
	}

	return 0;

}

static struct xfrm_dst *
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1752
			       const struct flowi *fl, u16 family,
1753 1754 1755 1756 1757 1758 1759 1760 1761 1762
			       struct dst_entry *dst_orig)
{
	struct net *net = xp_net(pols[0]);
	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
	struct dst_entry *dst;
	struct xfrm_dst *xdst;
	int err;

	/* Try to instantiate a bundle */
	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1763 1764
	if (err <= 0) {
		if (err != 0 && err != -EAGAIN)
1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787
			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
		return ERR_PTR(err);
	}

	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
	if (IS_ERR(dst)) {
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
		return ERR_CAST(dst);
	}

	xdst = (struct xfrm_dst *)dst;
	xdst->num_xfrms = err;
	if (num_pols > 1)
		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
	else
		err = xfrm_dst_update_origin(dst, fl);
	if (unlikely(err)) {
		dst_free(dst);
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
		return ERR_PTR(err);
	}

	xdst->num_pols = num_pols;
1788
	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
1789 1790 1791 1792 1793
	xdst->policy_genid = atomic_read(&pols[0]->genid);

	return xdst;
}

1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806
static void xfrm_policy_queue_process(unsigned long arg)
{
	int err = 0;
	struct sk_buff *skb;
	struct sock *sk;
	struct dst_entry *dst;
	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
	struct xfrm_policy_queue *pq = &pol->polq;
	struct flowi fl;
	struct sk_buff_head list;

	spin_lock(&pq->hold_queue.lock);
	skb = skb_peek(&pq->hold_queue);
1807 1808 1809 1810
	if (!skb) {
		spin_unlock(&pq->hold_queue.lock);
		goto out;
	}
1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828
	dst = skb_dst(skb);
	sk = skb->sk;
	xfrm_decode_session(skb, &fl, dst->ops->family);
	spin_unlock(&pq->hold_queue.lock);

	dst_hold(dst->path);
	dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
			  sk, 0);
	if (IS_ERR(dst))
		goto purge_queue;

	if (dst->flags & DST_XFRM_QUEUE) {
		dst_release(dst);

		if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
			goto purge_queue;

		pq->timeout = pq->timeout << 1;
1829 1830 1831
		if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
			xfrm_pol_hold(pol);
	goto out;
1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861
	}

	dst_release(dst);

	__skb_queue_head_init(&list);

	spin_lock(&pq->hold_queue.lock);
	pq->timeout = 0;
	skb_queue_splice_init(&pq->hold_queue, &list);
	spin_unlock(&pq->hold_queue.lock);

	while (!skb_queue_empty(&list)) {
		skb = __skb_dequeue(&list);

		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
		dst_hold(skb_dst(skb)->path);
		dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
				  &fl, skb->sk, 0);
		if (IS_ERR(dst)) {
			kfree_skb(skb);
			continue;
		}

		nf_reset(skb);
		skb_dst_drop(skb);
		skb_dst_set(skb, dst);

		err = dst_output(skb);
	}

1862 1863
out:
	xfrm_pol_put(pol);
1864 1865 1866 1867 1868
	return;

purge_queue:
	pq->timeout = 0;
	xfrm_queue_purge(&pq->hold_queue);
1869
	xfrm_pol_put(pol);
1870 1871
}

1872
static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
1873 1874 1875 1876
{
	unsigned long sched_next;
	struct dst_entry *dst = skb_dst(skb);
	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1877 1878
	struct xfrm_policy *pol = xdst->pols[0];
	struct xfrm_policy_queue *pq = &pol->polq;
1879 1880 1881 1882 1883 1884 1885
	const struct sk_buff *fclone = skb + 1;

	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
		     fclone->fclone == SKB_FCLONE_CLONE)) {
		kfree_skb(skb);
		return 0;
	}
1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903

	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
		kfree_skb(skb);
		return -EAGAIN;
	}

	skb_dst_force(skb);

	spin_lock_bh(&pq->hold_queue.lock);

	if (!pq->timeout)
		pq->timeout = XFRM_QUEUE_TMO_MIN;

	sched_next = jiffies + pq->timeout;

	if (del_timer(&pq->hold_timer)) {
		if (time_before(pq->hold_timer.expires, sched_next))
			sched_next = pq->hold_timer.expires;
1904
		xfrm_pol_put(pol);
1905 1906 1907
	}

	__skb_queue_tail(&pq->hold_queue, skb);
1908 1909
	if (!mod_timer(&pq->hold_timer, sched_next))
		xfrm_pol_hold(pol);
1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930

	spin_unlock_bh(&pq->hold_queue.lock);

	return 0;
}

static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
						 struct dst_entry *dst,
						 const struct flowi *fl,
						 int num_xfrms,
						 u16 family)
{
	int err;
	struct net_device *dev;
	struct dst_entry *dst1;
	struct xfrm_dst *xdst;

	xdst = xfrm_alloc_dst(net, family);
	if (IS_ERR(xdst))
		return xdst;

1931
	if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0)
1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970
		return xdst;

	dst1 = &xdst->u.dst;
	dst_hold(dst);
	xdst->route = dst;

	dst_copy_metrics(dst1, dst);

	dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
	dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
	dst1->lastuse = jiffies;

	dst1->input = dst_discard;
	dst1->output = xdst_queue_output;

	dst_hold(dst);
	dst1->child = dst;
	dst1->path = dst;

	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);

	err = -ENODEV;
	dev = dst->dev;
	if (!dev)
		goto free_dst;

	err = xfrm_fill_dst(xdst, dev, fl);
	if (err)
		goto free_dst;

out:
	return xdst;

free_dst:
	dst_release(dst1);
	xdst = ERR_PTR(err);
	goto out;
}

1971
static struct flow_cache_object *
1972
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
		   struct flow_cache_object *oldflo, void *ctx)
{
	struct dst_entry *dst_orig = (struct dst_entry *)ctx;
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
	struct xfrm_dst *xdst, *new_xdst;
	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;

	/* Check if the policies from old bundle are usable */
	xdst = NULL;
	if (oldflo) {
		xdst = container_of(oldflo, struct xfrm_dst, flo);
		num_pols = xdst->num_pols;
		num_xfrms = xdst->num_xfrms;
		pol_dead = 0;
		for (i = 0; i < num_pols; i++) {
			pols[i] = xdst->pols[i];
			pol_dead |= pols[i]->walk.dead;
		}
		if (pol_dead) {
			dst_free(&xdst->u.dst);
			xdst = NULL;
			num_pols = 0;
			num_xfrms = 0;
			oldflo = NULL;
		}
	}

	/* Resolve policies to use if we couldn't get them from
	 * previous cache entry */
	if (xdst == NULL) {
		num_pols = 1;
2004 2005
		pols[0] = __xfrm_policy_lookup(net, fl, family,
					       flow_to_policy_dir(dir));
2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024
		err = xfrm_expand_policies(fl, family, pols,
					   &num_pols, &num_xfrms);
		if (err < 0)
			goto inc_error;
		if (num_pols == 0)
			return NULL;
		if (num_xfrms <= 0)
			goto make_dummy_bundle;
	}

	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
	if (IS_ERR(new_xdst)) {
		err = PTR_ERR(new_xdst);
		if (err != -EAGAIN)
			goto error;
		if (oldflo == NULL)
			goto make_dummy_bundle;
		dst_hold(&xdst->u.dst);
		return oldflo;
2025 2026 2027 2028 2029 2030 2031
	} else if (new_xdst == NULL) {
		num_xfrms = 0;
		if (oldflo == NULL)
			goto make_dummy_bundle;
		xdst->num_xfrms = 0;
		dst_hold(&xdst->u.dst);
		return oldflo;
2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049
	}

	/* Kill the previous bundle */
	if (xdst) {
		/* The policies were stolen for newly generated bundle */
		xdst->num_pols = 0;
		dst_free(&xdst->u.dst);
	}

	/* Flow cache does not have reference, it dst_free()'s,
	 * but we do need to return one reference for original caller */
	dst_hold(&new_xdst->u.dst);
	return &new_xdst->flo;

make_dummy_bundle:
	/* We found policies, but there's no bundles to instantiate:
	 * either because the policy blocks, has no transformations or
	 * we could not build template (no xfrm_states).*/
2050
	xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
2051 2052 2053 2054 2055 2056
	if (IS_ERR(xdst)) {
		xfrm_pols_put(pols, num_pols);
		return ERR_CAST(xdst);
	}
	xdst->num_pols = num_pols;
	xdst->num_xfrms = num_xfrms;
2057
	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070

	dst_hold(&xdst->u.dst);
	return &xdst->flo;

inc_error:
	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
error:
	if (xdst != NULL)
		dst_free(&xdst->u.dst);
	else
		xfrm_pols_put(pols, num_pols);
	return ERR_PTR(err);
}
L
Linus Torvalds 已提交
2071

2072 2073 2074 2075 2076 2077 2078 2079
static struct dst_entry *make_blackhole(struct net *net, u16 family,
					struct dst_entry *dst_orig)
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
	struct dst_entry *ret;

	if (!afinfo) {
		dst_release(dst_orig);
2080
		return ERR_PTR(-EINVAL);
2081 2082 2083 2084 2085 2086 2087 2088
	} else {
		ret = afinfo->blackhole_route(net, dst_orig);
	}
	xfrm_policy_put_afinfo(afinfo);

	return ret;
}

L
Linus Torvalds 已提交
2089 2090 2091 2092 2093
/* Main function: finds/creates a bundle for given flow.
 *
 * At the moment we eat a raw IP route. Mostly to speed up lookups
 * on interfaces with disabled IPsec.
 */
2094 2095 2096
struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
			      const struct flowi *fl,
			      struct sock *sk, int flags)
L
Linus Torvalds 已提交
2097
{
2098
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2099 2100
	struct flow_cache_object *flo;
	struct xfrm_dst *xdst;
2101
	struct dst_entry *dst, *route;
2102
	u16 family = dst_orig->ops->family;
2103
	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2104
	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2105

2106 2107 2108
	dst = NULL;
	xdst = NULL;
	route = NULL;
2109

2110
	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2111 2112 2113 2114 2115
		num_pols = 1;
		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
		err = xfrm_expand_policies(fl, family, pols,
					   &num_pols, &num_xfrms);
		if (err < 0)
2116
			goto dropdst;
2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130

		if (num_pols) {
			if (num_xfrms <= 0) {
				drop_pols = num_pols;
				goto no_transform;
			}

			xdst = xfrm_resolve_and_create_bundle(
					pols, num_pols, fl,
					family, dst_orig);
			if (IS_ERR(xdst)) {
				xfrm_pols_put(pols, num_pols);
				err = PTR_ERR(xdst);
				goto dropdst;
2131 2132 2133 2134
			} else if (xdst == NULL) {
				num_xfrms = 0;
				drop_pols = num_pols;
				goto no_transform;
2135 2136
			}

2137 2138
			dst_hold(&xdst->u.dst);
			xdst->u.dst.flags |= DST_NOCACHE;
2139
			route = xdst->route;
2140
		}
2141
	}
L
Linus Torvalds 已提交
2142

2143
	if (xdst == NULL) {
L
Linus Torvalds 已提交
2144
		/* To accelerate a bit...  */
2145
		if ((dst_orig->flags & DST_NOXFRM) ||
A
Alexey Dobriyan 已提交
2146
		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
2147
			goto nopol;
L
Linus Torvalds 已提交
2148

2149 2150 2151 2152
		flo = flow_cache_lookup(net, fl, family, dir,
					xfrm_bundle_lookup, dst_orig);
		if (flo == NULL)
			goto nopol;
2153
		if (IS_ERR(flo)) {
2154
			err = PTR_ERR(flo);
2155
			goto dropdst;
2156
		}
2157 2158 2159 2160
		xdst = container_of(flo, struct xfrm_dst, flo);

		num_pols = xdst->num_pols;
		num_xfrms = xdst->num_xfrms;
2161
		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174
		route = xdst->route;
	}

	dst = &xdst->u.dst;
	if (route == NULL && num_xfrms > 0) {
		/* The only case when xfrm_bundle_lookup() returns a
		 * bundle with null route, is when the template could
		 * not be resolved. It means policies are there, but
		 * bundle could not be created, since we don't yet
		 * have the xfrm_state's. We need to wait for KM to
		 * negotiate new SA's or bail out with error.*/
		if (net->xfrm.sysctl_larval_drop) {
			dst_release(dst);
2175
			xfrm_pols_put(pols, drop_pols);
2176
			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2177

2178
			return make_blackhole(net, family, dst_orig);
2179 2180
		}

2181
		err = -EAGAIN;
2182 2183 2184

		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
		goto error;
L
Linus Torvalds 已提交
2185 2186
	}

2187 2188
no_transform:
	if (num_pols == 0)
2189
		goto nopol;
L
Linus Torvalds 已提交
2190

2191 2192 2193
	if ((flags & XFRM_LOOKUP_ICMP) &&
	    !(pols[0]->flags & XFRM_POLICY_ICMP)) {
		err = -ENOENT;
2194
		goto error;
2195
	}
2196

2197 2198
	for (i = 0; i < num_pols; i++)
		pols[i]->curlft.use_time = get_seconds();
2199

2200
	if (num_xfrms < 0) {
L
Linus Torvalds 已提交
2201
		/* Prohibit the flow */
A
Alexey Dobriyan 已提交
2202
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2203 2204
		err = -EPERM;
		goto error;
2205 2206 2207 2208 2209 2210
	} else if (num_xfrms > 0) {
		/* Flow transformed */
		dst_release(dst_orig);
	} else {
		/* Flow passes untransformed */
		dst_release(dst);
2211
		dst = dst_orig;
L
Linus Torvalds 已提交
2212
	}
2213 2214
ok:
	xfrm_pols_put(pols, drop_pols);
G
Gao feng 已提交
2215 2216 2217
	if (dst && dst->xfrm &&
	    dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
		dst->flags |= DST_XFRM_TUNNEL;
2218
	return dst;
L
Linus Torvalds 已提交
2219

2220
nopol:
2221 2222
	if (!(flags & XFRM_LOOKUP_ICMP)) {
		dst = dst_orig;
2223
		goto ok;
2224
	}
2225
	err = -ENOENT;
L
Linus Torvalds 已提交
2226
error:
2227
	dst_release(dst);
2228 2229
dropdst:
	dst_release(dst_orig);
2230
	xfrm_pols_put(pols, drop_pols);
2231
	return ERR_PTR(err);
L
Linus Torvalds 已提交
2232 2233 2234
}
EXPORT_SYMBOL(xfrm_lookup);

2235
static inline int
2236
xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2237 2238 2239 2240 2241 2242 2243 2244
{
	struct xfrm_state *x;

	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
		return 0;
	x = skb->sp->xvec[idx];
	if (!x->type->reject)
		return 0;
2245
	return x->type->reject(x, skb, fl);
2246 2247
}

L
Linus Torvalds 已提交
2248 2249 2250 2251 2252 2253 2254
/* When skb is transformed back to its "native" form, we have to
 * check policy restrictions. At the moment we make this in maximally
 * stupid way. Shame on me. :-) Of course, connected sockets must
 * have policy cached at them.
 */

static inline int
2255
xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
L
Linus Torvalds 已提交
2256 2257 2258
	      unsigned short family)
{
	if (xfrm_state_kern(x))
2259
		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
L
Linus Torvalds 已提交
2260 2261 2262 2263
	return	x->id.proto == tmpl->id.proto &&
		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
		x->props.mode == tmpl->mode &&
2264
		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2265
		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2266 2267
		!(x->props.mode != XFRM_MODE_TRANSPORT &&
		  xfrm_state_addr_cmp(tmpl, x, family));
L
Linus Torvalds 已提交
2268 2269
}

2270 2271 2272 2273 2274 2275 2276
/*
 * 0 or more than 0 is returned when validation is succeeded (either bypass
 * because of optional transport mode, or next index of the mathced secpath
 * state with the template.
 * -1 is returned when no matching template is found.
 * Otherwise "-2 - errored_index" is returned.
 */
L
Linus Torvalds 已提交
2277
static inline int
2278
xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
L
Linus Torvalds 已提交
2279 2280 2281 2282 2283
	       unsigned short family)
{
	int idx = start;

	if (tmpl->optional) {
2284
		if (tmpl->mode == XFRM_MODE_TRANSPORT)
L
Linus Torvalds 已提交
2285 2286 2287 2288
			return start;
	} else
		start = -1;
	for (; idx < sp->len; idx++) {
2289
		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
L
Linus Torvalds 已提交
2290
			return ++idx;
2291 2292 2293
		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
			if (start == -1)
				start = -2-idx;
L
Linus Torvalds 已提交
2294
			break;
2295
		}
L
Linus Torvalds 已提交
2296 2297 2298 2299
	}
	return start;
}

2300 2301
int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
			  unsigned int family, int reverse)
L
Linus Torvalds 已提交
2302 2303
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2304
	int err;
L
Linus Torvalds 已提交
2305 2306 2307 2308

	if (unlikely(afinfo == NULL))
		return -EAFNOSUPPORT;

2309
	afinfo->decode_session(skb, fl, reverse);
2310
	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
L
Linus Torvalds 已提交
2311
	xfrm_policy_put_afinfo(afinfo);
2312
	return err;
L
Linus Torvalds 已提交
2313
}
2314
EXPORT_SYMBOL(__xfrm_decode_session);
L
Linus Torvalds 已提交
2315

2316
static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
L
Linus Torvalds 已提交
2317 2318
{
	for (; k < sp->len; k++) {
2319
		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2320
			*idxp = k;
L
Linus Torvalds 已提交
2321
			return 1;
2322
		}
L
Linus Torvalds 已提交
2323 2324 2325 2326 2327
	}

	return 0;
}

2328
int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
L
Linus Torvalds 已提交
2329 2330
			unsigned short family)
{
2331
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
2332
	struct xfrm_policy *pol;
2333 2334 2335 2336
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
	int npols = 0;
	int xfrm_nr;
	int pi;
2337
	int reverse;
L
Linus Torvalds 已提交
2338
	struct flowi fl;
2339
	u8 fl_dir;
2340
	int xerr_idx = -1;
L
Linus Torvalds 已提交
2341

2342 2343 2344 2345
	reverse = dir & ~XFRM_POLICY_MASK;
	dir &= XFRM_POLICY_MASK;
	fl_dir = policy_to_flow_dir(dir);

2346
	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
A
Alexey Dobriyan 已提交
2347
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
L
Linus Torvalds 已提交
2348
		return 0;
2349 2350
	}

2351
	nf_nat_decode_session(skb, &fl, family);
L
Linus Torvalds 已提交
2352 2353 2354 2355 2356

	/* First, check used SA against their selectors. */
	if (skb->sp) {
		int i;

2357
		for (i = skb->sp->len-1; i >= 0; i--) {
2358
			struct xfrm_state *x = skb->sp->xvec[i];
2359
			if (!xfrm_selector_match(&x->sel, &fl, family)) {
A
Alexey Dobriyan 已提交
2360
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
L
Linus Torvalds 已提交
2361
				return 0;
2362
			}
L
Linus Torvalds 已提交
2363 2364 2365 2366
		}
	}

	pol = NULL;
2367
	if (sk && sk->sk_policy[dir]) {
2368
		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2369
		if (IS_ERR(pol)) {
A
Alexey Dobriyan 已提交
2370
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2371
			return 0;
2372
		}
2373
	}
L
Linus Torvalds 已提交
2374

2375 2376 2377 2378 2379 2380 2381 2382 2383 2384
	if (!pol) {
		struct flow_cache_object *flo;

		flo = flow_cache_lookup(net, &fl, family, fl_dir,
					xfrm_policy_lookup, NULL);
		if (IS_ERR_OR_NULL(flo))
			pol = ERR_CAST(flo);
		else
			pol = container_of(flo, struct xfrm_policy, flo);
	}
L
Linus Torvalds 已提交
2385

2386
	if (IS_ERR(pol)) {
A
Alexey Dobriyan 已提交
2387
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2388
		return 0;
2389
	}
2390

2391
	if (!pol) {
2392
		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2393
			xfrm_secpath_reject(xerr_idx, skb, &fl);
A
Alexey Dobriyan 已提交
2394
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2395 2396 2397 2398
			return 0;
		}
		return 1;
	}
L
Linus Torvalds 已提交
2399

2400
	pol->curlft.use_time = get_seconds();
L
Linus Torvalds 已提交
2401

2402
	pols[0] = pol;
2403
	npols++;
2404 2405
#ifdef CONFIG_XFRM_SUB_POLICY
	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2406
		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2407 2408 2409
						    &fl, family,
						    XFRM_POLICY_IN);
		if (pols[1]) {
2410
			if (IS_ERR(pols[1])) {
A
Alexey Dobriyan 已提交
2411
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2412
				return 0;
2413
			}
2414
			pols[1]->curlft.use_time = get_seconds();
2415
			npols++;
2416 2417 2418 2419
		}
	}
#endif

L
Linus Torvalds 已提交
2420 2421 2422
	if (pol->action == XFRM_POLICY_ALLOW) {
		struct sec_path *sp;
		static struct sec_path dummy;
2423
		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2424
		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2425 2426
		struct xfrm_tmpl **tpp = tp;
		int ti = 0;
L
Linus Torvalds 已提交
2427 2428 2429 2430 2431
		int i, k;

		if ((sp = skb->sp) == NULL)
			sp = &dummy;

2432 2433
		for (pi = 0; pi < npols; pi++) {
			if (pols[pi] != pol &&
2434
			    pols[pi]->action != XFRM_POLICY_ALLOW) {
A
Alexey Dobriyan 已提交
2435
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2436
				goto reject;
2437 2438
			}
			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
A
Alexey Dobriyan 已提交
2439
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2440
				goto reject_error;
2441
			}
2442 2443 2444 2445
			for (i = 0; i < pols[pi]->xfrm_nr; i++)
				tpp[ti++] = &pols[pi]->xfrm_vec[i];
		}
		xfrm_nr = ti;
2446
		if (npols > 1) {
F
Fan Du 已提交
2447
			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
2448 2449
			tpp = stp;
		}
2450

L
Linus Torvalds 已提交
2451 2452 2453 2454 2455 2456
		/* For each tunnel xfrm, find the first matching tmpl.
		 * For each tmpl before that, find corresponding xfrm.
		 * Order is _important_. Later we will implement
		 * some barriers, but at the moment barriers
		 * are implied between each two transformations.
		 */
2457 2458
		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
			k = xfrm_policy_ok(tpp[i], sp, k, family);
2459
			if (k < 0) {
2460 2461 2462
				if (k < -1)
					/* "-2 - errored_index" returned */
					xerr_idx = -(2+k);
A
Alexey Dobriyan 已提交
2463
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
L
Linus Torvalds 已提交
2464
				goto reject;
2465
			}
L
Linus Torvalds 已提交
2466 2467
		}

2468
		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
A
Alexey Dobriyan 已提交
2469
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
L
Linus Torvalds 已提交
2470
			goto reject;
2471
		}
L
Linus Torvalds 已提交
2472

2473
		xfrm_pols_put(pols, npols);
L
Linus Torvalds 已提交
2474 2475
		return 1;
	}
A
Alexey Dobriyan 已提交
2476
	XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
L
Linus Torvalds 已提交
2477 2478

reject:
2479
	xfrm_secpath_reject(xerr_idx, skb, &fl);
2480 2481
reject_error:
	xfrm_pols_put(pols, npols);
L
Linus Torvalds 已提交
2482 2483 2484 2485 2486 2487
	return 0;
}
EXPORT_SYMBOL(__xfrm_policy_check);

int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
{
2488
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
2489
	struct flowi fl;
E
Eric Dumazet 已提交
2490
	struct dst_entry *dst;
E
Eric Dumazet 已提交
2491
	int res = 1;
L
Linus Torvalds 已提交
2492

2493
	if (xfrm_decode_session(skb, &fl, family) < 0) {
2494
		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
L
Linus Torvalds 已提交
2495
		return 0;
2496
	}
L
Linus Torvalds 已提交
2497

2498
	skb_dst_force(skb);
E
Eric Dumazet 已提交
2499

2500 2501
	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
	if (IS_ERR(dst)) {
E
Eric Dumazet 已提交
2502
		res = 0;
2503 2504
		dst = NULL;
	}
E
Eric Dumazet 已提交
2505 2506
	skb_dst_set(skb, dst);
	return res;
L
Linus Torvalds 已提交
2507 2508 2509
}
EXPORT_SYMBOL(__xfrm_route_forward);

2510 2511
/* Optimize later using cookies and generation ids. */

L
Linus Torvalds 已提交
2512 2513
static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
{
2514
	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2515 2516 2517 2518 2519 2520 2521
	 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
	 * get validated by dst_ops->check on every use.  We do this
	 * because when a normal route referenced by an XFRM dst is
	 * obsoleted we do not go looking around for all parent
	 * referencing XFRM dsts so that we can invalidate them.  It
	 * is just too much work.  Instead we make the checks here on
	 * every use.  For example:
2522 2523 2524 2525 2526 2527 2528 2529 2530
	 *
	 *	XFRM dst A --> IPv4 dst X
	 *
	 * X is the "xdst->route" of A (X is also the "dst->path" of A
	 * in this example).  If X is marked obsolete, "A" will not
	 * notice.  That's what we are validating here via the
	 * stale_bundle() check.
	 *
	 * When a policy's bundle is pruned, we dst_free() the XFRM
2531 2532 2533
	 * dst which causes it's ->obsolete field to be set to
	 * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
	 * this, we want to force a new route lookup.
2534
	 */
2535 2536 2537
	if (dst->obsolete < 0 && !stale_bundle(dst))
		return dst;

L
Linus Torvalds 已提交
2538 2539 2540 2541 2542
	return NULL;
}

static int stale_bundle(struct dst_entry *dst)
{
2543
	return !xfrm_bundle_ok((struct xfrm_dst *)dst);
L
Linus Torvalds 已提交
2544 2545
}

H
Herbert Xu 已提交
2546
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
L
Linus Torvalds 已提交
2547 2548
{
	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2549
		dst->dev = dev_net(dev)->loopback_dev;
2550
		dev_hold(dst->dev);
L
Linus Torvalds 已提交
2551 2552 2553
		dev_put(dev);
	}
}
H
Herbert Xu 已提交
2554
EXPORT_SYMBOL(xfrm_dst_ifdown);
L
Linus Torvalds 已提交
2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571

static void xfrm_link_failure(struct sk_buff *skb)
{
	/* Impossible. Such dst must be popped before reaches point of failure. */
}

static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
{
	if (dst) {
		if (dst->obsolete) {
			dst_release(dst);
			dst = NULL;
		}
	}
	return dst;
}

2572
void xfrm_garbage_collect(struct net *net)
2573
{
2574
	flow_cache_flush(net);
2575
}
2576
EXPORT_SYMBOL(xfrm_garbage_collect);
2577 2578 2579

static void xfrm_garbage_collect_deferred(struct net *net)
{
2580
	flow_cache_flush_deferred(net);
2581 2582
}

2583
static void xfrm_init_pmtu(struct dst_entry *dst)
L
Linus Torvalds 已提交
2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599
{
	do {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
		u32 pmtu, route_mtu_cached;

		pmtu = dst_mtu(dst->child);
		xdst->child_mtu_cached = pmtu;

		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);

		route_mtu_cached = dst_mtu(xdst->route);
		xdst->route_mtu_cached = route_mtu_cached;

		if (pmtu > route_mtu_cached)
			pmtu = route_mtu_cached;

2600
		dst_metric_set(dst, RTAX_MTU, pmtu);
L
Linus Torvalds 已提交
2601 2602 2603 2604 2605 2606 2607
	} while ((dst = dst->next));
}

/* Check that the bundle accepts the flow and its components are
 * still valid.
 */

2608
static int xfrm_bundle_ok(struct xfrm_dst *first)
L
Linus Torvalds 已提交
2609 2610 2611 2612 2613
{
	struct dst_entry *dst = &first->u.dst;
	struct xfrm_dst *last;
	u32 mtu;

2614
	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
L
Linus Torvalds 已提交
2615 2616 2617
	    (dst->dev && !netif_running(dst->dev)))
		return 0;

2618 2619 2620
	if (dst->flags & DST_XFRM_QUEUE)
		return 1;

L
Linus Torvalds 已提交
2621 2622 2623 2624 2625 2626 2627
	last = NULL;

	do {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;

		if (dst->xfrm->km.state != XFRM_STATE_VALID)
			return 0;
2628 2629
		if (xdst->xfrm_genid != dst->xfrm->genid)
			return 0;
2630 2631
		if (xdst->num_pols > 0 &&
		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2632
			return 0;
2633

L
Linus Torvalds 已提交
2634 2635 2636 2637 2638 2639
		mtu = dst_mtu(dst->child);
		if (xdst->child_mtu_cached != mtu) {
			last = xdst;
			xdst->child_mtu_cached = mtu;
		}

2640
		if (!dst_check(xdst->route, xdst->route_cookie))
L
Linus Torvalds 已提交
2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660
			return 0;
		mtu = dst_mtu(xdst->route);
		if (xdst->route_mtu_cached != mtu) {
			last = xdst;
			xdst->route_mtu_cached = mtu;
		}

		dst = dst->child;
	} while (dst->xfrm);

	if (likely(!last))
		return 1;

	mtu = last->child_mtu_cached;
	for (;;) {
		dst = &last->u.dst;

		mtu = xfrm_state_mtu(dst->xfrm, mtu);
		if (mtu > last->route_mtu_cached)
			mtu = last->route_mtu_cached;
2661
		dst_metric_set(dst, RTAX_MTU, mtu);
L
Linus Torvalds 已提交
2662 2663 2664 2665

		if (last == first)
			break;

2666
		last = (struct xfrm_dst *)last->u.dst.next;
L
Linus Torvalds 已提交
2667 2668 2669 2670 2671 2672
		last->child_mtu_cached = mtu;
	}

	return 1;
}

2673 2674 2675 2676 2677
static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
{
	return dst_metric_advmss(dst->path);
}

2678
static unsigned int xfrm_mtu(const struct dst_entry *dst)
2679
{
2680 2681 2682
	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);

	return mtu ? : dst_mtu(dst->path);
2683 2684
}

2685 2686 2687
static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
					   struct sk_buff *skb,
					   const void *daddr)
2688
{
2689
	return dst->path->ops->neigh_lookup(dst, skb, daddr);
2690 2691
}

L
Linus Torvalds 已提交
2692 2693
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
{
2694
	struct net *net;
L
Linus Torvalds 已提交
2695 2696 2697 2698 2699
	int err = 0;
	if (unlikely(afinfo == NULL))
		return -EINVAL;
	if (unlikely(afinfo->family >= NPROTO))
		return -EAFNOSUPPORT;
E
Eric Dumazet 已提交
2700
	spin_lock(&xfrm_policy_afinfo_lock);
L
Linus Torvalds 已提交
2701 2702 2703 2704 2705 2706 2707 2708
	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
		err = -ENOBUFS;
	else {
		struct dst_ops *dst_ops = afinfo->dst_ops;
		if (likely(dst_ops->kmem_cachep == NULL))
			dst_ops->kmem_cachep = xfrm_dst_cache;
		if (likely(dst_ops->check == NULL))
			dst_ops->check = xfrm_dst_check;
2709 2710
		if (likely(dst_ops->default_advmss == NULL))
			dst_ops->default_advmss = xfrm_default_advmss;
2711 2712
		if (likely(dst_ops->mtu == NULL))
			dst_ops->mtu = xfrm_mtu;
L
Linus Torvalds 已提交
2713 2714 2715 2716
		if (likely(dst_ops->negative_advice == NULL))
			dst_ops->negative_advice = xfrm_negative_advice;
		if (likely(dst_ops->link_failure == NULL))
			dst_ops->link_failure = xfrm_link_failure;
2717 2718
		if (likely(dst_ops->neigh_lookup == NULL))
			dst_ops->neigh_lookup = xfrm_neigh_lookup;
L
Linus Torvalds 已提交
2719
		if (likely(afinfo->garbage_collect == NULL))
2720
			afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2721
		rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
L
Linus Torvalds 已提交
2722
	}
E
Eric Dumazet 已提交
2723
	spin_unlock(&xfrm_policy_afinfo_lock);
2724 2725 2726 2727 2728 2729 2730 2731 2732

	rtnl_lock();
	for_each_net(net) {
		struct dst_ops *xfrm_dst_ops;

		switch (afinfo->family) {
		case AF_INET:
			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
			break;
E
Eric Dumazet 已提交
2733
#if IS_ENABLED(CONFIG_IPV6)
2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744
		case AF_INET6:
			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
			break;
#endif
		default:
			BUG();
		}
		*xfrm_dst_ops = *afinfo->dst_ops;
	}
	rtnl_unlock();

L
Linus Torvalds 已提交
2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755
	return err;
}
EXPORT_SYMBOL(xfrm_policy_register_afinfo);

int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
{
	int err = 0;
	if (unlikely(afinfo == NULL))
		return -EINVAL;
	if (unlikely(afinfo->family >= NPROTO))
		return -EAFNOSUPPORT;
E
Eric Dumazet 已提交
2756
	spin_lock(&xfrm_policy_afinfo_lock);
L
Linus Torvalds 已提交
2757 2758 2759
	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
			err = -EINVAL;
E
Eric Dumazet 已提交
2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774
		else
			RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
					 NULL);
	}
	spin_unlock(&xfrm_policy_afinfo_lock);
	if (!err) {
		struct dst_ops *dst_ops = afinfo->dst_ops;

		synchronize_rcu();

		dst_ops->kmem_cachep = NULL;
		dst_ops->check = NULL;
		dst_ops->negative_advice = NULL;
		dst_ops->link_failure = NULL;
		afinfo->garbage_collect = NULL;
L
Linus Torvalds 已提交
2775 2776 2777 2778 2779
	}
	return err;
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);

2780 2781 2782 2783
static void __net_init xfrm_dst_ops_init(struct net *net)
{
	struct xfrm_policy_afinfo *afinfo;

E
Eric Dumazet 已提交
2784 2785
	rcu_read_lock();
	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
2786 2787
	if (afinfo)
		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
E
Eric Dumazet 已提交
2788
#if IS_ENABLED(CONFIG_IPV6)
E
Eric Dumazet 已提交
2789
	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
2790 2791 2792
	if (afinfo)
		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
#endif
2793
	rcu_read_unlock();
2794 2795
}

L
Linus Torvalds 已提交
2796 2797
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
2798
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2799

L
Linus Torvalds 已提交
2800 2801
	switch (event) {
	case NETDEV_DOWN:
2802
		xfrm_garbage_collect(dev_net(dev));
L
Linus Torvalds 已提交
2803 2804 2805 2806 2807
	}
	return NOTIFY_DONE;
}

static struct notifier_block xfrm_dev_notifier = {
A
Alexey Dobriyan 已提交
2808
	.notifier_call	= xfrm_dev_event,
L
Linus Torvalds 已提交
2809 2810
};

2811
#ifdef CONFIG_XFRM_STATISTICS
A
Alexey Dobriyan 已提交
2812
static int __net_init xfrm_statistics_init(struct net *net)
2813
{
2814
	int rv;
W
WANG Cong 已提交
2815 2816
	net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
	if (!net->mib.xfrm_statistics)
2817
		return -ENOMEM;
2818 2819
	rv = xfrm_proc_init(net);
	if (rv < 0)
W
WANG Cong 已提交
2820
		free_percpu(net->mib.xfrm_statistics);
2821
	return rv;
2822
}
A
Alexey Dobriyan 已提交
2823 2824 2825

static void xfrm_statistics_fini(struct net *net)
{
2826
	xfrm_proc_fini(net);
W
WANG Cong 已提交
2827
	free_percpu(net->mib.xfrm_statistics);
A
Alexey Dobriyan 已提交
2828 2829 2830 2831 2832 2833 2834 2835 2836 2837
}
#else
static int __net_init xfrm_statistics_init(struct net *net)
{
	return 0;
}

static void xfrm_statistics_fini(struct net *net)
{
}
2838 2839
#endif

2840
static int __net_init xfrm_policy_init(struct net *net)
L
Linus Torvalds 已提交
2841
{
2842 2843 2844
	unsigned int hmask, sz;
	int dir;

2845 2846
	if (net_eq(net, &init_net))
		xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
L
Linus Torvalds 已提交
2847
					   sizeof(struct xfrm_dst),
A
Alexey Dobriyan 已提交
2848
					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2849
					   NULL);
L
Linus Torvalds 已提交
2850

2851 2852 2853
	hmask = 8 - 1;
	sz = (hmask+1) * sizeof(struct hlist_head);

2854 2855 2856
	net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
	if (!net->xfrm.policy_byidx)
		goto out_byidx;
2857
	net->xfrm.policy_idx_hmask = hmask;
2858 2859 2860 2861

	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
		struct xfrm_policy_hash *htab;

2862
		net->xfrm.policy_count[dir] = 0;
2863
		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2864

2865
		htab = &net->xfrm.policy_bydst[dir];
2866
		htab->table = xfrm_hash_alloc(sz);
2867
		if (!htab->table)
2868 2869
			goto out_bydst;
		htab->hmask = hmask;
2870 2871 2872 2873
		htab->dbits4 = 32;
		htab->sbits4 = 32;
		htab->dbits6 = 128;
		htab->sbits6 = 128;
2874 2875
	}

2876
	INIT_LIST_HEAD(&net->xfrm.policy_all);
2877
	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2878 2879 2880
	if (net_eq(net, &init_net))
		register_netdevice_notifier(&xfrm_dev_notifier);
	return 0;
2881

2882 2883 2884 2885 2886 2887 2888 2889
out_bydst:
	for (dir--; dir >= 0; dir--) {
		struct xfrm_policy_hash *htab;

		htab = &net->xfrm.policy_bydst[dir];
		xfrm_hash_free(htab->table, sz);
	}
	xfrm_hash_free(net->xfrm.policy_byidx, sz);
2890 2891
out_byidx:
	return -ENOMEM;
2892 2893 2894 2895
}

static void xfrm_policy_fini(struct net *net)
{
2896
	unsigned int sz;
2897
	int dir;
2898

2899 2900
	flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
2901
	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
2902
#endif
2903
	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
2904

2905
	WARN_ON(!list_empty(&net->xfrm.policy_all));
2906

2907
	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2908 2909
		struct xfrm_policy_hash *htab;

2910
		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
2911 2912

		htab = &net->xfrm.policy_bydst[dir];
2913
		sz = (htab->hmask + 1) * sizeof(struct hlist_head);
2914 2915
		WARN_ON(!hlist_empty(htab->table));
		xfrm_hash_free(htab->table, sz);
2916 2917
	}

2918
	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
2919 2920
	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
	xfrm_hash_free(net->xfrm.policy_byidx, sz);
L
Linus Torvalds 已提交
2921 2922
}

2923 2924 2925 2926
static int __net_init xfrm_net_init(struct net *net)
{
	int rv;

A
Alexey Dobriyan 已提交
2927 2928 2929
	rv = xfrm_statistics_init(net);
	if (rv < 0)
		goto out_statistics;
2930 2931 2932 2933 2934 2935
	rv = xfrm_state_init(net);
	if (rv < 0)
		goto out_state;
	rv = xfrm_policy_init(net);
	if (rv < 0)
		goto out_policy;
2936
	xfrm_dst_ops_init(net);
A
Alexey Dobriyan 已提交
2937 2938 2939
	rv = xfrm_sysctl_init(net);
	if (rv < 0)
		goto out_sysctl;
2940 2941 2942
	rv = flow_cache_init(net);
	if (rv < 0)
		goto out;
F
Fan Du 已提交
2943 2944 2945 2946 2947 2948

	/* Initialize the per-net locks here */
	spin_lock_init(&net->xfrm.xfrm_state_lock);
	rwlock_init(&net->xfrm.xfrm_policy_lock);
	mutex_init(&net->xfrm.xfrm_cfg_mutex);

2949 2950
	return 0;

2951 2952
out:
	xfrm_sysctl_fini(net);
A
Alexey Dobriyan 已提交
2953 2954
out_sysctl:
	xfrm_policy_fini(net);
2955 2956 2957
out_policy:
	xfrm_state_fini(net);
out_state:
A
Alexey Dobriyan 已提交
2958 2959
	xfrm_statistics_fini(net);
out_statistics:
2960 2961 2962 2963 2964
	return rv;
}

static void __net_exit xfrm_net_exit(struct net *net)
{
2965
	flow_cache_fini(net);
A
Alexey Dobriyan 已提交
2966
	xfrm_sysctl_fini(net);
2967 2968
	xfrm_policy_fini(net);
	xfrm_state_fini(net);
A
Alexey Dobriyan 已提交
2969
	xfrm_statistics_fini(net);
2970 2971 2972 2973 2974 2975 2976
}

static struct pernet_operations __net_initdata xfrm_net_ops = {
	.init = xfrm_net_init,
	.exit = xfrm_net_exit,
};

L
Linus Torvalds 已提交
2977 2978
void __init xfrm_init(void)
{
2979
	register_pernet_subsys(&xfrm_net_ops);
L
Linus Torvalds 已提交
2980 2981 2982
	xfrm_input_init();
}

J
Joy Latten 已提交
2983
#ifdef CONFIG_AUDITSYSCALL
2984 2985
static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
					 struct audit_buffer *audit_buf)
J
Joy Latten 已提交
2986
{
2987 2988 2989 2990
	struct xfrm_sec_ctx *ctx = xp->security;
	struct xfrm_selector *sel = &xp->selector;

	if (ctx)
J
Joy Latten 已提交
2991
		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2992
				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
J
Joy Latten 已提交
2993

2994
	switch (sel->family) {
J
Joy Latten 已提交
2995
	case AF_INET:
H
Harvey Harrison 已提交
2996
		audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
2997 2998 2999
		if (sel->prefixlen_s != 32)
			audit_log_format(audit_buf, " src_prefixlen=%d",
					 sel->prefixlen_s);
H
Harvey Harrison 已提交
3000
		audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
3001 3002 3003
		if (sel->prefixlen_d != 32)
			audit_log_format(audit_buf, " dst_prefixlen=%d",
					 sel->prefixlen_d);
J
Joy Latten 已提交
3004 3005
		break;
	case AF_INET6:
H
Harvey Harrison 已提交
3006
		audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
3007 3008 3009
		if (sel->prefixlen_s != 128)
			audit_log_format(audit_buf, " src_prefixlen=%d",
					 sel->prefixlen_s);
H
Harvey Harrison 已提交
3010
		audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
3011 3012 3013
		if (sel->prefixlen_d != 128)
			audit_log_format(audit_buf, " dst_prefixlen=%d",
					 sel->prefixlen_d);
J
Joy Latten 已提交
3014 3015 3016 3017
		break;
	}
}

3018
void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
J
Joy Latten 已提交
3019 3020 3021
{
	struct audit_buffer *audit_buf;

P
Paul Moore 已提交
3022
	audit_buf = xfrm_audit_start("SPD-add");
J
Joy Latten 已提交
3023 3024
	if (audit_buf == NULL)
		return;
3025
	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
P
Paul Moore 已提交
3026
	audit_log_format(audit_buf, " res=%u", result);
J
Joy Latten 已提交
3027 3028 3029 3030 3031
	xfrm_audit_common_policyinfo(xp, audit_buf);
	audit_log_end(audit_buf);
}
EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);

P
Paul Moore 已提交
3032
void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
3033
			      bool task_valid)
J
Joy Latten 已提交
3034 3035 3036
{
	struct audit_buffer *audit_buf;

P
Paul Moore 已提交
3037
	audit_buf = xfrm_audit_start("SPD-delete");
J
Joy Latten 已提交
3038 3039
	if (audit_buf == NULL)
		return;
3040
	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
P
Paul Moore 已提交
3041
	audit_log_format(audit_buf, " res=%u", result);
J
Joy Latten 已提交
3042 3043 3044 3045 3046 3047
	xfrm_audit_common_policyinfo(xp, audit_buf);
	audit_log_end(audit_buf);
}
EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif

3048
#ifdef CONFIG_XFRM_MIGRATE
3049 3050
static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
					const struct xfrm_selector *sel_tgt)
3051 3052 3053
{
	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
		if (sel_tgt->family == sel_cmp->family &&
3054 3055 3056 3057
		    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
				    sel_cmp->family) &&
		    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
				    sel_cmp->family) &&
3058 3059
		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3060
			return true;
3061 3062 3063
		}
	} else {
		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3064
			return true;
3065 3066
		}
	}
3067
	return false;
3068 3069
}

3070 3071
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
						    u8 dir, u8 type, struct net *net)
3072 3073 3074 3075 3076
{
	struct xfrm_policy *pol, *ret = NULL;
	struct hlist_head *chain;
	u32 priority = ~0U;

F
Fan Du 已提交
3077
	read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
3078
	chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3079
	hlist_for_each_entry(pol, chain, bydst) {
3080 3081 3082 3083 3084 3085 3086
		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
		    pol->type == type) {
			ret = pol;
			priority = ret->priority;
			break;
		}
	}
3087
	chain = &net->xfrm.policy_inexact[dir];
3088
	hlist_for_each_entry(pol, chain, bydst) {
3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099
		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
		    pol->type == type &&
		    pol->priority < priority) {
			ret = pol;
			break;
		}
	}

	if (ret)
		xfrm_pol_hold(ret);

F
Fan Du 已提交
3100
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3101 3102 3103 3104

	return ret;
}

3105
static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3106 3107 3108 3109 3110 3111 3112 3113
{
	int match = 0;

	if (t->mode == m->mode && t->id.proto == m->proto &&
	    (m->reqid == 0 || t->reqid == m->reqid)) {
		switch (t->mode) {
		case XFRM_MODE_TUNNEL:
		case XFRM_MODE_BEET:
3114 3115 3116 3117
			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
					    m->old_family) &&
			    xfrm_addr_equal(&t->saddr, &m->old_saddr,
					    m->old_family)) {
3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141
				match = 1;
			}
			break;
		case XFRM_MODE_TRANSPORT:
			/* in case of transport mode, template does not store
			   any IP addresses, hence we just compare mode and
			   protocol */
			match = 1;
			break;
		default:
			break;
		}
	}
	return match;
}

/* update endpoint address(es) of template(s) */
static int xfrm_policy_migrate(struct xfrm_policy *pol,
			       struct xfrm_migrate *m, int num_migrate)
{
	struct xfrm_migrate *mp;
	int i, j, n = 0;

	write_lock_bh(&pol->lock);
H
Herbert Xu 已提交
3142
	if (unlikely(pol->walk.dead)) {
3143 3144 3145 3146 3147 3148 3149 3150 3151 3152
		/* target policy has been deleted */
		write_unlock_bh(&pol->lock);
		return -ENOENT;
	}

	for (i = 0; i < pol->xfrm_nr; i++) {
		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
				continue;
			n++;
H
Herbert Xu 已提交
3153 3154
			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3155 3156 3157 3158 3159 3160 3161 3162
				continue;
			/* update endpoints */
			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
			       sizeof(pol->xfrm_vec[i].id.daddr));
			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
			       sizeof(pol->xfrm_vec[i].saddr));
			pol->xfrm_vec[i].encap_family = mp->new_family;
			/* flush bundles */
3163
			atomic_inc(&pol->genid);
3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174
		}
	}

	write_unlock_bh(&pol->lock);

	if (!n)
		return -ENODATA;

	return 0;
}

3175
static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3176 3177 3178 3179 3180 3181 3182
{
	int i, j;

	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
		return -EINVAL;

	for (i = 0; i < num_migrate; i++) {
3183 3184 3185 3186
		if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
				    m[i].old_family) &&
		    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
				    m[i].old_family))
3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208
			return -EINVAL;
		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
			return -EINVAL;

		/* check if there is any duplicated entry */
		for (j = i + 1; j < num_migrate; j++) {
			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
				    sizeof(m[i].old_daddr)) &&
			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
				    sizeof(m[i].old_saddr)) &&
			    m[i].proto == m[j].proto &&
			    m[i].mode == m[j].mode &&
			    m[i].reqid == m[j].reqid &&
			    m[i].old_family == m[j].old_family)
				return -EINVAL;
		}
	}

	return 0;
}

3209
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3210
		 struct xfrm_migrate *m, int num_migrate,
3211
		 struct xfrm_kmaddress *k, struct net *net)
3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223
{
	int i, err, nx_cur = 0, nx_new = 0;
	struct xfrm_policy *pol = NULL;
	struct xfrm_state *x, *xc;
	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
	struct xfrm_migrate *mp;

	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
		goto out;

	/* Stage 1 - find policy */
3224
	if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
3225 3226 3227 3228 3229 3230
		err = -ENOENT;
		goto out;
	}

	/* Stage 2 - find and update state(s) */
	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
F
Fan Du 已提交
3231
		if ((x = xfrm_migrate_state_find(mp, net))) {
3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254
			x_cur[nx_cur] = x;
			nx_cur++;
			if ((xc = xfrm_state_migrate(x, mp))) {
				x_new[nx_new] = xc;
				nx_new++;
			} else {
				err = -ENODATA;
				goto restore_state;
			}
		}
	}

	/* Stage 3 - update policy */
	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
		goto restore_state;

	/* Stage 4 - delete old state(s) */
	if (nx_cur) {
		xfrm_states_put(x_cur, nx_cur);
		xfrm_states_delete(x_cur, nx_cur);
	}

	/* Stage 5 - announce */
3255
	km_migrate(sel, dir, type, m, num_migrate, k);
3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272

	xfrm_pol_put(pol);

	return 0;
out:
	return err;

restore_state:
	if (pol)
		xfrm_pol_put(pol);
	if (nx_cur)
		xfrm_states_put(x_cur, nx_cur);
	if (nx_new)
		xfrm_states_delete(x_new, nx_new);

	return err;
}
3273
EXPORT_SYMBOL(xfrm_migrate);
3274
#endif