xfrm_policy.c 80.0 KB
Newer Older
1
/*
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10 11 12
 * xfrm_policy.c
 *
 * Changes:
 *	Mitsuru KANDA @USAGI
 * 	Kazunori MIYAZAWA @USAGI
 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
 * 		IPv6 support
 * 	Kazunori MIYAZAWA @USAGI
 * 	YOSHIFUJI Hideaki
 * 		Split up af-specific portion
 *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13
 *
L
Linus Torvalds 已提交
14 15
 */

16
#include <linux/err.h>
L
Linus Torvalds 已提交
17 18 19 20 21 22 23
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
24
#include <linux/netfilter.h>
L
Linus Torvalds 已提交
25
#include <linux/module.h>
26
#include <linux/cache.h>
P
Paul Moore 已提交
27
#include <linux/audit.h>
28
#include <net/dst.h>
29
#include <net/flow.h>
L
Linus Torvalds 已提交
30 31
#include <net/xfrm.h>
#include <net/ip.h>
32 33 34
#ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h>
#endif
L
Linus Torvalds 已提交
35

36 37
#include "xfrm_hash.h"

38 39 40 41
#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN	100

42 43 44 45 46
struct xfrm_flo {
	struct dst_entry *dst_orig;
	u8 flags;
};

47 48 49
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
						__read_mostly;
L
Linus Torvalds 已提交
50

51
static struct kmem_cache *xfrm_dst_cache __read_mostly;
L
Linus Torvalds 已提交
52

53
static void xfrm_init_pmtu(struct dst_entry *dst);
54
static int stale_bundle(struct dst_entry *dst);
55
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
56
static void xfrm_policy_queue_process(unsigned long arg);
L
Linus Torvalds 已提交
57

58
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
W
Wei Yongjun 已提交
59 60 61
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
						int dir);

62
static inline bool
63
__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
64
{
65 66
	const struct flowi4 *fl4 = &fl->u.ip4;

67 68
	return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
		addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
69 70 71 72
		!((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
		!((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
		(fl4->flowi4_proto == sel->proto || !sel->proto) &&
		(fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
73 74
}

75
static inline bool
76
__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
77
{
78 79 80 81 82 83 84 85
	const struct flowi6 *fl6 = &fl->u.ip6;

	return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
		addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
		!((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
		!((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
		(fl6->flowi6_proto == sel->proto || !sel->proto) &&
		(fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
86 87
}

88 89
bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
			 unsigned short family)
90 91 92 93 94 95 96
{
	switch (family) {
	case AF_INET:
		return __xfrm4_selector_match(sel, fl);
	case AF_INET6:
		return __xfrm6_selector_match(sel, fl);
	}
97
	return false;
98 99
}

E
Eric Dumazet 已提交
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
{
	struct xfrm_policy_afinfo *afinfo;

	if (unlikely(family >= NPROTO))
		return NULL;
	rcu_read_lock();
	afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
	if (unlikely(!afinfo))
		rcu_read_unlock();
	return afinfo;
}

static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
{
	rcu_read_unlock();
}

D
David Ahern 已提交
118 119
static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
						  int tos, int oif,
120 121
						  const xfrm_address_t *saddr,
						  const xfrm_address_t *daddr,
122 123 124 125 126 127 128 129 130
						  int family)
{
	struct xfrm_policy_afinfo *afinfo;
	struct dst_entry *dst;

	afinfo = xfrm_policy_get_afinfo(family);
	if (unlikely(afinfo == NULL))
		return ERR_PTR(-EAFNOSUPPORT);

D
David Ahern 已提交
131
	dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
132 133 134 135 136 137

	xfrm_policy_put_afinfo(afinfo);

	return dst;
}

D
David Ahern 已提交
138 139
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
						int tos, int oif,
140 141
						xfrm_address_t *prev_saddr,
						xfrm_address_t *prev_daddr,
142
						int family)
L
Linus Torvalds 已提交
143
{
144
	struct net *net = xs_net(x);
145 146 147 148
	xfrm_address_t *saddr = &x->props.saddr;
	xfrm_address_t *daddr = &x->id.daddr;
	struct dst_entry *dst;

149
	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
150
		saddr = x->coaddr;
151 152 153 154
		daddr = prev_daddr;
	}
	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
		saddr = prev_saddr;
155
		daddr = x->coaddr;
156
	}
L
Linus Torvalds 已提交
157

D
David Ahern 已提交
158
	dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
159 160 161 162 163 164 165

	if (!IS_ERR(dst)) {
		if (prev_saddr != saddr)
			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
		if (prev_daddr != daddr)
			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
	}
L
Linus Torvalds 已提交
166

167
	return dst;
L
Linus Torvalds 已提交
168 169 170 171 172 173 174
}

static inline unsigned long make_jiffies(long secs)
{
	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
		return MAX_SCHEDULE_TIMEOUT-1;
	else
175
		return secs*HZ;
L
Linus Torvalds 已提交
176 177 178 179
}

static void xfrm_policy_timer(unsigned long data)
{
180
	struct xfrm_policy *xp = (struct xfrm_policy *)data;
181
	unsigned long now = get_seconds();
L
Linus Torvalds 已提交
182 183 184 185 186 187
	long next = LONG_MAX;
	int warn = 0;
	int dir;

	read_lock(&xp->lock);

188
	if (unlikely(xp->walk.dead))
L
Linus Torvalds 已提交
189 190
		goto out;

191
	dir = xfrm_policy_id2dir(xp->index);
L
Linus Torvalds 已提交
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230

	if (xp->lft.hard_add_expires_seconds) {
		long tmo = xp->lft.hard_add_expires_seconds +
			xp->curlft.add_time - now;
		if (tmo <= 0)
			goto expired;
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.hard_use_expires_seconds) {
		long tmo = xp->lft.hard_use_expires_seconds +
			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
		if (tmo <= 0)
			goto expired;
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.soft_add_expires_seconds) {
		long tmo = xp->lft.soft_add_expires_seconds +
			xp->curlft.add_time - now;
		if (tmo <= 0) {
			warn = 1;
			tmo = XFRM_KM_TIMEOUT;
		}
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.soft_use_expires_seconds) {
		long tmo = xp->lft.soft_use_expires_seconds +
			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
		if (tmo <= 0) {
			warn = 1;
			tmo = XFRM_KM_TIMEOUT;
		}
		if (tmo < next)
			next = tmo;
	}

	if (warn)
231
		km_policy_expired(xp, dir, 0, 0);
L
Linus Torvalds 已提交
232 233 234 235 236 237 238 239 240 241 242
	if (next != LONG_MAX &&
	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
		xfrm_pol_hold(xp);

out:
	read_unlock(&xp->lock);
	xfrm_pol_put(xp);
	return;

expired:
	read_unlock(&xp->lock);
243
	if (!xfrm_policy_delete(xp, dir))
244
		km_policy_expired(xp, dir, 1, 0);
L
Linus Torvalds 已提交
245 246 247
	xfrm_pol_put(xp);
}

248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
{
	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);

	if (unlikely(pol->walk.dead))
		flo = NULL;
	else
		xfrm_pol_hold(pol);

	return flo;
}

static int xfrm_policy_flo_check(struct flow_cache_object *flo)
{
	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);

	return !pol->walk.dead;
}

static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
{
	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
}

static const struct flow_cache_ops xfrm_policy_fc_ops = {
	.get = xfrm_policy_flo_get,
	.check = xfrm_policy_flo_check,
	.delete = xfrm_policy_flo_delete,
};
L
Linus Torvalds 已提交
277 278 279 280 281

/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
 * SPD calls.
 */

282
struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
L
Linus Torvalds 已提交
283 284 285
{
	struct xfrm_policy *policy;

286
	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
L
Linus Torvalds 已提交
287 288

	if (policy) {
289
		write_pnet(&policy->xp_net, net);
H
Herbert Xu 已提交
290
		INIT_LIST_HEAD(&policy->walk.all);
291 292
		INIT_HLIST_NODE(&policy->bydst);
		INIT_HLIST_NODE(&policy->byidx);
L
Linus Torvalds 已提交
293
		rwlock_init(&policy->lock);
294
		atomic_set(&policy->refcnt, 1);
295
		skb_queue_head_init(&policy->polq.hold_queue);
296 297
		setup_timer(&policy->timer, xfrm_policy_timer,
				(unsigned long)policy);
298 299
		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
			    (unsigned long)policy);
300
		policy->flo.ops = &xfrm_policy_fc_ops;
L
Linus Torvalds 已提交
301 302 303 304 305
	}
	return policy;
}
EXPORT_SYMBOL(xfrm_policy_alloc);

306 307 308 309 310 311 312 313
static void xfrm_policy_destroy_rcu(struct rcu_head *head)
{
	struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu);

	security_xfrm_policy_free(policy->security);
	kfree(policy);
}

L
Linus Torvalds 已提交
314 315
/* Destroy xfrm_policy: descendant resources must be released to this moment. */

316
void xfrm_policy_destroy(struct xfrm_policy *policy)
L
Linus Torvalds 已提交
317
{
H
Herbert Xu 已提交
318
	BUG_ON(!policy->walk.dead);
L
Linus Torvalds 已提交
319

320
	if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
L
Linus Torvalds 已提交
321 322
		BUG();

323
	call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
L
Linus Torvalds 已提交
324
}
325
EXPORT_SYMBOL(xfrm_policy_destroy);
L
Linus Torvalds 已提交
326 327 328 329 330 331 332

/* Rule must be locked. Release descentant resources, announce
 * entry dead. The rule must be unlinked from lists to the moment.
 */

static void xfrm_policy_kill(struct xfrm_policy *policy)
{
H
Herbert Xu 已提交
333
	policy->walk.dead = 1;
L
Linus Torvalds 已提交
334

335
	atomic_inc(&policy->genid);
L
Linus Torvalds 已提交
336

337 338
	if (del_timer(&policy->polq.hold_timer))
		xfrm_pol_put(policy);
339
	skb_queue_purge(&policy->polq.hold_queue);
340

341 342 343 344
	if (del_timer(&policy->timer))
		xfrm_pol_put(policy);

	xfrm_pol_put(policy);
L
Linus Torvalds 已提交
345 346
}

347 348
static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;

349
static inline unsigned int idx_hash(struct net *net, u32 index)
350
{
351
	return __idx_hash(index, net->xfrm.policy_idx_hmask);
352 353
}

354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
/* calculate policy hash thresholds */
static void __get_hash_thresh(struct net *net,
			      unsigned short family, int dir,
			      u8 *dbits, u8 *sbits)
{
	switch (family) {
	case AF_INET:
		*dbits = net->xfrm.policy_bydst[dir].dbits4;
		*sbits = net->xfrm.policy_bydst[dir].sbits4;
		break;

	case AF_INET6:
		*dbits = net->xfrm.policy_bydst[dir].dbits6;
		*sbits = net->xfrm.policy_bydst[dir].sbits6;
		break;

	default:
		*dbits = 0;
		*sbits = 0;
	}
}

376 377 378
static struct hlist_head *policy_hash_bysel(struct net *net,
					    const struct xfrm_selector *sel,
					    unsigned short family, int dir)
379
{
380
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
381 382 383 384 385 386
	unsigned int hash;
	u8 dbits;
	u8 sbits;

	__get_hash_thresh(net, family, dir, &dbits, &sbits);
	hash = __sel_hash(sel, family, hmask, dbits, sbits);
387

388 389 390 391 392
	if (hash == hmask + 1)
		return &net->xfrm.policy_inexact[dir];

	return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
		     lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
393 394
}

395 396 397 398
static struct hlist_head *policy_hash_direct(struct net *net,
					     const xfrm_address_t *daddr,
					     const xfrm_address_t *saddr,
					     unsigned short family, int dir)
399
{
400
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
401 402 403 404 405 406
	unsigned int hash;
	u8 dbits;
	u8 sbits;

	__get_hash_thresh(net, family, dir, &dbits, &sbits);
	hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
407

408 409
	return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
		     lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
410 411
}

412 413
static void xfrm_dst_hash_transfer(struct net *net,
				   struct hlist_head *list,
414
				   struct hlist_head *ndsttable,
415 416
				   unsigned int nhashmask,
				   int dir)
417
{
418
	struct hlist_node *tmp, *entry0 = NULL;
419
	struct xfrm_policy *pol;
420
	unsigned int h0 = 0;
421 422
	u8 dbits;
	u8 sbits;
423

424
redo:
425
	hlist_for_each_entry_safe(pol, tmp, list, bydst) {
426 427
		unsigned int h;

428
		__get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
429
		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
430
				pol->family, nhashmask, dbits, sbits);
431
		if (!entry0) {
432 433
			hlist_del_rcu(&pol->bydst);
			hlist_add_head_rcu(&pol->bydst, ndsttable + h);
434 435 436 437
			h0 = h;
		} else {
			if (h != h0)
				continue;
438 439
			hlist_del_rcu(&pol->bydst);
			hlist_add_behind_rcu(&pol->bydst, entry0);
440
		}
441
		entry0 = &pol->bydst;
442 443 444 445
	}
	if (!hlist_empty(list)) {
		entry0 = NULL;
		goto redo;
446 447 448 449 450 451 452
	}
}

static void xfrm_idx_hash_transfer(struct hlist_head *list,
				   struct hlist_head *nidxtable,
				   unsigned int nhashmask)
{
453
	struct hlist_node *tmp;
454 455
	struct xfrm_policy *pol;

456
	hlist_for_each_entry_safe(pol, tmp, list, byidx) {
457 458 459 460 461 462 463 464 465 466 467 468
		unsigned int h;

		h = __idx_hash(pol->index, nhashmask);
		hlist_add_head(&pol->byidx, nidxtable+h);
	}
}

static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
{
	return ((old_hmask + 1) << 1) - 1;
}

469
static void xfrm_bydst_resize(struct net *net, int dir)
470
{
471
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
472 473
	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
474
	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
475
	struct hlist_head *odst;
476 477 478 479 480
	int i;

	if (!ndst)
		return;

F
Fan Du 已提交
481
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
482

483 484 485
	odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
				lockdep_is_held(&net->xfrm.xfrm_policy_lock));

486
	for (i = hmask; i >= 0; i--)
487
		xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
488

489
	rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
490
	net->xfrm.policy_bydst[dir].hmask = nhashmask;
491

F
Fan Du 已提交
492
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
493

494 495
	synchronize_rcu();

496
	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
497 498
}

499
static void xfrm_byidx_resize(struct net *net, int total)
500
{
501
	unsigned int hmask = net->xfrm.policy_idx_hmask;
502 503
	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
504
	struct hlist_head *oidx = net->xfrm.policy_byidx;
505
	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
506 507 508 509 510
	int i;

	if (!nidx)
		return;

F
Fan Du 已提交
511
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
512 513 514 515

	for (i = hmask; i >= 0; i--)
		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);

516 517
	net->xfrm.policy_byidx = nidx;
	net->xfrm.policy_idx_hmask = nhashmask;
518

F
Fan Du 已提交
519
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
520

521
	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
522 523
}

524
static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
525
{
526 527
	unsigned int cnt = net->xfrm.policy_count[dir];
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
528 529 530 531 532 533 534 535 536 537 538

	if (total)
		*total += cnt;

	if ((hmask + 1) < xfrm_policy_hashmax &&
	    cnt > hmask)
		return 1;

	return 0;
}

539
static inline int xfrm_byidx_should_resize(struct net *net, int total)
540
{
541
	unsigned int hmask = net->xfrm.policy_idx_hmask;
542 543 544 545 546 547 548 549

	if ((hmask + 1) < xfrm_policy_hashmax &&
	    total > hmask)
		return 1;

	return 0;
}

550
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
J
Jamal Hadi Salim 已提交
551
{
F
Fan Du 已提交
552
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
553 554 555 556 557 558 559
	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
	si->spdhcnt = net->xfrm.policy_idx_hmask;
J
Jamal Hadi Salim 已提交
560
	si->spdhmcnt = xfrm_policy_hashmax;
F
Fan Du 已提交
561
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
J
Jamal Hadi Salim 已提交
562 563
}
EXPORT_SYMBOL(xfrm_spd_getinfo);
564

J
Jamal Hadi Salim 已提交
565
static DEFINE_MUTEX(hash_resize_mutex);
566
static void xfrm_hash_resize(struct work_struct *work)
567
{
568
	struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
569 570 571 572 573
	int dir, total;

	mutex_lock(&hash_resize_mutex);

	total = 0;
H
Herbert Xu 已提交
574
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
575 576
		if (xfrm_bydst_should_resize(net, dir, &total))
			xfrm_bydst_resize(net, dir);
577
	}
578 579
	if (xfrm_byidx_should_resize(net, total))
		xfrm_byidx_resize(net, total);
580 581 582 583

	mutex_unlock(&hash_resize_mutex);
}

584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
static void xfrm_hash_rebuild(struct work_struct *work)
{
	struct net *net = container_of(work, struct net,
				       xfrm.policy_hthresh.work);
	unsigned int hmask;
	struct xfrm_policy *pol;
	struct xfrm_policy *policy;
	struct hlist_head *chain;
	struct hlist_head *odst;
	struct hlist_node *newpos;
	int i;
	int dir;
	unsigned seq;
	u8 lbits4, rbits4, lbits6, rbits6;

	mutex_lock(&hash_resize_mutex);

	/* read selector prefixlen thresholds */
	do {
		seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);

		lbits4 = net->xfrm.policy_hthresh.lbits4;
		rbits4 = net->xfrm.policy_hthresh.rbits4;
		lbits6 = net->xfrm.policy_hthresh.lbits6;
		rbits6 = net->xfrm.policy_hthresh.rbits6;
	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));

	write_lock_bh(&net->xfrm.xfrm_policy_lock);

	/* reset the bydst and inexact table in all directions */
H
Herbert Xu 已提交
614
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663
		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
		hmask = net->xfrm.policy_bydst[dir].hmask;
		odst = net->xfrm.policy_bydst[dir].table;
		for (i = hmask; i >= 0; i--)
			INIT_HLIST_HEAD(odst + i);
		if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
			/* dir out => dst = remote, src = local */
			net->xfrm.policy_bydst[dir].dbits4 = rbits4;
			net->xfrm.policy_bydst[dir].sbits4 = lbits4;
			net->xfrm.policy_bydst[dir].dbits6 = rbits6;
			net->xfrm.policy_bydst[dir].sbits6 = lbits6;
		} else {
			/* dir in/fwd => dst = local, src = remote */
			net->xfrm.policy_bydst[dir].dbits4 = lbits4;
			net->xfrm.policy_bydst[dir].sbits4 = rbits4;
			net->xfrm.policy_bydst[dir].dbits6 = lbits6;
			net->xfrm.policy_bydst[dir].sbits6 = rbits6;
		}
	}

	/* re-insert all policies by order of creation */
	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
		newpos = NULL;
		chain = policy_hash_bysel(net, &policy->selector,
					  policy->family,
					  xfrm_policy_id2dir(policy->index));
		hlist_for_each_entry(pol, chain, bydst) {
			if (policy->priority >= pol->priority)
				newpos = &pol->bydst;
			else
				break;
		}
		if (newpos)
			hlist_add_behind(&policy->bydst, newpos);
		else
			hlist_add_head(&policy->bydst, chain);
	}

	write_unlock_bh(&net->xfrm.xfrm_policy_lock);

	mutex_unlock(&hash_resize_mutex);
}

void xfrm_policy_hash_rebuild(struct net *net)
{
	schedule_work(&net->xfrm.policy_hthresh.work);
}
EXPORT_SYMBOL(xfrm_policy_hash_rebuild);

L
Linus Torvalds 已提交
664 665
/* Generate new index... KAME seems to generate them ordered by cost
 * of an absolute inpredictability of ordering of rules. This will not pass. */
666
static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
L
Linus Torvalds 已提交
667 668 669 670
{
	static u32 idx_generator;

	for (;;) {
671 672 673 674 675
		struct hlist_head *list;
		struct xfrm_policy *p;
		u32 idx;
		int found;

676 677 678 679 680 681 682 683
		if (!index) {
			idx = (idx_generator | dir);
			idx_generator += 8;
		} else {
			idx = index;
			index = 0;
		}

L
Linus Torvalds 已提交
684 685
		if (idx == 0)
			idx = 8;
686
		list = net->xfrm.policy_byidx + idx_hash(net, idx);
687
		found = 0;
688
		hlist_for_each_entry(p, list, byidx) {
689 690
			if (p->index == idx) {
				found = 1;
L
Linus Torvalds 已提交
691
				break;
692
			}
L
Linus Torvalds 已提交
693
		}
694
		if (!found)
L
Linus Torvalds 已提交
695 696 697 698
			return idx;
	}
}

699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
{
	u32 *p1 = (u32 *) s1;
	u32 *p2 = (u32 *) s2;
	int len = sizeof(struct xfrm_selector) / sizeof(u32);
	int i;

	for (i = 0; i < len; i++) {
		if (p1[i] != p2[i])
			return 1;
	}

	return 0;
}

714 715 716 717 718 719
static void xfrm_policy_requeue(struct xfrm_policy *old,
				struct xfrm_policy *new)
{
	struct xfrm_policy_queue *pq = &old->polq;
	struct sk_buff_head list;

720 721 722
	if (skb_queue_empty(&pq->hold_queue))
		return;

723 724 725 726
	__skb_queue_head_init(&list);

	spin_lock_bh(&pq->hold_queue.lock);
	skb_queue_splice_init(&pq->hold_queue, &list);
727 728
	if (del_timer(&pq->hold_timer))
		xfrm_pol_put(old);
729 730 731 732 733 734 735
	spin_unlock_bh(&pq->hold_queue.lock);

	pq = &new->polq;

	spin_lock_bh(&pq->hold_queue.lock);
	skb_queue_splice(&list, &pq->hold_queue);
	pq->timeout = XFRM_QUEUE_TMO_MIN;
736 737
	if (!mod_timer(&pq->hold_timer, jiffies))
		xfrm_pol_hold(new);
738 739 740
	spin_unlock_bh(&pq->hold_queue.lock);
}

741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
				   struct xfrm_policy *pol)
{
	u32 mark = policy->mark.v & policy->mark.m;

	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
		return true;

	if ((mark & pol->mark.m) == pol->mark.v &&
	    policy->priority == pol->priority)
		return true;

	return false;
}

L
Linus Torvalds 已提交
756 757
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
{
758
	struct net *net = xp_net(policy);
759 760 761
	struct xfrm_policy *pol;
	struct xfrm_policy *delpol;
	struct hlist_head *chain;
762
	struct hlist_node *newpos;
L
Linus Torvalds 已提交
763

F
Fan Du 已提交
764
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
765
	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
766 767
	delpol = NULL;
	newpos = NULL;
768
	hlist_for_each_entry(pol, chain, bydst) {
H
Herbert Xu 已提交
769
		if (pol->type == policy->type &&
770
		    !selector_cmp(&pol->selector, &policy->selector) &&
771
		    xfrm_policy_mark_match(policy, pol) &&
H
Herbert Xu 已提交
772 773
		    xfrm_sec_ctx_match(pol->security, policy->security) &&
		    !WARN_ON(delpol)) {
L
Linus Torvalds 已提交
774
			if (excl) {
F
Fan Du 已提交
775
				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
776 777 778 779 780 781
				return -EEXIST;
			}
			delpol = pol;
			if (policy->priority > pol->priority)
				continue;
		} else if (policy->priority >= pol->priority) {
H
Herbert Xu 已提交
782
			newpos = &pol->bydst;
L
Linus Torvalds 已提交
783 784 785 786 787 788
			continue;
		}
		if (delpol)
			break;
	}
	if (newpos)
789
		hlist_add_behind(&policy->bydst, newpos);
790 791
	else
		hlist_add_head(&policy->bydst, chain);
792
	__xfrm_policy_link(policy, dir);
793
	atomic_inc(&net->xfrm.flow_cache_genid);
F
fan.du 已提交
794 795 796 797 798 799 800

	/* After previous checking, family can either be AF_INET or AF_INET6 */
	if (policy->family == AF_INET)
		rt_genid_bump_ipv4(net);
	else
		rt_genid_bump_ipv6(net);

801 802
	if (delpol) {
		xfrm_policy_requeue(delpol, policy);
W
Wei Yongjun 已提交
803
		__xfrm_policy_unlink(delpol, dir);
804
	}
805
	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
806
	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
807
	policy->curlft.add_time = get_seconds();
L
Linus Torvalds 已提交
808 809 810
	policy->curlft.use_time = 0;
	if (!mod_timer(&policy->timer, jiffies + HZ))
		xfrm_pol_hold(policy);
F
Fan Du 已提交
811
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
812

813
	if (delpol)
L
Linus Torvalds 已提交
814
		xfrm_policy_kill(delpol);
815 816
	else if (xfrm_bydst_should_resize(net, dir, NULL))
		schedule_work(&net->xfrm.policy_hash_work);
817

L
Linus Torvalds 已提交
818 819 820 821
	return 0;
}
EXPORT_SYMBOL(xfrm_policy_insert);

822 823
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
					  int dir, struct xfrm_selector *sel,
824 825
					  struct xfrm_sec_ctx *ctx, int delete,
					  int *err)
L
Linus Torvalds 已提交
826
{
827 828
	struct xfrm_policy *pol, *ret;
	struct hlist_head *chain;
L
Linus Torvalds 已提交
829

830
	*err = 0;
F
Fan Du 已提交
831
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
832
	chain = policy_hash_bysel(net, sel, sel->family, dir);
833
	ret = NULL;
834
	hlist_for_each_entry(pol, chain, bydst) {
835
		if (pol->type == type &&
J
Jamal Hadi Salim 已提交
836
		    (mark & pol->mark.m) == pol->mark.v &&
837 838
		    !selector_cmp(sel, &pol->selector) &&
		    xfrm_sec_ctx_match(ctx, pol->security)) {
L
Linus Torvalds 已提交
839
			xfrm_pol_hold(pol);
840
			if (delete) {
841 842
				*err = security_xfrm_policy_delete(
								pol->security);
843
				if (*err) {
F
Fan Du 已提交
844
					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
845 846
					return pol;
				}
W
Wei Yongjun 已提交
847
				__xfrm_policy_unlink(pol, dir);
848 849
			}
			ret = pol;
L
Linus Torvalds 已提交
850 851 852
			break;
		}
	}
F
Fan Du 已提交
853
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
854

855
	if (ret && delete)
856 857
		xfrm_policy_kill(ret);
	return ret;
L
Linus Torvalds 已提交
858
}
859
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
L
Linus Torvalds 已提交
860

861 862
struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
				     int dir, u32 id, int delete, int *err)
L
Linus Torvalds 已提交
863
{
864 865
	struct xfrm_policy *pol, *ret;
	struct hlist_head *chain;
L
Linus Torvalds 已提交
866

867 868 869 870
	*err = -ENOENT;
	if (xfrm_policy_id2dir(id) != dir)
		return NULL;

871
	*err = 0;
F
Fan Du 已提交
872
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
873
	chain = net->xfrm.policy_byidx + idx_hash(net, id);
874
	ret = NULL;
875
	hlist_for_each_entry(pol, chain, byidx) {
J
Jamal Hadi Salim 已提交
876 877
		if (pol->type == type && pol->index == id &&
		    (mark & pol->mark.m) == pol->mark.v) {
L
Linus Torvalds 已提交
878
			xfrm_pol_hold(pol);
879
			if (delete) {
880 881
				*err = security_xfrm_policy_delete(
								pol->security);
882
				if (*err) {
F
Fan Du 已提交
883
					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
884 885
					return pol;
				}
W
Wei Yongjun 已提交
886
				__xfrm_policy_unlink(pol, dir);
887 888
			}
			ret = pol;
L
Linus Torvalds 已提交
889 890 891
			break;
		}
	}
F
Fan Du 已提交
892
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
893

894
	if (ret && delete)
895 896
		xfrm_policy_kill(ret);
	return ret;
L
Linus Torvalds 已提交
897 898 899
}
EXPORT_SYMBOL(xfrm_policy_byid);

900 901
#ifdef CONFIG_SECURITY_NETWORK_XFRM
static inline int
902
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
L
Linus Torvalds 已提交
903
{
904 905 906 907 908 909
	int dir, err = 0;

	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
		struct xfrm_policy *pol;
		int i;

910
		hlist_for_each_entry(pol,
911
				     &net->xfrm.policy_inexact[dir], bydst) {
912 913
			if (pol->type != type)
				continue;
914
			err = security_xfrm_policy_delete(pol->security);
915
			if (err) {
916
				xfrm_audit_policy_delete(pol, 0, task_valid);
917 918
				return err;
			}
919
		}
920
		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
921
			hlist_for_each_entry(pol,
922
					     net->xfrm.policy_bydst[dir].table + i,
923 924 925
					     bydst) {
				if (pol->type != type)
					continue;
926 927
				err = security_xfrm_policy_delete(
								pol->security);
928
				if (err) {
J
Joy Latten 已提交
929
					xfrm_audit_policy_delete(pol, 0,
930
								 task_valid);
931 932 933 934 935 936 937 938 939
					return err;
				}
			}
		}
	}
	return err;
}
#else
static inline int
940
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
941 942 943 944 945
{
	return 0;
}
#endif

946
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
947
{
948
	int dir, err = 0, cnt = 0;
L
Linus Torvalds 已提交
949

F
Fan Du 已提交
950
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
951

952
	err = xfrm_policy_flush_secctx_check(net, type, task_valid);
953 954 955
	if (err)
		goto out;

L
Linus Torvalds 已提交
956
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
957
		struct xfrm_policy *pol;
W
Wei Yongjun 已提交
958
		int i;
959 960

	again1:
961
		hlist_for_each_entry(pol,
962
				     &net->xfrm.policy_inexact[dir], bydst) {
963 964
			if (pol->type != type)
				continue;
965
			__xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
966
			write_unlock_bh(&net->xfrm.xfrm_policy_lock);
967
			cnt++;
L
Linus Torvalds 已提交
968

969
			xfrm_audit_policy_delete(pol, 1, task_valid);
J
Joy Latten 已提交
970

971
			xfrm_policy_kill(pol);
L
Linus Torvalds 已提交
972

F
Fan Du 已提交
973
			write_lock_bh(&net->xfrm.xfrm_policy_lock);
974 975 976
			goto again1;
		}

977
		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
978
	again2:
979
			hlist_for_each_entry(pol,
980
					     net->xfrm.policy_bydst[dir].table + i,
981 982 983
					     bydst) {
				if (pol->type != type)
					continue;
984
				__xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
985
				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
986
				cnt++;
987

988
				xfrm_audit_policy_delete(pol, 1, task_valid);
989 990
				xfrm_policy_kill(pol);

F
Fan Du 已提交
991
				write_lock_bh(&net->xfrm.xfrm_policy_lock);
992 993
				goto again2;
			}
L
Linus Torvalds 已提交
994
		}
995

L
Linus Torvalds 已提交
996
	}
997 998
	if (!cnt)
		err = -ESRCH;
999
out:
F
Fan Du 已提交
1000
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1001
	return err;
L
Linus Torvalds 已提交
1002 1003 1004
}
EXPORT_SYMBOL(xfrm_policy_flush);

1005
int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
1006
		     int (*func)(struct xfrm_policy *, int, int, void*),
L
Linus Torvalds 已提交
1007 1008
		     void *data)
{
H
Herbert Xu 已提交
1009 1010
	struct xfrm_policy *pol;
	struct xfrm_policy_walk_entry *x;
1011 1012 1013 1014 1015
	int error = 0;

	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
	    walk->type != XFRM_POLICY_TYPE_ANY)
		return -EINVAL;
L
Linus Torvalds 已提交
1016

H
Herbert Xu 已提交
1017
	if (list_empty(&walk->walk.all) && walk->seq != 0)
1018 1019
		return 0;

F
Fan Du 已提交
1020
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
H
Herbert Xu 已提交
1021
	if (list_empty(&walk->walk.all))
1022
		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
H
Herbert Xu 已提交
1023
	else
1024 1025 1026
		x = list_first_entry(&walk->walk.all,
				     struct xfrm_policy_walk_entry, all);

1027
	list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
H
Herbert Xu 已提交
1028
		if (x->dead)
1029
			continue;
H
Herbert Xu 已提交
1030 1031 1032 1033 1034 1035 1036 1037 1038
		pol = container_of(x, struct xfrm_policy, walk);
		if (walk->type != XFRM_POLICY_TYPE_ANY &&
		    walk->type != pol->type)
			continue;
		error = func(pol, xfrm_policy_id2dir(pol->index),
			     walk->seq, data);
		if (error) {
			list_move_tail(&walk->walk.all, &x->all);
			goto out;
1039
		}
H
Herbert Xu 已提交
1040
		walk->seq++;
L
Linus Torvalds 已提交
1041
	}
H
Herbert Xu 已提交
1042
	if (walk->seq == 0) {
J
Jamal Hadi Salim 已提交
1043 1044 1045
		error = -ENOENT;
		goto out;
	}
H
Herbert Xu 已提交
1046
	list_del_init(&walk->walk.all);
L
Linus Torvalds 已提交
1047
out:
F
Fan Du 已提交
1048
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1049 1050 1051 1052
	return error;
}
EXPORT_SYMBOL(xfrm_policy_walk);

H
Herbert Xu 已提交
1053 1054 1055 1056 1057 1058 1059 1060 1061
void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
{
	INIT_LIST_HEAD(&walk->walk.all);
	walk->walk.dead = 1;
	walk->type = type;
	walk->seq = 0;
}
EXPORT_SYMBOL(xfrm_policy_walk_init);

F
Fan Du 已提交
1062
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
H
Herbert Xu 已提交
1063 1064 1065 1066
{
	if (list_empty(&walk->walk.all))
		return;

F
Fan Du 已提交
1067
	write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
H
Herbert Xu 已提交
1068
	list_del(&walk->walk.all);
F
Fan Du 已提交
1069
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
H
Herbert Xu 已提交
1070 1071 1072
}
EXPORT_SYMBOL(xfrm_policy_walk_done);

1073 1074 1075 1076 1077
/*
 * Find policy to apply to this flow.
 *
 * Returns 0 if policy found, else an -errno.
 */
1078 1079
static int xfrm_policy_match(const struct xfrm_policy *pol,
			     const struct flowi *fl,
1080
			     u8 type, u16 family, int dir)
L
Linus Torvalds 已提交
1081
{
1082
	const struct xfrm_selector *sel = &pol->selector;
1083 1084
	int ret = -ESRCH;
	bool match;
L
Linus Torvalds 已提交
1085

1086
	if (pol->family != family ||
1087
	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
1088
	    pol->type != type)
1089
		return ret;
L
Linus Torvalds 已提交
1090

1091
	match = xfrm_selector_match(sel, fl, family);
1092
	if (match)
1093
		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
1094
						  dir);
1095

1096
	return ret;
1097
}
L
Linus Torvalds 已提交
1098

A
Alexey Dobriyan 已提交
1099
static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1100
						     const struct flowi *fl,
1101 1102
						     u16 family, u8 dir)
{
1103
	int err;
1104
	struct xfrm_policy *pol, *ret;
1105
	const xfrm_address_t *daddr, *saddr;
1106
	struct hlist_head *chain;
1107
	u32 priority = ~0U;
1108

1109 1110 1111 1112 1113
	daddr = xfrm_flowi_daddr(fl, family);
	saddr = xfrm_flowi_saddr(fl, family);
	if (unlikely(!daddr || !saddr))
		return NULL;

F
Fan Du 已提交
1114
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
A
Alexey Dobriyan 已提交
1115
	chain = policy_hash_direct(net, daddr, saddr, family, dir);
1116
	ret = NULL;
1117
	hlist_for_each_entry_rcu(pol, chain, bydst) {
1118 1119 1120 1121 1122 1123 1124 1125 1126
		err = xfrm_policy_match(pol, fl, type, family, dir);
		if (err) {
			if (err == -ESRCH)
				continue;
			else {
				ret = ERR_PTR(err);
				goto fail;
			}
		} else {
1127
			ret = pol;
1128
			priority = ret->priority;
1129 1130 1131
			break;
		}
	}
A
Alexey Dobriyan 已提交
1132
	chain = &net->xfrm.policy_inexact[dir];
1133
	hlist_for_each_entry_rcu(pol, chain, bydst) {
1134 1135 1136
		if ((pol->priority >= priority) && ret)
			break;

1137 1138 1139 1140 1141 1142 1143 1144
		err = xfrm_policy_match(pol, fl, type, family, dir);
		if (err) {
			if (err == -ESRCH)
				continue;
			else {
				ret = ERR_PTR(err);
				goto fail;
			}
1145
		} else {
1146 1147
			ret = pol;
			break;
L
Linus Torvalds 已提交
1148 1149
		}
	}
1150 1151

	xfrm_pol_hold(ret);
1152
fail:
F
Fan Du 已提交
1153
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1154

1155
	return ret;
1156 1157
}

1158
static struct xfrm_policy *
1159
__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_policy *pol;

	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
	if (pol != NULL)
		return pol;
#endif
	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
}

1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188
static int flow_to_policy_dir(int dir)
{
	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
		return dir;

	switch (dir) {
	default:
	case FLOW_DIR_IN:
		return XFRM_POLICY_IN;
	case FLOW_DIR_OUT:
		return XFRM_POLICY_OUT;
	case FLOW_DIR_FWD:
		return XFRM_POLICY_FWD;
	}
}

1189
static struct flow_cache_object *
1190
xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1191
		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
1192 1193
{
	struct xfrm_policy *pol;
1194 1195 1196

	if (old_obj)
		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1197

1198
	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1199
	if (IS_ERR_OR_NULL(pol))
1200 1201 1202 1203 1204 1205 1206
		return ERR_CAST(pol);

	/* Resolver returns two references:
	 * one for cache and one for caller of flow_cache_lookup() */
	xfrm_pol_hold(pol);

	return &pol->flo;
L
Linus Torvalds 已提交
1207 1208
}

1209 1210 1211
static inline int policy_to_flow_dir(int dir)
{
	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222
	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
		return dir;
	switch (dir) {
	default:
	case XFRM_POLICY_IN:
		return FLOW_DIR_IN;
	case XFRM_POLICY_OUT:
		return FLOW_DIR_OUT;
	case XFRM_POLICY_FWD:
		return FLOW_DIR_FWD;
1223
	}
1224 1225
}

1226
static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1227
						 const struct flowi *fl)
L
Linus Torvalds 已提交
1228 1229
{
	struct xfrm_policy *pol;
F
Fan Du 已提交
1230
	struct net *net = sock_net(sk);
L
Linus Torvalds 已提交
1231

1232
	rcu_read_lock();
F
Fan Du 已提交
1233
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
1234 1235
	pol = rcu_dereference(sk->sk_policy[dir]);
	if (pol != NULL) {
1236 1237
		bool match = xfrm_selector_match(&pol->selector, fl,
						 sk->sk_family);
1238
		int err = 0;
1239

1240
		if (match) {
J
Jamal Hadi Salim 已提交
1241 1242 1243 1244
			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
				pol = NULL;
				goto out;
			}
1245
			err = security_xfrm_policy_lookup(pol->security,
1246
						      fl->flowi_secid,
1247
						      policy_to_flow_dir(dir));
1248 1249 1250 1251 1252 1253 1254
			if (!err)
				xfrm_pol_hold(pol);
			else if (err == -ESRCH)
				pol = NULL;
			else
				pol = ERR_PTR(err);
		} else
L
Linus Torvalds 已提交
1255 1256
			pol = NULL;
	}
J
Jamal Hadi Salim 已提交
1257
out:
F
Fan Du 已提交
1258
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1259
	rcu_read_unlock();
L
Linus Torvalds 已提交
1260 1261 1262 1263 1264
	return pol;
}

static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
1265
	struct net *net = xp_net(pol);
1266

1267 1268
	list_add(&pol->walk.all, &net->xfrm.policy_all);
	net->xfrm.policy_count[dir]++;
L
Linus Torvalds 已提交
1269 1270 1271 1272 1273 1274
	xfrm_pol_hold(pol);
}

static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
						int dir)
{
1275 1276
	struct net *net = xp_net(pol);

H
Herbert Xu 已提交
1277
	if (list_empty(&pol->walk.all))
1278
		return NULL;
L
Linus Torvalds 已提交
1279

H
Herbert Xu 已提交
1280 1281
	/* Socket policies are not hashed. */
	if (!hlist_unhashed(&pol->bydst)) {
1282
		hlist_del_rcu(&pol->bydst);
H
Herbert Xu 已提交
1283 1284 1285 1286
		hlist_del(&pol->byidx);
	}

	list_del_init(&pol->walk.all);
1287
	net->xfrm.policy_count[dir]--;
1288 1289

	return pol;
L
Linus Torvalds 已提交
1290 1291
}

H
Herbert Xu 已提交
1292 1293 1294 1295 1296 1297 1298 1299 1300 1301
static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
{
	__xfrm_policy_link(pol, XFRM_POLICY_MAX + dir);
}

static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
{
	__xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir);
}

1302
int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
L
Linus Torvalds 已提交
1303
{
F
Fan Du 已提交
1304 1305 1306
	struct net *net = xp_net(pol);

	write_lock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1307
	pol = __xfrm_policy_unlink(pol, dir);
F
Fan Du 已提交
1308
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1309 1310
	if (pol) {
		xfrm_policy_kill(pol);
1311
		return 0;
L
Linus Torvalds 已提交
1312
	}
1313
	return -ENOENT;
L
Linus Torvalds 已提交
1314
}
1315
EXPORT_SYMBOL(xfrm_policy_delete);
L
Linus Torvalds 已提交
1316 1317 1318

int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
{
1319
	struct net *net = xp_net(pol);
L
Linus Torvalds 已提交
1320 1321
	struct xfrm_policy *old_pol;

1322 1323 1324 1325 1326
#ifdef CONFIG_XFRM_SUB_POLICY
	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
		return -EINVAL;
#endif

F
Fan Du 已提交
1327
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
1328 1329
	old_pol = rcu_dereference_protected(sk->sk_policy[dir],
				lockdep_is_held(&net->xfrm.xfrm_policy_lock));
L
Linus Torvalds 已提交
1330
	if (pol) {
1331
		pol->curlft.add_time = get_seconds();
1332
		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
H
Herbert Xu 已提交
1333
		xfrm_sk_policy_link(pol, dir);
L
Linus Torvalds 已提交
1334
	}
1335
	rcu_assign_pointer(sk->sk_policy[dir], pol);
1336 1337 1338 1339
	if (old_pol) {
		if (pol)
			xfrm_policy_requeue(old_pol, pol);

1340 1341 1342
		/* Unlinking succeeds always. This is the only function
		 * allowed to delete or replace socket policy.
		 */
H
Herbert Xu 已提交
1343
		xfrm_sk_policy_unlink(old_pol, dir);
1344
	}
F
Fan Du 已提交
1345
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1346 1347 1348 1349 1350 1351 1352

	if (old_pol) {
		xfrm_policy_kill(old_pol);
	}
	return 0;
}

1353
static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
L
Linus Torvalds 已提交
1354
{
1355
	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
F
Fan Du 已提交
1356
	struct net *net = xp_net(old);
L
Linus Torvalds 已提交
1357 1358 1359

	if (newp) {
		newp->selector = old->selector;
1360 1361
		if (security_xfrm_policy_clone(old->security,
					       &newp->security)) {
1362 1363 1364
			kfree(newp);
			return NULL;  /* ENOMEM */
		}
L
Linus Torvalds 已提交
1365 1366
		newp->lft = old->lft;
		newp->curlft = old->curlft;
1367
		newp->mark = old->mark;
L
Linus Torvalds 已提交
1368 1369 1370 1371
		newp->action = old->action;
		newp->flags = old->flags;
		newp->xfrm_nr = old->xfrm_nr;
		newp->index = old->index;
1372
		newp->type = old->type;
L
Linus Torvalds 已提交
1373 1374
		memcpy(newp->xfrm_vec, old->xfrm_vec,
		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
F
Fan Du 已提交
1375
		write_lock_bh(&net->xfrm.xfrm_policy_lock);
H
Herbert Xu 已提交
1376
		xfrm_sk_policy_link(newp, dir);
F
Fan Du 已提交
1377
		write_unlock_bh(&net->xfrm.xfrm_policy_lock);
L
Linus Torvalds 已提交
1378 1379 1380 1381 1382
		xfrm_pol_put(newp);
	}
	return newp;
}

1383
int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
L
Linus Torvalds 已提交
1384
{
1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
	const struct xfrm_policy *p;
	struct xfrm_policy *np;
	int i, ret = 0;

	rcu_read_lock();
	for (i = 0; i < 2; i++) {
		p = rcu_dereference(osk->sk_policy[i]);
		if (p) {
			np = clone_policy(p, i);
			if (unlikely(!np)) {
				ret = -ENOMEM;
				break;
			}
			rcu_assign_pointer(sk->sk_policy[i], np);
		}
	}
	rcu_read_unlock();
	return ret;
L
Linus Torvalds 已提交
1403 1404
}

1405
static int
D
David Ahern 已提交
1406 1407
xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
	       xfrm_address_t *remote, unsigned short family)
1408 1409 1410 1411 1412 1413
{
	int err;
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);

	if (unlikely(afinfo == NULL))
		return -EINVAL;
D
David Ahern 已提交
1414
	err = afinfo->get_saddr(net, oif, local, remote);
1415 1416 1417 1418
	xfrm_policy_put_afinfo(afinfo);
	return err;
}

L
Linus Torvalds 已提交
1419 1420 1421
/* Resolve list of templates for the flow, given policy. */

static int
1422 1423
xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
		      struct xfrm_state **xfrm, unsigned short family)
L
Linus Torvalds 已提交
1424
{
A
Alexey Dobriyan 已提交
1425
	struct net *net = xp_net(policy);
L
Linus Torvalds 已提交
1426 1427 1428 1429
	int nx;
	int i, error;
	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1430
	xfrm_address_t tmp;
L
Linus Torvalds 已提交
1431

1432
	for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
L
Linus Torvalds 已提交
1433 1434 1435 1436 1437
		struct xfrm_state *x;
		xfrm_address_t *remote = daddr;
		xfrm_address_t *local  = saddr;
		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];

1438 1439
		if (tmpl->mode == XFRM_MODE_TUNNEL ||
		    tmpl->mode == XFRM_MODE_BEET) {
L
Linus Torvalds 已提交
1440 1441
			remote = &tmpl->id.daddr;
			local = &tmpl->saddr;
1442
			if (xfrm_addr_any(local, tmpl->encap_family)) {
D
David Ahern 已提交
1443 1444 1445
				error = xfrm_get_saddr(net, fl->flowi_oif,
						       &tmp, remote,
						       tmpl->encap_family);
1446 1447 1448 1449
				if (error)
					goto fail;
				local = &tmp;
			}
L
Linus Torvalds 已提交
1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
		}

		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);

		if (x && x->km.state == XFRM_STATE_VALID) {
			xfrm[nx++] = x;
			daddr = remote;
			saddr = local;
			continue;
		}
		if (x) {
			error = (x->km.state == XFRM_STATE_ERROR ?
				 -EINVAL : -EAGAIN);
			xfrm_state_put(x);
W
Weilong Chen 已提交
1464
		} else if (error == -ESRCH) {
1465
			error = -EAGAIN;
W
Weilong Chen 已提交
1466
		}
L
Linus Torvalds 已提交
1467 1468 1469 1470 1471 1472 1473

		if (!tmpl->optional)
			goto fail;
	}
	return nx;

fail:
1474
	for (nx--; nx >= 0; nx--)
L
Linus Torvalds 已提交
1475 1476 1477 1478
		xfrm_state_put(xfrm[nx]);
	return error;
}

1479
static int
1480 1481
xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
		  struct xfrm_state **xfrm, unsigned short family)
1482
{
1483 1484
	struct xfrm_state *tp[XFRM_MAX_DEPTH];
	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
	int cnx = 0;
	int error;
	int ret;
	int i;

	for (i = 0; i < npols; i++) {
		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
			error = -ENOBUFS;
			goto fail;
		}
1495 1496

		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1497 1498 1499 1500 1501 1502 1503
		if (ret < 0) {
			error = ret;
			goto fail;
		} else
			cnx += ret;
	}

1504 1505 1506 1507
	/* found states are sorted for outbound processing */
	if (npols > 1)
		xfrm_state_sort(xfrm, tpp, cnx, family);

1508 1509 1510
	return cnx;

 fail:
1511
	for (cnx--; cnx >= 0; cnx--)
1512
		xfrm_state_put(tpp[cnx]);
1513 1514 1515 1516
	return error;

}

L
Linus Torvalds 已提交
1517 1518 1519 1520
/* Check that the bundle accepts the flow and its components are
 * still valid.
 */

1521
static inline int xfrm_get_tos(const struct flowi *fl, int family)
1522 1523 1524
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
	int tos;
L
Linus Torvalds 已提交
1525

1526 1527 1528 1529 1530 1531 1532 1533 1534 1535
	if (!afinfo)
		return -EINVAL;

	tos = afinfo->get_tos(fl);

	xfrm_policy_put_afinfo(afinfo);

	return tos;
}

1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546
static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	if (xdst->route == NULL) {
		/* Dummy bundle - if it has xfrms we were not
		 * able to build bundle as template resolution failed.
		 * It means we need to try again resolving. */
		if (xdst->num_xfrms > 0)
			return NULL;
1547 1548
	} else if (dst->flags & DST_XFRM_QUEUE) {
		return NULL;
1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585
	} else {
		/* Real bundle */
		if (stale_bundle(dst))
			return NULL;
	}

	dst_hold(dst);
	return flo;
}

static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	if (!xdst->route)
		return 0;
	if (stale_bundle(dst))
		return 0;

	return 1;
}

static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
{
	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
	struct dst_entry *dst = &xdst->u.dst;

	dst_free(dst);
}

static const struct flow_cache_ops xfrm_bundle_fc_ops = {
	.get = xfrm_bundle_flo_get,
	.check = xfrm_bundle_flo_check,
	.delete = xfrm_bundle_flo_delete,
};

1586
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
L
Linus Torvalds 已提交
1587 1588
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1589
	struct dst_ops *dst_ops;
1590 1591 1592 1593 1594
	struct xfrm_dst *xdst;

	if (!afinfo)
		return ERR_PTR(-EINVAL);

1595 1596 1597 1598
	switch (family) {
	case AF_INET:
		dst_ops = &net->xfrm.xfrm4_dst_ops;
		break;
E
Eric Dumazet 已提交
1599
#if IS_ENABLED(CONFIG_IPV6)
1600 1601 1602 1603 1604 1605 1606
	case AF_INET6:
		dst_ops = &net->xfrm.xfrm6_dst_ops;
		break;
#endif
	default:
		BUG();
	}
1607
	xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1608

1609
	if (likely(xdst)) {
1610 1611 1612
		struct dst_entry *dst = &xdst->u.dst;

		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1613
		xdst->flo.ops = &xfrm_bundle_fc_ops;
1614
	} else
1615
		xdst = ERR_PTR(-ENOBUFS);
1616

1617 1618
	xfrm_policy_put_afinfo(afinfo);

1619 1620 1621
	return xdst;
}

1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638
static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
				 int nfheader_len)
{
	struct xfrm_policy_afinfo *afinfo =
		xfrm_policy_get_afinfo(dst->ops->family);
	int err;

	if (!afinfo)
		return -EINVAL;

	err = afinfo->init_path(path, dst, nfheader_len);

	xfrm_policy_put_afinfo(afinfo);

	return err;
}

H
Herbert Xu 已提交
1639
static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1640
				const struct flowi *fl)
1641 1642 1643 1644 1645 1646
{
	struct xfrm_policy_afinfo *afinfo =
		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
	int err;

	if (!afinfo)
L
Linus Torvalds 已提交
1647
		return -EINVAL;
1648

H
Herbert Xu 已提交
1649
	err = afinfo->fill_dst(xdst, dev, fl);
1650

L
Linus Torvalds 已提交
1651
	xfrm_policy_put_afinfo(afinfo);
1652

L
Linus Torvalds 已提交
1653 1654 1655
	return err;
}

1656

1657 1658 1659 1660 1661 1662
/* Allocate chain of dst_entry's, attach known xfrm's, calculate
 * all the metrics... Shortly, bundle a bundle.
 */

static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
					    struct xfrm_state **xfrm, int nx,
1663
					    const struct flowi *fl,
1664 1665
					    struct dst_entry *dst)
{
1666
	struct net *net = xp_net(policy);
1667 1668
	unsigned long now = jiffies;
	struct net_device *dev;
1669
	struct xfrm_mode *inner_mode;
1670 1671 1672 1673 1674
	struct dst_entry *dst_prev = NULL;
	struct dst_entry *dst0 = NULL;
	int i = 0;
	int err;
	int header_len = 0;
1675
	int nfheader_len = 0;
1676 1677 1678
	int trailer_len = 0;
	int tos;
	int family = policy->selector.family;
1679 1680 1681
	xfrm_address_t saddr, daddr;

	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1682 1683 1684 1685 1686 1687 1688 1689 1690

	tos = xfrm_get_tos(fl, family);
	err = tos;
	if (tos < 0)
		goto put_states;

	dst_hold(dst);

	for (; i < nx; i++) {
1691
		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1692 1693 1694 1695 1696 1697 1698 1699
		struct dst_entry *dst1 = &xdst->u.dst;

		err = PTR_ERR(xdst);
		if (IS_ERR(xdst)) {
			dst_release(dst);
			goto put_states;
		}

1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710
		if (xfrm[i]->sel.family == AF_UNSPEC) {
			inner_mode = xfrm_ip2inner_mode(xfrm[i],
							xfrm_af2proto(family));
			if (!inner_mode) {
				err = -EAFNOSUPPORT;
				dst_release(dst);
				goto put_states;
			}
		} else
			inner_mode = xfrm[i]->inner_mode;

1711 1712 1713 1714 1715 1716 1717 1718
		if (!dst_prev)
			dst0 = dst1;
		else {
			dst_prev->child = dst_clone(dst1);
			dst1->flags |= DST_NOHASH;
		}

		xdst->route = dst;
1719
		dst_copy_metrics(dst1, dst);
1720 1721 1722

		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
			family = xfrm[i]->props.family;
D
David Ahern 已提交
1723 1724
			dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
					      &saddr, &daddr, family);
1725 1726 1727 1728 1729 1730 1731
			err = PTR_ERR(dst);
			if (IS_ERR(dst))
				goto put_states;
		} else
			dst_hold(dst);

		dst1->xfrm = xfrm[i];
1732
		xdst->xfrm_genid = xfrm[i]->genid;
1733

1734
		dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1735 1736 1737 1738
		dst1->flags |= DST_HOST;
		dst1->lastuse = now;

		dst1->input = dst_discard;
1739
		dst1->output = inner_mode->afinfo->output;
1740 1741 1742 1743 1744

		dst1->next = dst_prev;
		dst_prev = dst1;

		header_len += xfrm[i]->props.header_len;
1745 1746
		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
			nfheader_len += xfrm[i]->props.header_len;
1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757
		trailer_len += xfrm[i]->props.trailer_len;
	}

	dst_prev->child = dst;
	dst0->path = dst;

	err = -ENODEV;
	dev = dst->dev;
	if (!dev)
		goto free_dst;

1758
	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1759 1760 1761 1762 1763
	xfrm_init_pmtu(dst_prev);

	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;

H
Herbert Xu 已提交
1764
		err = xfrm_fill_dst(xdst, dev, fl);
1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786
		if (err)
			goto free_dst;

		dst_prev->header_len = header_len;
		dst_prev->trailer_len = trailer_len;
		header_len -= xdst->u.dst.xfrm->props.header_len;
		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
	}

out:
	return dst0;

put_states:
	for (; i < nx; i++)
		xfrm_state_put(xfrm[i]);
free_dst:
	if (dst0)
		dst_free(dst0);
	dst0 = ERR_PTR(err);
	goto out;
}

1787
#ifdef CONFIG_XFRM_SUB_POLICY
1788
static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
1789 1790 1791 1792 1793 1794
{
	if (!*target) {
		*target = kmalloc(size, GFP_ATOMIC);
		if (!*target)
			return -ENOMEM;
	}
1795

1796 1797 1798
	memcpy(*target, src, size);
	return 0;
}
1799
#endif
1800

1801 1802
static int xfrm_dst_update_parent(struct dst_entry *dst,
				  const struct xfrm_selector *sel)
1803 1804 1805 1806 1807 1808 1809 1810 1811 1812
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
				   sel, sizeof(*sel));
#else
	return 0;
#endif
}

1813 1814
static int xfrm_dst_update_origin(struct dst_entry *dst,
				  const struct flowi *fl)
1815 1816 1817 1818 1819 1820 1821 1822
{
#ifdef CONFIG_XFRM_SUB_POLICY
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
#else
	return 0;
#endif
}
L
Linus Torvalds 已提交
1823

1824
static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851
				struct xfrm_policy **pols,
				int *num_pols, int *num_xfrms)
{
	int i;

	if (*num_pols == 0 || !pols[0]) {
		*num_pols = 0;
		*num_xfrms = 0;
		return 0;
	}
	if (IS_ERR(pols[0]))
		return PTR_ERR(pols[0]);

	*num_xfrms = pols[0]->xfrm_nr;

#ifdef CONFIG_XFRM_SUB_POLICY
	if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
	    pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
						    XFRM_POLICY_TYPE_MAIN,
						    fl, family,
						    XFRM_POLICY_OUT);
		if (pols[1]) {
			if (IS_ERR(pols[1])) {
				xfrm_pols_put(pols, *num_pols);
				return PTR_ERR(pols[1]);
			}
1852
			(*num_pols)++;
1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869
			(*num_xfrms) += pols[1]->xfrm_nr;
		}
	}
#endif
	for (i = 0; i < *num_pols; i++) {
		if (pols[i]->action != XFRM_POLICY_ALLOW) {
			*num_xfrms = -1;
			break;
		}
	}

	return 0;

}

static struct xfrm_dst *
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1870
			       const struct flowi *fl, u16 family,
1871 1872 1873 1874 1875 1876 1877 1878 1879 1880
			       struct dst_entry *dst_orig)
{
	struct net *net = xp_net(pols[0]);
	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
	struct dst_entry *dst;
	struct xfrm_dst *xdst;
	int err;

	/* Try to instantiate a bundle */
	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1881 1882
	if (err <= 0) {
		if (err != 0 && err != -EAGAIN)
1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905
			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
		return ERR_PTR(err);
	}

	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
	if (IS_ERR(dst)) {
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
		return ERR_CAST(dst);
	}

	xdst = (struct xfrm_dst *)dst;
	xdst->num_xfrms = err;
	if (num_pols > 1)
		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
	else
		err = xfrm_dst_update_origin(dst, fl);
	if (unlikely(err)) {
		dst_free(dst);
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
		return ERR_PTR(err);
	}

	xdst->num_pols = num_pols;
1906
	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
1907 1908 1909 1910 1911
	xdst->policy_genid = atomic_read(&pols[0]->genid);

	return xdst;
}

1912 1913 1914 1915 1916 1917
static void xfrm_policy_queue_process(unsigned long arg)
{
	struct sk_buff *skb;
	struct sock *sk;
	struct dst_entry *dst;
	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
1918
	struct net *net = xp_net(pol);
1919 1920 1921 1922 1923 1924
	struct xfrm_policy_queue *pq = &pol->polq;
	struct flowi fl;
	struct sk_buff_head list;

	spin_lock(&pq->hold_queue.lock);
	skb = skb_peek(&pq->hold_queue);
1925 1926 1927 1928
	if (!skb) {
		spin_unlock(&pq->hold_queue.lock);
		goto out;
	}
1929 1930 1931 1932 1933 1934
	dst = skb_dst(skb);
	sk = skb->sk;
	xfrm_decode_session(skb, &fl, dst->ops->family);
	spin_unlock(&pq->hold_queue.lock);

	dst_hold(dst->path);
1935
	dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
1936 1937 1938 1939 1940 1941 1942 1943 1944 1945
	if (IS_ERR(dst))
		goto purge_queue;

	if (dst->flags & DST_XFRM_QUEUE) {
		dst_release(dst);

		if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
			goto purge_queue;

		pq->timeout = pq->timeout << 1;
1946 1947 1948
		if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
			xfrm_pol_hold(pol);
	goto out;
1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964
	}

	dst_release(dst);

	__skb_queue_head_init(&list);

	spin_lock(&pq->hold_queue.lock);
	pq->timeout = 0;
	skb_queue_splice_init(&pq->hold_queue, &list);
	spin_unlock(&pq->hold_queue.lock);

	while (!skb_queue_empty(&list)) {
		skb = __skb_dequeue(&list);

		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
		dst_hold(skb_dst(skb)->path);
1965
		dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
1966 1967 1968 1969 1970 1971 1972 1973 1974
		if (IS_ERR(dst)) {
			kfree_skb(skb);
			continue;
		}

		nf_reset(skb);
		skb_dst_drop(skb);
		skb_dst_set(skb, dst);

1975
		dst_output(net, skb->sk, skb);
1976 1977
	}

1978 1979
out:
	xfrm_pol_put(pol);
1980 1981 1982 1983
	return;

purge_queue:
	pq->timeout = 0;
1984
	skb_queue_purge(&pq->hold_queue);
1985
	xfrm_pol_put(pol);
1986 1987
}

E
Eric W. Biederman 已提交
1988
static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1989 1990 1991 1992
{
	unsigned long sched_next;
	struct dst_entry *dst = skb_dst(skb);
	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1993 1994
	struct xfrm_policy *pol = xdst->pols[0];
	struct xfrm_policy_queue *pq = &pol->polq;
1995

1996
	if (unlikely(skb_fclone_busy(sk, skb))) {
1997 1998 1999
		kfree_skb(skb);
		return 0;
	}
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017

	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
		kfree_skb(skb);
		return -EAGAIN;
	}

	skb_dst_force(skb);

	spin_lock_bh(&pq->hold_queue.lock);

	if (!pq->timeout)
		pq->timeout = XFRM_QUEUE_TMO_MIN;

	sched_next = jiffies + pq->timeout;

	if (del_timer(&pq->hold_timer)) {
		if (time_before(pq->hold_timer.expires, sched_next))
			sched_next = pq->hold_timer.expires;
2018
		xfrm_pol_put(pol);
2019 2020 2021
	}

	__skb_queue_tail(&pq->hold_queue, skb);
2022 2023
	if (!mod_timer(&pq->hold_timer, sched_next))
		xfrm_pol_hold(pol);
2024 2025 2026 2027 2028 2029 2030

	spin_unlock_bh(&pq->hold_queue.lock);

	return 0;
}

static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
2031
						 struct xfrm_flo *xflo,
2032 2033 2034 2035 2036 2037
						 const struct flowi *fl,
						 int num_xfrms,
						 u16 family)
{
	int err;
	struct net_device *dev;
2038
	struct dst_entry *dst;
2039 2040 2041 2042 2043 2044 2045
	struct dst_entry *dst1;
	struct xfrm_dst *xdst;

	xdst = xfrm_alloc_dst(net, family);
	if (IS_ERR(xdst))
		return xdst;

2046 2047 2048
	if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
	    net->xfrm.sysctl_larval_drop ||
	    num_xfrms <= 0)
2049 2050
		return xdst;

2051
	dst = xflo->dst_orig;
2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088
	dst1 = &xdst->u.dst;
	dst_hold(dst);
	xdst->route = dst;

	dst_copy_metrics(dst1, dst);

	dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
	dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
	dst1->lastuse = jiffies;

	dst1->input = dst_discard;
	dst1->output = xdst_queue_output;

	dst_hold(dst);
	dst1->child = dst;
	dst1->path = dst;

	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);

	err = -ENODEV;
	dev = dst->dev;
	if (!dev)
		goto free_dst;

	err = xfrm_fill_dst(xdst, dev, fl);
	if (err)
		goto free_dst;

out:
	return xdst;

free_dst:
	dst_release(dst1);
	xdst = ERR_PTR(err);
	goto out;
}

2089
static struct flow_cache_object *
2090
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
2091 2092
		   struct flow_cache_object *oldflo, void *ctx)
{
2093
	struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
	struct xfrm_dst *xdst, *new_xdst;
	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;

	/* Check if the policies from old bundle are usable */
	xdst = NULL;
	if (oldflo) {
		xdst = container_of(oldflo, struct xfrm_dst, flo);
		num_pols = xdst->num_pols;
		num_xfrms = xdst->num_xfrms;
		pol_dead = 0;
		for (i = 0; i < num_pols; i++) {
			pols[i] = xdst->pols[i];
			pol_dead |= pols[i]->walk.dead;
		}
		if (pol_dead) {
			dst_free(&xdst->u.dst);
			xdst = NULL;
			num_pols = 0;
			num_xfrms = 0;
			oldflo = NULL;
		}
	}

	/* Resolve policies to use if we couldn't get them from
	 * previous cache entry */
	if (xdst == NULL) {
		num_pols = 1;
2122 2123
		pols[0] = __xfrm_policy_lookup(net, fl, family,
					       flow_to_policy_dir(dir));
2124 2125 2126 2127 2128 2129 2130 2131 2132 2133
		err = xfrm_expand_policies(fl, family, pols,
					   &num_pols, &num_xfrms);
		if (err < 0)
			goto inc_error;
		if (num_pols == 0)
			return NULL;
		if (num_xfrms <= 0)
			goto make_dummy_bundle;
	}

2134 2135
	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
						  xflo->dst_orig);
2136 2137 2138 2139 2140 2141 2142 2143
	if (IS_ERR(new_xdst)) {
		err = PTR_ERR(new_xdst);
		if (err != -EAGAIN)
			goto error;
		if (oldflo == NULL)
			goto make_dummy_bundle;
		dst_hold(&xdst->u.dst);
		return oldflo;
2144 2145 2146 2147 2148 2149 2150
	} else if (new_xdst == NULL) {
		num_xfrms = 0;
		if (oldflo == NULL)
			goto make_dummy_bundle;
		xdst->num_xfrms = 0;
		dst_hold(&xdst->u.dst);
		return oldflo;
2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168
	}

	/* Kill the previous bundle */
	if (xdst) {
		/* The policies were stolen for newly generated bundle */
		xdst->num_pols = 0;
		dst_free(&xdst->u.dst);
	}

	/* Flow cache does not have reference, it dst_free()'s,
	 * but we do need to return one reference for original caller */
	dst_hold(&new_xdst->u.dst);
	return &new_xdst->flo;

make_dummy_bundle:
	/* We found policies, but there's no bundles to instantiate:
	 * either because the policy blocks, has no transformations or
	 * we could not build template (no xfrm_states).*/
2169
	xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
2170 2171 2172 2173 2174 2175
	if (IS_ERR(xdst)) {
		xfrm_pols_put(pols, num_pols);
		return ERR_CAST(xdst);
	}
	xdst->num_pols = num_pols;
	xdst->num_xfrms = num_xfrms;
2176
	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189

	dst_hold(&xdst->u.dst);
	return &xdst->flo;

inc_error:
	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
error:
	if (xdst != NULL)
		dst_free(&xdst->u.dst);
	else
		xfrm_pols_put(pols, num_pols);
	return ERR_PTR(err);
}
L
Linus Torvalds 已提交
2190

2191 2192 2193 2194 2195 2196 2197 2198
static struct dst_entry *make_blackhole(struct net *net, u16 family,
					struct dst_entry *dst_orig)
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
	struct dst_entry *ret;

	if (!afinfo) {
		dst_release(dst_orig);
2199
		return ERR_PTR(-EINVAL);
2200 2201 2202 2203 2204 2205 2206 2207
	} else {
		ret = afinfo->blackhole_route(net, dst_orig);
	}
	xfrm_policy_put_afinfo(afinfo);

	return ret;
}

L
Linus Torvalds 已提交
2208 2209 2210 2211 2212
/* Main function: finds/creates a bundle for given flow.
 *
 * At the moment we eat a raw IP route. Mostly to speed up lookups
 * on interfaces with disabled IPsec.
 */
2213 2214
struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
			      const struct flowi *fl,
2215
			      const struct sock *sk, int flags)
L
Linus Torvalds 已提交
2216
{
2217
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2218 2219
	struct flow_cache_object *flo;
	struct xfrm_dst *xdst;
2220
	struct dst_entry *dst, *route;
2221
	u16 family = dst_orig->ops->family;
2222
	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2223
	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2224

2225 2226 2227
	dst = NULL;
	xdst = NULL;
	route = NULL;
2228

E
Eric Dumazet 已提交
2229
	sk = sk_const_to_full_sk(sk);
2230
	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2231 2232 2233 2234 2235
		num_pols = 1;
		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
		err = xfrm_expand_policies(fl, family, pols,
					   &num_pols, &num_xfrms);
		if (err < 0)
2236
			goto dropdst;
2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250

		if (num_pols) {
			if (num_xfrms <= 0) {
				drop_pols = num_pols;
				goto no_transform;
			}

			xdst = xfrm_resolve_and_create_bundle(
					pols, num_pols, fl,
					family, dst_orig);
			if (IS_ERR(xdst)) {
				xfrm_pols_put(pols, num_pols);
				err = PTR_ERR(xdst);
				goto dropdst;
2251 2252 2253 2254
			} else if (xdst == NULL) {
				num_xfrms = 0;
				drop_pols = num_pols;
				goto no_transform;
2255 2256
			}

2257 2258
			dst_hold(&xdst->u.dst);
			xdst->u.dst.flags |= DST_NOCACHE;
2259
			route = xdst->route;
2260
		}
2261
	}
L
Linus Torvalds 已提交
2262

2263
	if (xdst == NULL) {
2264 2265 2266 2267 2268
		struct xfrm_flo xflo;

		xflo.dst_orig = dst_orig;
		xflo.flags = flags;

L
Linus Torvalds 已提交
2269
		/* To accelerate a bit...  */
2270
		if ((dst_orig->flags & DST_NOXFRM) ||
A
Alexey Dobriyan 已提交
2271
		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
2272
			goto nopol;
L
Linus Torvalds 已提交
2273

2274
		flo = flow_cache_lookup(net, fl, family, dir,
2275
					xfrm_bundle_lookup, &xflo);
2276 2277
		if (flo == NULL)
			goto nopol;
2278
		if (IS_ERR(flo)) {
2279
			err = PTR_ERR(flo);
2280
			goto dropdst;
2281
		}
2282 2283 2284 2285
		xdst = container_of(flo, struct xfrm_dst, flo);

		num_pols = xdst->num_pols;
		num_xfrms = xdst->num_xfrms;
2286
		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299
		route = xdst->route;
	}

	dst = &xdst->u.dst;
	if (route == NULL && num_xfrms > 0) {
		/* The only case when xfrm_bundle_lookup() returns a
		 * bundle with null route, is when the template could
		 * not be resolved. It means policies are there, but
		 * bundle could not be created, since we don't yet
		 * have the xfrm_state's. We need to wait for KM to
		 * negotiate new SA's or bail out with error.*/
		if (net->xfrm.sysctl_larval_drop) {
			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2300 2301
			err = -EREMOTE;
			goto error;
2302 2303
		}

2304
		err = -EAGAIN;
2305 2306 2307

		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
		goto error;
L
Linus Torvalds 已提交
2308 2309
	}

2310 2311
no_transform:
	if (num_pols == 0)
2312
		goto nopol;
L
Linus Torvalds 已提交
2313

2314 2315 2316
	if ((flags & XFRM_LOOKUP_ICMP) &&
	    !(pols[0]->flags & XFRM_POLICY_ICMP)) {
		err = -ENOENT;
2317
		goto error;
2318
	}
2319

2320 2321
	for (i = 0; i < num_pols; i++)
		pols[i]->curlft.use_time = get_seconds();
2322

2323
	if (num_xfrms < 0) {
L
Linus Torvalds 已提交
2324
		/* Prohibit the flow */
A
Alexey Dobriyan 已提交
2325
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2326 2327
		err = -EPERM;
		goto error;
2328 2329 2330 2331 2332 2333
	} else if (num_xfrms > 0) {
		/* Flow transformed */
		dst_release(dst_orig);
	} else {
		/* Flow passes untransformed */
		dst_release(dst);
2334
		dst = dst_orig;
L
Linus Torvalds 已提交
2335
	}
2336 2337
ok:
	xfrm_pols_put(pols, drop_pols);
G
Gao feng 已提交
2338 2339 2340
	if (dst && dst->xfrm &&
	    dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
		dst->flags |= DST_XFRM_TUNNEL;
2341
	return dst;
L
Linus Torvalds 已提交
2342

2343
nopol:
2344 2345
	if (!(flags & XFRM_LOOKUP_ICMP)) {
		dst = dst_orig;
2346
		goto ok;
2347
	}
2348
	err = -ENOENT;
L
Linus Torvalds 已提交
2349
error:
2350
	dst_release(dst);
2351
dropdst:
2352 2353
	if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
		dst_release(dst_orig);
2354
	xfrm_pols_put(pols, drop_pols);
2355
	return ERR_PTR(err);
L
Linus Torvalds 已提交
2356 2357 2358
}
EXPORT_SYMBOL(xfrm_lookup);

2359 2360 2361 2362 2363
/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
 * Otherwise we may send out blackholed packets.
 */
struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
				    const struct flowi *fl,
2364
				    const struct sock *sk, int flags)
2365
{
2366
	struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
2367 2368
					    flags | XFRM_LOOKUP_QUEUE |
					    XFRM_LOOKUP_KEEP_DST_REF);
2369 2370 2371 2372 2373 2374 2375 2376

	if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
		return make_blackhole(net, dst_orig->ops->family, dst_orig);

	return dst;
}
EXPORT_SYMBOL(xfrm_lookup_route);

2377
static inline int
2378
xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2379 2380 2381 2382 2383 2384 2385 2386
{
	struct xfrm_state *x;

	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
		return 0;
	x = skb->sp->xvec[idx];
	if (!x->type->reject)
		return 0;
2387
	return x->type->reject(x, skb, fl);
2388 2389
}

L
Linus Torvalds 已提交
2390 2391 2392 2393 2394 2395 2396
/* When skb is transformed back to its "native" form, we have to
 * check policy restrictions. At the moment we make this in maximally
 * stupid way. Shame on me. :-) Of course, connected sockets must
 * have policy cached at them.
 */

static inline int
2397
xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
L
Linus Torvalds 已提交
2398 2399 2400
	      unsigned short family)
{
	if (xfrm_state_kern(x))
2401
		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
L
Linus Torvalds 已提交
2402 2403 2404 2405
	return	x->id.proto == tmpl->id.proto &&
		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
		x->props.mode == tmpl->mode &&
2406
		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2407
		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2408 2409
		!(x->props.mode != XFRM_MODE_TRANSPORT &&
		  xfrm_state_addr_cmp(tmpl, x, family));
L
Linus Torvalds 已提交
2410 2411
}

2412 2413 2414 2415 2416 2417 2418
/*
 * 0 or more than 0 is returned when validation is succeeded (either bypass
 * because of optional transport mode, or next index of the mathced secpath
 * state with the template.
 * -1 is returned when no matching template is found.
 * Otherwise "-2 - errored_index" is returned.
 */
L
Linus Torvalds 已提交
2419
static inline int
2420
xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
L
Linus Torvalds 已提交
2421 2422 2423 2424 2425
	       unsigned short family)
{
	int idx = start;

	if (tmpl->optional) {
2426
		if (tmpl->mode == XFRM_MODE_TRANSPORT)
L
Linus Torvalds 已提交
2427 2428 2429 2430
			return start;
	} else
		start = -1;
	for (; idx < sp->len; idx++) {
2431
		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
L
Linus Torvalds 已提交
2432
			return ++idx;
2433 2434 2435
		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
			if (start == -1)
				start = -2-idx;
L
Linus Torvalds 已提交
2436
			break;
2437
		}
L
Linus Torvalds 已提交
2438 2439 2440 2441
	}
	return start;
}

2442 2443
int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
			  unsigned int family, int reverse)
L
Linus Torvalds 已提交
2444 2445
{
	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2446
	int err;
L
Linus Torvalds 已提交
2447 2448 2449 2450

	if (unlikely(afinfo == NULL))
		return -EAFNOSUPPORT;

2451
	afinfo->decode_session(skb, fl, reverse);
2452
	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
L
Linus Torvalds 已提交
2453
	xfrm_policy_put_afinfo(afinfo);
2454
	return err;
L
Linus Torvalds 已提交
2455
}
2456
EXPORT_SYMBOL(__xfrm_decode_session);
L
Linus Torvalds 已提交
2457

2458
static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
L
Linus Torvalds 已提交
2459 2460
{
	for (; k < sp->len; k++) {
2461
		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2462
			*idxp = k;
L
Linus Torvalds 已提交
2463
			return 1;
2464
		}
L
Linus Torvalds 已提交
2465 2466 2467 2468 2469
	}

	return 0;
}

2470
int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
L
Linus Torvalds 已提交
2471 2472
			unsigned short family)
{
2473
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
2474
	struct xfrm_policy *pol;
2475 2476 2477 2478
	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
	int npols = 0;
	int xfrm_nr;
	int pi;
2479
	int reverse;
L
Linus Torvalds 已提交
2480
	struct flowi fl;
2481
	u8 fl_dir;
2482
	int xerr_idx = -1;
L
Linus Torvalds 已提交
2483

2484 2485 2486 2487
	reverse = dir & ~XFRM_POLICY_MASK;
	dir &= XFRM_POLICY_MASK;
	fl_dir = policy_to_flow_dir(dir);

2488
	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
A
Alexey Dobriyan 已提交
2489
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
L
Linus Torvalds 已提交
2490
		return 0;
2491 2492
	}

2493
	nf_nat_decode_session(skb, &fl, family);
L
Linus Torvalds 已提交
2494 2495 2496 2497 2498

	/* First, check used SA against their selectors. */
	if (skb->sp) {
		int i;

2499
		for (i = skb->sp->len-1; i >= 0; i--) {
2500
			struct xfrm_state *x = skb->sp->xvec[i];
2501
			if (!xfrm_selector_match(&x->sel, &fl, family)) {
A
Alexey Dobriyan 已提交
2502
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
L
Linus Torvalds 已提交
2503
				return 0;
2504
			}
L
Linus Torvalds 已提交
2505 2506 2507 2508
		}
	}

	pol = NULL;
E
Eric Dumazet 已提交
2509
	sk = sk_to_full_sk(sk);
2510
	if (sk && sk->sk_policy[dir]) {
2511
		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2512
		if (IS_ERR(pol)) {
A
Alexey Dobriyan 已提交
2513
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2514
			return 0;
2515
		}
2516
	}
L
Linus Torvalds 已提交
2517

2518 2519 2520 2521 2522 2523 2524 2525 2526 2527
	if (!pol) {
		struct flow_cache_object *flo;

		flo = flow_cache_lookup(net, &fl, family, fl_dir,
					xfrm_policy_lookup, NULL);
		if (IS_ERR_OR_NULL(flo))
			pol = ERR_CAST(flo);
		else
			pol = container_of(flo, struct xfrm_policy, flo);
	}
L
Linus Torvalds 已提交
2528

2529
	if (IS_ERR(pol)) {
A
Alexey Dobriyan 已提交
2530
		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2531
		return 0;
2532
	}
2533

2534
	if (!pol) {
2535
		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2536
			xfrm_secpath_reject(xerr_idx, skb, &fl);
A
Alexey Dobriyan 已提交
2537
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2538 2539 2540 2541
			return 0;
		}
		return 1;
	}
L
Linus Torvalds 已提交
2542

2543
	pol->curlft.use_time = get_seconds();
L
Linus Torvalds 已提交
2544

2545
	pols[0] = pol;
2546
	npols++;
2547 2548
#ifdef CONFIG_XFRM_SUB_POLICY
	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2549
		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2550 2551 2552
						    &fl, family,
						    XFRM_POLICY_IN);
		if (pols[1]) {
2553
			if (IS_ERR(pols[1])) {
A
Alexey Dobriyan 已提交
2554
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2555
				return 0;
2556
			}
2557
			pols[1]->curlft.use_time = get_seconds();
2558
			npols++;
2559 2560 2561 2562
		}
	}
#endif

L
Linus Torvalds 已提交
2563 2564 2565
	if (pol->action == XFRM_POLICY_ALLOW) {
		struct sec_path *sp;
		static struct sec_path dummy;
2566
		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2567
		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2568 2569
		struct xfrm_tmpl **tpp = tp;
		int ti = 0;
L
Linus Torvalds 已提交
2570 2571 2572 2573 2574
		int i, k;

		if ((sp = skb->sp) == NULL)
			sp = &dummy;

2575 2576
		for (pi = 0; pi < npols; pi++) {
			if (pols[pi] != pol &&
2577
			    pols[pi]->action != XFRM_POLICY_ALLOW) {
A
Alexey Dobriyan 已提交
2578
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2579
				goto reject;
2580 2581
			}
			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
A
Alexey Dobriyan 已提交
2582
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2583
				goto reject_error;
2584
			}
2585 2586 2587 2588
			for (i = 0; i < pols[pi]->xfrm_nr; i++)
				tpp[ti++] = &pols[pi]->xfrm_vec[i];
		}
		xfrm_nr = ti;
2589
		if (npols > 1) {
F
Fan Du 已提交
2590
			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
2591 2592
			tpp = stp;
		}
2593

L
Linus Torvalds 已提交
2594 2595 2596 2597 2598 2599
		/* For each tunnel xfrm, find the first matching tmpl.
		 * For each tmpl before that, find corresponding xfrm.
		 * Order is _important_. Later we will implement
		 * some barriers, but at the moment barriers
		 * are implied between each two transformations.
		 */
2600 2601
		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
			k = xfrm_policy_ok(tpp[i], sp, k, family);
2602
			if (k < 0) {
2603 2604 2605
				if (k < -1)
					/* "-2 - errored_index" returned */
					xerr_idx = -(2+k);
A
Alexey Dobriyan 已提交
2606
				XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
L
Linus Torvalds 已提交
2607
				goto reject;
2608
			}
L
Linus Torvalds 已提交
2609 2610
		}

2611
		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
A
Alexey Dobriyan 已提交
2612
			XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
L
Linus Torvalds 已提交
2613
			goto reject;
2614
		}
L
Linus Torvalds 已提交
2615

2616
		xfrm_pols_put(pols, npols);
L
Linus Torvalds 已提交
2617 2618
		return 1;
	}
A
Alexey Dobriyan 已提交
2619
	XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
L
Linus Torvalds 已提交
2620 2621

reject:
2622
	xfrm_secpath_reject(xerr_idx, skb, &fl);
2623 2624
reject_error:
	xfrm_pols_put(pols, npols);
L
Linus Torvalds 已提交
2625 2626 2627 2628 2629 2630
	return 0;
}
EXPORT_SYMBOL(__xfrm_policy_check);

int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
{
2631
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
2632
	struct flowi fl;
E
Eric Dumazet 已提交
2633
	struct dst_entry *dst;
E
Eric Dumazet 已提交
2634
	int res = 1;
L
Linus Torvalds 已提交
2635

2636
	if (xfrm_decode_session(skb, &fl, family) < 0) {
2637
		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
L
Linus Torvalds 已提交
2638
		return 0;
2639
	}
L
Linus Torvalds 已提交
2640

2641
	skb_dst_force(skb);
E
Eric Dumazet 已提交
2642

2643
	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
2644
	if (IS_ERR(dst)) {
E
Eric Dumazet 已提交
2645
		res = 0;
2646 2647
		dst = NULL;
	}
E
Eric Dumazet 已提交
2648 2649
	skb_dst_set(skb, dst);
	return res;
L
Linus Torvalds 已提交
2650 2651 2652
}
EXPORT_SYMBOL(__xfrm_route_forward);

2653 2654
/* Optimize later using cookies and generation ids. */

L
Linus Torvalds 已提交
2655 2656
static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
{
2657
	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2658 2659 2660 2661 2662 2663 2664
	 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
	 * get validated by dst_ops->check on every use.  We do this
	 * because when a normal route referenced by an XFRM dst is
	 * obsoleted we do not go looking around for all parent
	 * referencing XFRM dsts so that we can invalidate them.  It
	 * is just too much work.  Instead we make the checks here on
	 * every use.  For example:
2665 2666 2667 2668 2669 2670 2671 2672 2673
	 *
	 *	XFRM dst A --> IPv4 dst X
	 *
	 * X is the "xdst->route" of A (X is also the "dst->path" of A
	 * in this example).  If X is marked obsolete, "A" will not
	 * notice.  That's what we are validating here via the
	 * stale_bundle() check.
	 *
	 * When a policy's bundle is pruned, we dst_free() the XFRM
2674 2675 2676
	 * dst which causes it's ->obsolete field to be set to
	 * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
	 * this, we want to force a new route lookup.
2677
	 */
2678 2679 2680
	if (dst->obsolete < 0 && !stale_bundle(dst))
		return dst;

L
Linus Torvalds 已提交
2681 2682 2683 2684 2685
	return NULL;
}

static int stale_bundle(struct dst_entry *dst)
{
2686
	return !xfrm_bundle_ok((struct xfrm_dst *)dst);
L
Linus Torvalds 已提交
2687 2688
}

H
Herbert Xu 已提交
2689
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
L
Linus Torvalds 已提交
2690 2691
{
	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2692
		dst->dev = dev_net(dev)->loopback_dev;
2693
		dev_hold(dst->dev);
L
Linus Torvalds 已提交
2694 2695 2696
		dev_put(dev);
	}
}
H
Herbert Xu 已提交
2697
EXPORT_SYMBOL(xfrm_dst_ifdown);
L
Linus Torvalds 已提交
2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714

static void xfrm_link_failure(struct sk_buff *skb)
{
	/* Impossible. Such dst must be popped before reaches point of failure. */
}

static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
{
	if (dst) {
		if (dst->obsolete) {
			dst_release(dst);
			dst = NULL;
		}
	}
	return dst;
}

2715
void xfrm_garbage_collect(struct net *net)
2716
{
2717
	flow_cache_flush(net);
2718
}
2719
EXPORT_SYMBOL(xfrm_garbage_collect);
2720 2721 2722

static void xfrm_garbage_collect_deferred(struct net *net)
{
2723
	flow_cache_flush_deferred(net);
2724 2725
}

2726
static void xfrm_init_pmtu(struct dst_entry *dst)
L
Linus Torvalds 已提交
2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742
{
	do {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
		u32 pmtu, route_mtu_cached;

		pmtu = dst_mtu(dst->child);
		xdst->child_mtu_cached = pmtu;

		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);

		route_mtu_cached = dst_mtu(xdst->route);
		xdst->route_mtu_cached = route_mtu_cached;

		if (pmtu > route_mtu_cached)
			pmtu = route_mtu_cached;

2743
		dst_metric_set(dst, RTAX_MTU, pmtu);
L
Linus Torvalds 已提交
2744 2745 2746 2747 2748 2749 2750
	} while ((dst = dst->next));
}

/* Check that the bundle accepts the flow and its components are
 * still valid.
 */

2751
static int xfrm_bundle_ok(struct xfrm_dst *first)
L
Linus Torvalds 已提交
2752 2753 2754 2755 2756
{
	struct dst_entry *dst = &first->u.dst;
	struct xfrm_dst *last;
	u32 mtu;

2757
	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
L
Linus Torvalds 已提交
2758 2759 2760
	    (dst->dev && !netif_running(dst->dev)))
		return 0;

2761 2762 2763
	if (dst->flags & DST_XFRM_QUEUE)
		return 1;

L
Linus Torvalds 已提交
2764 2765 2766 2767 2768 2769 2770
	last = NULL;

	do {
		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;

		if (dst->xfrm->km.state != XFRM_STATE_VALID)
			return 0;
2771 2772
		if (xdst->xfrm_genid != dst->xfrm->genid)
			return 0;
2773 2774
		if (xdst->num_pols > 0 &&
		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2775
			return 0;
2776

L
Linus Torvalds 已提交
2777 2778 2779 2780 2781 2782
		mtu = dst_mtu(dst->child);
		if (xdst->child_mtu_cached != mtu) {
			last = xdst;
			xdst->child_mtu_cached = mtu;
		}

2783
		if (!dst_check(xdst->route, xdst->route_cookie))
L
Linus Torvalds 已提交
2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803
			return 0;
		mtu = dst_mtu(xdst->route);
		if (xdst->route_mtu_cached != mtu) {
			last = xdst;
			xdst->route_mtu_cached = mtu;
		}

		dst = dst->child;
	} while (dst->xfrm);

	if (likely(!last))
		return 1;

	mtu = last->child_mtu_cached;
	for (;;) {
		dst = &last->u.dst;

		mtu = xfrm_state_mtu(dst->xfrm, mtu);
		if (mtu > last->route_mtu_cached)
			mtu = last->route_mtu_cached;
2804
		dst_metric_set(dst, RTAX_MTU, mtu);
L
Linus Torvalds 已提交
2805 2806 2807 2808

		if (last == first)
			break;

2809
		last = (struct xfrm_dst *)last->u.dst.next;
L
Linus Torvalds 已提交
2810 2811 2812 2813 2814 2815
		last->child_mtu_cached = mtu;
	}

	return 1;
}

2816 2817 2818 2819 2820
static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
{
	return dst_metric_advmss(dst->path);
}

2821
static unsigned int xfrm_mtu(const struct dst_entry *dst)
2822
{
2823 2824 2825
	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);

	return mtu ? : dst_mtu(dst->path);
2826 2827
}

2828 2829 2830
static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
					   struct sk_buff *skb,
					   const void *daddr)
2831
{
2832
	return dst->path->ops->neigh_lookup(dst, skb, daddr);
2833 2834
}

L
Linus Torvalds 已提交
2835 2836 2837 2838 2839 2840 2841
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
{
	int err = 0;
	if (unlikely(afinfo == NULL))
		return -EINVAL;
	if (unlikely(afinfo->family >= NPROTO))
		return -EAFNOSUPPORT;
E
Eric Dumazet 已提交
2842
	spin_lock(&xfrm_policy_afinfo_lock);
L
Linus Torvalds 已提交
2843
	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2844
		err = -EEXIST;
L
Linus Torvalds 已提交
2845 2846 2847 2848 2849 2850
	else {
		struct dst_ops *dst_ops = afinfo->dst_ops;
		if (likely(dst_ops->kmem_cachep == NULL))
			dst_ops->kmem_cachep = xfrm_dst_cache;
		if (likely(dst_ops->check == NULL))
			dst_ops->check = xfrm_dst_check;
2851 2852
		if (likely(dst_ops->default_advmss == NULL))
			dst_ops->default_advmss = xfrm_default_advmss;
2853 2854
		if (likely(dst_ops->mtu == NULL))
			dst_ops->mtu = xfrm_mtu;
L
Linus Torvalds 已提交
2855 2856 2857 2858
		if (likely(dst_ops->negative_advice == NULL))
			dst_ops->negative_advice = xfrm_negative_advice;
		if (likely(dst_ops->link_failure == NULL))
			dst_ops->link_failure = xfrm_link_failure;
2859 2860
		if (likely(dst_ops->neigh_lookup == NULL))
			dst_ops->neigh_lookup = xfrm_neigh_lookup;
L
Linus Torvalds 已提交
2861
		if (likely(afinfo->garbage_collect == NULL))
2862
			afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2863
		rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
L
Linus Torvalds 已提交
2864
	}
E
Eric Dumazet 已提交
2865
	spin_unlock(&xfrm_policy_afinfo_lock);
2866

L
Linus Torvalds 已提交
2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877
	return err;
}
EXPORT_SYMBOL(xfrm_policy_register_afinfo);

int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
{
	int err = 0;
	if (unlikely(afinfo == NULL))
		return -EINVAL;
	if (unlikely(afinfo->family >= NPROTO))
		return -EAFNOSUPPORT;
E
Eric Dumazet 已提交
2878
	spin_lock(&xfrm_policy_afinfo_lock);
L
Linus Torvalds 已提交
2879 2880 2881
	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
			err = -EINVAL;
E
Eric Dumazet 已提交
2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896
		else
			RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
					 NULL);
	}
	spin_unlock(&xfrm_policy_afinfo_lock);
	if (!err) {
		struct dst_ops *dst_ops = afinfo->dst_ops;

		synchronize_rcu();

		dst_ops->kmem_cachep = NULL;
		dst_ops->check = NULL;
		dst_ops->negative_advice = NULL;
		dst_ops->link_failure = NULL;
		afinfo->garbage_collect = NULL;
L
Linus Torvalds 已提交
2897 2898 2899 2900 2901 2902 2903
	}
	return err;
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);

static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
2904
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2905

L
Linus Torvalds 已提交
2906 2907
	switch (event) {
	case NETDEV_DOWN:
2908
		xfrm_garbage_collect(dev_net(dev));
L
Linus Torvalds 已提交
2909 2910 2911 2912 2913
	}
	return NOTIFY_DONE;
}

static struct notifier_block xfrm_dev_notifier = {
A
Alexey Dobriyan 已提交
2914
	.notifier_call	= xfrm_dev_event,
L
Linus Torvalds 已提交
2915 2916
};

2917
#ifdef CONFIG_XFRM_STATISTICS
A
Alexey Dobriyan 已提交
2918
static int __net_init xfrm_statistics_init(struct net *net)
2919
{
2920
	int rv;
W
WANG Cong 已提交
2921 2922
	net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
	if (!net->mib.xfrm_statistics)
2923
		return -ENOMEM;
2924 2925
	rv = xfrm_proc_init(net);
	if (rv < 0)
W
WANG Cong 已提交
2926
		free_percpu(net->mib.xfrm_statistics);
2927
	return rv;
2928
}
A
Alexey Dobriyan 已提交
2929 2930 2931

static void xfrm_statistics_fini(struct net *net)
{
2932
	xfrm_proc_fini(net);
W
WANG Cong 已提交
2933
	free_percpu(net->mib.xfrm_statistics);
A
Alexey Dobriyan 已提交
2934 2935 2936 2937 2938 2939 2940 2941 2942 2943
}
#else
static int __net_init xfrm_statistics_init(struct net *net)
{
	return 0;
}

static void xfrm_statistics_fini(struct net *net)
{
}
2944 2945
#endif

2946
static int __net_init xfrm_policy_init(struct net *net)
L
Linus Torvalds 已提交
2947
{
2948 2949 2950
	unsigned int hmask, sz;
	int dir;

2951 2952
	if (net_eq(net, &init_net))
		xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
L
Linus Torvalds 已提交
2953
					   sizeof(struct xfrm_dst),
A
Alexey Dobriyan 已提交
2954
					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2955
					   NULL);
L
Linus Torvalds 已提交
2956

2957 2958 2959
	hmask = 8 - 1;
	sz = (hmask+1) * sizeof(struct hlist_head);

2960 2961 2962
	net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
	if (!net->xfrm.policy_byidx)
		goto out_byidx;
2963
	net->xfrm.policy_idx_hmask = hmask;
2964

H
Herbert Xu 已提交
2965
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
2966 2967
		struct xfrm_policy_hash *htab;

2968
		net->xfrm.policy_count[dir] = 0;
H
Herbert Xu 已提交
2969
		net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
2970
		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2971

2972
		htab = &net->xfrm.policy_bydst[dir];
2973
		htab->table = xfrm_hash_alloc(sz);
2974
		if (!htab->table)
2975 2976
			goto out_bydst;
		htab->hmask = hmask;
2977 2978 2979 2980
		htab->dbits4 = 32;
		htab->sbits4 = 32;
		htab->dbits6 = 128;
		htab->sbits6 = 128;
2981
	}
2982 2983 2984 2985 2986 2987
	net->xfrm.policy_hthresh.lbits4 = 32;
	net->xfrm.policy_hthresh.rbits4 = 32;
	net->xfrm.policy_hthresh.lbits6 = 128;
	net->xfrm.policy_hthresh.rbits6 = 128;

	seqlock_init(&net->xfrm.policy_hthresh.lock);
2988

2989
	INIT_LIST_HEAD(&net->xfrm.policy_all);
2990
	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2991
	INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
2992 2993 2994
	if (net_eq(net, &init_net))
		register_netdevice_notifier(&xfrm_dev_notifier);
	return 0;
2995

2996 2997 2998 2999 3000 3001 3002 3003
out_bydst:
	for (dir--; dir >= 0; dir--) {
		struct xfrm_policy_hash *htab;

		htab = &net->xfrm.policy_bydst[dir];
		xfrm_hash_free(htab->table, sz);
	}
	xfrm_hash_free(net->xfrm.policy_byidx, sz);
3004 3005
out_byidx:
	return -ENOMEM;
3006 3007 3008 3009
}

static void xfrm_policy_fini(struct net *net)
{
3010
	unsigned int sz;
3011
	int dir;
3012

3013 3014
	flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
3015
	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
3016
#endif
3017
	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
3018

3019
	WARN_ON(!list_empty(&net->xfrm.policy_all));
3020

H
Herbert Xu 已提交
3021
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
3022 3023
		struct xfrm_policy_hash *htab;

3024
		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
3025 3026

		htab = &net->xfrm.policy_bydst[dir];
3027
		sz = (htab->hmask + 1) * sizeof(struct hlist_head);
3028 3029
		WARN_ON(!hlist_empty(htab->table));
		xfrm_hash_free(htab->table, sz);
3030 3031
	}

3032
	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
3033 3034
	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
	xfrm_hash_free(net->xfrm.policy_byidx, sz);
L
Linus Torvalds 已提交
3035 3036
}

3037 3038 3039 3040
static int __net_init xfrm_net_init(struct net *net)
{
	int rv;

A
Alexey Dobriyan 已提交
3041 3042 3043
	rv = xfrm_statistics_init(net);
	if (rv < 0)
		goto out_statistics;
3044 3045 3046 3047 3048 3049
	rv = xfrm_state_init(net);
	if (rv < 0)
		goto out_state;
	rv = xfrm_policy_init(net);
	if (rv < 0)
		goto out_policy;
A
Alexey Dobriyan 已提交
3050 3051 3052
	rv = xfrm_sysctl_init(net);
	if (rv < 0)
		goto out_sysctl;
3053 3054 3055
	rv = flow_cache_init(net);
	if (rv < 0)
		goto out;
F
Fan Du 已提交
3056 3057 3058 3059 3060 3061

	/* Initialize the per-net locks here */
	spin_lock_init(&net->xfrm.xfrm_state_lock);
	rwlock_init(&net->xfrm.xfrm_policy_lock);
	mutex_init(&net->xfrm.xfrm_cfg_mutex);

3062 3063
	return 0;

3064 3065
out:
	xfrm_sysctl_fini(net);
A
Alexey Dobriyan 已提交
3066 3067
out_sysctl:
	xfrm_policy_fini(net);
3068 3069 3070
out_policy:
	xfrm_state_fini(net);
out_state:
A
Alexey Dobriyan 已提交
3071 3072
	xfrm_statistics_fini(net);
out_statistics:
3073 3074 3075 3076 3077
	return rv;
}

static void __net_exit xfrm_net_exit(struct net *net)
{
3078
	flow_cache_fini(net);
A
Alexey Dobriyan 已提交
3079
	xfrm_sysctl_fini(net);
3080 3081
	xfrm_policy_fini(net);
	xfrm_state_fini(net);
A
Alexey Dobriyan 已提交
3082
	xfrm_statistics_fini(net);
3083 3084 3085 3086 3087 3088 3089
}

static struct pernet_operations __net_initdata xfrm_net_ops = {
	.init = xfrm_net_init,
	.exit = xfrm_net_exit,
};

L
Linus Torvalds 已提交
3090 3091
void __init xfrm_init(void)
{
3092
	register_pernet_subsys(&xfrm_net_ops);
L
Linus Torvalds 已提交
3093 3094 3095
	xfrm_input_init();
}

J
Joy Latten 已提交
3096
#ifdef CONFIG_AUDITSYSCALL
3097 3098
static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
					 struct audit_buffer *audit_buf)
J
Joy Latten 已提交
3099
{
3100 3101 3102 3103
	struct xfrm_sec_ctx *ctx = xp->security;
	struct xfrm_selector *sel = &xp->selector;

	if (ctx)
J
Joy Latten 已提交
3104
		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
3105
				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
J
Joy Latten 已提交
3106

3107
	switch (sel->family) {
J
Joy Latten 已提交
3108
	case AF_INET:
H
Harvey Harrison 已提交
3109
		audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
3110 3111 3112
		if (sel->prefixlen_s != 32)
			audit_log_format(audit_buf, " src_prefixlen=%d",
					 sel->prefixlen_s);
H
Harvey Harrison 已提交
3113
		audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
3114 3115 3116
		if (sel->prefixlen_d != 32)
			audit_log_format(audit_buf, " dst_prefixlen=%d",
					 sel->prefixlen_d);
J
Joy Latten 已提交
3117 3118
		break;
	case AF_INET6:
H
Harvey Harrison 已提交
3119
		audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
3120 3121 3122
		if (sel->prefixlen_s != 128)
			audit_log_format(audit_buf, " src_prefixlen=%d",
					 sel->prefixlen_s);
H
Harvey Harrison 已提交
3123
		audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
3124 3125 3126
		if (sel->prefixlen_d != 128)
			audit_log_format(audit_buf, " dst_prefixlen=%d",
					 sel->prefixlen_d);
J
Joy Latten 已提交
3127 3128 3129 3130
		break;
	}
}

3131
void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
J
Joy Latten 已提交
3132 3133 3134
{
	struct audit_buffer *audit_buf;

P
Paul Moore 已提交
3135
	audit_buf = xfrm_audit_start("SPD-add");
J
Joy Latten 已提交
3136 3137
	if (audit_buf == NULL)
		return;
3138
	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
P
Paul Moore 已提交
3139
	audit_log_format(audit_buf, " res=%u", result);
J
Joy Latten 已提交
3140 3141 3142 3143 3144
	xfrm_audit_common_policyinfo(xp, audit_buf);
	audit_log_end(audit_buf);
}
EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);

P
Paul Moore 已提交
3145
void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
3146
			      bool task_valid)
J
Joy Latten 已提交
3147 3148 3149
{
	struct audit_buffer *audit_buf;

P
Paul Moore 已提交
3150
	audit_buf = xfrm_audit_start("SPD-delete");
J
Joy Latten 已提交
3151 3152
	if (audit_buf == NULL)
		return;
3153
	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
P
Paul Moore 已提交
3154
	audit_log_format(audit_buf, " res=%u", result);
J
Joy Latten 已提交
3155 3156 3157 3158 3159 3160
	xfrm_audit_common_policyinfo(xp, audit_buf);
	audit_log_end(audit_buf);
}
EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif

3161
#ifdef CONFIG_XFRM_MIGRATE
3162 3163
static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
					const struct xfrm_selector *sel_tgt)
3164 3165 3166
{
	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
		if (sel_tgt->family == sel_cmp->family &&
3167 3168 3169 3170
		    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
				    sel_cmp->family) &&
		    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
				    sel_cmp->family) &&
3171 3172
		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3173
			return true;
3174 3175 3176
		}
	} else {
		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3177
			return true;
3178 3179
		}
	}
3180
	return false;
3181 3182
}

3183 3184
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
						    u8 dir, u8 type, struct net *net)
3185 3186 3187 3188 3189
{
	struct xfrm_policy *pol, *ret = NULL;
	struct hlist_head *chain;
	u32 priority = ~0U;

F
Fan Du 已提交
3190
	read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
3191
	chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3192
	hlist_for_each_entry(pol, chain, bydst) {
3193 3194 3195 3196 3197 3198 3199
		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
		    pol->type == type) {
			ret = pol;
			priority = ret->priority;
			break;
		}
	}
3200
	chain = &net->xfrm.policy_inexact[dir];
3201
	hlist_for_each_entry(pol, chain, bydst) {
3202 3203 3204
		if ((pol->priority >= priority) && ret)
			break;

3205
		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3206
		    pol->type == type) {
3207 3208 3209 3210 3211
			ret = pol;
			break;
		}
	}

3212
	xfrm_pol_hold(ret);
3213

F
Fan Du 已提交
3214
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3215 3216 3217 3218

	return ret;
}

3219
static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3220 3221 3222 3223 3224 3225 3226 3227
{
	int match = 0;

	if (t->mode == m->mode && t->id.proto == m->proto &&
	    (m->reqid == 0 || t->reqid == m->reqid)) {
		switch (t->mode) {
		case XFRM_MODE_TUNNEL:
		case XFRM_MODE_BEET:
3228 3229 3230 3231
			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
					    m->old_family) &&
			    xfrm_addr_equal(&t->saddr, &m->old_saddr,
					    m->old_family)) {
3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255
				match = 1;
			}
			break;
		case XFRM_MODE_TRANSPORT:
			/* in case of transport mode, template does not store
			   any IP addresses, hence we just compare mode and
			   protocol */
			match = 1;
			break;
		default:
			break;
		}
	}
	return match;
}

/* update endpoint address(es) of template(s) */
static int xfrm_policy_migrate(struct xfrm_policy *pol,
			       struct xfrm_migrate *m, int num_migrate)
{
	struct xfrm_migrate *mp;
	int i, j, n = 0;

	write_lock_bh(&pol->lock);
H
Herbert Xu 已提交
3256
	if (unlikely(pol->walk.dead)) {
3257 3258 3259 3260 3261 3262 3263 3264 3265 3266
		/* target policy has been deleted */
		write_unlock_bh(&pol->lock);
		return -ENOENT;
	}

	for (i = 0; i < pol->xfrm_nr; i++) {
		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
				continue;
			n++;
H
Herbert Xu 已提交
3267 3268
			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3269 3270 3271 3272 3273 3274 3275 3276
				continue;
			/* update endpoints */
			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
			       sizeof(pol->xfrm_vec[i].id.daddr));
			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
			       sizeof(pol->xfrm_vec[i].saddr));
			pol->xfrm_vec[i].encap_family = mp->new_family;
			/* flush bundles */
3277
			atomic_inc(&pol->genid);
3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288
		}
	}

	write_unlock_bh(&pol->lock);

	if (!n)
		return -ENODATA;

	return 0;
}

3289
static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3290 3291 3292 3293 3294 3295 3296
{
	int i, j;

	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
		return -EINVAL;

	for (i = 0; i < num_migrate; i++) {
3297 3298 3299 3300
		if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
				    m[i].old_family) &&
		    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
				    m[i].old_family))
3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322
			return -EINVAL;
		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
			return -EINVAL;

		/* check if there is any duplicated entry */
		for (j = i + 1; j < num_migrate; j++) {
			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
				    sizeof(m[i].old_daddr)) &&
			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
				    sizeof(m[i].old_saddr)) &&
			    m[i].proto == m[j].proto &&
			    m[i].mode == m[j].mode &&
			    m[i].reqid == m[j].reqid &&
			    m[i].old_family == m[j].old_family)
				return -EINVAL;
		}
	}

	return 0;
}

3323
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3324
		 struct xfrm_migrate *m, int num_migrate,
3325
		 struct xfrm_kmaddress *k, struct net *net)
3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337
{
	int i, err, nx_cur = 0, nx_new = 0;
	struct xfrm_policy *pol = NULL;
	struct xfrm_state *x, *xc;
	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
	struct xfrm_migrate *mp;

	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
		goto out;

	/* Stage 1 - find policy */
3338
	if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
3339 3340 3341 3342 3343 3344
		err = -ENOENT;
		goto out;
	}

	/* Stage 2 - find and update state(s) */
	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
F
Fan Du 已提交
3345
		if ((x = xfrm_migrate_state_find(mp, net))) {
3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368
			x_cur[nx_cur] = x;
			nx_cur++;
			if ((xc = xfrm_state_migrate(x, mp))) {
				x_new[nx_new] = xc;
				nx_new++;
			} else {
				err = -ENODATA;
				goto restore_state;
			}
		}
	}

	/* Stage 3 - update policy */
	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
		goto restore_state;

	/* Stage 4 - delete old state(s) */
	if (nx_cur) {
		xfrm_states_put(x_cur, nx_cur);
		xfrm_states_delete(x_cur, nx_cur);
	}

	/* Stage 5 - announce */
3369
	km_migrate(sel, dir, type, m, num_migrate, k);
3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386

	xfrm_pol_put(pol);

	return 0;
out:
	return err;

restore_state:
	if (pol)
		xfrm_pol_put(pol);
	if (nx_cur)
		xfrm_states_put(x_cur, nx_cur);
	if (nx_new)
		xfrm_states_delete(x_new, nx_new);

	return err;
}
3387
EXPORT_SYMBOL(xfrm_migrate);
3388
#endif