nf_conntrack_core.c 31.9 KB
Newer Older
1 2 3 4 5
/* Connection state tracking for netfilter.  This is separated from,
   but required by, the NAT layer; it can also be used by an iptables
   extension. */

/* (C) 1999-2001 Paul `Rusty' Russell
6
 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/jhash.h>
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/moduleparam.h>
#include <linux/notifier.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/socket.h>
A
Al Viro 已提交
31
#include <linux/mm.h>
32 33 34

#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
35
#include <net/netfilter/nf_conntrack_l4proto.h>
36
#include <net/netfilter/nf_conntrack_expect.h>
37 38
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
39
#include <net/netfilter/nf_conntrack_extend.h>
40

41
#define NF_CONNTRACK_VERSION	"0.5.0"
42

43
DEFINE_SPINLOCK(nf_conntrack_lock);
44
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
45 46 47

/* nf_conntrack_standalone needs this */
atomic_t nf_conntrack_count = ATOMIC_INIT(0);
48
EXPORT_SYMBOL_GPL(nf_conntrack_count);
49

50
unsigned int nf_conntrack_htable_size __read_mostly;
51 52
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);

53
int nf_conntrack_max __read_mostly;
54
EXPORT_SYMBOL_GPL(nf_conntrack_max);
55

56
struct hlist_head *nf_conntrack_hash __read_mostly;
57 58
EXPORT_SYMBOL_GPL(nf_conntrack_hash);

59
struct nf_conn nf_conntrack_untracked __read_mostly;
60 61
EXPORT_SYMBOL_GPL(nf_conntrack_untracked);

62
unsigned int nf_ct_log_invalid __read_mostly;
63
HLIST_HEAD(unconfirmed);
64
static int nf_conntrack_vmalloc __read_mostly;
65
static struct kmem_cache *nf_conntrack_cachep __read_mostly;
66

67 68 69 70 71 72 73 74 75
DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);

static int nf_conntrack_hash_rnd_initted;
static unsigned int nf_conntrack_hash_rnd;

static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
				  unsigned int size, unsigned int rnd)
{
76 77 78 79 80 81 82 83 84 85 86 87 88
	unsigned int n;
	u_int32_t h;

	/* The direction must be ignored, so we hash everything up to the
	 * destination ports (which is a multiple of 4) and treat the last
	 * three bytes manually.
	 */
	n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
	h = jhash2((u32 *)tuple, n,
		   rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
			  tuple->dst.protonum));

	return ((u64)h * size) >> 32;
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
}

static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
{
	return __hash_conntrack(tuple, nf_conntrack_htable_size,
				nf_conntrack_hash_rnd);
}

int
nf_ct_get_tuple(const struct sk_buff *skb,
		unsigned int nhoff,
		unsigned int dataoff,
		u_int16_t l3num,
		u_int8_t protonum,
		struct nf_conntrack_tuple *tuple,
		const struct nf_conntrack_l3proto *l3proto,
105
		const struct nf_conntrack_l4proto *l4proto)
106 107 108 109 110 111 112 113 114 115
{
	NF_CT_TUPLE_U_BLANK(tuple);

	tuple->src.l3num = l3num;
	if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
		return 0;

	tuple->dst.protonum = protonum;
	tuple->dst.dir = IP_CT_DIR_ORIGINAL;

116
	return l4proto->pkt_to_tuple(skb, dataoff, tuple);
117
}
118
EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
119

120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
int nf_ct_get_tuplepr(const struct sk_buff *skb,
		      unsigned int nhoff,
		      u_int16_t l3num,
		      struct nf_conntrack_tuple *tuple)
{
	struct nf_conntrack_l3proto *l3proto;
	struct nf_conntrack_l4proto *l4proto;
	unsigned int protoff;
	u_int8_t protonum;
	int ret;

	rcu_read_lock();

	l3proto = __nf_ct_l3proto_find(l3num);
	ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
	if (ret != NF_ACCEPT) {
		rcu_read_unlock();
		return 0;
	}

	l4proto = __nf_ct_l4proto_find(l3num, protonum);

	ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple,
			      l3proto, l4proto);

	rcu_read_unlock();
	return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);

150 151 152 153
int
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
		   const struct nf_conntrack_tuple *orig,
		   const struct nf_conntrack_l3proto *l3proto,
154
		   const struct nf_conntrack_l4proto *l4proto)
155 156 157 158 159 160 161 162 163 164
{
	NF_CT_TUPLE_U_BLANK(inverse);

	inverse->src.l3num = orig->src.l3num;
	if (l3proto->invert_tuple(inverse, orig) == 0)
		return 0;

	inverse->dst.dir = !orig->dst.dir;

	inverse->dst.protonum = orig->dst.protonum;
165
	return l4proto->invert_tuple(inverse, orig);
166
}
167
EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
168 169 170 171

static void
clean_from_lists(struct nf_conn *ct)
{
172
	pr_debug("clean_from_lists(%p)\n", ct);
173 174
	hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
	hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode);
175 176

	/* Destroy all pending expectations */
177
	nf_ct_remove_expectations(ct);
178 179 180 181 182 183
}

static void
destroy_conntrack(struct nf_conntrack *nfct)
{
	struct nf_conn *ct = (struct nf_conn *)nfct;
184
	struct nf_conntrack_l4proto *l4proto;
185

186
	pr_debug("destroy_conntrack(%p)\n", ct);
187 188 189 190 191 192 193 194 195
	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
	NF_CT_ASSERT(!timer_pending(&ct->timeout));

	nf_conntrack_event(IPCT_DESTROY, ct);
	set_bit(IPS_DYING_BIT, &ct->status);

	/* To make sure we don't get any weird locking issues here:
	 * destroy_conntrack() MUST NOT be called with a write lock
	 * to nf_conntrack_lock!!! -HW */
196
	rcu_read_lock();
197
	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
198 199
	if (l4proto && l4proto->destroy)
		l4proto->destroy(ct);
200

201 202
	nf_ct_ext_destroy(ct);

203
	rcu_read_unlock();
204

205
	spin_lock_bh(&nf_conntrack_lock);
206 207 208 209
	/* Expectations will have been removed in clean_from_lists,
	 * except TFTP can create an expectation on the first packet,
	 * before connection is in the list, so we need to clean here,
	 * too. */
210
	nf_ct_remove_expectations(ct);
211 212 213

	/* We overload first tuple to link into unconfirmed list. */
	if (!nf_ct_is_confirmed(ct)) {
214 215
		BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode));
		hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
216 217 218
	}

	NF_CT_STAT_INC(delete);
219
	spin_unlock_bh(&nf_conntrack_lock);
220 221 222 223

	if (ct->master)
		nf_ct_put(ct->master);

224
	pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
225 226 227 228 229 230
	nf_conntrack_free(ct);
}

static void death_by_timeout(unsigned long ul_conntrack)
{
	struct nf_conn *ct = (void *)ul_conntrack;
231
	struct nf_conn_help *help = nfct_help(ct);
232
	struct nf_conntrack_helper *helper;
233

234 235 236 237 238 239 240
	if (help) {
		rcu_read_lock();
		helper = rcu_dereference(help->helper);
		if (helper && helper->destroy)
			helper->destroy(ct);
		rcu_read_unlock();
	}
241

242
	spin_lock_bh(&nf_conntrack_lock);
243 244 245 246
	/* Inside lock so preempt is disabled on module removal path.
	 * Otherwise we can get spurious warnings. */
	NF_CT_STAT_INC(delete_list);
	clean_from_lists(ct);
247
	spin_unlock_bh(&nf_conntrack_lock);
248 249 250
	nf_ct_put(ct);
}

251
struct nf_conntrack_tuple_hash *
252
__nf_conntrack_find(const struct nf_conntrack_tuple *tuple)
253 254
{
	struct nf_conntrack_tuple_hash *h;
255
	struct hlist_node *n;
256 257
	unsigned int hash = hash_conntrack(tuple);

258 259 260 261
	/* Disable BHs the entire time since we normally need to disable them
	 * at least once for the stats anyway.
	 */
	local_bh_disable();
262
	hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) {
263
		if (nf_ct_tuple_equal(tuple, &h->tuple)) {
264
			NF_CT_STAT_INC(found);
265
			local_bh_enable();
266 267 268 269
			return h;
		}
		NF_CT_STAT_INC(searched);
	}
270
	local_bh_enable();
271 272 273

	return NULL;
}
274
EXPORT_SYMBOL_GPL(__nf_conntrack_find);
275 276 277

/* Find a connection corresponding to a tuple. */
struct nf_conntrack_tuple_hash *
278
nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple)
279 280
{
	struct nf_conntrack_tuple_hash *h;
281
	struct nf_conn *ct;
282

283
	rcu_read_lock();
284
	h = __nf_conntrack_find(tuple);
285 286 287 288 289 290
	if (h) {
		ct = nf_ct_tuplehash_to_ctrack(h);
		if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
			h = NULL;
	}
	rcu_read_unlock();
291 292 293

	return h;
}
294
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
295

296 297
static void __nf_conntrack_hash_insert(struct nf_conn *ct,
				       unsigned int hash,
298
				       unsigned int repl_hash)
299
{
300 301 302 303
	hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
			   &nf_conntrack_hash[hash]);
	hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode,
			   &nf_conntrack_hash[repl_hash]);
304 305 306 307 308 309 310 311 312
}

void nf_conntrack_hash_insert(struct nf_conn *ct)
{
	unsigned int hash, repl_hash;

	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);

313
	spin_lock_bh(&nf_conntrack_lock);
314
	__nf_conntrack_hash_insert(ct, hash, repl_hash);
315
	spin_unlock_bh(&nf_conntrack_lock);
316
}
317
EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert);
318

319 320
/* Confirm a connection given skb; places it in hash table */
int
321
__nf_conntrack_confirm(struct sk_buff *skb)
322 323
{
	unsigned int hash, repl_hash;
P
Patrick McHardy 已提交
324
	struct nf_conntrack_tuple_hash *h;
325
	struct nf_conn *ct;
P
Patrick McHardy 已提交
326
	struct nf_conn_help *help;
327
	struct hlist_node *n;
328 329
	enum ip_conntrack_info ctinfo;

330
	ct = nf_ct_get(skb, &ctinfo);
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349

	/* ipt_REJECT uses nf_conntrack_attach to attach related
	   ICMP/TCP RST packets in other direction.  Actual packet
	   which created connection will be IP_CT_NEW or for an
	   expected connection, IP_CT_RELATED. */
	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
		return NF_ACCEPT;

	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);

	/* We're not in hash table, and we refuse to set up related
	   connections for unconfirmed conns.  But packet copies and
	   REJECT will give spurious warnings here. */
	/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */

	/* No external references means noone else could have
	   confirmed us. */
	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
350
	pr_debug("Confirming conntrack %p\n", ct);
351

352
	spin_lock_bh(&nf_conntrack_lock);
353 354 355 356

	/* See if there's one in the list already, including reverse:
	   NAT could have grabbed it without realizing, since we're
	   not in the hash.  If there is, we lost race. */
357
	hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode)
P
Patrick McHardy 已提交
358 359 360
		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
				      &h->tuple))
			goto out;
361
	hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode)
P
Patrick McHardy 已提交
362 363 364
		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
				      &h->tuple))
			goto out;
365

P
Patrick McHardy 已提交
366
	/* Remove from unconfirmed list */
367
	hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
P
Patrick McHardy 已提交
368 369 370 371 372 373 374 375 376 377

	__nf_conntrack_hash_insert(ct, hash, repl_hash);
	/* Timer relative to confirmation time, not original
	   setting time, otherwise we'd get timer wrap in
	   weird delay cases. */
	ct->timeout.expires += jiffies;
	add_timer(&ct->timeout);
	atomic_inc(&ct->ct_general.use);
	set_bit(IPS_CONFIRMED_BIT, &ct->status);
	NF_CT_STAT_INC(insert);
378
	spin_unlock_bh(&nf_conntrack_lock);
P
Patrick McHardy 已提交
379 380
	help = nfct_help(ct);
	if (help && help->helper)
381
		nf_conntrack_event_cache(IPCT_HELPER, skb);
382
#ifdef CONFIG_NF_NAT_NEEDED
P
Patrick McHardy 已提交
383 384
	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
385
		nf_conntrack_event_cache(IPCT_NATINFO, skb);
386
#endif
P
Patrick McHardy 已提交
387
	nf_conntrack_event_cache(master_ct(ct) ?
388
				 IPCT_RELATED : IPCT_NEW, skb);
P
Patrick McHardy 已提交
389
	return NF_ACCEPT;
390

P
Patrick McHardy 已提交
391
out:
392
	NF_CT_STAT_INC(insert_failed);
393
	spin_unlock_bh(&nf_conntrack_lock);
394 395
	return NF_DROP;
}
396
EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
397 398 399 400 401 402 403 404

/* Returns true if a connection correspondings to the tuple (required
   for NAT). */
int
nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
			 const struct nf_conn *ignored_conntrack)
{
	struct nf_conntrack_tuple_hash *h;
405 406
	struct hlist_node *n;
	unsigned int hash = hash_conntrack(tuple);
407

408 409 410 411
	/* Disable BHs the entire time since we need to disable them at
	 * least once for the stats anyway.
	 */
	rcu_read_lock_bh();
412 413 414 415
	hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) {
		if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
		    nf_ct_tuple_equal(tuple, &h->tuple)) {
			NF_CT_STAT_INC(found);
416
			rcu_read_unlock_bh();
417 418 419 420
			return 1;
		}
		NF_CT_STAT_INC(searched);
	}
421
	rcu_read_unlock_bh();
422

423
	return 0;
424
}
425
EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
426

427 428
#define NF_CT_EVICTION_RANGE	8

429 430
/* There's a small race here where we may free a just-assured
   connection.  Too bad: we're in trouble anyway. */
431
static noinline int early_drop(unsigned int hash)
432
{
433
	/* Use oldest entry, which is roughly LRU */
434
	struct nf_conntrack_tuple_hash *h;
P
Patrick McHardy 已提交
435
	struct nf_conn *ct = NULL, *tmp;
436
	struct hlist_node *n;
437
	unsigned int i, cnt = 0;
438 439
	int dropped = 0;

440
	rcu_read_lock();
441
	for (i = 0; i < nf_conntrack_htable_size; i++) {
442 443
		hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
					 hnode) {
444 445 446 447 448
			tmp = nf_ct_tuplehash_to_ctrack(h);
			if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
				ct = tmp;
			cnt++;
		}
449 450 451

		if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
			ct = NULL;
452 453 454
		if (ct || cnt >= NF_CT_EVICTION_RANGE)
			break;
		hash = (hash + 1) % nf_conntrack_htable_size;
455
	}
456
	rcu_read_unlock();
457 458 459 460 461 462 463

	if (!ct)
		return dropped;

	if (del_timer(&ct->timeout)) {
		death_by_timeout((unsigned long)ct);
		dropped = 1;
464
		NF_CT_STAT_INC_ATOMIC(early_drop);
465 466 467 468 469
	}
	nf_ct_put(ct);
	return dropped;
}

470 471
struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
				   const struct nf_conntrack_tuple *repl)
472
{
473
	struct nf_conn *ct = NULL;
474

475
	if (unlikely(!nf_conntrack_hash_rnd_initted)) {
476 477 478 479
		get_random_bytes(&nf_conntrack_hash_rnd, 4);
		nf_conntrack_hash_rnd_initted = 1;
	}

480 481 482
	/* We don't want any race condition at early drop stage */
	atomic_inc(&nf_conntrack_count);

483 484
	if (nf_conntrack_max &&
	    unlikely(atomic_read(&nf_conntrack_count) > nf_conntrack_max)) {
485
		unsigned int hash = hash_conntrack(orig);
486
		if (!early_drop(hash)) {
487
			atomic_dec(&nf_conntrack_count);
488 489 490 491 492 493 494 495
			if (net_ratelimit())
				printk(KERN_WARNING
				       "nf_conntrack: table full, dropping"
				       " packet.\n");
			return ERR_PTR(-ENOMEM);
		}
	}

496 497
	ct = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC);
	if (ct == NULL) {
498
		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
499 500
		atomic_dec(&nf_conntrack_count);
		return ERR_PTR(-ENOMEM);
501 502
	}

503 504 505
	atomic_set(&ct->ct_general.use, 1);
	ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
506
	/* Don't set timer yet: wait for confirmation */
507 508
	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
	INIT_RCU_HEAD(&ct->rcu);
509

510
	return ct;
511
}
512
EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
513

514
static void nf_conntrack_free_rcu(struct rcu_head *head)
515
{
516 517 518 519
	struct nf_conn *ct = container_of(head, struct nf_conn, rcu);

	nf_ct_ext_free(ct);
	kmem_cache_free(nf_conntrack_cachep, ct);
520 521
	atomic_dec(&nf_conntrack_count);
}
522

523
void nf_conntrack_free(struct nf_conn *ct)
524
{
525
	call_rcu(&ct->rcu, nf_conntrack_free_rcu);
526
}
527
EXPORT_SYMBOL_GPL(nf_conntrack_free);
528 529 530 531 532 533

/* Allocate a new conntrack: we return -ENOMEM if classification
   failed due to stress.  Otherwise it really is unclassifiable. */
static struct nf_conntrack_tuple_hash *
init_conntrack(const struct nf_conntrack_tuple *tuple,
	       struct nf_conntrack_l3proto *l3proto,
534
	       struct nf_conntrack_l4proto *l4proto,
535 536 537
	       struct sk_buff *skb,
	       unsigned int dataoff)
{
538
	struct nf_conn *ct;
539
	struct nf_conn_help *help;
540 541 542
	struct nf_conntrack_tuple repl_tuple;
	struct nf_conntrack_expect *exp;

543
	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
544
		pr_debug("Can't invert tuple.\n");
545 546 547
		return NULL;
	}

548 549
	ct = nf_conntrack_alloc(tuple, &repl_tuple);
	if (ct == NULL || IS_ERR(ct)) {
550
		pr_debug("Can't allocate conntrack.\n");
551
		return (struct nf_conntrack_tuple_hash *)ct;
552 553
	}

554 555
	if (!l4proto->new(ct, skb, dataoff)) {
		nf_conntrack_free(ct);
556
		pr_debug("init conntrack: can't track with proto module\n");
557 558 559
		return NULL;
	}

560
	spin_lock_bh(&nf_conntrack_lock);
561
	exp = nf_ct_find_expectation(tuple);
562
	if (exp) {
563
		pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
564
			 ct, exp);
565
		/* Welcome, Mr. Bond.  We've been expecting you... */
566 567
		__set_bit(IPS_EXPECTED_BIT, &ct->status);
		ct->master = exp->master;
568
		if (exp->helper) {
569
			help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
570 571 572 573
			if (help)
				rcu_assign_pointer(help->helper, exp->helper);
		}

574
#ifdef CONFIG_NF_CONNTRACK_MARK
575
		ct->mark = exp->master->mark;
576 577
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
578
		ct->secmark = exp->master->secmark;
579
#endif
580
		nf_conntrack_get(&ct->master->ct_general);
581
		NF_CT_STAT_INC(expect_new);
582
	} else {
583 584 585 586
		struct nf_conntrack_helper *helper;

		helper = __nf_ct_helper_find(&repl_tuple);
		if (helper) {
587
			help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
588 589
			if (help)
				rcu_assign_pointer(help->helper, helper);
590
		}
591
		NF_CT_STAT_INC(new);
592
	}
593 594

	/* Overload tuple linked list to put us in unconfirmed list. */
595
	hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed);
596

597
	spin_unlock_bh(&nf_conntrack_lock);
598 599 600

	if (exp) {
		if (exp->expectfn)
601
			exp->expectfn(ct, exp);
602
		nf_ct_expect_put(exp);
603 604
	}

605
	return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
606 607 608 609 610 611 612 613 614
}

/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
static inline struct nf_conn *
resolve_normal_ct(struct sk_buff *skb,
		  unsigned int dataoff,
		  u_int16_t l3num,
		  u_int8_t protonum,
		  struct nf_conntrack_l3proto *l3proto,
615
		  struct nf_conntrack_l4proto *l4proto,
616 617 618 619 620 621 622
		  int *set_reply,
		  enum ip_conntrack_info *ctinfo)
{
	struct nf_conntrack_tuple tuple;
	struct nf_conntrack_tuple_hash *h;
	struct nf_conn *ct;

623
	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
624
			     dataoff, l3num, protonum, &tuple, l3proto,
625
			     l4proto)) {
626
		pr_debug("resolve_normal_ct: Can't get tuple\n");
627 628 629 630
		return NULL;
	}

	/* look for tuple match */
631
	h = nf_conntrack_find_get(&tuple);
632
	if (!h) {
633
		h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff);
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
		if (!h)
			return NULL;
		if (IS_ERR(h))
			return (void *)h;
	}
	ct = nf_ct_tuplehash_to_ctrack(h);

	/* It exists; we have (non-exclusive) reference. */
	if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
		*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
		/* Please set reply bit if this packet OK */
		*set_reply = 1;
	} else {
		/* Once we've had two way comms, always ESTABLISHED. */
		if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
649
			pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
650 651
			*ctinfo = IP_CT_ESTABLISHED;
		} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
652 653
			pr_debug("nf_conntrack_in: related packet for %p\n",
				 ct);
654 655
			*ctinfo = IP_CT_RELATED;
		} else {
656
			pr_debug("nf_conntrack_in: new packet for %p\n", ct);
657 658 659 660 661 662 663 664 665 666
			*ctinfo = IP_CT_NEW;
		}
		*set_reply = 0;
	}
	skb->nfct = &ct->ct_general;
	skb->nfctinfo = *ctinfo;
	return ct;
}

unsigned int
667
nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
668 669 670 671
{
	struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;
	struct nf_conntrack_l3proto *l3proto;
672
	struct nf_conntrack_l4proto *l4proto;
673 674 675 676 677 678
	unsigned int dataoff;
	u_int8_t protonum;
	int set_reply = 0;
	int ret;

	/* Previously seen (loopback or untracked)?  Ignore. */
679
	if (skb->nfct) {
680
		NF_CT_STAT_INC_ATOMIC(ignore);
681 682 683
		return NF_ACCEPT;
	}

684
	/* rcu_read_lock()ed by nf_hook_slow */
685
	l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
686
	ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
687 688
				   &dataoff, &protonum);
	if (ret <= 0) {
689
		pr_debug("not prepared to track yet or error occured\n");
690 691
		NF_CT_STAT_INC_ATOMIC(error);
		NF_CT_STAT_INC_ATOMIC(invalid);
692 693 694
		return -ret;
	}

695
	l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum);
696 697 698 699

	/* It may be an special packet, error, unclean...
	 * inverse of the return code tells to the netfilter
	 * core what to do with the packet. */
700
	if (l4proto->error != NULL &&
701
	    (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
702 703
		NF_CT_STAT_INC_ATOMIC(error);
		NF_CT_STAT_INC_ATOMIC(invalid);
704 705 706
		return -ret;
	}

707
	ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto,
708 709 710
			       &set_reply, &ctinfo);
	if (!ct) {
		/* Not valid part of a connection */
711
		NF_CT_STAT_INC_ATOMIC(invalid);
712 713 714 715 716
		return NF_ACCEPT;
	}

	if (IS_ERR(ct)) {
		/* Too stressed to deal. */
717
		NF_CT_STAT_INC_ATOMIC(drop);
718 719 720
		return NF_DROP;
	}

721
	NF_CT_ASSERT(skb->nfct);
722

723
	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
724 725 726
	if (ret < 0) {
		/* Invalid: inverse of the return code tells
		 * the netfilter core what to do */
727
		pr_debug("nf_conntrack_in: Can't track with proto module\n");
728 729
		nf_conntrack_put(skb->nfct);
		skb->nfct = NULL;
730
		NF_CT_STAT_INC_ATOMIC(invalid);
731 732 733 734
		return -ret;
	}

	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
735
		nf_conntrack_event_cache(IPCT_STATUS, skb);
736 737 738

	return ret;
}
739
EXPORT_SYMBOL_GPL(nf_conntrack_in);
740 741 742 743

int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
			 const struct nf_conntrack_tuple *orig)
{
744 745 746 747 748 749 750 751 752
	int ret;

	rcu_read_lock();
	ret = nf_ct_invert_tuple(inverse, orig,
				 __nf_ct_l3proto_find(orig->src.l3num),
				 __nf_ct_l4proto_find(orig->src.l3num,
						      orig->dst.protonum));
	rcu_read_unlock();
	return ret;
753
}
754
EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
755

756 757 758 759 760 761
/* Alter reply tuple (maybe alter helper).  This is for NAT, and is
   implicitly racy: see __nf_conntrack_confirm */
void nf_conntrack_alter_reply(struct nf_conn *ct,
			      const struct nf_conntrack_tuple *newreply)
{
	struct nf_conn_help *help = nfct_help(ct);
762
	struct nf_conntrack_helper *helper;
763 764 765 766

	/* Should be unconfirmed, so not in hash table yet */
	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));

767
	pr_debug("Altering reply tuple of %p to ", ct);
768 769 770
	NF_CT_DUMP_TUPLE(newreply);

	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
771
	if (ct->master || (help && help->expecting != 0))
772
		return;
773

774
	rcu_read_lock();
775 776 777 778 779
	helper = __nf_ct_helper_find(newreply);
	if (helper == NULL) {
		if (help)
			rcu_assign_pointer(help->helper, NULL);
		goto out;
780
	}
781 782

	if (help == NULL) {
783 784
		help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
		if (help == NULL)
785 786 787 788 789 790 791
			goto out;
	} else {
		memset(&help->help, 0, sizeof(help->help));
	}

	rcu_assign_pointer(help->helper, helper);
out:
792
	rcu_read_unlock();
793
}
794
EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
795

796 797 798 799 800 801 802 803 804 805 806 807
/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
void __nf_ct_refresh_acct(struct nf_conn *ct,
			  enum ip_conntrack_info ctinfo,
			  const struct sk_buff *skb,
			  unsigned long extra_jiffies,
			  int do_acct)
{
	int event = 0;

	NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
	NF_CT_ASSERT(skb);

808
	spin_lock_bh(&nf_conntrack_lock);
809

810
	/* Only update if this is not a fixed timeout */
811 812
	if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
		goto acct;
813

814 815 816 817 818
	/* If not in hash table, timer will not be active yet */
	if (!nf_ct_is_confirmed(ct)) {
		ct->timeout.expires = extra_jiffies;
		event = IPCT_REFRESH;
	} else {
819 820 821 822 823 824 825 826
		unsigned long newtime = jiffies + extra_jiffies;

		/* Only update the timeout if the new timeout is at least
		   HZ jiffies from the old timeout. Need del_timer for race
		   avoidance (may already be dying). */
		if (newtime - ct->timeout.expires >= HZ
		    && del_timer(&ct->timeout)) {
			ct->timeout.expires = newtime;
827 828 829 830 831
			add_timer(&ct->timeout);
			event = IPCT_REFRESH;
		}
	}

832
acct:
833 834 835 836
#ifdef CONFIG_NF_CT_ACCT
	if (do_acct) {
		ct->counters[CTINFO2DIR(ctinfo)].packets++;
		ct->counters[CTINFO2DIR(ctinfo)].bytes +=
837
			skb->len - skb_network_offset(skb);
838 839 840 841

		if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
		    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
			event |= IPCT_COUNTER_FILLING;
842 843 844
	}
#endif

845
	spin_unlock_bh(&nf_conntrack_lock);
846 847 848 849 850

	/* must be unlocked when calling event cache */
	if (event)
		nf_conntrack_event_cache(event, skb);
}
851
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
852

853
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
854 855 856

#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
I
Ingo Molnar 已提交
857 858
#include <linux/mutex.h>

859 860 861
/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
 * in ip_conntrack_core, since we don't want the protocols to autoload
 * or depend on ctnetlink */
862
int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
863 864
			       const struct nf_conntrack_tuple *tuple)
{
865 866
	NLA_PUT_BE16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port);
	NLA_PUT_BE16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port);
867 868
	return 0;

869
nla_put_failure:
870 871
	return -1;
}
872
EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
873

874 875 876
const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
	[CTA_PROTO_SRC_PORT]  = { .type = NLA_U16 },
	[CTA_PROTO_DST_PORT]  = { .type = NLA_U16 },
877
};
878
EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
879

880
int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
881 882
			       struct nf_conntrack_tuple *t)
{
883
	if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
884 885
		return -EINVAL;

886 887
	t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
	t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
888 889 890

	return 0;
}
891
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
892 893
#endif

894
/* Used by ipt_REJECT and ip6t_REJECT. */
895
static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913
{
	struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;

	/* This ICMP is in reverse direction to the packet which caused it */
	ct = nf_ct_get(skb, &ctinfo);
	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
		ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
	else
		ctinfo = IP_CT_RELATED;

	/* Attach to new skbuff, and increment count */
	nskb->nfct = &ct->ct_general;
	nskb->nfctinfo = ctinfo;
	nf_conntrack_get(nskb->nfct);
}

/* Bring out ya dead! */
P
Patrick McHardy 已提交
914
static struct nf_conn *
915 916 917
get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
		void *data, unsigned int *bucket)
{
P
Patrick McHardy 已提交
918 919
	struct nf_conntrack_tuple_hash *h;
	struct nf_conn *ct;
920
	struct hlist_node *n;
921

922
	spin_lock_bh(&nf_conntrack_lock);
923
	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
924
		hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) {
P
Patrick McHardy 已提交
925 926 927 928
			ct = nf_ct_tuplehash_to_ctrack(h);
			if (iter(ct, data))
				goto found;
		}
929
	}
930
	hlist_for_each_entry(h, n, &unconfirmed, hnode) {
P
Patrick McHardy 已提交
931 932
		ct = nf_ct_tuplehash_to_ctrack(h);
		if (iter(ct, data))
933
			set_bit(IPS_DYING_BIT, &ct->status);
P
Patrick McHardy 已提交
934
	}
935
	spin_unlock_bh(&nf_conntrack_lock);
P
Patrick McHardy 已提交
936 937
	return NULL;
found:
938
	atomic_inc(&ct->ct_general.use);
939
	spin_unlock_bh(&nf_conntrack_lock);
P
Patrick McHardy 已提交
940
	return ct;
941 942 943 944 945
}

void
nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
{
P
Patrick McHardy 已提交
946
	struct nf_conn *ct;
947 948
	unsigned int bucket = 0;

P
Patrick McHardy 已提交
949
	while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
950 951 952 953 954 955 956 957
		/* Time to push up daises... */
		if (del_timer(&ct->timeout))
			death_by_timeout((unsigned long)ct);
		/* ... else the timer will get him soon. */

		nf_ct_put(ct);
	}
}
958
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
959 960 961 962 963 964

static int kill_all(struct nf_conn *i, void *data)
{
	return 1;
}

965
void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size)
966 967 968 969
{
	if (vmalloced)
		vfree(hash);
	else
970
		free_pages((unsigned long)hash,
971
			   get_order(sizeof(struct hlist_head) * size));
972
}
973
EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
974

975
void nf_conntrack_flush(void)
976 977 978
{
	nf_ct_iterate_cleanup(kill_all, NULL);
}
979
EXPORT_SYMBOL_GPL(nf_conntrack_flush);
980

981 982 983 984
/* Mishearing the voices in his head, our hero wonders how he's
   supposed to kill the mall. */
void nf_conntrack_cleanup(void)
{
985
	rcu_assign_pointer(ip_ct_attach, NULL);
986

987 988 989 990 991 992 993
	/* This makes sure all current packets have passed through
	   netfilter framework.  Roll on, two-stage module
	   delete... */
	synchronize_net();

	nf_ct_event_cache_flush();
 i_see_dead_people:
994
	nf_conntrack_flush();
995 996 997 998
	if (atomic_read(&nf_conntrack_count) != 0) {
		schedule();
		goto i_see_dead_people;
	}
999 1000 1001
	/* wait until all references to nf_conntrack_untracked are dropped */
	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
		schedule();
1002

1003 1004
	rcu_assign_pointer(nf_ct_destroy, NULL);

1005
	kmem_cache_destroy(nf_conntrack_cachep);
1006 1007
	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
			     nf_conntrack_htable_size);
1008

1009
	nf_conntrack_proto_fini();
1010
	nf_conntrack_helper_fini();
1011
	nf_conntrack_expect_fini();
1012 1013
}

1014
struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
1015
{
1016
	struct hlist_head *hash;
1017
	unsigned int size, i;
1018

1019
	*vmalloced = 0;
1020

1021
	size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head));
1022
	hash = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN,
1023
				       get_order(sizeof(struct hlist_head)
1024
						 * size));
1025
	if (!hash) {
1026 1027
		*vmalloced = 1;
		printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1028
		hash = vmalloc(sizeof(struct hlist_head) * size);
1029 1030 1031
	}

	if (hash)
1032
		for (i = 0; i < size; i++)
1033
			INIT_HLIST_HEAD(&hash[i]);
1034 1035 1036

	return hash;
}
1037
EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
1038

1039
int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1040
{
1041 1042
	int i, bucket, vmalloced, old_vmalloced;
	unsigned int hashsize, old_size;
1043
	int rnd;
1044
	struct hlist_head *hash, *old_hash;
1045 1046 1047 1048 1049 1050
	struct nf_conntrack_tuple_hash *h;

	/* On boot, we can set this without any fancy locking. */
	if (!nf_conntrack_htable_size)
		return param_set_uint(val, kp);

1051
	hashsize = simple_strtoul(val, NULL, 0);
1052 1053 1054
	if (!hashsize)
		return -EINVAL;

1055
	hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced);
1056 1057 1058 1059 1060 1061 1062
	if (!hash)
		return -ENOMEM;

	/* We have to rehahs for the new table anyway, so we also can
	 * use a newrandom seed */
	get_random_bytes(&rnd, 4);

1063 1064 1065 1066 1067
	/* Lookups in the old hash might happen in parallel, which means we
	 * might get false negatives during connection lookup. New connections
	 * created because of a false negative won't make it into the hash
	 * though since that required taking the lock.
	 */
1068
	spin_lock_bh(&nf_conntrack_lock);
1069
	for (i = 0; i < nf_conntrack_htable_size; i++) {
1070 1071 1072
		while (!hlist_empty(&nf_conntrack_hash[i])) {
			h = hlist_entry(nf_conntrack_hash[i].first,
					struct nf_conntrack_tuple_hash, hnode);
1073
			hlist_del_rcu(&h->hnode);
1074
			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1075
			hlist_add_head(&h->hnode, &hash[bucket]);
1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
		}
	}
	old_size = nf_conntrack_htable_size;
	old_vmalloced = nf_conntrack_vmalloc;
	old_hash = nf_conntrack_hash;

	nf_conntrack_htable_size = hashsize;
	nf_conntrack_vmalloc = vmalloced;
	nf_conntrack_hash = hash;
	nf_conntrack_hash_rnd = rnd;
1086
	spin_unlock_bh(&nf_conntrack_lock);
1087

1088
	nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
1089 1090
	return 0;
}
1091
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
1092

1093
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
1094 1095 1096 1097
		  &nf_conntrack_htable_size, 0600);

int __init nf_conntrack_init(void)
{
1098
	int max_factor = 8;
1099 1100 1101
	int ret;

	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
1102
	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
1103 1104 1105
	if (!nf_conntrack_htable_size) {
		nf_conntrack_htable_size
			= (((num_physpages << PAGE_SHIFT) / 16384)
1106
			   / sizeof(struct hlist_head));
1107
		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1108 1109 1110 1111 1112 1113 1114 1115 1116
			nf_conntrack_htable_size = 16384;
		if (nf_conntrack_htable_size < 32)
			nf_conntrack_htable_size = 32;

		/* Use a max. factor of four by default to get the same max as
		 * with the old struct list_heads. When a table size is given
		 * we use the old value of 8 to avoid reducing the max.
		 * entries. */
		max_factor = 4;
1117
	}
1118 1119
	nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
						  &nf_conntrack_vmalloc);
1120 1121 1122 1123 1124
	if (!nf_conntrack_hash) {
		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
		goto err_out;
	}

1125
	nf_conntrack_max = max_factor * nf_conntrack_htable_size;
1126 1127 1128 1129 1130

	printk("nf_conntrack version %s (%u buckets, %d max)\n",
	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
	       nf_conntrack_max);

1131 1132
	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
						sizeof(struct nf_conn),
1133
						0, 0, NULL);
1134
	if (!nf_conntrack_cachep) {
1135 1136 1137 1138
		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
		goto err_free_hash;
	}

1139 1140
	ret = nf_conntrack_proto_init();
	if (ret < 0)
1141 1142
		goto err_free_conntrack_slab;

1143
	ret = nf_conntrack_expect_init();
1144
	if (ret < 0)
1145
		goto out_fini_proto;
1146

1147 1148
	ret = nf_conntrack_helper_init();
	if (ret < 0)
1149
		goto out_fini_expect;
1150

1151
	/* For use by REJECT target */
1152
	rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
1153
	rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
1154

1155 1156 1157 1158 1159 1160 1161 1162
	/* Set up fake conntrack:
	    - to never be deleted, not in any hashes */
	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
	/*  - and look it like as a confirmed connection */
	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);

	return ret;

1163 1164
out_fini_expect:
	nf_conntrack_expect_fini();
1165 1166
out_fini_proto:
	nf_conntrack_proto_fini();
1167
err_free_conntrack_slab:
1168
	kmem_cache_destroy(nf_conntrack_cachep);
1169
err_free_hash:
1170 1171
	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
			     nf_conntrack_htable_size);
1172 1173 1174
err_out:
	return -ENOMEM;
}