提交 8073e960 编写于 作者: F Florian Westphal 提交者: Pablo Neira Ayuso

netfilter: nat: use keyed locks

no need to serialize on a single lock, we can partition the table and
add/delete in parallel to different slots.
This restores one of the advantages that got lost with the rhlist
revert.

Cc: Ivan Babrou <ibobrik@gmail.com>
Signed-off-by: NFlorian Westphal <fw@strlen.de>
Signed-off-by: NPablo Neira Ayuso <pablo@netfilter.org>
上级 e1bf1687
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h> #include <linux/netfilter/nf_nat.h>
static DEFINE_SPINLOCK(nf_nat_lock); static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
static DEFINE_MUTEX(nf_nat_proto_mutex); static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
...@@ -425,13 +425,15 @@ nf_nat_setup_info(struct nf_conn *ct, ...@@ -425,13 +425,15 @@ nf_nat_setup_info(struct nf_conn *ct,
if (maniptype == NF_NAT_MANIP_SRC) { if (maniptype == NF_NAT_MANIP_SRC) {
unsigned int srchash; unsigned int srchash;
spinlock_t *lock;
srchash = hash_by_src(net, srchash = hash_by_src(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_lock); lock = &nf_nat_locks[srchash % ARRAY_SIZE(nf_nat_locks)];
spin_lock_bh(lock);
hlist_add_head_rcu(&ct->nat_bysource, hlist_add_head_rcu(&ct->nat_bysource,
&nf_nat_bysource[srchash]); &nf_nat_bysource[srchash]);
spin_unlock_bh(&nf_nat_lock); spin_unlock_bh(lock);
} }
/* It's done. */ /* It's done. */
...@@ -525,6 +527,16 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data) ...@@ -525,6 +527,16 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
return i->status & IPS_NAT_MASK ? 1 : 0; return i->status & IPS_NAT_MASK ? 1 : 0;
} }
static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
unsigned int h;
h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
hlist_del_rcu(&ct->nat_bysource);
spin_unlock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
}
static int nf_nat_proto_clean(struct nf_conn *ct, void *data) static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
{ {
if (nf_nat_proto_remove(ct, data)) if (nf_nat_proto_remove(ct, data))
...@@ -540,9 +552,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) ...@@ -540,9 +552,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* will delete entry from already-freed table. * will delete entry from already-freed table.
*/ */
clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
spin_lock_bh(&nf_nat_lock); __nf_nat_cleanup_conntrack(ct);
hlist_del_rcu(&ct->nat_bysource);
spin_unlock_bh(&nf_nat_lock);
/* don't delete conntrack. Although that would make things a lot /* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod. * simpler, we'd end up flushing all conntracks on nat rmmod.
...@@ -670,11 +680,8 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); ...@@ -670,11 +680,8 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
/* No one using conntrack by the time this called. */ /* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct) static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{ {
if (ct->status & IPS_SRC_NAT_DONE) { if (ct->status & IPS_SRC_NAT_DONE)
spin_lock_bh(&nf_nat_lock); __nf_nat_cleanup_conntrack(ct);
hlist_del_rcu(&ct->nat_bysource);
spin_unlock_bh(&nf_nat_lock);
}
} }
static struct nf_ct_ext_type nat_extend __read_mostly = { static struct nf_ct_ext_type nat_extend __read_mostly = {
...@@ -796,10 +803,12 @@ static struct nf_ct_helper_expectfn follow_master_nat = { ...@@ -796,10 +803,12 @@ static struct nf_ct_helper_expectfn follow_master_nat = {
static int __init nf_nat_init(void) static int __init nf_nat_init(void)
{ {
int ret; int ret, i;
/* Leave them the same for the moment. */ /* Leave them the same for the moment. */
nf_nat_htable_size = nf_conntrack_htable_size; nf_nat_htable_size = nf_conntrack_htable_size;
if (nf_nat_htable_size < ARRAY_SIZE(nf_nat_locks))
nf_nat_htable_size = ARRAY_SIZE(nf_nat_locks);
nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
if (!nf_nat_bysource) if (!nf_nat_bysource)
...@@ -812,6 +821,9 @@ static int __init nf_nat_init(void) ...@@ -812,6 +821,9 @@ static int __init nf_nat_init(void)
return ret; return ret;
} }
for (i = 0; i < ARRAY_SIZE(nf_nat_locks); i++)
spin_lock_init(&nf_nat_locks[i]);
nf_ct_helper_expectfn_register(&follow_master_nat); nf_ct_helper_expectfn_register(&follow_master_nat);
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册