提交 13ee6ac5 编写于 作者: S Stephen Hemminger 提交者: Pablo Neira Ayuso

netfilter: fix race in conntrack between dump_table and destroy

The netlink interface to dump the connection tracking table has a race
when entries are deleted at the same time. A customer reported a crash
and the backtrace showed thatctnetlink_dump_table was running while a
conntrack entry was being destroyed.
(see https://bugzilla.vyatta.com/show_bug.cgi?id=6402).

According to RCU documentation, when using hlist_nulls the reader
must handle the case of seeing a deleted entry and not proceed
further down the linked list.  The old code would continue
which caused the scan to walk into the free list.

This patch uses locking (rather than RCU) for this operation which
is guaranteed safe, and no longer requires getting reference while
doing dump operation.
Signed-off-by: NStephen Hemminger <shemminger@vyatta.com>
Signed-off-by: NPablo Neira Ayuso <pablo@netfilter.org>
上级 83723d60
...@@ -645,25 +645,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -645,25 +645,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family; u_int8_t l3proto = nfmsg->nfgen_family;
rcu_read_lock(); spin_lock_bh(&nf_conntrack_lock);
last = (struct nf_conn *)cb->args[1]; last = (struct nf_conn *)cb->args[1];
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart: restart:
hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[cb->args[0]], hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
hnnode) { hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue; continue;
ct = nf_ct_tuplehash_to_ctrack(h); ct = nf_ct_tuplehash_to_ctrack(h);
if (!atomic_inc_not_zero(&ct->ct_general.use))
continue;
/* Dump entries of a given L3 protocol number. /* Dump entries of a given L3 protocol number.
* If it is not specified, ie. l3proto == 0, * If it is not specified, ie. l3proto == 0,
* then dump everything. */ * then dump everything. */
if (l3proto && nf_ct_l3num(ct) != l3proto) if (l3proto && nf_ct_l3num(ct) != l3proto)
goto releasect; continue;
if (cb->args[1]) { if (cb->args[1]) {
if (ct != last) if (ct != last)
goto releasect; continue;
cb->args[1] = 0; cb->args[1] = 0;
} }
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
...@@ -681,8 +679,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -681,8 +679,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
if (acct) if (acct)
memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
} }
releasect:
nf_ct_put(ct);
} }
if (cb->args[1]) { if (cb->args[1]) {
cb->args[1] = 0; cb->args[1] = 0;
...@@ -690,7 +686,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -690,7 +686,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
} }
} }
out: out:
rcu_read_unlock(); spin_unlock_bh(&nf_conntrack_lock);
if (last) if (last)
nf_ct_put(last); nf_ct_put(last);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册