提交 addf9b90 编写于 作者: F Florian Westphal 提交者: David S. Miller

net: rtnetlink: use rcu to free rtnl message handlers

rtnetlink is littered with READ_ONCE() because we can have read accesses
while another cpu can write to the structure we're reading by
(un)registering doit or dumpit handlers.

This patch changes this so that (un)registering cpu allocates a new
structure and then publishes it via rcu_assign_pointer, i.e. once
another cpu can see such pointer no modifications will occur anymore.

based on initial patch from Peter Zijlstra.

Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: NFlorian Westphal <fw@strlen.de>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 9753c21f
...@@ -63,6 +63,7 @@ struct rtnl_link { ...@@ -63,6 +63,7 @@ struct rtnl_link {
rtnl_doit_func doit; rtnl_doit_func doit;
rtnl_dumpit_func dumpit; rtnl_dumpit_func dumpit;
unsigned int flags; unsigned int flags;
struct rcu_head rcu;
}; };
static DEFINE_MUTEX(rtnl_mutex); static DEFINE_MUTEX(rtnl_mutex);
...@@ -127,7 +128,7 @@ bool lockdep_rtnl_is_held(void) ...@@ -127,7 +128,7 @@ bool lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held); EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */ #endif /* #ifdef CONFIG_PROVE_LOCKING */
static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static struct rtnl_link __rcu **rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1]; static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype) static inline int rtm_msgindex(int msgtype)
...@@ -144,6 +145,20 @@ static inline int rtm_msgindex(int msgtype) ...@@ -144,6 +145,20 @@ static inline int rtm_msgindex(int msgtype)
return msgindex; return msgindex;
} }
static struct rtnl_link *rtnl_get_link(int protocol, int msgtype)
{
struct rtnl_link **tab;
if (protocol >= ARRAY_SIZE(rtnl_msg_handlers))
protocol = PF_UNSPEC;
tab = rcu_dereference_rtnl(rtnl_msg_handlers[protocol]);
if (!tab)
tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]);
return tab[msgtype];
}
/** /**
* __rtnl_register - Register a rtnetlink message type * __rtnl_register - Register a rtnetlink message type
* @protocol: Protocol family or PF_UNSPEC * @protocol: Protocol family or PF_UNSPEC
...@@ -166,28 +181,52 @@ int __rtnl_register(int protocol, int msgtype, ...@@ -166,28 +181,52 @@ int __rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit, rtnl_doit_func doit, rtnl_dumpit_func dumpit,
unsigned int flags) unsigned int flags)
{ {
struct rtnl_link *tab; struct rtnl_link **tab, *link, *old;
int msgindex; int msgindex;
int ret = -ENOBUFS;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype); msgindex = rtm_msgindex(msgtype);
tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]); rtnl_lock();
tab = rtnl_msg_handlers[protocol];
if (tab == NULL) { if (tab == NULL) {
tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL); tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL);
if (tab == NULL) if (!tab)
return -ENOBUFS; goto unlock;
/* ensures we see the 0 stores */
rcu_assign_pointer(rtnl_msg_handlers[protocol], tab); rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
} }
old = rtnl_dereference(tab[msgindex]);
if (old) {
link = kmemdup(old, sizeof(*old), GFP_KERNEL);
if (!link)
goto unlock;
} else {
link = kzalloc(sizeof(*link), GFP_KERNEL);
if (!link)
goto unlock;
}
WARN_ON(doit && link->doit && link->doit != doit);
if (doit) if (doit)
tab[msgindex].doit = doit; link->doit = doit;
WARN_ON(dumpit && link->dumpit && link->dumpit != dumpit);
if (dumpit) if (dumpit)
tab[msgindex].dumpit = dumpit; link->dumpit = dumpit;
tab[msgindex].flags |= flags;
return 0; link->flags |= flags;
/* publish protocol:msgtype */
rcu_assign_pointer(tab[msgindex], link);
ret = 0;
if (old)
kfree_rcu(old, rcu);
unlock:
rtnl_unlock();
return ret;
} }
EXPORT_SYMBOL_GPL(__rtnl_register); EXPORT_SYMBOL_GPL(__rtnl_register);
...@@ -220,24 +259,25 @@ EXPORT_SYMBOL_GPL(rtnl_register); ...@@ -220,24 +259,25 @@ EXPORT_SYMBOL_GPL(rtnl_register);
*/ */
int rtnl_unregister(int protocol, int msgtype) int rtnl_unregister(int protocol, int msgtype)
{ {
struct rtnl_link *handlers; struct rtnl_link **tab, *link;
int msgindex; int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype); msgindex = rtm_msgindex(msgtype);
rtnl_lock(); rtnl_lock();
handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); tab = rtnl_dereference(rtnl_msg_handlers[protocol]);
if (!handlers) { if (!tab) {
rtnl_unlock(); rtnl_unlock();
return -ENOENT; return -ENOENT;
} }
handlers[msgindex].doit = NULL; link = tab[msgindex];
handlers[msgindex].dumpit = NULL; rcu_assign_pointer(tab[msgindex], NULL);
handlers[msgindex].flags = 0;
rtnl_unlock(); rtnl_unlock();
kfree_rcu(link, rcu);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(rtnl_unregister); EXPORT_SYMBOL_GPL(rtnl_unregister);
...@@ -251,20 +291,29 @@ EXPORT_SYMBOL_GPL(rtnl_unregister); ...@@ -251,20 +291,29 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
*/ */
void rtnl_unregister_all(int protocol) void rtnl_unregister_all(int protocol)
{ {
struct rtnl_link *handlers; struct rtnl_link **tab, *link;
int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
rtnl_lock(); rtnl_lock();
handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); tab = rtnl_msg_handlers[protocol];
RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL); RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
link = tab[msgindex];
if (!link)
continue;
rcu_assign_pointer(tab[msgindex], NULL);
kfree_rcu(link, rcu);
}
rtnl_unlock(); rtnl_unlock();
synchronize_net(); synchronize_net();
while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1) while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1)
schedule(); schedule();
kfree(handlers); kfree(tab);
} }
EXPORT_SYMBOL_GPL(rtnl_unregister_all); EXPORT_SYMBOL_GPL(rtnl_unregister_all);
...@@ -2973,18 +3022,26 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -2973,18 +3022,26 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = 1; s_idx = 1;
for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
struct rtnl_link **tab;
int type = cb->nlh->nlmsg_type-RTM_BASE; int type = cb->nlh->nlmsg_type-RTM_BASE;
struct rtnl_link *handlers; struct rtnl_link *link;
rtnl_dumpit_func dumpit; rtnl_dumpit_func dumpit;
if (idx < s_idx || idx == PF_PACKET) if (idx < s_idx || idx == PF_PACKET)
continue; continue;
handlers = rtnl_dereference(rtnl_msg_handlers[idx]); if (type < 0 || type >= RTM_NR_MSGTYPES)
if (!handlers)
continue; continue;
dumpit = READ_ONCE(handlers[type].dumpit); tab = rcu_dereference_rtnl(rtnl_msg_handlers[idx]);
if (!tab)
continue;
link = tab[type];
if (!link)
continue;
dumpit = link->dumpit;
if (!dumpit) if (!dumpit)
continue; continue;
...@@ -4314,7 +4371,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -4314,7 +4371,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct net *net = sock_net(skb->sk); struct net *net = sock_net(skb->sk);
struct rtnl_link *handlers; struct rtnl_link *link;
int err = -EOPNOTSUPP; int err = -EOPNOTSUPP;
rtnl_doit_func doit; rtnl_doit_func doit;
unsigned int flags; unsigned int flags;
...@@ -4338,32 +4395,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -4338,32 +4395,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM; return -EPERM;
if (family >= ARRAY_SIZE(rtnl_msg_handlers))
family = PF_UNSPEC;
rcu_read_lock(); rcu_read_lock();
handlers = rcu_dereference(rtnl_msg_handlers[family]);
if (!handlers) {
family = PF_UNSPEC;
handlers = rcu_dereference(rtnl_msg_handlers[family]);
}
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
struct sock *rtnl; struct sock *rtnl;
rtnl_dumpit_func dumpit; rtnl_dumpit_func dumpit;
u16 min_dump_alloc = 0; u16 min_dump_alloc = 0;
dumpit = READ_ONCE(handlers[type].dumpit); link = rtnl_get_link(family, type);
if (!dumpit) { if (!link || !link->dumpit) {
family = PF_UNSPEC; family = PF_UNSPEC;
handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]); link = rtnl_get_link(family, type);
if (!handlers) if (!link || !link->dumpit)
goto err_unlock;
dumpit = READ_ONCE(handlers[type].dumpit);
if (!dumpit)
goto err_unlock; goto err_unlock;
} }
dumpit = link->dumpit;
refcount_inc(&rtnl_msg_handlers_ref[family]); refcount_inc(&rtnl_msg_handlers_ref[family]);
...@@ -4384,33 +4429,36 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -4384,33 +4429,36 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
return err; return err;
} }
doit = READ_ONCE(handlers[type].doit); link = rtnl_get_link(family, type);
if (!doit) { if (!link || !link->doit) {
family = PF_UNSPEC; family = PF_UNSPEC;
handlers = rcu_dereference(rtnl_msg_handlers[family]); link = rtnl_get_link(PF_UNSPEC, type);
if (!link || !link->doit)
goto out_unlock;
} }
flags = READ_ONCE(handlers[type].flags); flags = link->flags;
if (flags & RTNL_FLAG_DOIT_UNLOCKED) { if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
refcount_inc(&rtnl_msg_handlers_ref[family]); refcount_inc(&rtnl_msg_handlers_ref[family]);
doit = READ_ONCE(handlers[type].doit); doit = link->doit;
rcu_read_unlock(); rcu_read_unlock();
if (doit) if (doit)
err = doit(skb, nlh, extack); err = doit(skb, nlh, extack);
refcount_dec(&rtnl_msg_handlers_ref[family]); refcount_dec(&rtnl_msg_handlers_ref[family]);
return err; return err;
} }
rcu_read_unlock(); rcu_read_unlock();
rtnl_lock(); rtnl_lock();
handlers = rtnl_dereference(rtnl_msg_handlers[family]); link = rtnl_get_link(family, type);
if (handlers) { if (link && link->doit)
doit = READ_ONCE(handlers[type].doit); err = link->doit(skb, nlh, extack);
if (doit)
err = doit(skb, nlh, extack);
}
rtnl_unlock(); rtnl_unlock();
return err;
out_unlock:
rcu_read_unlock();
return err; return err;
err_unlock: err_unlock:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册