提交 75c28bb3 编写于 作者: P Parav Pandit 提交者: Xie XiuQi

RDMA/core: Sync unregistration with netlink commands

mainline inclusion
from mainline-5.0-rc1
commit 01b67117
category: bugfix
bugzilla: 6593
CVE: NA

--------------------------

When the rdma device is getting removed, get resource info can race with
device removal, as below:

      CPU-0                                  CPU-1
    --------                               --------
    rdma_nl_rcv_msg()
       nldev_res_get_cq_dumpit()
          mutex_lock(device_lock);
          get device reference
          mutex_unlock(device_lock);        [..]
                                            ib_unregister_device()
                                            /* Valid reference to
                                             * device->dev exists.
                                             */
                                             ib_dealloc_device()

          [..]
          provider->fill_res_entry();

Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.

Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series.  If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.

Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.

[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160

Fixes: da5c8507 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: NParav Pandit <parav@mellanox.com>
Signed-off-by: NLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: NJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: NJing xiangfeng <jingxiangfeng@huawei.com>
Reviewed-by: NHanjun Guo <guohanjun@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 13bcb2e5
...@@ -295,6 +295,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, ...@@ -295,6 +295,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
#endif #endif
struct ib_device *ib_device_get_by_index(u32 ifindex); struct ib_device *ib_device_get_by_index(u32 ifindex);
void ib_device_put(struct ib_device *device);
/* RDMA device netlink */ /* RDMA device netlink */
void nldev_init(void); void nldev_init(void);
void nldev_exit(void); void nldev_exit(void);
......
...@@ -143,7 +143,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index) ...@@ -143,7 +143,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index)
} }
/* /*
* Caller is responsible to return refrerence count by calling put_device() * Caller must perform ib_device_put() to return the device reference count
* when ib_device_get_by_index() returns valid device pointer.
*/ */
struct ib_device *ib_device_get_by_index(u32 index) struct ib_device *ib_device_get_by_index(u32 index)
{ {
...@@ -151,13 +152,21 @@ struct ib_device *ib_device_get_by_index(u32 index) ...@@ -151,13 +152,21 @@ struct ib_device *ib_device_get_by_index(u32 index)
down_read(&lists_rwsem); down_read(&lists_rwsem);
device = __ib_device_get_by_index(index); device = __ib_device_get_by_index(index);
if (device) if (device) {
get_device(&device->dev); /* Do not return a device if unregistration has started. */
if (!refcount_inc_not_zero(&device->refcount))
device = NULL;
}
up_read(&lists_rwsem); up_read(&lists_rwsem);
return device; return device;
} }
void ib_device_put(struct ib_device *device)
{
if (refcount_dec_and_test(&device->refcount))
complete(&device->unreg_completion);
}
static struct ib_device *__ib_device_get_by_name(const char *name) static struct ib_device *__ib_device_get_by_name(const char *name)
{ {
struct ib_device *device; struct ib_device *device;
...@@ -272,6 +281,8 @@ struct ib_device *ib_alloc_device(size_t size) ...@@ -272,6 +281,8 @@ struct ib_device *ib_alloc_device(size_t size)
spin_lock_init(&device->client_data_lock); spin_lock_init(&device->client_data_lock);
INIT_LIST_HEAD(&device->client_data_list); INIT_LIST_HEAD(&device->client_data_list);
INIT_LIST_HEAD(&device->port_list); INIT_LIST_HEAD(&device->port_list);
refcount_set(&device->refcount, 1);
init_completion(&device->unreg_completion);
return device; return device;
} }
...@@ -581,6 +592,13 @@ void ib_unregister_device(struct ib_device *device) ...@@ -581,6 +592,13 @@ void ib_unregister_device(struct ib_device *device)
struct ib_client_data *context, *tmp; struct ib_client_data *context, *tmp;
unsigned long flags; unsigned long flags;
/*
* Wait for all netlink command callers to finish working on the
* device.
*/
ib_device_put(device);
wait_for_completion(&device->unreg_completion);
mutex_lock(&device_mutex); mutex_lock(&device_mutex);
down_write(&lists_rwsem); down_write(&lists_rwsem);
......
...@@ -631,13 +631,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -631,13 +631,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh); nlmsg_end(msg, nlh);
put_device(&device->dev); ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free: err_free:
nlmsg_free(msg); nlmsg_free(msg);
err: err:
put_device(&device->dev); ib_device_put(device);
return err; return err;
} }
...@@ -721,14 +721,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -721,14 +721,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_free; goto err_free;
nlmsg_end(msg, nlh); nlmsg_end(msg, nlh);
put_device(&device->dev); ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free: err_free:
nlmsg_free(msg); nlmsg_free(msg);
err: err:
put_device(&device->dev); ib_device_put(device);
return err; return err;
} }
...@@ -785,7 +785,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, ...@@ -785,7 +785,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
} }
out: out:
put_device(&device->dev); ib_device_put(device);
cb->args[0] = idx; cb->args[0] = idx;
return skb->len; return skb->len;
} }
...@@ -824,13 +824,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -824,13 +824,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_free; goto err_free;
nlmsg_end(msg, nlh); nlmsg_end(msg, nlh);
put_device(&device->dev); ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free: err_free:
nlmsg_free(msg); nlmsg_free(msg);
err: err:
put_device(&device->dev); ib_device_put(device);
return ret; return ret;
} }
...@@ -1023,7 +1023,7 @@ next: idx++; ...@@ -1023,7 +1023,7 @@ next: idx++;
if (!filled) if (!filled)
goto err; goto err;
put_device(&device->dev); ib_device_put(device);
return skb->len; return skb->len;
res_err: res_err:
...@@ -1034,7 +1034,7 @@ next: idx++; ...@@ -1034,7 +1034,7 @@ next: idx++;
nlmsg_cancel(skb, nlh); nlmsg_cancel(skb, nlh);
err_index: err_index:
put_device(&device->dev); ib_device_put(device);
return ret; return ret;
} }
......
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
#include <linux/string.h> #include <linux/string.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/refcount.h>
#include <linux/if_link.h> #include <linux/if_link.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/mmu_notifier.h> #include <linux/mmu_notifier.h>
...@@ -2588,6 +2588,12 @@ struct ib_device { ...@@ -2588,6 +2588,12 @@ struct ib_device {
const struct uverbs_object_tree_def *const *driver_specs; const struct uverbs_object_tree_def *const *driver_specs;
enum rdma_driver_id driver_id; enum rdma_driver_id driver_id;
/*
* Provides synchronization between device unregistration and netlink
* commands on a device. To be used only by core.
*/
refcount_t refcount;
struct completion unreg_completion;
}; };
struct ib_client { struct ib_client {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册