提交 35d4a0b6 编写于 作者: Y Yishai Hadas 提交者: Doug Ledford

IB/uverbs: Fix race between ib_uverbs_open and remove_one

Fixes: 2a72f212 ("IB/uverbs: Remove dev_table")

Before this commit there was a device look-up table that was protected
by a spin_lock used by ib_uverbs_open and by ib_uverbs_remove_one. When
it was dropped and container_of was used instead, it enabled the race
with remove_one as dev might be freed just after:
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev) but
before the kref_get.

In addition, this buggy patch added some dead code as
container_of(x,y,z) can never be NULL and so dev can never be NULL.
As a result the comment above ib_uverbs_open saying "the open method
will either immediately run -ENXIO" is wrong as it can never happen.

The solution follows Jason Gunthorpe suggestion from below URL:
https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg25692.html

cdev will hold a kref on the parent (the containing structure,
ib_uverbs_device) and only when that kref is released it is
guaranteed that open will never be called again.

In addition, fixes the active count scheme to use an atomic
not a kref to prevent WARN_ON as pointed by above comment
from Jason.
Signed-off-by: NYishai Hadas <yishaih@mellanox.com>
Signed-off-by: NShachar Raindel <raindel@mellanox.com>
Reviewed-by: NJason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: NDoug Ledford <dledford@redhat.com>
上级 03c40442
...@@ -85,7 +85,7 @@ ...@@ -85,7 +85,7 @@
*/ */
struct ib_uverbs_device { struct ib_uverbs_device {
struct kref ref; atomic_t refcount;
int num_comp_vectors; int num_comp_vectors;
struct completion comp; struct completion comp;
struct device *dev; struct device *dev;
...@@ -94,6 +94,7 @@ struct ib_uverbs_device { ...@@ -94,6 +94,7 @@ struct ib_uverbs_device {
struct cdev cdev; struct cdev cdev;
struct rb_root xrcd_tree; struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex; struct mutex xrcd_tree_mutex;
struct kobject kobj;
}; };
struct ib_uverbs_event_file { struct ib_uverbs_event_file {
......
...@@ -130,14 +130,18 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, ...@@ -130,14 +130,18 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
static void ib_uverbs_release_dev(struct kref *ref) static void ib_uverbs_release_dev(struct kobject *kobj)
{ {
struct ib_uverbs_device *dev = struct ib_uverbs_device *dev =
container_of(ref, struct ib_uverbs_device, ref); container_of(kobj, struct ib_uverbs_device, kobj);
complete(&dev->comp); kfree(dev);
} }
static struct kobj_type ib_uverbs_dev_ktype = {
.release = ib_uverbs_release_dev,
};
static void ib_uverbs_release_event_file(struct kref *ref) static void ib_uverbs_release_event_file(struct kref *ref)
{ {
struct ib_uverbs_event_file *file = struct ib_uverbs_event_file *file =
...@@ -303,13 +307,19 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, ...@@ -303,13 +307,19 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
return context->device->dealloc_ucontext(context); return context->device->dealloc_ucontext(context);
} }
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
{
complete(&dev->comp);
}
static void ib_uverbs_release_file(struct kref *ref) static void ib_uverbs_release_file(struct kref *ref)
{ {
struct ib_uverbs_file *file = struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref); container_of(ref, struct ib_uverbs_file, ref);
module_put(file->device->ib_dev->owner); module_put(file->device->ib_dev->owner);
kref_put(&file->device->ref, ib_uverbs_release_dev); if (atomic_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device);
kfree(file); kfree(file);
} }
...@@ -775,9 +785,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) ...@@ -775,9 +785,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
int ret; int ret;
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
if (dev) if (!atomic_inc_not_zero(&dev->refcount))
kref_get(&dev->ref);
else
return -ENXIO; return -ENXIO;
if (!try_module_get(dev->ib_dev->owner)) { if (!try_module_get(dev->ib_dev->owner)) {
...@@ -798,6 +806,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) ...@@ -798,6 +806,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
mutex_init(&file->mutex); mutex_init(&file->mutex);
filp->private_data = file; filp->private_data = file;
kobject_get(&dev->kobj);
return nonseekable_open(inode, filp); return nonseekable_open(inode, filp);
...@@ -805,13 +814,16 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) ...@@ -805,13 +814,16 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
module_put(dev->ib_dev->owner); module_put(dev->ib_dev->owner);
err: err:
kref_put(&dev->ref, ib_uverbs_release_dev); if (atomic_dec_and_test(&dev->refcount))
ib_uverbs_comp_dev(dev);
return ret; return ret;
} }
static int ib_uverbs_close(struct inode *inode, struct file *filp) static int ib_uverbs_close(struct inode *inode, struct file *filp)
{ {
struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_file *file = filp->private_data;
struct ib_uverbs_device *dev = file->device;
ib_uverbs_cleanup_ucontext(file, file->ucontext); ib_uverbs_cleanup_ucontext(file, file->ucontext);
...@@ -819,6 +831,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) ...@@ -819,6 +831,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
kref_put(&file->ref, ib_uverbs_release_file); kref_put(&file->ref, ib_uverbs_release_file);
kobject_put(&dev->kobj);
return 0; return 0;
} }
...@@ -914,10 +927,11 @@ static void ib_uverbs_add_one(struct ib_device *device) ...@@ -914,10 +927,11 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (!uverbs_dev) if (!uverbs_dev)
return; return;
kref_init(&uverbs_dev->ref); atomic_set(&uverbs_dev->refcount, 1);
init_completion(&uverbs_dev->comp); init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT; uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex); mutex_init(&uverbs_dev->xrcd_tree_mutex);
kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
spin_lock(&map_lock); spin_lock(&map_lock);
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
...@@ -944,6 +958,7 @@ static void ib_uverbs_add_one(struct ib_device *device) ...@@ -944,6 +958,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
cdev_init(&uverbs_dev->cdev, NULL); cdev_init(&uverbs_dev->cdev, NULL);
uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.owner = THIS_MODULE;
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
if (cdev_add(&uverbs_dev->cdev, base, 1)) if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev; goto err_cdev;
...@@ -974,9 +989,10 @@ static void ib_uverbs_add_one(struct ib_device *device) ...@@ -974,9 +989,10 @@ static void ib_uverbs_add_one(struct ib_device *device)
clear_bit(devnum, overflow_map); clear_bit(devnum, overflow_map);
err: err:
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); if (atomic_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp); wait_for_completion(&uverbs_dev->comp);
kfree(uverbs_dev); kobject_put(&uverbs_dev->kobj);
return; return;
} }
...@@ -996,9 +1012,10 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) ...@@ -996,9 +1012,10 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
else else
clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); if (atomic_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp); wait_for_completion(&uverbs_dev->comp);
kfree(uverbs_dev); kobject_put(&uverbs_dev->kobj);
} }
static char *uverbs_devnode(struct device *dev, umode_t *mode) static char *uverbs_devnode(struct device *dev, umode_t *mode)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册