提交 ee190ab7 编写于 作者: J Jason Gunthorpe

IB/ipoib: Get rid of the sysfs_mutex

This mutex was introduced to deal with the deadlock formed by calling
unregister_netdev from within the sysfs callback of a netdev.

Now that we have priv_destructor and needs_free_netdev we can switch
to the more targeted solution of running the unregister from a
work queue. This avoids the deadlock and gets rid of the mutex.

The next patch in the series needs this mutex eliminated to create
atomicity of unregisteration.
Signed-off-by: NJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: NLeon Romanovsky <leonro@mellanox.com>
上级 9f49a5b5
...@@ -332,7 +332,6 @@ struct ipoib_dev_priv { ...@@ -332,7 +332,6 @@ struct ipoib_dev_priv {
struct rw_semaphore vlan_rwsem; struct rw_semaphore vlan_rwsem;
struct mutex mcast_mutex; struct mutex mcast_mutex;
struct mutex sysfs_mutex;
struct rb_root path_tree; struct rb_root path_tree;
struct list_head path_list; struct list_head path_list;
......
...@@ -1517,19 +1517,13 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ...@@ -1517,19 +1517,13 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
{ {
struct net_device *dev = to_net_dev(d); struct net_device *dev = to_net_dev(d);
int ret; int ret;
struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!mutex_trylock(&priv->sysfs_mutex))
return restart_syscall();
if (!rtnl_trylock()) { if (!rtnl_trylock()) {
mutex_unlock(&priv->sysfs_mutex);
return restart_syscall(); return restart_syscall();
} }
if (dev->reg_state != NETREG_REGISTERED) { if (dev->reg_state != NETREG_REGISTERED) {
rtnl_unlock(); rtnl_unlock();
mutex_unlock(&priv->sysfs_mutex);
return -EPERM; return -EPERM;
} }
...@@ -1541,7 +1535,6 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ...@@ -1541,7 +1535,6 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
*/ */
if (ret != -EBUSY) if (ret != -EBUSY)
rtnl_unlock(); rtnl_unlock();
mutex_unlock(&priv->sysfs_mutex);
return (!ret || ret == -EBUSY) ? count : ret; return (!ret || ret == -EBUSY) ? count : ret;
} }
......
...@@ -2079,7 +2079,6 @@ static void ipoib_build_priv(struct net_device *dev) ...@@ -2079,7 +2079,6 @@ static void ipoib_build_priv(struct net_device *dev)
spin_lock_init(&priv->lock); spin_lock_init(&priv->lock);
init_rwsem(&priv->vlan_rwsem); init_rwsem(&priv->vlan_rwsem);
mutex_init(&priv->mcast_mutex); mutex_init(&priv->mcast_mutex);
mutex_init(&priv->sysfs_mutex);
INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->path_list);
INIT_LIST_HEAD(&priv->child_intfs); INIT_LIST_HEAD(&priv->child_intfs);
...@@ -2476,10 +2475,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) ...@@ -2476,10 +2475,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
list_for_each_entry_safe(priv, tmp, dev_list, list) { list_for_each_entry_safe(priv, tmp, dev_list, list) {
ipoib_parent_unregister_pre(priv->dev); ipoib_parent_unregister_pre(priv->dev);
/* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */
mutex_lock(&priv->sysfs_mutex);
unregister_netdev(priv->dev); unregister_netdev(priv->dev);
mutex_unlock(&priv->sysfs_mutex);
} }
kfree(dev_list); kfree(dev_list);
...@@ -2527,8 +2523,7 @@ static int __init ipoib_init_module(void) ...@@ -2527,8 +2523,7 @@ static int __init ipoib_init_module(void)
* its private workqueue, and we only queue up flush events * its private workqueue, and we only queue up flush events
* on our global flush workqueue. This avoids the deadlocks. * on our global flush workqueue. This avoids the deadlocks.
*/ */
ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", 0);
WQ_MEM_RECLAIM);
if (!ipoib_workqueue) { if (!ipoib_workqueue) {
ret = -ENOMEM; ret = -ENOMEM;
goto err_fs; goto err_fs;
......
...@@ -125,23 +125,16 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ...@@ -125,23 +125,16 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
snprintf(intf_name, sizeof(intf_name), "%s.%04x", snprintf(intf_name, sizeof(intf_name), "%s.%04x",
ppriv->dev->name, pkey); ppriv->dev->name, pkey);
if (!mutex_trylock(&ppriv->sysfs_mutex)) if (!rtnl_trylock())
return restart_syscall(); return restart_syscall();
if (!rtnl_trylock()) {
mutex_unlock(&ppriv->sysfs_mutex);
return restart_syscall();
}
if (pdev->reg_state != NETREG_REGISTERED) { if (pdev->reg_state != NETREG_REGISTERED) {
rtnl_unlock(); rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex);
return -EPERM; return -EPERM;
} }
if (!down_write_trylock(&ppriv->vlan_rwsem)) { if (!down_write_trylock(&ppriv->vlan_rwsem)) {
rtnl_unlock(); rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex);
return restart_syscall(); return restart_syscall();
} }
...@@ -178,58 +171,95 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ...@@ -178,58 +171,95 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
out: out:
up_write(&ppriv->vlan_rwsem); up_write(&ppriv->vlan_rwsem);
rtnl_unlock(); rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex);
return result; return result;
} }
struct ipoib_vlan_delete_work {
struct work_struct work;
struct net_device *dev;
};
/*
* sysfs callbacks of a netdevice cannot obtain the rtnl lock as
* unregister_netdev ultimately deletes the sysfs files while holding the rtnl
* lock. This deadlocks the system.
*
* A callback can use rtnl_trylock to avoid the deadlock but it cannot call
* unregister_netdev as that internally takes and releases the rtnl_lock. So
* instead we find the netdev to unregister and then do the actual unregister
* from the global work queue where we can obtain the rtnl_lock safely.
*/
static void ipoib_vlan_delete_task(struct work_struct *work)
{
struct ipoib_vlan_delete_work *pwork =
container_of(work, struct ipoib_vlan_delete_work, work);
struct net_device *dev = pwork->dev;
rtnl_lock();
/* Unregistering tasks can race with another task or parent removal */
if (dev->reg_state == NETREG_REGISTERED) {
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
down_write(&ppriv->vlan_rwsem);
list_del(&priv->list);
up_write(&ppriv->vlan_rwsem);
ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
unregister_netdevice(dev);
}
rtnl_unlock();
kfree(pwork);
}
int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
{ {
struct ipoib_dev_priv *ppriv, *priv, *tpriv; struct ipoib_dev_priv *ppriv, *priv, *tpriv;
struct net_device *dev = NULL; int rc;
if (!capable(CAP_NET_ADMIN)) if (!capable(CAP_NET_ADMIN))
return -EPERM; return -EPERM;
ppriv = ipoib_priv(pdev); if (!rtnl_trylock())
if (!mutex_trylock(&ppriv->sysfs_mutex))
return restart_syscall(); return restart_syscall();
if (!rtnl_trylock()) {
mutex_unlock(&ppriv->sysfs_mutex);
return restart_syscall();
}
if (pdev->reg_state != NETREG_REGISTERED) { if (pdev->reg_state != NETREG_REGISTERED) {
rtnl_unlock(); rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex);
return -EPERM; return -EPERM;
} }
if (!down_write_trylock(&ppriv->vlan_rwsem)) { ppriv = ipoib_priv(pdev);
rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex);
return restart_syscall();
}
rc = -ENODEV;
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
if (priv->pkey == pkey && if (priv->pkey == pkey &&
priv->child_type == IPOIB_LEGACY_CHILD) { priv->child_type == IPOIB_LEGACY_CHILD) {
list_del(&priv->list); struct ipoib_vlan_delete_work *work;
dev = priv->dev;
work = kmalloc(sizeof(*work), GFP_KERNEL);
if (!work) {
rc = -ENOMEM;
goto out;
}
down_write(&ppriv->vlan_rwsem);
list_del_init(&priv->list);
up_write(&ppriv->vlan_rwsem);
work->dev = priv->dev;
INIT_WORK(&work->work, ipoib_vlan_delete_task);
queue_work(ipoib_workqueue, &work->work);
rc = 0;
break; break;
} }
} }
up_write(&ppriv->vlan_rwsem);
if (dev) {
ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
unregister_netdevice(dev);
}
out:
rtnl_unlock(); rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex);
return (dev) ? 0 : -ENODEV; return rc;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册