提交 1c0453d6 编写于 作者: D Doug Ledford

IB/ipoib: drop mcast_mutex usage

We needed the mcast_mutex when we had to prevent the join completion
callback from having the value it stored in mcast->mc overwritten
by a delayed return from ib_sa_join_multicast.  By storing the return
of ib_sa_join_multicast in an intermediate variable, we prevent a
delayed return from ib_sa_join_multicast overwriting the valid
contents of mcast->mc, and we no longer need a mutex to force the
join callback to run after the return of ib_sa_join_multicast.  This
allows us to do away with the mutex entirely and protect our critical
sections with a just a spinlock instead.  This is highly desirable
as there were some places where we couldn't use a mutex because the
code was not allowed to sleep, and so we were currently using a mix
of mutex and spinlock to protect what we needed to protect.  Now we
only have a spin lock and the locking complexity is greatly reduced.
Signed-off-by: NDoug Ledford <dledford@redhat.com>
上级 d2fe937c
...@@ -55,8 +55,6 @@ MODULE_PARM_DESC(mcast_debug_level, ...@@ -55,8 +55,6 @@ MODULE_PARM_DESC(mcast_debug_level,
"Enable multicast debug tracing if > 0"); "Enable multicast debug tracing if > 0");
#endif #endif
static DEFINE_MUTEX(mcast_mutex);
struct ipoib_mcast_iter { struct ipoib_mcast_iter {
struct net_device *dev; struct net_device *dev;
union ib_gid mgid; union ib_gid mgid;
...@@ -67,7 +65,7 @@ struct ipoib_mcast_iter { ...@@ -67,7 +65,7 @@ struct ipoib_mcast_iter {
}; };
/* /*
* This should be called with the mcast_mutex held * This should be called with the priv->lock held
*/ */
static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv, static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
struct ipoib_mcast *mcast, struct ipoib_mcast *mcast,
...@@ -352,16 +350,6 @@ static int ipoib_mcast_join_complete(int status, ...@@ -352,16 +350,6 @@ static int ipoib_mcast_join_complete(int status,
"sendonly " : "", "sendonly " : "",
mcast->mcmember.mgid.raw, status); mcast->mcmember.mgid.raw, status);
/*
* We have to take the mutex to force mcast_join to
* return from ib_sa_multicast_join and set mcast->mc to a
* valid value. Otherwise we were racing with ourselves in
* that we might fail here, but get a valid return from
* ib_sa_multicast_join after we had cleared mcast->mc here,
* resulting in mis-matched joins and leaves and a deadlock
*/
mutex_lock(&mcast_mutex);
/* We trap for port events ourselves. */ /* We trap for port events ourselves. */
if (status == -ENETRESET) { if (status == -ENETRESET) {
status = 0; status = 0;
...@@ -383,8 +371,10 @@ static int ipoib_mcast_join_complete(int status, ...@@ -383,8 +371,10 @@ static int ipoib_mcast_join_complete(int status,
* send out all of the non-broadcast joins * send out all of the non-broadcast joins
*/ */
if (mcast == priv->broadcast) { if (mcast == priv->broadcast) {
spin_lock_irq(&priv->lock);
queue_work(priv->wq, &priv->carrier_on_task); queue_work(priv->wq, &priv->carrier_on_task);
__ipoib_mcast_schedule_join_thread(priv, NULL, 0); __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
goto out_locked;
} }
} else { } else {
if (mcast->logcount++ < 20) { if (mcast->logcount++ < 20) {
...@@ -417,16 +407,28 @@ static int ipoib_mcast_join_complete(int status, ...@@ -417,16 +407,28 @@ static int ipoib_mcast_join_complete(int status,
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
} }
netif_tx_unlock_bh(dev); netif_tx_unlock_bh(dev);
} else } else {
spin_lock_irq(&priv->lock);
/* Requeue this join task with a backoff delay */ /* Requeue this join task with a backoff delay */
__ipoib_mcast_schedule_join_thread(priv, mcast, 1); __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
goto out_locked;
}
} }
out: out:
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); spin_lock_irq(&priv->lock);
out_locked:
/*
* Make sure to set mcast->mc before we clear the busy flag to avoid
* racing with code that checks for BUSY before checking mcast->mc
*/
if (status) if (status)
mcast->mc = NULL; mcast->mc = NULL;
else
mcast->mc = multicast;
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
spin_unlock_irq(&priv->lock);
complete(&mcast->done); complete(&mcast->done);
mutex_unlock(&mcast_mutex);
return status; return status;
} }
...@@ -434,6 +436,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, ...@@ -434,6 +436,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
int create) int create)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_sa_multicast *multicast;
struct ib_sa_mcmember_rec rec = { struct ib_sa_mcmember_rec rec = {
.join_state = 1 .join_state = 1
}; };
...@@ -475,18 +478,19 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, ...@@ -475,18 +478,19 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
rec.hop_limit = priv->broadcast->mcmember.hop_limit; rec.hop_limit = priv->broadcast->mcmember.hop_limit;
} }
mutex_lock(&mcast_mutex); multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
&rec, comp_mask, GFP_KERNEL, &rec, comp_mask, GFP_KERNEL,
ipoib_mcast_join_complete, mcast); ipoib_mcast_join_complete, mcast);
if (IS_ERR(mcast->mc)) { if (IS_ERR(multicast)) {
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); ret = PTR_ERR(multicast);
ret = PTR_ERR(mcast->mc);
ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
spin_lock_irq(&priv->lock);
/* Requeue this join task with a backoff delay */
__ipoib_mcast_schedule_join_thread(priv, mcast, 1); __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
spin_unlock_irq(&priv->lock);
complete(&mcast->done); complete(&mcast->done);
} }
mutex_unlock(&mcast_mutex);
} }
void ipoib_mcast_join_task(struct work_struct *work) void ipoib_mcast_join_task(struct work_struct *work)
...@@ -515,15 +519,6 @@ void ipoib_mcast_join_task(struct work_struct *work) ...@@ -515,15 +519,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
else else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
/*
* We have to hold the mutex to keep from racing with the join
* completion threads on setting flags on mcasts, and we have
* to hold the priv->lock because dev_flush will remove entries
* out from underneath us, so at a minimum we need the lock
* through the time that we do the for_each loop of the mcast
* list or else dev_flush can make us oops.
*/
mutex_lock(&mcast_mutex);
spin_lock_irq(&priv->lock); spin_lock_irq(&priv->lock);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
goto out; goto out;
...@@ -584,9 +579,7 @@ void ipoib_mcast_join_task(struct work_struct *work) ...@@ -584,9 +579,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
else else
create = 1; create = 1;
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
ipoib_mcast_join(dev, mcast, create); ipoib_mcast_join(dev, mcast, create);
mutex_lock(&mcast_mutex);
spin_lock_irq(&priv->lock); spin_lock_irq(&priv->lock);
} else if (!delay_until || } else if (!delay_until ||
time_before(mcast->delay_until, delay_until)) time_before(mcast->delay_until, delay_until))
...@@ -608,7 +601,6 @@ void ipoib_mcast_join_task(struct work_struct *work) ...@@ -608,7 +601,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
} }
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
if (mcast) if (mcast)
ipoib_mcast_join(dev, mcast, create); ipoib_mcast_join(dev, mcast, create);
} }
...@@ -616,13 +608,14 @@ void ipoib_mcast_join_task(struct work_struct *work) ...@@ -616,13 +608,14 @@ void ipoib_mcast_join_task(struct work_struct *work)
int ipoib_mcast_start_thread(struct net_device *dev) int ipoib_mcast_start_thread(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "starting multicast thread\n"); ipoib_dbg_mcast(priv, "starting multicast thread\n");
mutex_lock(&mcast_mutex); spin_lock_irqsave(&priv->lock, flags);
set_bit(IPOIB_MCAST_RUN, &priv->flags); set_bit(IPOIB_MCAST_RUN, &priv->flags);
__ipoib_mcast_schedule_join_thread(priv, NULL, 0); __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
mutex_unlock(&mcast_mutex); spin_unlock_irqrestore(&priv->lock, flags);
return 0; return 0;
} }
...@@ -630,13 +623,14 @@ int ipoib_mcast_start_thread(struct net_device *dev) ...@@ -630,13 +623,14 @@ int ipoib_mcast_start_thread(struct net_device *dev)
int ipoib_mcast_stop_thread(struct net_device *dev) int ipoib_mcast_stop_thread(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "stopping multicast thread\n"); ipoib_dbg_mcast(priv, "stopping multicast thread\n");
mutex_lock(&mcast_mutex); spin_lock_irqsave(&priv->lock, flags);
clear_bit(IPOIB_MCAST_RUN, &priv->flags); clear_bit(IPOIB_MCAST_RUN, &priv->flags);
cancel_delayed_work(&priv->mcast_task); cancel_delayed_work(&priv->mcast_task);
mutex_unlock(&mcast_mutex); spin_unlock_irqrestore(&priv->lock, flags);
flush_workqueue(priv->wq); flush_workqueue(priv->wq);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册