提交 6efaf10f 编写于 作者: P Petr Mladek 提交者: Doug Ledford

IB/rdmavt: Avoid queuing work into a destroyed cq kthread worker

The memory barrier is not enough to protect queuing works into
a destroyed cq kthread. Just imagine the following situation:

CPU1				CPU2

rvt_cq_enter()
  worker =  cq->rdi->worker;

				rvt_cq_exit()
				  rdi->worker = NULL;
				  smp_wmb();
				  kthread_flush_worker(worker);
				  kthread_stop(worker->task);
				  kfree(worker);

				  // nothing queued yet =>
				  // nothing flushed and
				  // happily stopped and freed

  if (likely(worker)) {
     // true => read before CPU2 acted
     cq->notify = RVT_CQ_NONE;
     cq->triggered++;
     kthread_queue_work(worker, &cq->comptask);

  BANG: worker has been flushed/stopped/freed in the meantime.

This patch solves this by protecting the critical sections by
rdi->n_cqs_lock. It seems that this lock is not much contended
and looks reasonable for this purpose.

One catch is that rvt_cq_enter() might be called from IRQ context.
Therefore we must always take the lock with IRQs disabled to avoid
a possible deadlock.
Signed-off-by: NPetr Mladek <pmladek@suse.com>
Signed-off-by: NDoug Ledford <dledford@redhat.com>
上级 66431b0e
...@@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) ...@@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
if (cq->notify == IB_CQ_NEXT_COMP || if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED && (cq->notify == IB_CQ_SOLICITED &&
(solicited || entry->status != IB_WC_SUCCESS))) { (solicited || entry->status != IB_WC_SUCCESS))) {
struct kthread_worker *worker;
/* /*
* This will cause send_complete() to be called in * This will cause send_complete() to be called in
* another thread. * another thread.
*/ */
smp_read_barrier_depends(); /* see rvt_cq_exit */ spin_lock(&cq->rdi->n_cqs_lock);
worker = cq->rdi->worker; if (likely(cq->rdi->worker)) {
if (likely(worker)) {
cq->notify = RVT_CQ_NONE; cq->notify = RVT_CQ_NONE;
cq->triggered++; cq->triggered++;
kthread_queue_work(worker, &cq->comptask); kthread_queue_work(cq->rdi->worker, &cq->comptask);
} }
spin_unlock(&cq->rdi->n_cqs_lock);
} }
spin_unlock_irqrestore(&cq->lock, flags); spin_unlock_irqrestore(&cq->lock, flags);
...@@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, ...@@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
} }
} }
spin_lock(&rdi->n_cqs_lock); spin_lock_irq(&rdi->n_cqs_lock);
if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
spin_unlock(&rdi->n_cqs_lock); spin_unlock_irq(&rdi->n_cqs_lock);
ret = ERR_PTR(-ENOMEM); ret = ERR_PTR(-ENOMEM);
goto bail_ip; goto bail_ip;
} }
rdi->n_cqs_allocated++; rdi->n_cqs_allocated++;
spin_unlock(&rdi->n_cqs_lock); spin_unlock_irq(&rdi->n_cqs_lock);
if (cq->ip) { if (cq->ip) {
spin_lock_irq(&rdi->pending_lock); spin_lock_irq(&rdi->pending_lock);
...@@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq) ...@@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq)
struct rvt_dev_info *rdi = cq->rdi; struct rvt_dev_info *rdi = cq->rdi;
kthread_flush_work(&cq->comptask); kthread_flush_work(&cq->comptask);
spin_lock(&rdi->n_cqs_lock); spin_lock_irq(&rdi->n_cqs_lock);
rdi->n_cqs_allocated--; rdi->n_cqs_allocated--;
spin_unlock(&rdi->n_cqs_lock); spin_unlock_irq(&rdi->n_cqs_lock);
if (cq->ip) if (cq->ip)
kref_put(&cq->ip->ref, rvt_release_mmap_info); kref_put(&cq->ip->ref, rvt_release_mmap_info);
else else
...@@ -541,12 +540,15 @@ void rvt_cq_exit(struct rvt_dev_info *rdi) ...@@ -541,12 +540,15 @@ void rvt_cq_exit(struct rvt_dev_info *rdi)
{ {
struct kthread_worker *worker; struct kthread_worker *worker;
worker = rdi->worker; /* block future queuing from send_complete() */
if (!worker) spin_lock_irq(&rdi->n_cqs_lock);
if (!rdi->worker) {
spin_unlock_irq(&rdi->n_cqs_lock);
return; return;
/* blocks future queuing from send_complete() */ }
rdi->worker = NULL; rdi->worker = NULL;
smp_wmb(); /* See rdi_cq_enter */ spin_unlock_irq(&rdi->n_cqs_lock);
kthread_flush_worker(worker); kthread_flush_worker(worker);
kthread_stop(worker->task); kthread_stop(worker->task);
kfree(worker); kfree(worker);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册