提交 fcf38cdf 编写于 作者: O Omar Sandoval 提交者: Jens Axboe

kyber: fix another domain token wait queue hang

Commit 8cf46660 ("kyber: fix hang on domain token wait queue") fixed
a hang caused by leaving wait entries on the domain token wait queue
after the __sbitmap_queue_get() retry succeeded, making that wait entry
a "dud" which won't in turn wake more entries up. However, we can also
get a dud entry if kyber_get_domain_token() fails once but is then
called again and succeeds. This can happen if the hardware queue is
rerun for some other reason, or, more likely, kyber_dispatch_request()
tries the same domain twice.

The fix is to remove our entry from the wait queue whenever we
successfully get a token. The only complication is that we might be on
one of many wait queues in the struct sbitmap_queue, but that's easily
fixed by remembering which wait queue we were put on.

While we're here, only initialize the wait queue entry once instead of
on every wait, and use spin_lock_irq() instead of spin_lock_irqsave(),
since this is always called from process context with irqs enabled.
Signed-off-by: NOmar Sandoval <osandov@fb.com>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
上级 ae64f9bd
...@@ -100,9 +100,13 @@ struct kyber_hctx_data { ...@@ -100,9 +100,13 @@ struct kyber_hctx_data {
unsigned int cur_domain; unsigned int cur_domain;
unsigned int batching; unsigned int batching;
wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS]; wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
atomic_t wait_index[KYBER_NUM_DOMAINS]; atomic_t wait_index[KYBER_NUM_DOMAINS];
}; };
static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
void *key);
static int rq_sched_domain(const struct request *rq) static int rq_sched_domain(const struct request *rq)
{ {
unsigned int op = rq->cmd_flags; unsigned int op = rq->cmd_flags;
...@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) ...@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
for (i = 0; i < KYBER_NUM_DOMAINS; i++) { for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
INIT_LIST_HEAD(&khd->rqs[i]); INIT_LIST_HEAD(&khd->rqs[i]);
init_waitqueue_func_entry(&khd->domain_wait[i],
kyber_domain_wake);
khd->domain_wait[i].private = hctx;
INIT_LIST_HEAD(&khd->domain_wait[i].entry); INIT_LIST_HEAD(&khd->domain_wait[i].entry);
atomic_set(&khd->wait_index[i], 0); atomic_set(&khd->wait_index[i], 0);
} }
...@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd, ...@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
int nr; int nr;
nr = __sbitmap_queue_get(domain_tokens); nr = __sbitmap_queue_get(domain_tokens);
if (nr >= 0)
return nr;
/* /*
* If we failed to get a domain token, make sure the hardware queue is * If we failed to get a domain token, make sure the hardware queue is
* run when one becomes available. Note that this is serialized on * run when one becomes available. Note that this is serialized on
* khd->lock, but we still need to be careful about the waker. * khd->lock, but we still need to be careful about the waker.
*/ */
if (list_empty_careful(&wait->entry)) { if (nr < 0 && list_empty_careful(&wait->entry)) {
init_waitqueue_func_entry(wait, kyber_domain_wake);
wait->private = hctx;
ws = sbq_wait_ptr(domain_tokens, ws = sbq_wait_ptr(domain_tokens,
&khd->wait_index[sched_domain]); &khd->wait_index[sched_domain]);
khd->domain_ws[sched_domain] = ws;
add_wait_queue(&ws->wait, wait); add_wait_queue(&ws->wait, wait);
/* /*
* Try again in case a token was freed before we got on the wait * Try again in case a token was freed before we got on the wait
* queue. The waker may have already removed the entry from the * queue.
* wait queue, but list_del_init() is okay with that.
*/ */
nr = __sbitmap_queue_get(domain_tokens); nr = __sbitmap_queue_get(domain_tokens);
if (nr >= 0) { }
unsigned long flags;
spin_lock_irqsave(&ws->wait.lock, flags); /*
list_del_init(&wait->entry); * If we got a token while we were on the wait queue, remove ourselves
spin_unlock_irqrestore(&ws->wait.lock, flags); * from the wait queue to ensure that all wake ups make forward
} * progress. It's possible that the waker already deleted the entry
* between the !list_empty_careful() check and us grabbing the lock, but
* list_del_init() is okay with that.
*/
if (nr >= 0 && !list_empty_careful(&wait->entry)) {
ws = khd->domain_ws[sched_domain];
spin_lock_irq(&ws->wait.lock);
list_del_init(&wait->entry);
spin_unlock_irq(&ws->wait.lock);
} }
return nr; return nr;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册