提交 326d641b 编写于 作者: Y Yu Kuai 提交者: Zheng Zengkai

blk-mq: decrease pending_queues when it expires

hulk inclusion
category: performance
bugzilla: https://gitee.com/openeuler/kernel/issues/I4S8DW

---------------------------

If pending_queues is increased once, it will only be decreased when
nr_active is zero, and that will lead to the under-utilization of
host tags because pending_queues is non-zero and the available
tags for the queue will be max(host tags / active_queues, 4)
instead of the needed tags of the queue.

Fix it by adding an expiration time for the increasement of pending_queues,
and decrease it when it expires, so pending_queues will be decreased
to zero if there is no tag allocation failure, and the available tags
for the queue will be the whole host tags.
Signed-off-by: NYu Kuai <yukuai3@huawei.com>
Reviewed-by: NHou Tao <houtao1@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 19ac7109
...@@ -224,6 +224,19 @@ static int queue_tag_set_show(void *data, struct seq_file *m) ...@@ -224,6 +224,19 @@ static int queue_tag_set_show(void *data, struct seq_file *m)
return 0; return 0;
} }
static int queue_dtag_wait_time_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
unsigned int time = 0;
if (test_bit(QUEUE_FLAG_HCTX_WAIT, &q->queue_flags))
time = jiffies_to_msecs(jiffies - READ_ONCE(q->dtag_wait_time));
seq_printf(m, "%u\n", time);
return 0;
}
static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
{ "poll_stat", 0400, queue_poll_stat_show }, { "poll_stat", 0400, queue_poll_stat_show },
{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
...@@ -232,6 +245,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { ...@@ -232,6 +245,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
{ "write_hints", 0600, queue_write_hint_show, queue_write_hint_store }, { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
{ "zone_wlock", 0400, queue_zone_wlock_show, NULL }, { "zone_wlock", 0400, queue_zone_wlock_show, NULL },
{ "tag_set", 0400, queue_tag_set_show, NULL }, { "tag_set", 0400, queue_tag_set_show, NULL },
{ "dtag_wait_time_ms", 0400, queue_dtag_wait_time_show, NULL },
{ }, { },
}; };
...@@ -651,6 +665,20 @@ static int hctx_dispatch_busy_show(void *data, struct seq_file *m) ...@@ -651,6 +665,20 @@ static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
return 0; return 0;
} }
static int hctx_dtag_wait_time_show(void *data, struct seq_file *m)
{
struct blk_mq_hw_ctx *hctx = data;
unsigned int time = 0;
if (test_bit(BLK_MQ_S_DTAG_WAIT, &hctx->state))
time = jiffies_to_msecs(jiffies -
READ_ONCE(hctx->dtag_wait_time));
seq_printf(m, "%u\n", time);
return 0;
}
#define CTX_RQ_SEQ_OPS(name, type) \ #define CTX_RQ_SEQ_OPS(name, type) \
static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \ static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \
__acquires(&ctx->lock) \ __acquires(&ctx->lock) \
...@@ -821,6 +849,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { ...@@ -821,6 +849,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
{"active", 0400, hctx_active_show}, {"active", 0400, hctx_active_show},
{"dispatch_busy", 0400, hctx_dispatch_busy_show}, {"dispatch_busy", 0400, hctx_dispatch_busy_show},
{"type", 0400, hctx_type_show}, {"type", 0400, hctx_type_show},
{"dtag_wait_time_ms", 0400, hctx_dtag_wait_time_show},
{}, {},
}; };
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include "blk-mq.h" #include "blk-mq.h"
#include "blk-mq-tag.h" #include "blk-mq-tag.h"
#define BLK_MQ_DTAG_WAIT_EXPIRE (5 * HZ)
/* /*
* If a previously inactive queue goes active, bump the active user count. * If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail * We need to do this before try to allocate driver tag, then even if fail
...@@ -80,29 +82,53 @@ void __blk_mq_dtag_busy(struct blk_mq_hw_ctx *hctx) ...@@ -80,29 +82,53 @@ void __blk_mq_dtag_busy(struct blk_mq_hw_ctx *hctx)
struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_tag_set *set = q->tag_set;
if (!test_bit(QUEUE_FLAG_HCTX_WAIT, &q->queue_flags) && if (!test_bit(QUEUE_FLAG_HCTX_WAIT, &q->queue_flags) &&
!test_and_set_bit(QUEUE_FLAG_HCTX_WAIT, &q->queue_flags)) !test_and_set_bit(QUEUE_FLAG_HCTX_WAIT, &q->queue_flags)) {
WRITE_ONCE(q->dtag_wait_time, jiffies);
atomic_inc(&set->pending_queues_shared_sbitmap); atomic_inc(&set->pending_queues_shared_sbitmap);
}
} else { } else {
if (!test_bit(BLK_MQ_S_DTAG_WAIT, &hctx->state) && if (!test_bit(BLK_MQ_S_DTAG_WAIT, &hctx->state) &&
!test_and_set_bit(BLK_MQ_S_DTAG_WAIT, &hctx->state)) !test_and_set_bit(BLK_MQ_S_DTAG_WAIT, &hctx->state)) {
WRITE_ONCE(hctx->dtag_wait_time, jiffies);
atomic_inc(&hctx->tags->pending_queues); atomic_inc(&hctx->tags->pending_queues);
}
} }
} }
void __blk_mq_dtag_idle(struct blk_mq_hw_ctx *hctx) void __blk_mq_dtag_idle(struct blk_mq_hw_ctx *hctx, bool force)
{ {
struct blk_mq_tags *tags = hctx->tags; struct blk_mq_tags *tags = hctx->tags;
struct request_queue *q = hctx->queue; struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_tag_set *set = q->tag_set;
if (blk_mq_is_sbitmap_shared(hctx->flags)) { if (blk_mq_is_sbitmap_shared(hctx->flags)) {
if (!test_bit(QUEUE_FLAG_HCTX_WAIT, &q->queue_flags))
return;
if (!force && time_before(jiffies,
READ_ONCE(q->dtag_wait_time) +
BLK_MQ_DTAG_WAIT_EXPIRE))
return;
if (!test_and_clear_bit(QUEUE_FLAG_HCTX_WAIT, if (!test_and_clear_bit(QUEUE_FLAG_HCTX_WAIT,
&q->queue_flags)) &q->queue_flags))
return; return;
WRITE_ONCE(q->dtag_wait_time, jiffies);
atomic_dec(&set->pending_queues_shared_sbitmap); atomic_dec(&set->pending_queues_shared_sbitmap);
} else { } else {
if (!test_bit(BLK_MQ_S_DTAG_WAIT, &hctx->state))
return;
if (!force && time_before(jiffies,
READ_ONCE(hctx->dtag_wait_time) +
BLK_MQ_DTAG_WAIT_EXPIRE))
return;
if (!test_and_clear_bit(BLK_MQ_S_DTAG_WAIT, &hctx->state)) if (!test_and_clear_bit(BLK_MQ_S_DTAG_WAIT, &hctx->state))
return; return;
WRITE_ONCE(hctx->dtag_wait_time, jiffies);
atomic_dec(&tags->pending_queues); atomic_dec(&tags->pending_queues);
} }
} }
...@@ -206,6 +232,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) ...@@ -206,6 +232,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
sbitmap_finish_wait(bt, ws, &wait); sbitmap_finish_wait(bt, ws, &wait);
found_tag: found_tag:
if (!data->q->elevator)
blk_mq_dtag_idle(data->hctx, false);
/* /*
* Give up this allocation if the hctx is inactive. The caller will * Give up this allocation if the hctx is inactive. The caller will
* retry on an active hctx. * retry on an active hctx.
......
...@@ -82,7 +82,7 @@ enum { ...@@ -82,7 +82,7 @@ enum {
extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx); extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx);
extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx); extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx);
extern void __blk_mq_dtag_busy(struct blk_mq_hw_ctx *hctx); extern void __blk_mq_dtag_busy(struct blk_mq_hw_ctx *hctx);
extern void __blk_mq_dtag_idle(struct blk_mq_hw_ctx *hctx); extern void __blk_mq_dtag_idle(struct blk_mq_hw_ctx *hctx, bool force);
static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
...@@ -109,12 +109,12 @@ static inline void blk_mq_dtag_busy(struct blk_mq_hw_ctx *hctx) ...@@ -109,12 +109,12 @@ static inline void blk_mq_dtag_busy(struct blk_mq_hw_ctx *hctx)
__blk_mq_dtag_busy(hctx); __blk_mq_dtag_busy(hctx);
} }
static inline void blk_mq_dtag_idle(struct blk_mq_hw_ctx *hctx) static inline void blk_mq_dtag_idle(struct blk_mq_hw_ctx *hctx, bool force)
{ {
if (!(mq_unfair_dtag && (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))) if (!(mq_unfair_dtag && (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)))
return; return;
__blk_mq_dtag_idle(hctx); __blk_mq_dtag_idle(hctx, force);
} }
static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
......
...@@ -542,7 +542,7 @@ void blk_mq_free_request(struct request *rq) ...@@ -542,7 +542,7 @@ void blk_mq_free_request(struct request *rq)
__blk_mq_dec_active_requests(hctx); __blk_mq_dec_active_requests(hctx);
if (mq_unfair_dtag && !__blk_mq_active_requests(hctx)) { if (mq_unfair_dtag && !__blk_mq_active_requests(hctx)) {
blk_mq_tag_idle(hctx); blk_mq_tag_idle(hctx);
blk_mq_dtag_idle(hctx); blk_mq_dtag_idle(hctx, true);
} }
} }
...@@ -1013,7 +1013,7 @@ static void blk_mq_timeout_work(struct work_struct *work) ...@@ -1013,7 +1013,7 @@ static void blk_mq_timeout_work(struct work_struct *work)
/* the hctx may be unmapped, so check it here */ /* the hctx may be unmapped, so check it here */
if (blk_mq_hw_queue_mapped(hctx)) { if (blk_mq_hw_queue_mapped(hctx)) {
blk_mq_tag_idle(hctx); blk_mq_tag_idle(hctx);
blk_mq_dtag_idle(hctx); blk_mq_dtag_idle(hctx, true);
} }
} }
} }
...@@ -1124,6 +1124,7 @@ static bool __blk_mq_get_driver_tag(struct request *rq) ...@@ -1124,6 +1124,7 @@ static bool __blk_mq_get_driver_tag(struct request *rq)
return false; return false;
} }
blk_mq_dtag_idle(rq->mq_hctx, false);
rq->tag = tag + tag_offset; rq->tag = tag + tag_offset;
return true; return true;
} }
...@@ -2725,7 +2726,7 @@ static void blk_mq_exit_hctx(struct request_queue *q, ...@@ -2725,7 +2726,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (blk_mq_hw_queue_mapped(hctx)) { if (blk_mq_hw_queue_mapped(hctx)) {
blk_mq_tag_idle(hctx); blk_mq_tag_idle(hctx);
blk_mq_dtag_idle(hctx); blk_mq_dtag_idle(hctx, true);
} }
blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
...@@ -2825,6 +2826,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, ...@@ -2825,6 +2826,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
INIT_LIST_HEAD(&hctx->dispatch); INIT_LIST_HEAD(&hctx->dispatch);
hctx->queue = q; hctx->queue = q;
hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;
hctx->dtag_wait_time = jiffies;
INIT_LIST_HEAD(&hctx->hctx_list); INIT_LIST_HEAD(&hctx->hctx_list);
...@@ -3047,7 +3049,7 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) ...@@ -3047,7 +3049,7 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
} else { } else {
blk_mq_tag_idle(hctx); blk_mq_tag_idle(hctx);
blk_mq_dtag_idle(hctx); blk_mq_dtag_idle(hctx, true);
hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
} }
} }
...@@ -3375,6 +3377,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -3375,6 +3377,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
spin_lock_init(&q->requeue_lock); spin_lock_init(&q->requeue_lock);
q->nr_requests = set->queue_depth; q->nr_requests = set->queue_depth;
q->dtag_wait_time = jiffies;
/* /*
* Default to classic polling * Default to classic polling
......
...@@ -172,6 +172,12 @@ struct blk_mq_hw_ctx { ...@@ -172,6 +172,12 @@ struct blk_mq_hw_ctx {
*/ */
struct list_head hctx_list; struct list_head hctx_list;
/**
* @dtag_wait_time: record when hardware queue is pending, specifically
* when BLK_MQ_S_DTAG_WAIT is set in state.
*/
unsigned long dtag_wait_time;
KABI_RESERVE(1) KABI_RESERVE(1)
KABI_RESERVE(2) KABI_RESERVE(2)
KABI_RESERVE(3) KABI_RESERVE(3)
......
...@@ -602,6 +602,7 @@ struct request_queue { ...@@ -602,6 +602,7 @@ struct request_queue {
#define BLK_MAX_WRITE_HINTS 5 #define BLK_MAX_WRITE_HINTS 5
u64 write_hints[BLK_MAX_WRITE_HINTS]; u64 write_hints[BLK_MAX_WRITE_HINTS];
unsigned long dtag_wait_time;
KABI_RESERVE(1) KABI_RESERVE(1)
KABI_RESERVE(2) KABI_RESERVE(2)
KABI_RESERVE(3) KABI_RESERVE(3)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册