提交 24391c0d 编写于 作者: S Shaohua Li 提交者: Jens Axboe

blk-mq: add tag allocation policy

This is the blk-mq part to support tag allocation policy. The default
allocation policy isn't changed (though it's not a strict FIFO). The new
policy is round-robin for libata. But it's a try-best implementation. If
multiple tasks are competing, the tags returned will be mixed (which is
unavoidable even with !mq, as requests from different tasks can be
mixed in queue)

Cc: Jens Axboe <axboe@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: NShaohua Li <shli@fb.com>
Signed-off-by: NJens Axboe <axboe@fb.com>
上级 ee1b6f7a
...@@ -140,7 +140,8 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, ...@@ -140,7 +140,8 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
return atomic_read(&hctx->nr_active) < depth; return atomic_read(&hctx->nr_active) < depth;
} }
static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag,
bool nowrap)
{ {
int tag, org_last_tag = last_tag; int tag, org_last_tag = last_tag;
...@@ -152,7 +153,7 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) ...@@ -152,7 +153,7 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
* offset to 0 in a failure case, so start from 0 to * offset to 0 in a failure case, so start from 0 to
* exhaust the map. * exhaust the map.
*/ */
if (org_last_tag && last_tag) { if (org_last_tag && last_tag && !nowrap) {
last_tag = org_last_tag = 0; last_tag = org_last_tag = 0;
continue; continue;
} }
...@@ -170,6 +171,8 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) ...@@ -170,6 +171,8 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
return tag; return tag;
} }
#define BT_ALLOC_RR(tags) (tags->alloc_policy == BLK_TAG_ALLOC_RR)
/* /*
* Straight forward bitmap tag implementation, where each bit is a tag * Straight forward bitmap tag implementation, where each bit is a tag
* (cleared == free, and set == busy). The small twist is using per-cpu * (cleared == free, and set == busy). The small twist is using per-cpu
...@@ -182,7 +185,7 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) ...@@ -182,7 +185,7 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
* until the map is exhausted. * until the map is exhausted.
*/ */
static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
unsigned int *tag_cache) unsigned int *tag_cache, struct blk_mq_tags *tags)
{ {
unsigned int last_tag, org_last_tag; unsigned int last_tag, org_last_tag;
int index, i, tag; int index, i, tag;
...@@ -194,7 +197,8 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, ...@@ -194,7 +197,8 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
index = TAG_TO_INDEX(bt, last_tag); index = TAG_TO_INDEX(bt, last_tag);
for (i = 0; i < bt->map_nr; i++) { for (i = 0; i < bt->map_nr; i++) {
tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag)); tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag),
BT_ALLOC_RR(tags));
if (tag != -1) { if (tag != -1) {
tag += (index << bt->bits_per_word); tag += (index << bt->bits_per_word);
goto done; goto done;
...@@ -221,7 +225,7 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, ...@@ -221,7 +225,7 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
* up using the specific cached tag. * up using the specific cached tag.
*/ */
done: done:
if (tag == org_last_tag) { if (tag == org_last_tag || unlikely(BT_ALLOC_RR(tags))) {
last_tag = tag + 1; last_tag = tag + 1;
if (last_tag >= bt->depth - 1) if (last_tag >= bt->depth - 1)
last_tag = 0; last_tag = 0;
...@@ -250,13 +254,13 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, ...@@ -250,13 +254,13 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
static int bt_get(struct blk_mq_alloc_data *data, static int bt_get(struct blk_mq_alloc_data *data,
struct blk_mq_bitmap_tags *bt, struct blk_mq_bitmap_tags *bt,
struct blk_mq_hw_ctx *hctx, struct blk_mq_hw_ctx *hctx,
unsigned int *last_tag) unsigned int *last_tag, struct blk_mq_tags *tags)
{ {
struct bt_wait_state *bs; struct bt_wait_state *bs;
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
int tag; int tag;
tag = __bt_get(hctx, bt, last_tag); tag = __bt_get(hctx, bt, last_tag, tags);
if (tag != -1) if (tag != -1)
return tag; return tag;
...@@ -267,7 +271,7 @@ static int bt_get(struct blk_mq_alloc_data *data, ...@@ -267,7 +271,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
do { do {
prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
tag = __bt_get(hctx, bt, last_tag); tag = __bt_get(hctx, bt, last_tag, tags);
if (tag != -1) if (tag != -1)
break; break;
...@@ -282,7 +286,7 @@ static int bt_get(struct blk_mq_alloc_data *data, ...@@ -282,7 +286,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
* Retry tag allocation after running the hardware queue, * Retry tag allocation after running the hardware queue,
* as running the queue may also have found completions. * as running the queue may also have found completions.
*/ */
tag = __bt_get(hctx, bt, last_tag); tag = __bt_get(hctx, bt, last_tag, tags);
if (tag != -1) if (tag != -1)
break; break;
...@@ -313,7 +317,7 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data) ...@@ -313,7 +317,7 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
int tag; int tag;
tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
&data->ctx->last_tag); &data->ctx->last_tag, data->hctx->tags);
if (tag >= 0) if (tag >= 0)
return tag + data->hctx->tags->nr_reserved_tags; return tag + data->hctx->tags->nr_reserved_tags;
...@@ -329,7 +333,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data) ...@@ -329,7 +333,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
return BLK_MQ_TAG_FAIL; return BLK_MQ_TAG_FAIL;
} }
tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero); tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero,
data->hctx->tags);
if (tag < 0) if (tag < 0)
return BLK_MQ_TAG_FAIL; return BLK_MQ_TAG_FAIL;
...@@ -401,6 +406,7 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, ...@@ -401,6 +406,7 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
BUG_ON(real_tag >= tags->nr_tags); BUG_ON(real_tag >= tags->nr_tags);
bt_clear_tag(&tags->bitmap_tags, real_tag); bt_clear_tag(&tags->bitmap_tags, real_tag);
if (likely(tags->alloc_policy == BLK_TAG_ALLOC_FIFO))
*last_tag = real_tag; *last_tag = real_tag;
} else { } else {
BUG_ON(tag >= tags->nr_reserved_tags); BUG_ON(tag >= tags->nr_reserved_tags);
...@@ -538,10 +544,12 @@ static void bt_free(struct blk_mq_bitmap_tags *bt) ...@@ -538,10 +544,12 @@ static void bt_free(struct blk_mq_bitmap_tags *bt)
} }
static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
int node) int node, int alloc_policy)
{ {
unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
tags->alloc_policy = alloc_policy;
if (bt_alloc(&tags->bitmap_tags, depth, node, false)) if (bt_alloc(&tags->bitmap_tags, depth, node, false))
goto enomem; goto enomem;
if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true)) if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true))
...@@ -555,7 +563,8 @@ static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, ...@@ -555,7 +563,8 @@ static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
} }
struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
unsigned int reserved_tags, int node) unsigned int reserved_tags,
int node, int alloc_policy)
{ {
struct blk_mq_tags *tags; struct blk_mq_tags *tags;
...@@ -571,7 +580,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, ...@@ -571,7 +580,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
tags->nr_tags = total_tags; tags->nr_tags = total_tags;
tags->nr_reserved_tags = reserved_tags; tags->nr_reserved_tags = reserved_tags;
return blk_mq_init_bitmap_tags(tags, node); return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
} }
void blk_mq_free_tags(struct blk_mq_tags *tags) void blk_mq_free_tags(struct blk_mq_tags *tags)
......
...@@ -42,10 +42,12 @@ struct blk_mq_tags { ...@@ -42,10 +42,12 @@ struct blk_mq_tags {
struct request **rqs; struct request **rqs;
struct list_head page_list; struct list_head page_list;
int alloc_policy;
}; };
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy);
extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern void blk_mq_free_tags(struct blk_mq_tags *tags);
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
......
...@@ -1374,7 +1374,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, ...@@ -1374,7 +1374,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
size_t rq_size, left; size_t rq_size, left;
tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
set->numa_node); set->numa_node,
BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
if (!tags) if (!tags)
return NULL; return NULL;
......
...@@ -2188,6 +2188,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) ...@@ -2188,6 +2188,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
shost->tag_set.cmd_size = cmd_size; shost->tag_set.cmd_size = cmd_size;
shost->tag_set.numa_node = NUMA_NO_NODE; shost->tag_set.numa_node = NUMA_NO_NODE;
shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
shost->tag_set.flags |=
BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
shost->tag_set.driver_data = shost; shost->tag_set.driver_data = shost;
return blk_mq_alloc_tag_set(&shost->tag_set); return blk_mq_alloc_tag_set(&shost->tag_set);
......
...@@ -147,6 +147,8 @@ enum { ...@@ -147,6 +147,8 @@ enum {
BLK_MQ_F_SG_MERGE = 1 << 2, BLK_MQ_F_SG_MERGE = 1 << 2,
BLK_MQ_F_SYSFS_UP = 1 << 3, BLK_MQ_F_SYSFS_UP = 1 << 3,
BLK_MQ_F_DEFER_ISSUE = 1 << 4, BLK_MQ_F_DEFER_ISSUE = 1 << 4,
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
BLK_MQ_F_ALLOC_POLICY_BITS = 1,
BLK_MQ_S_STOPPED = 0, BLK_MQ_S_STOPPED = 0,
BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_TAG_ACTIVE = 1,
...@@ -155,6 +157,12 @@ enum { ...@@ -155,6 +157,12 @@ enum {
BLK_MQ_CPU_WORK_BATCH = 8, BLK_MQ_CPU_WORK_BATCH = 8,
}; };
#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
void blk_mq_finish_init(struct request_queue *q); void blk_mq_finish_init(struct request_queue *q);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册