提交 f70ced09 编写于 作者: M Ming Lei 提交者: Jens Axboe

blk-mq: support per-distpatch_queue flush machinery

This patch supports to run one single flush machinery for
each blk-mq dispatch queue, so that:

- current init_request and exit_request callbacks can
cover flush request too, then the buggy copying way of
initializing flush request's pdu can be fixed

- flushing performance gets improved in case of multi hw-queue

In fio sync write test over virtio-blk(4 hw queues, ioengine=sync,
iodepth=64, numjobs=4, bs=4K), it is observed that througput gets
increased a lot over my test environment:
	- throughput: +70% in case of virtio-blk over null_blk
	- throughput: +30% in case of virtio-blk over SSD image

The multi virtqueue feature isn't merged to QEMU yet, and patches for
the feature can be found in below tree:

	git://kernel.ubuntu.com/ming/qemu.git  	v2.1.0-mq.4

And simply passing 'num_queues=4 vectors=5' should be enough to
enable multi queue(quad queue) feature for QEMU virtio-blk.
Suggested-by: NChristoph Hellwig <hch@lst.de>
Signed-off-by: NMing Lei <ming.lei@canonical.com>
Signed-off-by: NJens Axboe <axboe@fb.com>
上级 e97c293c
...@@ -704,7 +704,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, ...@@ -704,7 +704,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
if (!q) if (!q)
return NULL; return NULL;
q->fq = blk_alloc_flush_queue(q); q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
if (!q->fq) if (!q->fq)
return NULL; return NULL;
......
...@@ -305,8 +305,15 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) ...@@ -305,8 +305,15 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
fq->flush_pending_idx ^= 1; fq->flush_pending_idx ^= 1;
blk_rq_init(q, flush_rq); blk_rq_init(q, flush_rq);
if (q->mq_ops)
blk_mq_clone_flush_request(flush_rq, first_rq); /*
* Borrow tag from the first request since they can't
* be in flight at the same time.
*/
if (q->mq_ops) {
flush_rq->mq_ctx = first_rq->mq_ctx;
flush_rq->tag = first_rq->tag;
}
flush_rq->cmd_type = REQ_TYPE_FS; flush_rq->cmd_type = REQ_TYPE_FS;
flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
...@@ -480,22 +487,22 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, ...@@ -480,22 +487,22 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
} }
EXPORT_SYMBOL(blkdev_issue_flush); EXPORT_SYMBOL(blkdev_issue_flush);
struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q) struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
int node, int cmd_size)
{ {
struct blk_flush_queue *fq; struct blk_flush_queue *fq;
int rq_sz = sizeof(struct request); int rq_sz = sizeof(struct request);
fq = kzalloc(sizeof(*fq), GFP_KERNEL); fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
if (!fq) if (!fq)
goto fail; goto fail;
if (q->mq_ops) { if (q->mq_ops) {
spin_lock_init(&fq->mq_flush_lock); spin_lock_init(&fq->mq_flush_lock);
rq_sz = round_up(rq_sz + q->tag_set->cmd_size, rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
cache_line_size());
} }
fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL); fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
if (!fq->flush_rq) if (!fq->flush_rq)
goto fail_rq; goto fail_rq;
......
...@@ -281,26 +281,6 @@ void blk_mq_free_request(struct request *rq) ...@@ -281,26 +281,6 @@ void blk_mq_free_request(struct request *rq)
__blk_mq_free_request(hctx, ctx, rq); __blk_mq_free_request(hctx, ctx, rq);
} }
/*
* Clone all relevant state from a request that has been put on hold in
* the flush state machine into the preallocated flush request that hangs
* off the request queue.
*
* For a driver the flush request should be invisible, that's why we are
* impersonating the original request here.
*/
void blk_mq_clone_flush_request(struct request *flush_rq,
struct request *orig_rq)
{
struct blk_mq_hw_ctx *hctx =
orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
flush_rq->mq_ctx = orig_rq->mq_ctx;
flush_rq->tag = orig_rq->tag;
memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
hctx->cmd_size);
}
inline void __blk_mq_end_request(struct request *rq, int error) inline void __blk_mq_end_request(struct request *rq, int error)
{ {
blk_account_io_done(rq); blk_account_io_done(rq);
...@@ -1516,12 +1496,20 @@ static void blk_mq_exit_hctx(struct request_queue *q, ...@@ -1516,12 +1496,20 @@ static void blk_mq_exit_hctx(struct request_queue *q,
struct blk_mq_tag_set *set, struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
{ {
unsigned flush_start_tag = set->queue_depth;
blk_mq_tag_idle(hctx); blk_mq_tag_idle(hctx);
if (set->ops->exit_request)
set->ops->exit_request(set->driver_data,
hctx->fq->flush_rq, hctx_idx,
flush_start_tag + hctx_idx);
if (set->ops->exit_hctx) if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx); set->ops->exit_hctx(hctx, hctx_idx);
blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
blk_free_flush_queue(hctx->fq);
kfree(hctx->ctxs); kfree(hctx->ctxs);
blk_mq_free_bitmap(&hctx->ctx_map); blk_mq_free_bitmap(&hctx->ctx_map);
} }
...@@ -1556,6 +1544,7 @@ static int blk_mq_init_hctx(struct request_queue *q, ...@@ -1556,6 +1544,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
{ {
int node; int node;
unsigned flush_start_tag = set->queue_depth;
node = hctx->numa_node; node = hctx->numa_node;
if (node == NUMA_NO_NODE) if (node == NUMA_NO_NODE)
...@@ -1594,8 +1583,23 @@ static int blk_mq_init_hctx(struct request_queue *q, ...@@ -1594,8 +1583,23 @@ static int blk_mq_init_hctx(struct request_queue *q,
set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
goto free_bitmap; goto free_bitmap;
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
if (!hctx->fq)
goto exit_hctx;
if (set->ops->init_request &&
set->ops->init_request(set->driver_data,
hctx->fq->flush_rq, hctx_idx,
flush_start_tag + hctx_idx, node))
goto free_fq;
return 0; return 0;
free_fq:
kfree(hctx->fq);
exit_hctx:
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
free_bitmap: free_bitmap:
blk_mq_free_bitmap(&hctx->ctx_map); blk_mq_free_bitmap(&hctx->ctx_map);
free_ctxs: free_ctxs:
...@@ -1862,16 +1866,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) ...@@ -1862,16 +1866,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
blk_mq_add_queue_tag_set(set, q); blk_mq_add_queue_tag_set(set, q);
q->fq = blk_alloc_flush_queue(q);
if (!q->fq)
goto err_hw_queues;
blk_mq_map_swqueue(q); blk_mq_map_swqueue(q);
return q; return q;
err_hw_queues:
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
err_hw: err_hw:
blk_cleanup_queue(q); blk_cleanup_queue(q);
err_hctxs: err_hctxs:
......
...@@ -517,10 +517,10 @@ static void blk_release_queue(struct kobject *kobj) ...@@ -517,10 +517,10 @@ static void blk_release_queue(struct kobject *kobj)
if (q->queue_tags) if (q->queue_tags)
__blk_queue_free_tags(q); __blk_queue_free_tags(q);
blk_free_flush_queue(q->fq);
if (q->mq_ops) if (q->mq_ops)
blk_mq_free_queue(q); blk_mq_free_queue(q);
else
blk_free_flush_queue(q->fq);
blk_trace_shutdown(q); blk_trace_shutdown(q);
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#define BLK_INTERNAL_H #define BLK_INTERNAL_H
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/blk-mq.h>
#include "blk-mq.h"
/* Amount of time in which a process may batch requests */ /* Amount of time in which a process may batch requests */
#define BLK_BATCH_TIME (HZ/50UL) #define BLK_BATCH_TIME (HZ/50UL)
...@@ -31,7 +33,14 @@ extern struct ida blk_queue_ida; ...@@ -31,7 +33,14 @@ extern struct ida blk_queue_ida;
static inline struct blk_flush_queue *blk_get_flush_queue( static inline struct blk_flush_queue *blk_get_flush_queue(
struct request_queue *q, struct blk_mq_ctx *ctx) struct request_queue *q, struct blk_mq_ctx *ctx)
{ {
return q->fq; struct blk_mq_hw_ctx *hctx;
if (!q->mq_ops)
return q->fq;
hctx = q->mq_ops->map_queue(q, ctx->cpu);
return hctx->fq;
} }
static inline void __blk_get_queue(struct request_queue *q) static inline void __blk_get_queue(struct request_queue *q)
...@@ -39,8 +48,9 @@ static inline void __blk_get_queue(struct request_queue *q) ...@@ -39,8 +48,9 @@ static inline void __blk_get_queue(struct request_queue *q)
kobject_get(&q->kobj); kobject_get(&q->kobj);
} }
struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q); struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
void blk_free_flush_queue(struct blk_flush_queue *fq); int node, int cmd_size);
void blk_free_flush_queue(struct blk_flush_queue *q);
int blk_init_rl(struct request_list *rl, struct request_queue *q, int blk_init_rl(struct request_list *rl, struct request_queue *q,
gfp_t gfp_mask); gfp_t gfp_mask);
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
struct blk_mq_tags; struct blk_mq_tags;
struct blk_flush_queue;
struct blk_mq_cpu_notifier { struct blk_mq_cpu_notifier {
struct list_head list; struct list_head list;
...@@ -34,6 +35,7 @@ struct blk_mq_hw_ctx { ...@@ -34,6 +35,7 @@ struct blk_mq_hw_ctx {
struct request_queue *queue; struct request_queue *queue;
unsigned int queue_num; unsigned int queue_num;
struct blk_flush_queue *fq;
void *driver_data; void *driver_data;
...@@ -119,6 +121,10 @@ struct blk_mq_ops { ...@@ -119,6 +121,10 @@ struct blk_mq_ops {
/* /*
* Called for every command allocated by the block layer to allow * Called for every command allocated by the block layer to allow
* the driver to set up driver specific data. * the driver to set up driver specific data.
*
* Tag greater than or equal to queue_depth is for setting up
* flush request.
*
* Ditto for exit/teardown. * Ditto for exit/teardown.
*/ */
init_request_fn *init_request; init_request_fn *init_request;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册