提交 5d4ba74a 编写于 作者: J Jens Axboe 提交者: Joseph Qi

io_uring: improve poll completion performance

to #26323588

commit e94f141bd248ebdadcb7351f1e70b31cee5add53 upstream.

For busy IORING_OP_POLL_ADD workloads, we can have enough contention
on the completion lock that we fail the inline completion path quite
often as we fail the trylock on that lock. Add a list for deferred
completions that we can use in that case. This helps reduce the number
of async offloads we have to do, as if we get multiple completions in
a row, we'll piggy back on to the poll_llist instead of having to queue
our own offload.
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Acked-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
上级 5e8e604e
...@@ -286,7 +286,8 @@ struct io_ring_ctx { ...@@ -286,7 +286,8 @@ struct io_ring_ctx {
struct { struct {
spinlock_t completion_lock; spinlock_t completion_lock;
bool poll_multi_file; struct llist_head poll_llist;
/* /*
* ->poll_list is protected by the ctx->uring_lock for * ->poll_list is protected by the ctx->uring_lock for
* io_uring instances that don't use IORING_SETUP_SQPOLL. * io_uring instances that don't use IORING_SETUP_SQPOLL.
...@@ -296,6 +297,7 @@ struct io_ring_ctx { ...@@ -296,6 +297,7 @@ struct io_ring_ctx {
struct list_head poll_list; struct list_head poll_list;
struct hlist_head *cancel_hash; struct hlist_head *cancel_hash;
unsigned cancel_hash_bits; unsigned cancel_hash_bits;
bool poll_multi_file;
spinlock_t inflight_lock; spinlock_t inflight_lock;
struct list_head inflight_list; struct list_head inflight_list;
...@@ -453,7 +455,14 @@ struct io_kiocb { ...@@ -453,7 +455,14 @@ struct io_kiocb {
}; };
struct io_async_ctx *io; struct io_async_ctx *io;
struct file *ring_file; union {
/*
* ring_file is only used in the submission path, and
* llist_node is only used for poll deferred completions
*/
struct file *ring_file;
struct llist_node llist_node;
};
int ring_fd; int ring_fd;
bool has_user; bool has_user;
bool in_async; bool in_async;
...@@ -725,6 +734,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ...@@ -725,6 +734,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
mutex_init(&ctx->uring_lock); mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait); init_waitqueue_head(&ctx->wait);
spin_lock_init(&ctx->completion_lock); spin_lock_init(&ctx->completion_lock);
init_llist_head(&ctx->poll_llist);
INIT_LIST_HEAD(&ctx->poll_list); INIT_LIST_HEAD(&ctx->poll_list);
INIT_LIST_HEAD(&ctx->defer_list); INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list); INIT_LIST_HEAD(&ctx->timeout_list);
...@@ -1320,6 +1330,20 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) ...@@ -1320,6 +1330,20 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
} }
static inline bool io_req_multi_free(struct io_kiocb *req)
{
/*
* If we're not using fixed files, we have to pair the completion part
* with the file put. Use regular completions for those, only batch
* free for fixed file and non-linked commands.
*/
if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) == REQ_F_FIXED_FILE)
&& !io_is_fallback_req(req) && !req->io)
return true;
return false;
}
/* /*
* Find and free completed poll iocbs * Find and free completed poll iocbs
*/ */
...@@ -1339,14 +1363,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -1339,14 +1363,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
(*nr_events)++; (*nr_events)++;
if (refcount_dec_and_test(&req->refs)) { if (refcount_dec_and_test(&req->refs)) {
/* If we're not using fixed files, we have to pair the if (io_req_multi_free(req)) {
* completion part with the file put. Use regular
* completions for those, only batch free for fixed
* file and non-linked commands.
*/
if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
REQ_F_FIXED_FILE) && !io_is_fallback_req(req) &&
!req->io) {
reqs[to_free++] = req; reqs[to_free++] = req;
if (to_free == ARRAY_SIZE(reqs)) if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free); io_free_req_many(ctx, reqs, &to_free);
...@@ -3078,6 +3095,44 @@ static void io_poll_complete_work(struct io_wq_work **workptr) ...@@ -3078,6 +3095,44 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
io_wq_assign_next(workptr, nxt); io_wq_assign_next(workptr, nxt);
} }
static void __io_poll_flush(struct io_ring_ctx *ctx, struct llist_node *nodes)
{
void *reqs[IO_IOPOLL_BATCH];
struct io_kiocb *req, *tmp;
int to_free = 0;
spin_lock_irq(&ctx->completion_lock);
llist_for_each_entry_safe(req, tmp, nodes, llist_node) {
hash_del(&req->hash_node);
io_poll_complete(req, req->result, 0);
if (refcount_dec_and_test(&req->refs)) {
if (io_req_multi_free(req)) {
reqs[to_free++] = req;
if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free);
} else {
req->flags |= REQ_F_COMP_LOCKED;
io_free_req(req);
}
}
}
spin_unlock_irq(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
io_free_req_many(ctx, reqs, &to_free);
}
static void io_poll_flush(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
struct llist_node *nodes;
nodes = llist_del_all(&req->ctx->poll_llist);
if (nodes)
__io_poll_flush(req->ctx, nodes);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
void *key) void *key)
{ {
...@@ -3085,7 +3140,6 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, ...@@ -3085,7 +3140,6 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
struct io_kiocb *req = container_of(poll, struct io_kiocb, poll); struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
__poll_t mask = key_to_poll(key); __poll_t mask = key_to_poll(key);
unsigned long flags;
/* for instances that support it check for an event match first: */ /* for instances that support it check for an event match first: */
if (mask && !(mask & poll->events)) if (mask && !(mask & poll->events))
...@@ -3099,17 +3153,31 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, ...@@ -3099,17 +3153,31 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
* If we have a link timeout we're going to need the completion_lock * If we have a link timeout we're going to need the completion_lock
* for finalizing the request, mark us as having grabbed that already. * for finalizing the request, mark us as having grabbed that already.
*/ */
if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) { if (mask) {
hash_del(&req->hash_node); unsigned long flags;
io_poll_complete(req, mask, 0);
req->flags |= REQ_F_COMP_LOCKED;
io_put_req(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx); if (llist_empty(&ctx->poll_llist) &&
} else { spin_trylock_irqsave(&ctx->completion_lock, flags)) {
io_queue_async_work(req); hash_del(&req->hash_node);
io_poll_complete(req, mask, 0);
req->flags |= REQ_F_COMP_LOCKED;
io_put_req(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
req = NULL;
} else {
req->result = mask;
req->llist_node.next = NULL;
/* if the list wasn't empty, we're done */
if (!llist_add(&req->llist_node, &ctx->poll_llist))
req = NULL;
else
req->work.func = io_poll_flush;
}
} }
if (req)
io_queue_async_work(req);
return 1; return 1;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册