From 183e3851c4bb193c6e69afb04141cbc636f0876c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 28 Sep 2019 11:36:45 -0600 Subject: [PATCH] io_uring: run dependent links inline if possible commit ba816ad61fdf31f59f423a773b00bfa2ed38243a upstream. Currently any dependent link is executed from a new workqueue context, which means that we'll be doing a context switch per link in the chain. If we are running the completion of the current request from our async workqueue and find that the next request is a link, then run it directly from the workqueue context instead of forcing another switch. This improves the performance of linked SQEs, and reduces the CPU overhead. Reviewed-by: Jackie Liu Signed-off-by: Jens Axboe Signed-off-by: Joseph Qi Reviewed-by: Xiaoguang Wang --- fs/io_uring.c | 160 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 113 insertions(+), 47 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 4b1bb6ccac76..83123b76cfb1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -665,7 +665,7 @@ static void __io_free_req(struct io_kiocb *req) kmem_cache_free(req_cachep, req); } -static void io_req_link_next(struct io_kiocb *req) +static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr) { struct io_kiocb *nxt; @@ -684,8 +684,16 @@ static void io_req_link_next(struct io_kiocb *req) } nxt->flags |= REQ_F_LINK_DONE; - INIT_WORK(&nxt->work, io_sq_wq_submit_work); - io_queue_async_work(req->ctx, nxt); + /* + * If we're in async work, we can continue processing the chain + * in this context instead of having to queue up new async work. + */ + if (nxtptr && current_work()) { + *nxtptr = nxt; + } else { + INIT_WORK(&nxt->work, io_sq_wq_submit_work); + io_queue_async_work(req->ctx, nxt); + } } } @@ -705,7 +713,7 @@ static void io_fail_links(struct io_kiocb *req) } } -static void io_free_req(struct io_kiocb *req) +static void io_free_req(struct io_kiocb *req, struct io_kiocb **nxt) { /* * If LINK is set, we have dependent requests in this chain. If we @@ -717,16 +725,39 @@ static void io_free_req(struct io_kiocb *req) if (req->flags & REQ_F_FAIL_LINK) io_fail_links(req); else - io_req_link_next(req); + io_req_link_next(req, nxt); } __io_free_req(req); } -static void io_put_req(struct io_kiocb *req) +/* + * Drop reference to request, return next in chain (if there is one) if this + * was the last reference to this request. + */ +static struct io_kiocb *io_put_req_find_next(struct io_kiocb *req) { + struct io_kiocb *nxt = NULL; + if (refcount_dec_and_test(&req->refs)) - io_free_req(req); + io_free_req(req, &nxt); + + return nxt; +} + +static void io_put_req(struct io_kiocb *req, struct io_kiocb **nxtptr) +{ + struct io_kiocb *nxt; + + nxt = io_put_req_find_next(req); + if (nxt) { + if (nxtptr) { + *nxtptr = nxt; + } else { + INIT_WORK(&nxt->work, io_sq_wq_submit_work); + io_queue_async_work(nxt->ctx, nxt); + } + } } static unsigned io_cqring_events(struct io_rings *rings) @@ -774,7 +805,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, if (to_free == ARRAY_SIZE(reqs)) io_free_req_many(ctx, reqs, &to_free); } else { - io_free_req(req); + io_free_req(req, NULL); } } } @@ -946,7 +977,7 @@ static void kiocb_end_write(struct io_kiocb *req) file_end_write(req->file); } -static void io_complete_rw(struct kiocb *kiocb, long res, long res2) +static void io_complete_rw_common(struct kiocb *kiocb, long res) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); @@ -956,7 +987,22 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) if ((req->flags & REQ_F_LINK) && res != req->result) req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, req->user_data, res); - io_put_req(req); +} + +static void io_complete_rw(struct kiocb *kiocb, long res, long res2) +{ + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); + + io_complete_rw_common(kiocb, res); + io_put_req(req, NULL); +} + +static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res) +{ + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); + + io_complete_rw_common(kiocb, res); + return io_put_req_find_next(req); } static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) @@ -1152,6 +1198,15 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) } } +static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt, + bool in_async) +{ + if (in_async && ret >= 0 && nxt && kiocb->ki_complete == io_complete_rw) + *nxt = __io_complete_rw(kiocb, ret); + else + io_rw_done(kiocb, ret); +} + static int io_import_fixed(struct io_ring_ctx *ctx, int rw, const struct io_uring_sqe *sqe, struct iov_iter *iter) @@ -1368,7 +1423,7 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb, } static int io_read(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock) + struct io_kiocb **nxt, bool force_nonblock) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw; @@ -1417,7 +1472,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, ret2 = -EAGAIN; /* Catch -EAGAIN return for forced non-blocking submission */ if (!force_nonblock || ret2 != -EAGAIN) { - io_rw_done(kiocb, ret2); + kiocb_done(kiocb, ret2, nxt, s->needs_lock); } else { /* * If ->needs_lock is true, we're already in async @@ -1433,7 +1488,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, } static int io_write(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock) + struct io_kiocb **nxt, bool force_nonblock) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw; @@ -1491,7 +1546,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, else ret2 = loop_rw_iter(WRITE, file, kiocb, &iter); if (!force_nonblock || ret2 != -EAGAIN) { - io_rw_done(kiocb, ret2); + kiocb_done(kiocb, ret2, nxt, s->needs_lock); } else { /* * If ->needs_lock is true, we're already in async @@ -1519,7 +1574,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data) return -EINVAL; io_cqring_add_event(ctx, user_data, err); - io_put_req(req); + io_put_req(req, NULL); return 0; } @@ -1539,7 +1594,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) } static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, - bool force_nonblock) + struct io_kiocb **nxt, bool force_nonblock) { loff_t sqe_off = READ_ONCE(sqe->off); loff_t sqe_len = READ_ONCE(sqe->len); @@ -1566,7 +1621,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (ret < 0 && (req->flags & REQ_F_LINK)) req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, sqe->user_data, ret); - io_put_req(req); + io_put_req(req, nxt); return 0; } @@ -1588,6 +1643,7 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_sync_file_range(struct io_kiocb *req, const struct io_uring_sqe *sqe, + struct io_kiocb **nxt, bool force_nonblock) { loff_t sqe_off; @@ -1612,13 +1668,13 @@ static int io_sync_file_range(struct io_kiocb *req, if (ret < 0 && (req->flags & REQ_F_LINK)) req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, sqe->user_data, ret); - io_put_req(req); + io_put_req(req, nxt); return 0; } #if defined(CONFIG_NET) static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, - bool force_nonblock, + struct io_kiocb **nxt, bool force_nonblock, long (*fn)(struct socket *, struct user_msghdr __user *, unsigned int)) { @@ -1648,26 +1704,28 @@ static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, } io_cqring_add_event(req->ctx, sqe->user_data, ret); - io_put_req(req); + io_put_req(req, nxt); return 0; } #endif static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, - bool force_nonblock) + struct io_kiocb **nxt, bool force_nonblock) { #if defined(CONFIG_NET) - return io_send_recvmsg(req, sqe, force_nonblock, __sys_sendmsg_sock); + return io_send_recvmsg(req, sqe, nxt, force_nonblock, + __sys_sendmsg_sock); #else return -EOPNOTSUPP; #endif } static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, - bool force_nonblock) + struct io_kiocb **nxt, bool force_nonblock) { #if defined(CONFIG_NET) - return io_send_recvmsg(req, sqe, force_nonblock, __sys_recvmsg_sock); + return io_send_recvmsg(req, sqe, nxt, force_nonblock, + __sys_recvmsg_sock); #else return -EOPNOTSUPP; #endif @@ -1727,7 +1785,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) spin_unlock_irq(&ctx->completion_lock); io_cqring_add_event(req->ctx, sqe->user_data, ret); - io_put_req(req); + io_put_req(req, NULL); return 0; } @@ -1768,7 +1826,7 @@ static void io_poll_complete_work(struct work_struct *work) spin_unlock_irq(&ctx->completion_lock); io_cqring_ev_posted(ctx); - io_put_req(req); + io_put_req(req, NULL); } static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, @@ -1793,7 +1851,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); - io_put_req(req); + io_put_req(req, NULL); } else { io_queue_async_work(ctx, req); } @@ -1885,7 +1943,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (mask) { io_cqring_ev_posted(ctx); - io_put_req(req); + io_put_req(req, NULL); } return ipt.error; } @@ -1918,7 +1976,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) io_cqring_ev_posted(ctx); - io_put_req(req); + io_put_req(req, NULL); return HRTIMER_NORESTART; } @@ -2027,7 +2085,8 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req, } static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct sqe_submit *s, bool force_nonblock) + const struct sqe_submit *s, struct io_kiocb **nxt, + bool force_nonblock) { int ret, opcode; @@ -2044,21 +2103,21 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, case IORING_OP_READV: if (unlikely(s->sqe->buf_index)) return -EINVAL; - ret = io_read(req, s, force_nonblock); + ret = io_read(req, s, nxt, force_nonblock); break; case IORING_OP_WRITEV: if (unlikely(s->sqe->buf_index)) return -EINVAL; - ret = io_write(req, s, force_nonblock); + ret = io_write(req, s, nxt, force_nonblock); break; case IORING_OP_READ_FIXED: - ret = io_read(req, s, force_nonblock); + ret = io_read(req, s, nxt, force_nonblock); break; case IORING_OP_WRITE_FIXED: - ret = io_write(req, s, force_nonblock); + ret = io_write(req, s, nxt, force_nonblock); break; case IORING_OP_FSYNC: - ret = io_fsync(req, s->sqe, force_nonblock); + ret = io_fsync(req, s->sqe, nxt, force_nonblock); break; case IORING_OP_POLL_ADD: ret = io_poll_add(req, s->sqe); @@ -2067,13 +2126,13 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, ret = io_poll_remove(req, s->sqe); break; case IORING_OP_SYNC_FILE_RANGE: - ret = io_sync_file_range(req, s->sqe, force_nonblock); + ret = io_sync_file_range(req, s->sqe, nxt, force_nonblock); break; case IORING_OP_SENDMSG: - ret = io_sendmsg(req, s->sqe, force_nonblock); + ret = io_sendmsg(req, s->sqe, nxt, force_nonblock); break; case IORING_OP_RECVMSG: - ret = io_recvmsg(req, s->sqe, force_nonblock); + ret = io_recvmsg(req, s->sqe, nxt, force_nonblock); break; case IORING_OP_TIMEOUT: ret = io_timeout(req, s->sqe); @@ -2140,6 +2199,7 @@ static void io_sq_wq_submit_work(struct work_struct *work) struct sqe_submit *s = &req->submit; const struct io_uring_sqe *sqe = s->sqe; unsigned int flags = req->flags; + struct io_kiocb *nxt = NULL; /* Ensure we clear previously set non-block flag */ req->rw.ki_flags &= ~IOCB_NOWAIT; @@ -2160,7 +2220,7 @@ static void io_sq_wq_submit_work(struct work_struct *work) s->has_user = cur_mm != NULL; s->needs_lock = true; do { - ret = __io_submit_sqe(ctx, req, s, false); + ret = __io_submit_sqe(ctx, req, s, &nxt, false); /* * We can get EAGAIN for polled IO even though * we're forcing a sync submission from here, @@ -2174,16 +2234,22 @@ static void io_sq_wq_submit_work(struct work_struct *work) } /* drop submission reference */ - io_put_req(req); + io_put_req(req, NULL); if (ret) { io_cqring_add_event(ctx, sqe->user_data, ret); - io_put_req(req); + io_put_req(req, NULL); } /* async context always use a copy of the sqe */ kfree(sqe); + /* if a dependent link is ready, do that as the next one */ + if (!ret && nxt) { + req = nxt; + continue; + } + /* req from defer and link list needn't decrease async cnt */ if (flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE)) goto out; @@ -2330,7 +2396,7 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, { int ret; - ret = __io_submit_sqe(ctx, req, s, true); + ret = __io_submit_sqe(ctx, req, s, NULL, true); /* * We async punt it if the file wasn't marked NOWAIT, or if the file @@ -2363,14 +2429,14 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, } /* drop submission reference */ - io_put_req(req); + io_put_req(req, NULL); /* and drop final reference, if we failed */ if (ret) { io_cqring_add_event(ctx, req->user_data, ret); if (req->flags & REQ_F_LINK) req->flags |= REQ_F_FAIL_LINK; - io_put_req(req); + io_put_req(req, NULL); } return ret; @@ -2384,7 +2450,7 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, ret = io_req_defer(ctx, req, s->sqe); if (ret) { if (ret != -EIOCBQUEUED) { - io_free_req(req); + io_free_req(req, NULL); io_cqring_add_event(ctx, s->sqe->user_data, ret); } return 0; @@ -2411,7 +2477,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req, ret = io_req_defer(ctx, req, s->sqe); if (ret) { if (ret != -EIOCBQUEUED) { - io_free_req(req); + io_free_req(req, NULL); __io_free_req(shadow); io_cqring_add_event(ctx, s->sqe->user_data, ret); return 0; @@ -2459,7 +2525,7 @@ static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, ret = io_req_set_file(ctx, s, state, req); if (unlikely(ret)) { err_req: - io_free_req(req); + io_free_req(req, NULL); err: io_cqring_add_event(ctx, s->sqe->user_data, ret); return; -- GitLab