提交 6412618c 编写于 作者: J Jens Axboe 提交者: Joseph Qi

io_uring: run dependent links inline if possible

commit ba816ad61fdf31f59f423a773b00bfa2ed38243a upstream.

Currently any dependent link is executed from a new workqueue context,
which means that we'll be doing a context switch per link in the chain.
If we are running the completion of the current request from our async
workqueue and find that the next request is a link, then run it directly
from the workqueue context instead of forcing another switch.

This improves the performance of linked SQEs, and reduces the CPU
overhead.
Reviewed-by: NJackie Liu <liuyun01@kylinos.cn>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
上级 bb4d087e
......@@ -665,7 +665,7 @@ static void __io_free_req(struct io_kiocb *req)
kmem_cache_free(req_cachep, req);
}
static void io_req_link_next(struct io_kiocb *req)
static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
{
struct io_kiocb *nxt;
......@@ -684,8 +684,16 @@ static void io_req_link_next(struct io_kiocb *req)
}
nxt->flags |= REQ_F_LINK_DONE;
INIT_WORK(&nxt->work, io_sq_wq_submit_work);
io_queue_async_work(req->ctx, nxt);
/*
* If we're in async work, we can continue processing the chain
* in this context instead of having to queue up new async work.
*/
if (nxtptr && current_work()) {
*nxtptr = nxt;
} else {
INIT_WORK(&nxt->work, io_sq_wq_submit_work);
io_queue_async_work(req->ctx, nxt);
}
}
}
......@@ -705,7 +713,7 @@ static void io_fail_links(struct io_kiocb *req)
}
}
static void io_free_req(struct io_kiocb *req)
static void io_free_req(struct io_kiocb *req, struct io_kiocb **nxt)
{
/*
* If LINK is set, we have dependent requests in this chain. If we
......@@ -717,16 +725,39 @@ static void io_free_req(struct io_kiocb *req)
if (req->flags & REQ_F_FAIL_LINK)
io_fail_links(req);
else
io_req_link_next(req);
io_req_link_next(req, nxt);
}
__io_free_req(req);
}
static void io_put_req(struct io_kiocb *req)
/*
* Drop reference to request, return next in chain (if there is one) if this
* was the last reference to this request.
*/
static struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
{
struct io_kiocb *nxt = NULL;
if (refcount_dec_and_test(&req->refs))
io_free_req(req);
io_free_req(req, &nxt);
return nxt;
}
static void io_put_req(struct io_kiocb *req, struct io_kiocb **nxtptr)
{
struct io_kiocb *nxt;
nxt = io_put_req_find_next(req);
if (nxt) {
if (nxtptr) {
*nxtptr = nxt;
} else {
INIT_WORK(&nxt->work, io_sq_wq_submit_work);
io_queue_async_work(nxt->ctx, nxt);
}
}
}
static unsigned io_cqring_events(struct io_rings *rings)
......@@ -774,7 +805,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free);
} else {
io_free_req(req);
io_free_req(req, NULL);
}
}
}
......@@ -946,7 +977,7 @@ static void kiocb_end_write(struct io_kiocb *req)
file_end_write(req->file);
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
static void io_complete_rw_common(struct kiocb *kiocb, long res)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
......@@ -956,7 +987,22 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
if ((req->flags & REQ_F_LINK) && res != req->result)
req->flags |= REQ_F_FAIL_LINK;
io_cqring_add_event(req->ctx, req->user_data, res);
io_put_req(req);
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
io_complete_rw_common(kiocb, res);
io_put_req(req, NULL);
}
static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
io_complete_rw_common(kiocb, res);
return io_put_req_find_next(req);
}
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
......@@ -1152,6 +1198,15 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
}
}
static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt,
bool in_async)
{
if (in_async && ret >= 0 && nxt && kiocb->ki_complete == io_complete_rw)
*nxt = __io_complete_rw(kiocb, ret);
else
io_rw_done(kiocb, ret);
}
static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
const struct io_uring_sqe *sqe,
struct iov_iter *iter)
......@@ -1368,7 +1423,7 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
}
static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
bool force_nonblock)
struct io_kiocb **nxt, bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
......@@ -1417,7 +1472,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
ret2 = -EAGAIN;
/* Catch -EAGAIN return for forced non-blocking submission */
if (!force_nonblock || ret2 != -EAGAIN) {
io_rw_done(kiocb, ret2);
kiocb_done(kiocb, ret2, nxt, s->needs_lock);
} else {
/*
* If ->needs_lock is true, we're already in async
......@@ -1433,7 +1488,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
}
static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
bool force_nonblock)
struct io_kiocb **nxt, bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
......@@ -1491,7 +1546,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
else
ret2 = loop_rw_iter(WRITE, file, kiocb, &iter);
if (!force_nonblock || ret2 != -EAGAIN) {
io_rw_done(kiocb, ret2);
kiocb_done(kiocb, ret2, nxt, s->needs_lock);
} else {
/*
* If ->needs_lock is true, we're already in async
......@@ -1519,7 +1574,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
return -EINVAL;
io_cqring_add_event(ctx, user_data, err);
io_put_req(req);
io_put_req(req, NULL);
return 0;
}
......@@ -1539,7 +1594,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool force_nonblock)
struct io_kiocb **nxt, bool force_nonblock)
{
loff_t sqe_off = READ_ONCE(sqe->off);
loff_t sqe_len = READ_ONCE(sqe->len);
......@@ -1566,7 +1621,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret < 0 && (req->flags & REQ_F_LINK))
req->flags |= REQ_F_FAIL_LINK;
io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
io_put_req(req, nxt);
return 0;
}
......@@ -1588,6 +1643,7 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
static int io_sync_file_range(struct io_kiocb *req,
const struct io_uring_sqe *sqe,
struct io_kiocb **nxt,
bool force_nonblock)
{
loff_t sqe_off;
......@@ -1612,13 +1668,13 @@ static int io_sync_file_range(struct io_kiocb *req,
if (ret < 0 && (req->flags & REQ_F_LINK))
req->flags |= REQ_F_FAIL_LINK;
io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
io_put_req(req, nxt);
return 0;
}
#if defined(CONFIG_NET)
static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool force_nonblock,
struct io_kiocb **nxt, bool force_nonblock,
long (*fn)(struct socket *, struct user_msghdr __user *,
unsigned int))
{
......@@ -1648,26 +1704,28 @@ static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
io_put_req(req, nxt);
return 0;
}
#endif
static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool force_nonblock)
struct io_kiocb **nxt, bool force_nonblock)
{
#if defined(CONFIG_NET)
return io_send_recvmsg(req, sqe, force_nonblock, __sys_sendmsg_sock);
return io_send_recvmsg(req, sqe, nxt, force_nonblock,
__sys_sendmsg_sock);
#else
return -EOPNOTSUPP;
#endif
}
static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool force_nonblock)
struct io_kiocb **nxt, bool force_nonblock)
{
#if defined(CONFIG_NET)
return io_send_recvmsg(req, sqe, force_nonblock, __sys_recvmsg_sock);
return io_send_recvmsg(req, sqe, nxt, force_nonblock,
__sys_recvmsg_sock);
#else
return -EOPNOTSUPP;
#endif
......@@ -1727,7 +1785,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
spin_unlock_irq(&ctx->completion_lock);
io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
io_put_req(req, NULL);
return 0;
}
......@@ -1768,7 +1826,7 @@ static void io_poll_complete_work(struct work_struct *work)
spin_unlock_irq(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
io_put_req(req);
io_put_req(req, NULL);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
......@@ -1793,7 +1851,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
io_put_req(req);
io_put_req(req, NULL);
} else {
io_queue_async_work(ctx, req);
}
......@@ -1885,7 +1943,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (mask) {
io_cqring_ev_posted(ctx);
io_put_req(req);
io_put_req(req, NULL);
}
return ipt.error;
}
......@@ -1918,7 +1976,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
io_cqring_ev_posted(ctx);
io_put_req(req);
io_put_req(req, NULL);
return HRTIMER_NORESTART;
}
......@@ -2027,7 +2085,8 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct sqe_submit *s, bool force_nonblock)
const struct sqe_submit *s, struct io_kiocb **nxt,
bool force_nonblock)
{
int ret, opcode;
......@@ -2044,21 +2103,21 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
case IORING_OP_READV:
if (unlikely(s->sqe->buf_index))
return -EINVAL;
ret = io_read(req, s, force_nonblock);
ret = io_read(req, s, nxt, force_nonblock);
break;
case IORING_OP_WRITEV:
if (unlikely(s->sqe->buf_index))
return -EINVAL;
ret = io_write(req, s, force_nonblock);
ret = io_write(req, s, nxt, force_nonblock);
break;
case IORING_OP_READ_FIXED:
ret = io_read(req, s, force_nonblock);
ret = io_read(req, s, nxt, force_nonblock);
break;
case IORING_OP_WRITE_FIXED:
ret = io_write(req, s, force_nonblock);
ret = io_write(req, s, nxt, force_nonblock);
break;
case IORING_OP_FSYNC:
ret = io_fsync(req, s->sqe, force_nonblock);
ret = io_fsync(req, s->sqe, nxt, force_nonblock);
break;
case IORING_OP_POLL_ADD:
ret = io_poll_add(req, s->sqe);
......@@ -2067,13 +2126,13 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
ret = io_poll_remove(req, s->sqe);
break;
case IORING_OP_SYNC_FILE_RANGE:
ret = io_sync_file_range(req, s->sqe, force_nonblock);
ret = io_sync_file_range(req, s->sqe, nxt, force_nonblock);
break;
case IORING_OP_SENDMSG:
ret = io_sendmsg(req, s->sqe, force_nonblock);
ret = io_sendmsg(req, s->sqe, nxt, force_nonblock);
break;
case IORING_OP_RECVMSG:
ret = io_recvmsg(req, s->sqe, force_nonblock);
ret = io_recvmsg(req, s->sqe, nxt, force_nonblock);
break;
case IORING_OP_TIMEOUT:
ret = io_timeout(req, s->sqe);
......@@ -2140,6 +2199,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
struct sqe_submit *s = &req->submit;
const struct io_uring_sqe *sqe = s->sqe;
unsigned int flags = req->flags;
struct io_kiocb *nxt = NULL;
/* Ensure we clear previously set non-block flag */
req->rw.ki_flags &= ~IOCB_NOWAIT;
......@@ -2160,7 +2220,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
s->has_user = cur_mm != NULL;
s->needs_lock = true;
do {
ret = __io_submit_sqe(ctx, req, s, false);
ret = __io_submit_sqe(ctx, req, s, &nxt, false);
/*
* We can get EAGAIN for polled IO even though
* we're forcing a sync submission from here,
......@@ -2174,16 +2234,22 @@ static void io_sq_wq_submit_work(struct work_struct *work)
}
/* drop submission reference */
io_put_req(req);
io_put_req(req, NULL);
if (ret) {
io_cqring_add_event(ctx, sqe->user_data, ret);
io_put_req(req);
io_put_req(req, NULL);
}
/* async context always use a copy of the sqe */
kfree(sqe);
/* if a dependent link is ready, do that as the next one */
if (!ret && nxt) {
req = nxt;
continue;
}
/* req from defer and link list needn't decrease async cnt */
if (flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE))
goto out;
......@@ -2330,7 +2396,7 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
{
int ret;
ret = __io_submit_sqe(ctx, req, s, true);
ret = __io_submit_sqe(ctx, req, s, NULL, true);
/*
* We async punt it if the file wasn't marked NOWAIT, or if the file
......@@ -2363,14 +2429,14 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
/* drop submission reference */
io_put_req(req);
io_put_req(req, NULL);
/* and drop final reference, if we failed */
if (ret) {
io_cqring_add_event(ctx, req->user_data, ret);
if (req->flags & REQ_F_LINK)
req->flags |= REQ_F_FAIL_LINK;
io_put_req(req);
io_put_req(req, NULL);
}
return ret;
......@@ -2384,7 +2450,7 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
ret = io_req_defer(ctx, req, s->sqe);
if (ret) {
if (ret != -EIOCBQUEUED) {
io_free_req(req);
io_free_req(req, NULL);
io_cqring_add_event(ctx, s->sqe->user_data, ret);
}
return 0;
......@@ -2411,7 +2477,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
ret = io_req_defer(ctx, req, s->sqe);
if (ret) {
if (ret != -EIOCBQUEUED) {
io_free_req(req);
io_free_req(req, NULL);
__io_free_req(shadow);
io_cqring_add_event(ctx, s->sqe->user_data, ret);
return 0;
......@@ -2459,7 +2525,7 @@ static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
ret = io_req_set_file(ctx, s, state, req);
if (unlikely(ret)) {
err_req:
io_free_req(req);
io_free_req(req, NULL);
err:
io_cqring_add_event(ctx, s->sqe->user_data, ret);
return;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册