提交 d4be78c6 编写于 作者: J Jens Axboe 提交者: Shile Zhang

io_uring: add support for linked SQE timeouts

commit 2665abfd757fb35a241c6f0b1ebf620e3ffb36fb upstream.

While we have support for generic timeouts, we don't have a way to tie
a timeout to a specific SQE. The generic timeouts simply trigger wakeups
on the CQ ring.

This adds support for IORING_OP_LINK_TIMEOUT. This command is only valid
as a link to a previous command. The timeout specific can be either
relative or absolute, following the same rules as IORING_OP_TIMEOUT. If
the timeout triggers before the dependent command completes, it will
attempt to cancel that command. Likewise, if the dependent command
completes before the timeout triggers, it will cancel the timeout.
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
上级 30ab13bc
...@@ -329,6 +329,7 @@ struct io_kiocb { ...@@ -329,6 +329,7 @@ struct io_kiocb {
#define REQ_F_IO_DRAIN 16 /* drain existing IO first */ #define REQ_F_IO_DRAIN 16 /* drain existing IO first */
#define REQ_F_IO_DRAINED 32 /* drain done */ #define REQ_F_IO_DRAINED 32 /* drain done */
#define REQ_F_LINK 64 /* linked sqes */ #define REQ_F_LINK 64 /* linked sqes */
#define REQ_F_LINK_TIMEOUT 128 /* has linked timeout */
#define REQ_F_FAIL_LINK 256 /* fail rest of links */ #define REQ_F_FAIL_LINK 256 /* fail rest of links */
#define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */ #define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */
#define REQ_F_TIMEOUT 1024 /* timeout request */ #define REQ_F_TIMEOUT 1024 /* timeout request */
...@@ -371,6 +372,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr); ...@@ -371,6 +372,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr);
static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
long res); long res);
static void __io_free_req(struct io_kiocb *req); static void __io_free_req(struct io_kiocb *req);
static void io_put_req(struct io_kiocb *req, struct io_kiocb **nxtptr);
static struct kmem_cache *req_cachep; static struct kmem_cache *req_cachep;
...@@ -711,9 +713,28 @@ static void __io_free_req(struct io_kiocb *req) ...@@ -711,9 +713,28 @@ static void __io_free_req(struct io_kiocb *req)
kmem_cache_free(req_cachep, req); kmem_cache_free(req_cachep, req);
} }
static bool io_link_cancel_timeout(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
int ret;
ret = hrtimer_try_to_cancel(&req->timeout.timer);
if (ret != -1) {
io_cqring_fill_event(ctx, req->user_data, -ECANCELED);
io_commit_cqring(ctx);
req->flags &= ~REQ_F_LINK;
__io_free_req(req);
return true;
}
return false;
}
static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr) static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
{ {
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *nxt; struct io_kiocb *nxt;
bool wake_ev = false;
/* /*
* The list should never be empty when we are called here. But could * The list should never be empty when we are called here. But could
...@@ -721,7 +742,7 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr) ...@@ -721,7 +742,7 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
* safe side. * safe side.
*/ */
nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list); nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list);
if (nxt) { while (nxt) {
list_del(&nxt->list); list_del(&nxt->list);
if (!list_empty(&req->link_list)) { if (!list_empty(&req->link_list)) {
INIT_LIST_HEAD(&nxt->link_list); INIT_LIST_HEAD(&nxt->link_list);
...@@ -733,11 +754,23 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr) ...@@ -733,11 +754,23 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
* If we're in async work, we can continue processing the chain * If we're in async work, we can continue processing the chain
* in this context instead of having to queue up new async work. * in this context instead of having to queue up new async work.
*/ */
if (nxtptr && current_work()) if (req->flags & REQ_F_LINK_TIMEOUT) {
wake_ev = io_link_cancel_timeout(ctx, nxt);
/* we dropped this link, get next */
nxt = list_first_entry_or_null(&req->link_list,
struct io_kiocb, list);
} else if (nxtptr && current_work()) {
*nxtptr = nxt; *nxtptr = nxt;
else break;
} else {
io_queue_async_work(req->ctx, nxt); io_queue_async_work(req->ctx, nxt);
break;
}
} }
if (wake_ev)
io_cqring_ev_posted(ctx);
} }
/* /*
...@@ -745,31 +778,61 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr) ...@@ -745,31 +778,61 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
*/ */
static void io_fail_links(struct io_kiocb *req) static void io_fail_links(struct io_kiocb *req)
{ {
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *link; struct io_kiocb *link;
unsigned long flags;
spin_lock_irqsave(&ctx->completion_lock, flags);
while (!list_empty(&req->link_list)) { while (!list_empty(&req->link_list)) {
link = list_first_entry(&req->link_list, struct io_kiocb, list); link = list_first_entry(&req->link_list, struct io_kiocb, list);
list_del(&link->list); list_del_init(&link->list);
trace_io_uring_fail_link(req, link); trace_io_uring_fail_link(req, link);
io_cqring_add_event(req->ctx, link->user_data, -ECANCELED);
__io_free_req(link); if ((req->flags & REQ_F_LINK_TIMEOUT) &&
link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
io_link_cancel_timeout(ctx, link);
} else {
io_cqring_fill_event(ctx, link->user_data, -ECANCELED);
__io_free_req(link);
}
} }
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
} }
static void io_free_req(struct io_kiocb *req, struct io_kiocb **nxt) static void io_free_req(struct io_kiocb *req, struct io_kiocb **nxt)
{ {
if (likely(!(req->flags & REQ_F_LINK))) {
__io_free_req(req);
return;
}
/* /*
* If LINK is set, we have dependent requests in this chain. If we * If LINK is set, we have dependent requests in this chain. If we
* didn't fail this request, queue the first one up, moving any other * didn't fail this request, queue the first one up, moving any other
* dependencies to the next request. In case of failure, fail the rest * dependencies to the next request. In case of failure, fail the rest
* of the chain. * of the chain.
*/ */
if (req->flags & REQ_F_LINK) { if (req->flags & REQ_F_FAIL_LINK) {
if (req->flags & REQ_F_FAIL_LINK) io_fail_links(req);
io_fail_links(req); } else if (req->flags & REQ_F_LINK_TIMEOUT) {
else struct io_ring_ctx *ctx = req->ctx;
io_req_link_next(req, nxt); unsigned long flags;
/*
* If this is a timeout link, we could be racing with the
* timeout timer. Grab the completion lock for this case to
* protection against that.
*/
spin_lock_irqsave(&ctx->completion_lock, flags);
io_req_link_next(req, nxt);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
} else {
io_req_link_next(req, nxt);
} }
__io_free_req(req); __io_free_req(req);
...@@ -2447,10 +2510,112 @@ static int io_grab_files(struct io_ring_ctx *ctx, struct io_kiocb *req) ...@@ -2447,10 +2510,112 @@ static int io_grab_files(struct io_ring_ctx *ctx, struct io_kiocb *req)
return ret; return ret;
} }
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
{
struct io_kiocb *req = container_of(timer, struct io_kiocb,
timeout.timer);
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *prev = NULL;
unsigned long flags;
int ret = -ETIME;
spin_lock_irqsave(&ctx->completion_lock, flags);
/*
* We don't expect the list to be empty, that will only happen if we
* race with the completion of the linked work.
*/
if (!list_empty(&req->list)) {
prev = list_entry(req->list.prev, struct io_kiocb, link_list);
list_del_init(&req->list);
}
spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (prev) {
void *user_data = (void *) (unsigned long) prev->user_data;
ret = io_async_cancel_one(ctx, user_data);
}
io_cqring_add_event(ctx, req->user_data, ret);
io_put_req(req, NULL);
return HRTIMER_NORESTART;
}
static int io_queue_linked_timeout(struct io_kiocb *req, struct io_kiocb *nxt)
{
const struct io_uring_sqe *sqe = nxt->submit.sqe;
enum hrtimer_mode mode;
struct timespec64 ts;
int ret = -EINVAL;
if (sqe->ioprio || sqe->buf_index || sqe->len != 1 || sqe->off)
goto err;
if (sqe->timeout_flags & ~IORING_TIMEOUT_ABS)
goto err;
if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr))) {
ret = -EFAULT;
goto err;
}
req->flags |= REQ_F_LINK_TIMEOUT;
if (sqe->timeout_flags & IORING_TIMEOUT_ABS)
mode = HRTIMER_MODE_ABS;
else
mode = HRTIMER_MODE_REL;
hrtimer_init(&nxt->timeout.timer, CLOCK_MONOTONIC, mode);
nxt->timeout.timer.function = io_link_timeout_fn;
hrtimer_start(&nxt->timeout.timer, timespec64_to_ktime(ts), mode);
ret = 0;
err:
/* drop submission reference */
io_put_req(nxt, NULL);
if (ret) {
struct io_ring_ctx *ctx = req->ctx;
/*
* Break the link and fail linked timeout, parent will get
* failed by the regular submission path.
*/
list_del(&nxt->list);
io_cqring_fill_event(ctx, nxt->user_data, ret);
trace_io_uring_fail_link(req, nxt);
io_commit_cqring(ctx);
io_put_req(nxt, NULL);
ret = -ECANCELED;
}
return ret;
}
static inline struct io_kiocb *io_get_linked_timeout(struct io_kiocb *req)
{
struct io_kiocb *nxt;
if (!(req->flags & REQ_F_LINK))
return NULL;
nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list);
if (nxt && nxt->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT)
return nxt;
return NULL;
}
static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req) static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req)
{ {
struct io_kiocb *nxt;
int ret; int ret;
nxt = io_get_linked_timeout(req);
if (unlikely(nxt)) {
ret = io_queue_linked_timeout(req, nxt);
if (ret)
goto err;
}
ret = __io_submit_sqe(ctx, req, NULL, true); ret = __io_submit_sqe(ctx, req, NULL, true);
/* /*
...@@ -2605,6 +2770,10 @@ static void io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, ...@@ -2605,6 +2770,10 @@ static void io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
INIT_LIST_HEAD(&req->link_list); INIT_LIST_HEAD(&req->link_list);
*link = req; *link = req;
} else if (READ_ONCE(s->sqe->opcode) == IORING_OP_LINK_TIMEOUT) {
/* Only valid as a linked SQE */
ret = -EINVAL;
goto err_req;
} else { } else {
io_queue_sqe(ctx, req); io_queue_sqe(ctx, req);
} }
......
...@@ -72,6 +72,7 @@ struct io_uring_sqe { ...@@ -72,6 +72,7 @@ struct io_uring_sqe {
#define IORING_OP_TIMEOUT_REMOVE 12 #define IORING_OP_TIMEOUT_REMOVE 12
#define IORING_OP_ACCEPT 13 #define IORING_OP_ACCEPT 13
#define IORING_OP_ASYNC_CANCEL 14 #define IORING_OP_ASYNC_CANCEL 14
#define IORING_OP_LINK_TIMEOUT 15
/* /*
* sqe->fsync_flags * sqe->fsync_flags
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册