diff --git a/io_uring/Makefile b/io_uring/Makefile index d7cf992c841a381fb50e0e67010abb9200ed7965..6ae4e45a15dbfc2893faa9122d6477f98492eeef 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -5,5 +5,5 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \ sync.o advise.o filetable.o \ openclose.o uring_cmd.o epoll.o \ - statx.o net.o msg_ring.o + statx.o net.o msg_ring.o timeout.o obj-$(CONFIG_IO_WQ) += io-wq.o diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index eea5282b1ca21bfe3a6c625fbcd6c2c31375d4f1..3fc59a22d54e89c456fa8b6014307b3df1492085 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -105,6 +105,7 @@ #include "statx.h" #include "net.h" #include "msg_ring.h" +#include "timeout.h" #define IORING_MAX_ENTRIES 32768 #define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) @@ -288,14 +289,6 @@ struct io_poll_update { bool update_user_data; }; -struct io_timeout_data { - struct io_kiocb *req; - struct hrtimer timer; - struct timespec64 ts; - enum hrtimer_mode mode; - u32 flags; -}; - struct io_cancel { struct file *file; u64 addr; @@ -303,27 +296,6 @@ struct io_cancel { s32 fd; }; -struct io_timeout { - struct file *file; - u32 off; - u32 target_seq; - struct list_head list; - /* head of the link, used by linked timeouts only */ - struct io_kiocb *head; - /* for linked completions */ - struct io_kiocb *prev; -}; - -struct io_timeout_rem { - struct file *file; - u64 addr; - - /* timeout update */ - struct timespec64 ts; - u32 flags; - bool ltimeout; -}; - struct io_rw { /* NOTE: kiocb has the file as the first member, so don't do it here */ struct kiocb kiocb; @@ -388,16 +360,6 @@ struct io_defer_entry { u32 seq; }; -struct io_cancel_data { - struct io_ring_ctx *ctx; - union { - u64 data; - struct file *file; - }; - u32 flags; - int seq; -}; - struct io_op_def { /* needs req->file assigned */ unsigned needs_file : 1; @@ -436,7 +398,6 @@ static const struct io_op_def io_op_defs[]; #define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL) #define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK) -static bool io_disarm_next(struct io_kiocb *req); static void io_uring_del_tctx_node(unsigned long index); static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, struct task_struct *task, @@ -444,7 +405,6 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); static void io_dismantle_req(struct io_kiocb *req); -static void io_queue_linked_timeout(struct io_kiocb *req); static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, struct io_uring_rsrc_update2 *up, unsigned nr_args); @@ -456,9 +416,7 @@ static void io_req_task_queue(struct io_kiocb *req); static void __io_submit_flush_completions(struct io_ring_ctx *ctx); static int io_req_prep_async(struct io_kiocb *req); -static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); static void io_eventfd_signal(struct io_ring_ctx *ctx); -static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags); static struct kmem_cache *req_cachep; @@ -609,9 +567,6 @@ static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked) } } -#define io_for_each_link(pos, head) \ - for (pos = (head); pos; pos = pos->link) - static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) { if (!wq_list_empty(&ctx->submit_state.compl_reqs)) @@ -803,24 +758,6 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) io_ring_submit_unlock(ctx, issue_flags); } -static bool io_match_task(struct io_kiocb *head, struct task_struct *task, - bool cancel_all) - __must_hold(&req->ctx->timeout_lock) -{ - struct io_kiocb *req; - - if (task && head->task != task) - return false; - if (cancel_all) - return true; - - io_for_each_link(req, head) { - if (req->flags & REQ_F_INFLIGHT) - return true; - } - return false; -} - static bool io_match_linked(struct io_kiocb *head) { struct io_kiocb *req; @@ -877,13 +814,6 @@ static __cold void io_ring_ctx_ref_free(struct percpu_ref *ref) complete(&ctx->ref_comp); } -static inline bool io_is_timeout_noseq(struct io_kiocb *req) -{ - struct io_timeout *timeout = io_kiocb_to_cmd(req); - - return !timeout->off; -} - static __cold void io_fallback_req_func(struct work_struct *work) { struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, @@ -1120,24 +1050,6 @@ static void io_queue_iowq(struct io_kiocb *req, bool *dont_use) io_queue_linked_timeout(link); } -static void io_kill_timeout(struct io_kiocb *req, int status) - __must_hold(&req->ctx->completion_lock) - __must_hold(&req->ctx->timeout_lock) -{ - struct io_timeout_data *io = req->async_data; - - if (hrtimer_try_to_cancel(&io->timer) != -1) { - struct io_timeout *timeout = io_kiocb_to_cmd(req); - - if (status) - req_set_fail(req); - atomic_set(&req->ctx->cq_timeouts, - atomic_read(&req->ctx->cq_timeouts) + 1); - list_del_init(&timeout->list); - io_req_tw_post_queue(req, status, 0); - } -} - static __cold void io_queue_deferred(struct io_ring_ctx *ctx) { while (!list_empty(&ctx->defer_list)) { @@ -1152,38 +1064,6 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx) } } -static __cold void io_flush_timeouts(struct io_ring_ctx *ctx) - __must_hold(&ctx->completion_lock) -{ - u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); - struct io_timeout *timeout, *tmp; - - spin_lock_irq(&ctx->timeout_lock); - list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { - struct io_kiocb *req = cmd_to_io_kiocb(timeout); - u32 events_needed, events_got; - - if (io_is_timeout_noseq(req)) - break; - - /* - * Since seq can easily wrap around over time, subtract - * the last seq at which timeouts were flushed before comparing. - * Assuming not more than 2^31-1 events have happened since, - * these subtractions won't have wrapped, so we can check if - * target is in [last_seq, current_seq] by comparing the two. - */ - events_needed = timeout->target_seq - ctx->cq_last_tm_flush; - events_got = seq - ctx->cq_last_tm_flush; - if (events_got < events_needed) - break; - - io_kill_timeout(req, 0); - } - ctx->cq_last_tm_flush = seq; - spin_unlock_irq(&ctx->timeout_lock); -} - static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) { if (ctx->off_timeout_used || ctx->drain_active) { @@ -1585,14 +1465,14 @@ static void __io_req_complete_put(struct io_kiocb *req) } } -static void __io_req_complete_post(struct io_kiocb *req) +void __io_req_complete_post(struct io_kiocb *req) { if (!(req->flags & REQ_F_CQE_SKIP)) __io_fill_cqe_req(req->ctx, req); __io_req_complete_put(req); } -static void io_req_complete_post(struct io_kiocb *req) +void io_req_complete_post(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; @@ -1717,7 +1597,7 @@ static inline void io_dismantle_req(struct io_kiocb *req) io_put_file(req->file); } -static __cold void io_free_req(struct io_kiocb *req) +__cold void io_free_req(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; @@ -1731,96 +1611,6 @@ static __cold void io_free_req(struct io_kiocb *req) spin_unlock(&ctx->completion_lock); } -static inline void io_remove_next_linked(struct io_kiocb *req) -{ - struct io_kiocb *nxt = req->link; - - req->link = nxt->link; - nxt->link = NULL; -} - -static struct io_kiocb *io_disarm_linked_timeout(struct io_kiocb *req) - __must_hold(&req->ctx->completion_lock) - __must_hold(&req->ctx->timeout_lock) -{ - struct io_kiocb *link = req->link; - - if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { - struct io_timeout_data *io = link->async_data; - struct io_timeout *timeout = io_kiocb_to_cmd(link); - - io_remove_next_linked(req); - timeout->head = NULL; - if (hrtimer_try_to_cancel(&io->timer) != -1) { - list_del(&timeout->list); - return link; - } - } - return NULL; -} - -static void io_fail_links(struct io_kiocb *req) - __must_hold(&req->ctx->completion_lock) -{ - struct io_kiocb *nxt, *link = req->link; - bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES; - - req->link = NULL; - while (link) { - long res = -ECANCELED; - - if (link->flags & REQ_F_FAIL) - res = link->cqe.res; - - nxt = link->link; - link->link = NULL; - - trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data, - req->opcode, link); - - if (ignore_cqes) - link->flags |= REQ_F_CQE_SKIP; - else - link->flags &= ~REQ_F_CQE_SKIP; - io_req_set_res(link, res, 0); - __io_req_complete_post(link); - link = nxt; - } -} - -static bool io_disarm_next(struct io_kiocb *req) - __must_hold(&req->ctx->completion_lock) -{ - struct io_kiocb *link = NULL; - bool posted = false; - - if (req->flags & REQ_F_ARM_LTIMEOUT) { - link = req->link; - req->flags &= ~REQ_F_ARM_LTIMEOUT; - if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { - io_remove_next_linked(req); - io_req_tw_post_queue(link, -ECANCELED, 0); - posted = true; - } - } else if (req->flags & REQ_F_LINK_TIMEOUT) { - struct io_ring_ctx *ctx = req->ctx; - - spin_lock_irq(&ctx->timeout_lock); - link = io_disarm_linked_timeout(req); - spin_unlock_irq(&ctx->timeout_lock); - if (link) { - posted = true; - io_req_tw_post_queue(link, -ECANCELED, 0); - } - } - if (unlikely((req->flags & REQ_F_FAIL) && - !(req->flags & REQ_F_HARDLINK))) { - posted |= (req->link != NULL); - io_fail_links(req); - } - return posted; -} - static void __io_req_find_next_prep(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; @@ -2033,7 +1823,7 @@ static void io_req_tw_post(struct io_kiocb *req, bool *locked) io_req_complete_post(req); } -static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags) +void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags) { io_req_set_res(req, res, cflags); req->io_task_work.func = io_req_tw_post; @@ -2057,7 +1847,7 @@ static void io_req_task_submit(struct io_kiocb *req, bool *locked) io_req_complete_failed(req, -EFAULT); } -static void io_req_task_queue_fail(struct io_kiocb *req, int ret) +void io_req_task_queue_fail(struct io_kiocb *req, int ret) { io_req_set_res(req, ret, 0); req->io_task_work.func = io_req_task_cancel; @@ -2076,7 +1866,7 @@ static void io_req_task_queue_reissue(struct io_kiocb *req) io_req_task_work_add(req); } -static void io_queue_next(struct io_kiocb *req) +void io_queue_next(struct io_kiocb *req) { struct io_kiocb *nxt = io_req_find_next(req); @@ -2177,14 +1967,6 @@ static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req) return nxt; } -static inline void io_put_req(struct io_kiocb *req) -{ - if (req_ref_put_and_test(req)) { - io_queue_next(req); - io_free_req(req); - } -} - static unsigned io_cqring_events(struct io_ring_ctx *ctx) { /* See comment at the top of this file */ @@ -2451,7 +2233,7 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res) return false; } -static inline void io_req_task_complete(struct io_kiocb *req, bool *locked) +inline void io_req_task_complete(struct io_kiocb *req, bool *locked) { if (*locked) { req->cqe.flags |= io_put_kbuf(req, 0); @@ -4600,334 +4382,6 @@ static int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) -{ - struct io_timeout_data *data = container_of(timer, - struct io_timeout_data, timer); - struct io_kiocb *req = data->req; - struct io_timeout *timeout = io_kiocb_to_cmd(req); - struct io_ring_ctx *ctx = req->ctx; - unsigned long flags; - - spin_lock_irqsave(&ctx->timeout_lock, flags); - list_del_init(&timeout->list); - atomic_set(&req->ctx->cq_timeouts, - atomic_read(&req->ctx->cq_timeouts) + 1); - spin_unlock_irqrestore(&ctx->timeout_lock, flags); - - if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS)) - req_set_fail(req); - - io_req_set_res(req, -ETIME, 0); - req->io_task_work.func = io_req_task_complete; - io_req_task_work_add(req); - return HRTIMER_NORESTART; -} - -static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, - struct io_cancel_data *cd) - __must_hold(&ctx->timeout_lock) -{ - struct io_timeout *timeout; - struct io_timeout_data *io; - struct io_kiocb *req = NULL; - - list_for_each_entry(timeout, &ctx->timeout_list, list) { - struct io_kiocb *tmp = cmd_to_io_kiocb(timeout); - - if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && - cd->data != tmp->cqe.user_data) - continue; - if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { - if (cd->seq == tmp->work.cancel_seq) - continue; - tmp->work.cancel_seq = cd->seq; - } - req = tmp; - break; - } - if (!req) - return ERR_PTR(-ENOENT); - - io = req->async_data; - if (hrtimer_try_to_cancel(&io->timer) == -1) - return ERR_PTR(-EALREADY); - timeout = io_kiocb_to_cmd(req); - list_del_init(&timeout->list); - return req; -} - -static int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) - __must_hold(&ctx->completion_lock) -{ - struct io_kiocb *req; - - spin_lock_irq(&ctx->timeout_lock); - req = io_timeout_extract(ctx, cd); - spin_unlock_irq(&ctx->timeout_lock); - - if (IS_ERR(req)) - return PTR_ERR(req); - io_req_task_queue_fail(req, -ECANCELED); - return 0; -} - -static clockid_t io_timeout_get_clock(struct io_timeout_data *data) -{ - switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { - case IORING_TIMEOUT_BOOTTIME: - return CLOCK_BOOTTIME; - case IORING_TIMEOUT_REALTIME: - return CLOCK_REALTIME; - default: - /* can't happen, vetted at prep time */ - WARN_ON_ONCE(1); - fallthrough; - case 0: - return CLOCK_MONOTONIC; - } -} - -static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, - struct timespec64 *ts, enum hrtimer_mode mode) - __must_hold(&ctx->timeout_lock) -{ - struct io_timeout_data *io; - struct io_timeout *timeout; - struct io_kiocb *req = NULL; - - list_for_each_entry(timeout, &ctx->ltimeout_list, list) { - struct io_kiocb *tmp = cmd_to_io_kiocb(timeout); - - if (user_data == tmp->cqe.user_data) { - req = tmp; - break; - } - } - if (!req) - return -ENOENT; - - io = req->async_data; - if (hrtimer_try_to_cancel(&io->timer) == -1) - return -EALREADY; - hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); - io->timer.function = io_link_timeout_fn; - hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); - return 0; -} - -static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, - struct timespec64 *ts, enum hrtimer_mode mode) - __must_hold(&ctx->timeout_lock) -{ - struct io_cancel_data cd = { .data = user_data, }; - struct io_kiocb *req = io_timeout_extract(ctx, &cd); - struct io_timeout *timeout = io_kiocb_to_cmd(req); - struct io_timeout_data *data; - - if (IS_ERR(req)) - return PTR_ERR(req); - - timeout->off = 0; /* noseq */ - data = req->async_data; - list_add_tail(&timeout->list, &ctx->timeout_list); - hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); - data->timer.function = io_timeout_fn; - hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); - return 0; -} - -static int io_timeout_remove_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_timeout_rem *tr = io_kiocb_to_cmd(req); - - if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) - return -EINVAL; - if (sqe->buf_index || sqe->len || sqe->splice_fd_in) - return -EINVAL; - - tr->ltimeout = false; - tr->addr = READ_ONCE(sqe->addr); - tr->flags = READ_ONCE(sqe->timeout_flags); - if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { - if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) - return -EINVAL; - if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) - tr->ltimeout = true; - if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) - return -EINVAL; - if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) - return -EFAULT; - if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) - return -EINVAL; - } else if (tr->flags) { - /* timeout removal doesn't support flags */ - return -EINVAL; - } - - return 0; -} - -static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) -{ - return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS - : HRTIMER_MODE_REL; -} - -/* - * Remove or update an existing timeout command - */ -static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_timeout_rem *tr = io_kiocb_to_cmd(req); - struct io_ring_ctx *ctx = req->ctx; - int ret; - - if (!(tr->flags & IORING_TIMEOUT_UPDATE)) { - struct io_cancel_data cd = { .data = tr->addr, }; - - spin_lock(&ctx->completion_lock); - ret = io_timeout_cancel(ctx, &cd); - spin_unlock(&ctx->completion_lock); - } else { - enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); - - spin_lock_irq(&ctx->timeout_lock); - if (tr->ltimeout) - ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); - else - ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); - spin_unlock_irq(&ctx->timeout_lock); - } - - if (ret < 0) - req_set_fail(req); - io_req_set_res(req, ret, 0); - return IOU_OK; -} - -static int __io_timeout_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe, - bool is_timeout_link) -{ - struct io_timeout *timeout = io_kiocb_to_cmd(req); - struct io_timeout_data *data; - unsigned flags; - u32 off = READ_ONCE(sqe->off); - - if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) - return -EINVAL; - if (off && is_timeout_link) - return -EINVAL; - flags = READ_ONCE(sqe->timeout_flags); - if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | - IORING_TIMEOUT_ETIME_SUCCESS)) - return -EINVAL; - /* more than one clock specified is invalid, obviously */ - if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) - return -EINVAL; - - INIT_LIST_HEAD(&timeout->list); - timeout->off = off; - if (unlikely(off && !req->ctx->off_timeout_used)) - req->ctx->off_timeout_used = true; - - if (WARN_ON_ONCE(req_has_async_data(req))) - return -EFAULT; - if (io_alloc_async_data(req)) - return -ENOMEM; - - data = req->async_data; - data->req = req; - data->flags = flags; - - if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) - return -EFAULT; - - if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) - return -EINVAL; - - INIT_LIST_HEAD(&timeout->list); - data->mode = io_translate_timeout_mode(flags); - hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); - - if (is_timeout_link) { - struct io_submit_link *link = &req->ctx->submit_state.link; - - if (!link->head) - return -EINVAL; - if (link->last->opcode == IORING_OP_LINK_TIMEOUT) - return -EINVAL; - timeout->head = link->last; - link->last->flags |= REQ_F_ARM_LTIMEOUT; - } - return 0; -} - -static int io_timeout_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - return __io_timeout_prep(req, sqe, false); -} - -static int io_link_timeout_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - return __io_timeout_prep(req, sqe, true); -} - -static int io_timeout(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_timeout *timeout = io_kiocb_to_cmd(req); - struct io_ring_ctx *ctx = req->ctx; - struct io_timeout_data *data = req->async_data; - struct list_head *entry; - u32 tail, off = timeout->off; - - spin_lock_irq(&ctx->timeout_lock); - - /* - * sqe->off holds how many events that need to occur for this - * timeout event to be satisfied. If it isn't set, then this is - * a pure timeout request, sequence isn't used. - */ - if (io_is_timeout_noseq(req)) { - entry = ctx->timeout_list.prev; - goto add; - } - - tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); - timeout->target_seq = tail + off; - - /* Update the last seq here in case io_flush_timeouts() hasn't. - * This is safe because ->completion_lock is held, and submissions - * and completions are never mixed in the same ->completion_lock section. - */ - ctx->cq_last_tm_flush = tail; - - /* - * Insertion sort, ensuring the first entry in the list is always - * the one we need first. - */ - list_for_each_prev(entry, &ctx->timeout_list) { - struct io_timeout *nextt = list_entry(entry, struct io_timeout, list); - struct io_kiocb *nxt = cmd_to_io_kiocb(nextt); - - if (io_is_timeout_noseq(nxt)) - continue; - /* nxt.seq is behind @tail, otherwise would've been completed */ - if (off >= nextt->target_seq - tail) - break; - } -add: - list_add(&timeout->list, entry); - data->timer.function = io_timeout_fn; - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); - spin_unlock_irq(&ctx->timeout_lock); - return IOU_ISSUE_SKIP_COMPLETE; -} - static bool io_cancel_cb(struct io_wq_work *work, void *data) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); @@ -4979,7 +4433,7 @@ static int io_async_cancel_one(struct io_uring_task *tctx, return ret; } -static int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd) +int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd) { struct io_ring_ctx *ctx = req->ctx; int ret; @@ -5462,84 +4916,6 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd) return file; } -static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) -{ - struct io_timeout *timeout = io_kiocb_to_cmd(req); - struct io_kiocb *prev = timeout->prev; - int ret = -ENOENT; - - if (prev) { - if (!(req->task->flags & PF_EXITING)) { - struct io_cancel_data cd = { - .ctx = req->ctx, - .data = prev->cqe.user_data, - }; - - ret = io_try_cancel(req, &cd); - } - io_req_set_res(req, ret ?: -ETIME, 0); - io_req_complete_post(req); - io_put_req(prev); - } else { - io_req_set_res(req, -ETIME, 0); - io_req_complete_post(req); - } -} - -static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) -{ - struct io_timeout_data *data = container_of(timer, - struct io_timeout_data, timer); - struct io_kiocb *prev, *req = data->req; - struct io_timeout *timeout = io_kiocb_to_cmd(req); - struct io_ring_ctx *ctx = req->ctx; - unsigned long flags; - - spin_lock_irqsave(&ctx->timeout_lock, flags); - prev = timeout->head; - timeout->head = NULL; - - /* - * We don't expect the list to be empty, that will only happen if we - * race with the completion of the linked work. - */ - if (prev) { - io_remove_next_linked(prev); - if (!req_ref_inc_not_zero(prev)) - prev = NULL; - } - list_del(&timeout->list); - timeout->prev = prev; - spin_unlock_irqrestore(&ctx->timeout_lock, flags); - - req->io_task_work.func = io_req_task_link_timeout; - io_req_task_work_add(req); - return HRTIMER_NORESTART; -} - -static void io_queue_linked_timeout(struct io_kiocb *req) -{ - struct io_timeout *timeout = io_kiocb_to_cmd(req); - struct io_ring_ctx *ctx = req->ctx; - - spin_lock_irq(&ctx->timeout_lock); - /* - * If the back reference is NULL, then our linked request finished - * before we got a chance to setup the timer - */ - if (timeout->head) { - struct io_timeout_data *data = req->async_data; - - data->timer.function = io_link_timeout_fn; - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), - data->mode); - list_add_tail(&timeout->list, &ctx->ltimeout_list); - } - spin_unlock_irq(&ctx->timeout_lock); - /* drop submission reference */ - io_put_req(req); -} - static void io_queue_async(struct io_kiocb *req, int ret) __must_hold(&req->ctx->uring_lock) { @@ -8116,31 +7492,6 @@ static __cold void io_ring_exit_work(struct work_struct *work) io_ring_ctx_free(ctx); } -/* Returns true if we found and killed one or more timeouts */ -static __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, - struct task_struct *tsk, bool cancel_all) -{ - struct io_timeout *timeout, *tmp; - int canceled = 0; - - spin_lock(&ctx->completion_lock); - spin_lock_irq(&ctx->timeout_lock); - list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { - struct io_kiocb *req = cmd_to_io_kiocb(timeout); - - if (io_match_task(req, tsk, cancel_all)) { - io_kill_timeout(req, -ECANCELED); - canceled++; - } - } - spin_unlock_irq(&ctx->timeout_lock); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - if (canceled != 0) - io_cqring_ev_posted(ctx); - return canceled != 0; -} - static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { unsigned long index; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 4b46385720c585287b3227c89eadee0eea12afdd..e285e12ccbdbb17e6eb35378e8a7bb160c737e25 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -65,7 +65,8 @@ static inline void io_commit_cqring(struct io_ring_ctx *ctx) } void __io_req_complete(struct io_kiocb *req, unsigned issue_flags); - +void io_req_complete_post(struct io_kiocb *req); +void __io_req_complete_post(struct io_kiocb *req); bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags); void io_cqring_ev_posted(struct io_ring_ctx *ctx); @@ -96,5 +97,15 @@ void io_rsrc_node_switch(struct io_ring_ctx *ctx, bool io_is_uring_fops(struct file *file); bool io_alloc_async_data(struct io_kiocb *req); void io_req_task_work_add(struct io_kiocb *req); +void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags); +void io_req_task_complete(struct io_kiocb *req, bool *locked); +void io_req_task_queue_fail(struct io_kiocb *req, int ret); +int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd); + +void io_free_req(struct io_kiocb *req); +void io_queue_next(struct io_kiocb *req); + +#define io_for_each_link(pos, head) \ + for (pos = (head); pos; pos = pos->link) #endif diff --git a/io_uring/io_uring_types.h b/io_uring/io_uring_types.h index dba72113c59d67d74027e2378d1c1c14a86b90b7..349524907b6bfcb38760a03715d7893004bebd54 100644 --- a/io_uring/io_uring_types.h +++ b/io_uring/io_uring_types.h @@ -488,4 +488,14 @@ struct io_kiocb { struct io_wq_work work; }; +struct io_cancel_data { + struct io_ring_ctx *ctx; + union { + u64 data; + struct file *file; + }; + u32 flags; + int seq; +}; + #endif diff --git a/io_uring/timeout.c b/io_uring/timeout.c new file mode 100644 index 0000000000000000000000000000000000000000..5e42bfcd683e2a8a69a4ba7f45fad215b8b715bf --- /dev/null +++ b/io_uring/timeout.c @@ -0,0 +1,634 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include + +#include + +#include "io_uring_types.h" +#include "io_uring.h" +#include "refs.h" +#include "timeout.h" + +struct io_timeout { + struct file *file; + u32 off; + u32 target_seq; + struct list_head list; + /* head of the link, used by linked timeouts only */ + struct io_kiocb *head; + /* for linked completions */ + struct io_kiocb *prev; +}; + +struct io_timeout_rem { + struct file *file; + u64 addr; + + /* timeout update */ + struct timespec64 ts; + u32 flags; + bool ltimeout; +}; + +static inline bool io_is_timeout_noseq(struct io_kiocb *req) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req); + + return !timeout->off; +} + +static inline void io_put_req(struct io_kiocb *req) +{ + if (req_ref_put_and_test(req)) { + io_queue_next(req); + io_free_req(req); + } +} + +static void io_kill_timeout(struct io_kiocb *req, int status) + __must_hold(&req->ctx->completion_lock) + __must_hold(&req->ctx->timeout_lock) +{ + struct io_timeout_data *io = req->async_data; + + if (hrtimer_try_to_cancel(&io->timer) != -1) { + struct io_timeout *timeout = io_kiocb_to_cmd(req); + + if (status) + req_set_fail(req); + atomic_set(&req->ctx->cq_timeouts, + atomic_read(&req->ctx->cq_timeouts) + 1); + list_del_init(&timeout->list); + io_req_tw_post_queue(req, status, 0); + } +} + +__cold void io_flush_timeouts(struct io_ring_ctx *ctx) + __must_hold(&ctx->completion_lock) +{ + u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + struct io_timeout *timeout, *tmp; + + spin_lock_irq(&ctx->timeout_lock); + list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { + struct io_kiocb *req = cmd_to_io_kiocb(timeout); + u32 events_needed, events_got; + + if (io_is_timeout_noseq(req)) + break; + + /* + * Since seq can easily wrap around over time, subtract + * the last seq at which timeouts were flushed before comparing. + * Assuming not more than 2^31-1 events have happened since, + * these subtractions won't have wrapped, so we can check if + * target is in [last_seq, current_seq] by comparing the two. + */ + events_needed = timeout->target_seq - ctx->cq_last_tm_flush; + events_got = seq - ctx->cq_last_tm_flush; + if (events_got < events_needed) + break; + + io_kill_timeout(req, 0); + } + ctx->cq_last_tm_flush = seq; + spin_unlock_irq(&ctx->timeout_lock); +} + +static void io_fail_links(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) +{ + struct io_kiocb *nxt, *link = req->link; + bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES; + + req->link = NULL; + while (link) { + long res = -ECANCELED; + + if (link->flags & REQ_F_FAIL) + res = link->cqe.res; + + nxt = link->link; + link->link = NULL; + + trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data, + req->opcode, link); + + if (ignore_cqes) + link->flags |= REQ_F_CQE_SKIP; + else + link->flags &= ~REQ_F_CQE_SKIP; + io_req_set_res(link, res, 0); + __io_req_complete_post(link); + link = nxt; + } +} + +static inline void io_remove_next_linked(struct io_kiocb *req) +{ + struct io_kiocb *nxt = req->link; + + req->link = nxt->link; + nxt->link = NULL; +} + +bool io_disarm_next(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) +{ + struct io_kiocb *link = NULL; + bool posted = false; + + if (req->flags & REQ_F_ARM_LTIMEOUT) { + link = req->link; + req->flags &= ~REQ_F_ARM_LTIMEOUT; + if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { + io_remove_next_linked(req); + io_req_tw_post_queue(link, -ECANCELED, 0); + posted = true; + } + } else if (req->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->timeout_lock); + link = io_disarm_linked_timeout(req); + spin_unlock_irq(&ctx->timeout_lock); + if (link) { + posted = true; + io_req_tw_post_queue(link, -ECANCELED, 0); + } + } + if (unlikely((req->flags & REQ_F_FAIL) && + !(req->flags & REQ_F_HARDLINK))) { + posted |= (req->link != NULL); + io_fail_links(req); + } + return posted; +} + +struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req, + struct io_kiocb *link) + __must_hold(&req->ctx->completion_lock) + __must_hold(&req->ctx->timeout_lock) +{ + struct io_timeout_data *io = link->async_data; + struct io_timeout *timeout = io_kiocb_to_cmd(link); + + io_remove_next_linked(req); + timeout->head = NULL; + if (hrtimer_try_to_cancel(&io->timer) != -1) { + list_del(&timeout->list); + return link; + } + + return NULL; +} + +static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) +{ + struct io_timeout_data *data = container_of(timer, + struct io_timeout_data, timer); + struct io_kiocb *req = data->req; + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->timeout_lock, flags); + list_del_init(&timeout->list); + atomic_set(&req->ctx->cq_timeouts, + atomic_read(&req->ctx->cq_timeouts) + 1); + spin_unlock_irqrestore(&ctx->timeout_lock, flags); + + if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS)) + req_set_fail(req); + + io_req_set_res(req, -ETIME, 0); + req->io_task_work.func = io_req_task_complete; + io_req_task_work_add(req); + return HRTIMER_NORESTART; +} + +static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, + struct io_cancel_data *cd) + __must_hold(&ctx->timeout_lock) +{ + struct io_timeout *timeout; + struct io_timeout_data *io; + struct io_kiocb *req = NULL; + + list_for_each_entry(timeout, &ctx->timeout_list, list) { + struct io_kiocb *tmp = cmd_to_io_kiocb(timeout); + + if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && + cd->data != tmp->cqe.user_data) + continue; + if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { + if (cd->seq == tmp->work.cancel_seq) + continue; + tmp->work.cancel_seq = cd->seq; + } + req = tmp; + break; + } + if (!req) + return ERR_PTR(-ENOENT); + + io = req->async_data; + if (hrtimer_try_to_cancel(&io->timer) == -1) + return ERR_PTR(-EALREADY); + timeout = io_kiocb_to_cmd(req); + list_del_init(&timeout->list); + return req; +} + +int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) + __must_hold(&ctx->completion_lock) +{ + struct io_kiocb *req; + + spin_lock_irq(&ctx->timeout_lock); + req = io_timeout_extract(ctx, cd); + spin_unlock_irq(&ctx->timeout_lock); + + if (IS_ERR(req)) + return PTR_ERR(req); + io_req_task_queue_fail(req, -ECANCELED); + return 0; +} + +static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_kiocb *prev = timeout->prev; + int ret = -ENOENT; + + if (prev) { + if (!(req->task->flags & PF_EXITING)) { + struct io_cancel_data cd = { + .ctx = req->ctx, + .data = prev->cqe.user_data, + }; + + ret = io_try_cancel(req, &cd); + } + io_req_set_res(req, ret ?: -ETIME, 0); + io_req_complete_post(req); + io_put_req(prev); + } else { + io_req_set_res(req, -ETIME, 0); + io_req_complete_post(req); + } +} + +static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) +{ + struct io_timeout_data *data = container_of(timer, + struct io_timeout_data, timer); + struct io_kiocb *prev, *req = data->req; + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->timeout_lock, flags); + prev = timeout->head; + timeout->head = NULL; + + /* + * We don't expect the list to be empty, that will only happen if we + * race with the completion of the linked work. + */ + if (prev) { + io_remove_next_linked(prev); + if (!req_ref_inc_not_zero(prev)) + prev = NULL; + } + list_del(&timeout->list); + timeout->prev = prev; + spin_unlock_irqrestore(&ctx->timeout_lock, flags); + + req->io_task_work.func = io_req_task_link_timeout; + io_req_task_work_add(req); + return HRTIMER_NORESTART; +} + +static clockid_t io_timeout_get_clock(struct io_timeout_data *data) +{ + switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { + case IORING_TIMEOUT_BOOTTIME: + return CLOCK_BOOTTIME; + case IORING_TIMEOUT_REALTIME: + return CLOCK_REALTIME; + default: + /* can't happen, vetted at prep time */ + WARN_ON_ONCE(1); + fallthrough; + case 0: + return CLOCK_MONOTONIC; + } +} + +static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, + struct timespec64 *ts, enum hrtimer_mode mode) + __must_hold(&ctx->timeout_lock) +{ + struct io_timeout_data *io; + struct io_timeout *timeout; + struct io_kiocb *req = NULL; + + list_for_each_entry(timeout, &ctx->ltimeout_list, list) { + struct io_kiocb *tmp = cmd_to_io_kiocb(timeout); + + if (user_data == tmp->cqe.user_data) { + req = tmp; + break; + } + } + if (!req) + return -ENOENT; + + io = req->async_data; + if (hrtimer_try_to_cancel(&io->timer) == -1) + return -EALREADY; + hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); + io->timer.function = io_link_timeout_fn; + hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); + return 0; +} + +static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, + struct timespec64 *ts, enum hrtimer_mode mode) + __must_hold(&ctx->timeout_lock) +{ + struct io_cancel_data cd = { .data = user_data, }; + struct io_kiocb *req = io_timeout_extract(ctx, &cd); + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_timeout_data *data; + + if (IS_ERR(req)) + return PTR_ERR(req); + + timeout->off = 0; /* noseq */ + data = req->async_data; + list_add_tail(&timeout->list, &ctx->timeout_list); + hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); + data->timer.function = io_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); + return 0; +} + +int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_timeout_rem *tr = io_kiocb_to_cmd(req); + + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->buf_index || sqe->len || sqe->splice_fd_in) + return -EINVAL; + + tr->ltimeout = false; + tr->addr = READ_ONCE(sqe->addr); + tr->flags = READ_ONCE(sqe->timeout_flags); + if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { + if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) + return -EINVAL; + if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) + tr->ltimeout = true; + if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) + return -EINVAL; + if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) + return -EFAULT; + if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) + return -EINVAL; + } else if (tr->flags) { + /* timeout removal doesn't support flags */ + return -EINVAL; + } + + return 0; +} + +static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) +{ + return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS + : HRTIMER_MODE_REL; +} + +/* + * Remove or update an existing timeout command + */ +int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_timeout_rem *tr = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + int ret; + + if (!(tr->flags & IORING_TIMEOUT_UPDATE)) { + struct io_cancel_data cd = { .data = tr->addr, }; + + spin_lock(&ctx->completion_lock); + ret = io_timeout_cancel(ctx, &cd); + spin_unlock(&ctx->completion_lock); + } else { + enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); + + spin_lock_irq(&ctx->timeout_lock); + if (tr->ltimeout) + ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); + else + ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); + spin_unlock_irq(&ctx->timeout_lock); + } + + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); + return IOU_OK; +} + +static int __io_timeout_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe, + bool is_timeout_link) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_timeout_data *data; + unsigned flags; + u32 off = READ_ONCE(sqe->off); + + if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) + return -EINVAL; + if (off && is_timeout_link) + return -EINVAL; + flags = READ_ONCE(sqe->timeout_flags); + if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | + IORING_TIMEOUT_ETIME_SUCCESS)) + return -EINVAL; + /* more than one clock specified is invalid, obviously */ + if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) + return -EINVAL; + + INIT_LIST_HEAD(&timeout->list); + timeout->off = off; + if (unlikely(off && !req->ctx->off_timeout_used)) + req->ctx->off_timeout_used = true; + + if (WARN_ON_ONCE(req_has_async_data(req))) + return -EFAULT; + if (io_alloc_async_data(req)) + return -ENOMEM; + + data = req->async_data; + data->req = req; + data->flags = flags; + + if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) + return -EFAULT; + + if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) + return -EINVAL; + + INIT_LIST_HEAD(&timeout->list); + data->mode = io_translate_timeout_mode(flags); + hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); + + if (is_timeout_link) { + struct io_submit_link *link = &req->ctx->submit_state.link; + + if (!link->head) + return -EINVAL; + if (link->last->opcode == IORING_OP_LINK_TIMEOUT) + return -EINVAL; + timeout->head = link->last; + link->last->flags |= REQ_F_ARM_LTIMEOUT; + } + return 0; +} + +int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + return __io_timeout_prep(req, sqe, false); +} + +int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + return __io_timeout_prep(req, sqe, true); +} + +int io_timeout(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + struct io_timeout_data *data = req->async_data; + struct list_head *entry; + u32 tail, off = timeout->off; + + spin_lock_irq(&ctx->timeout_lock); + + /* + * sqe->off holds how many events that need to occur for this + * timeout event to be satisfied. If it isn't set, then this is + * a pure timeout request, sequence isn't used. + */ + if (io_is_timeout_noseq(req)) { + entry = ctx->timeout_list.prev; + goto add; + } + + tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + timeout->target_seq = tail + off; + + /* Update the last seq here in case io_flush_timeouts() hasn't. + * This is safe because ->completion_lock is held, and submissions + * and completions are never mixed in the same ->completion_lock section. + */ + ctx->cq_last_tm_flush = tail; + + /* + * Insertion sort, ensuring the first entry in the list is always + * the one we need first. + */ + list_for_each_prev(entry, &ctx->timeout_list) { + struct io_timeout *nextt = list_entry(entry, struct io_timeout, list); + struct io_kiocb *nxt = cmd_to_io_kiocb(nextt); + + if (io_is_timeout_noseq(nxt)) + continue; + /* nxt.seq is behind @tail, otherwise would've been completed */ + if (off >= nextt->target_seq - tail) + break; + } +add: + list_add(&timeout->list, entry); + data->timer.function = io_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); + spin_unlock_irq(&ctx->timeout_lock); + return IOU_ISSUE_SKIP_COMPLETE; +} + +void io_queue_linked_timeout(struct io_kiocb *req) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->timeout_lock); + /* + * If the back reference is NULL, then our linked request finished + * before we got a chance to setup the timer + */ + if (timeout->head) { + struct io_timeout_data *data = req->async_data; + + data->timer.function = io_link_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), + data->mode); + list_add_tail(&timeout->list, &ctx->ltimeout_list); + } + spin_unlock_irq(&ctx->timeout_lock); + /* drop submission reference */ + io_put_req(req); +} + +static bool io_match_task(struct io_kiocb *head, struct task_struct *task, + bool cancel_all) + __must_hold(&req->ctx->timeout_lock) +{ + struct io_kiocb *req; + + if (task && head->task != task) + return false; + if (cancel_all) + return true; + + io_for_each_link(req, head) { + if (req->flags & REQ_F_INFLIGHT) + return true; + } + return false; +} + +/* Returns true if we found and killed one or more timeouts */ +__cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, + bool cancel_all) +{ + struct io_timeout *timeout, *tmp; + int canceled = 0; + + spin_lock(&ctx->completion_lock); + spin_lock_irq(&ctx->timeout_lock); + list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { + struct io_kiocb *req = cmd_to_io_kiocb(timeout); + + if (io_match_task(req, tsk, cancel_all)) { + io_kill_timeout(req, -ECANCELED); + canceled++; + } + } + spin_unlock_irq(&ctx->timeout_lock); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (canceled != 0) + io_cqring_ev_posted(ctx); + return canceled != 0; +} diff --git a/io_uring/timeout.h b/io_uring/timeout.h new file mode 100644 index 0000000000000000000000000000000000000000..dd7cfb0d936671a6e63a82b2e148d000ee0d9d8f --- /dev/null +++ b/io_uring/timeout.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 + +struct io_timeout_data { + struct io_kiocb *req; + struct hrtimer timer; + struct timespec64 ts; + enum hrtimer_mode mode; + u32 flags; +}; + +struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req, + struct io_kiocb *link); + +static inline struct io_kiocb *io_disarm_linked_timeout(struct io_kiocb *req) +{ + struct io_kiocb *link = req->link; + + if (link && link->opcode == IORING_OP_LINK_TIMEOUT) + return __io_disarm_linked_timeout(req, link); + + return NULL; +} + +__cold void io_flush_timeouts(struct io_ring_ctx *ctx); +int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd); +__cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, + bool cancel_all); +void io_queue_linked_timeout(struct io_kiocb *req); +bool io_disarm_next(struct io_kiocb *req); + +int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_timeout(struct io_kiocb *req, unsigned int issue_flags); +int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags);