提交 777f45c0 编写于 作者: P Pavel Begunkov 提交者: Joseph Qi

io_uring: Fix broken links with offloading

commit fb5ccc98782f654778cb8d96ba8a998304f9a51f upstream.

io_sq_thread() processes sqes by 8 without considering links. As a
result, links will be randomely subdivided.

The easiest way to fix it is to call io_get_sqring() inside
io_submit_sqes() as do io_ring_submit().

Downsides:
1. This removes optimisation of not grabbing mm_struct for fixed files
2. It submitting all sqes in one go, without finer-grained sheduling
with cq processing.
Signed-off-by: NPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
上级 47be7764
...@@ -734,6 +734,14 @@ static unsigned io_cqring_events(struct io_rings *rings) ...@@ -734,6 +734,14 @@ static unsigned io_cqring_events(struct io_rings *rings)
return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head); return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
} }
static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
{
struct io_rings *rings = ctx->rings;
/* make sure SQ entry isn't read before tail */
return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
}
/* /*
* Find and free completed poll iocbs * Find and free completed poll iocbs
*/ */
...@@ -2559,8 +2567,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) ...@@ -2559,8 +2567,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
return false; return false;
} }
static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
unsigned int nr, bool has_user, bool mm_fault) bool has_user, bool mm_fault)
{ {
struct io_submit_state state, *statep = NULL; struct io_submit_state state, *statep = NULL;
struct io_kiocb *link = NULL; struct io_kiocb *link = NULL;
...@@ -2574,6 +2582,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, ...@@ -2574,6 +2582,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
} }
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
struct sqe_submit s;
if (!io_get_sqring(ctx, &s))
break;
/* /*
* If previous wasn't linked and we have a linked command, * If previous wasn't linked and we have a linked command,
* that's the end of the chain. Submit the previous link. * that's the end of the chain. Submit the previous link.
...@@ -2583,9 +2596,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, ...@@ -2583,9 +2596,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
link = NULL; link = NULL;
shadow_req = NULL; shadow_req = NULL;
} }
prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0; prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) { if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
if (!shadow_req) { if (!shadow_req) {
shadow_req = io_get_req(ctx, NULL); shadow_req = io_get_req(ctx, NULL);
if (unlikely(!shadow_req)) if (unlikely(!shadow_req))
...@@ -2593,18 +2606,18 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, ...@@ -2593,18 +2606,18 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN); shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
refcount_dec(&shadow_req->refs); refcount_dec(&shadow_req->refs);
} }
shadow_req->sequence = sqes[i].sequence; shadow_req->sequence = s.sequence;
} }
out: out:
if (unlikely(mm_fault)) { if (unlikely(mm_fault)) {
io_cqring_add_event(ctx, sqes[i].sqe->user_data, io_cqring_add_event(ctx, s.sqe->user_data,
-EFAULT); -EFAULT);
} else { } else {
sqes[i].has_user = has_user; s.has_user = has_user;
sqes[i].needs_lock = true; s.needs_lock = true;
sqes[i].needs_fixed_file = true; s.needs_fixed_file = true;
io_submit_sqe(ctx, &sqes[i], statep, &link); io_submit_sqe(ctx, &s, statep, &link);
submitted++; submitted++;
} }
} }
...@@ -2619,7 +2632,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, ...@@ -2619,7 +2632,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
static int io_sq_thread(void *data) static int io_sq_thread(void *data)
{ {
struct sqe_submit sqes[IO_IOPOLL_BATCH];
struct io_ring_ctx *ctx = data; struct io_ring_ctx *ctx = data;
struct mm_struct *cur_mm = NULL; struct mm_struct *cur_mm = NULL;
mm_segment_t old_fs; mm_segment_t old_fs;
...@@ -2634,8 +2646,8 @@ static int io_sq_thread(void *data) ...@@ -2634,8 +2646,8 @@ static int io_sq_thread(void *data)
timeout = inflight = 0; timeout = inflight = 0;
while (!kthread_should_park()) { while (!kthread_should_park()) {
bool all_fixed, mm_fault = false; bool mm_fault = false;
int i; unsigned int to_submit;
if (inflight) { if (inflight) {
unsigned nr_events = 0; unsigned nr_events = 0;
...@@ -2655,7 +2667,8 @@ static int io_sq_thread(void *data) ...@@ -2655,7 +2667,8 @@ static int io_sq_thread(void *data)
timeout = jiffies + ctx->sq_thread_idle; timeout = jiffies + ctx->sq_thread_idle;
} }
if (!io_get_sqring(ctx, &sqes[0])) { to_submit = io_sqring_entries(ctx);
if (!to_submit) {
/* /*
* We're polling. If we're within the defined idle * We're polling. If we're within the defined idle
* period, then let us spin without work before going * period, then let us spin without work before going
...@@ -2686,7 +2699,8 @@ static int io_sq_thread(void *data) ...@@ -2686,7 +2699,8 @@ static int io_sq_thread(void *data)
/* make sure to read SQ tail after writing flags */ /* make sure to read SQ tail after writing flags */
smp_mb(); smp_mb();
if (!io_get_sqring(ctx, &sqes[0])) { to_submit = io_sqring_entries(ctx);
if (!to_submit) {
if (kthread_should_park()) { if (kthread_should_park()) {
finish_wait(&ctx->sqo_wait, &wait); finish_wait(&ctx->sqo_wait, &wait);
break; break;
...@@ -2704,19 +2718,8 @@ static int io_sq_thread(void *data) ...@@ -2704,19 +2718,8 @@ static int io_sq_thread(void *data)
ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
} }
i = 0;
all_fixed = true;
do {
if (all_fixed && io_sqe_needs_user(sqes[i].sqe))
all_fixed = false;
i++;
if (i == ARRAY_SIZE(sqes))
break;
} while (io_get_sqring(ctx, &sqes[i]));
/* Unless all new commands are FIXED regions, grab mm */ /* Unless all new commands are FIXED regions, grab mm */
if (!all_fixed && !cur_mm) { if (!cur_mm) {
mm_fault = !mmget_not_zero(ctx->sqo_mm); mm_fault = !mmget_not_zero(ctx->sqo_mm);
if (!mm_fault) { if (!mm_fault) {
use_mm(ctx->sqo_mm); use_mm(ctx->sqo_mm);
...@@ -2724,8 +2727,9 @@ static int io_sq_thread(void *data) ...@@ -2724,8 +2727,9 @@ static int io_sq_thread(void *data)
} }
} }
inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL, to_submit = min(to_submit, ctx->sq_entries);
mm_fault); inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL,
mm_fault);
/* Commit SQ ring head once we've consumed all SQEs */ /* Commit SQ ring head once we've consumed all SQEs */
io_commit_sqring(ctx); io_commit_sqring(ctx);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册