io_uring: split overflow state into SQ and CQ side

to #26323588 commit ad3eb2c89fb24d14ac81f43eff8e85fece2c934d upstream. We currently check ->cq_overflow_list from both SQ and CQ context, which causes some bouncing of that cache line. Add separate bits of state for this instead, so that the SQ side can check using its own state, and likewise for the CQ side. This adds ->sq_check_overflow with the SQ state, and ->cq_check_overflow with the CQ state. If we hit an overflow condition, both of these bits are set. Likewise for overflow flush clear, we clear both bits. For the fast path of just checking if there's an overflow condition on either the SQ or CQ side, we can use our own private bit for this. Signed-off-by: N Jens Axboe <axboe@kernel.dk> Signed-off-by: N Joseph Qi <joseph.qi@linux.alibaba.com> Acked-by: N Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>

io_uring: split overflow state into SQ and CQ side
to #26323588 commit ad3eb2c89fb24d14ac81f43eff8e85fece2c934d upstream. We currently check ->cq_overflow_list from both SQ and CQ context, which causes some bouncing of that cache line. Add separate bits of state for this instead, so that the SQ side can check using its own state, and likewise for the CQ side. This adds ->sq_check_overflow with the SQ state, and ->cq_check_overflow with the CQ state. If we hit an overflow condition, both of these bits are set. Likewise for overflow flush clear, we clear both bits. For the fast path of just checking if there's an overflow condition on either the SQ or CQ side, we can use our own private bit for this. Signed-off-by: N Jens Axboe <axboe@kernel.dk> Signed-off-by: N Joseph Qi <joseph.qi@linux.alibaba.com> Acked-by: N Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
5e8e604e · Jens Axboe · Joseph Qi · 037118ad · 5e8e604e
隐藏空白更改
内联并排

Showing with 27 addition and 13 deletion

fs/io_uring.c fs/io_uring.c +27 -13

未找到文件。
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -224,13 +224,14 @@ struct io_ring_ctx {
 		unsigned		sq_thread_idle;
 		unsigned		cached_sq_dropped;
 		atomic_t		cached_cq_overflow;
-		struct io_uring_sqe	*sq_sqes;
+		unsigned long		sq_check_overflow;
 		struct list_head	defer_list;
 		struct list_head	timeout_list;
 		struct list_head	cq_overflow_list;
 		wait_queue_head_t	inflight_wait;
+		struct io_uring_sqe	*sq_sqes;
 	} ____cacheline_aligned_in_smp;
 	struct io_rings	*rings;
@@ -272,6 +273,7 @@ struct io_ring_ctx {
 		unsigned		cq_entries;
 		unsigned		cq_mask;
 		atomic_t		cq_timeouts;
+		unsigned long		cq_check_overflow;
 		struct wait_queue_head	cq_wait;
 		struct fasync_struct	*cq_fasync;
 		struct eventfd_ctx	*cq_ev_fd;
@@ -950,6 +952,10 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 	}
 	io_commit_cqring(ctx);
+	if (cqe) {
+		clear_bit(0, &ctx->sq_check_overflow);
+		clear_bit(0, &ctx->cq_check_overflow);
+	}
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 	io_cqring_ev_posted(ctx);
@@ -983,6 +989,10 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
 		WRITE_ONCE(ctx->rings->cq_overflow,
 				atomic_inc_return(&ctx->cached_cq_overflow));
 	} else {
+		if (list_empty(&ctx->cq_overflow_list)) {
+			set_bit(0, &ctx->sq_check_overflow);
+			set_bit(0, &ctx->cq_check_overflow);
+		}
 		refcount_inc(&req->refs);
 		req->result = res;
 		list_add_tail(&req->list, &ctx->cq_overflow_list);
@@ -1285,19 +1295,21 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush)
 {
 	struct io_rings *rings = ctx->rings;
-	/*
+	if (test_bit(0, &ctx->cq_check_overflow)) {
-	 * noflush == true is from the waitqueue handler, just ensure we wake
+		/*
-	 * up the task, and the next invocation will flush the entries. We
+		 * noflush == true is from the waitqueue handler, just ensure
-	 * cannot safely to it from here.
+		 * we wake up the task, and the next invocation will flush the
-	 */
+		 * entries. We cannot safely to it from here.
-	if (noflush && !list_empty(&ctx->cq_overflow_list))
+		 */
-		return -1U;
+		if (noflush && !list_empty(&ctx->cq_overflow_list))
+			return -1U;
-	io_cqring_overflow_flush(ctx, false);
+		io_cqring_overflow_flush(ctx, false);
+	}
 	/* See comment at the top of this file */
 	smp_rmb();
-	return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
+	return ctx->cached_cq_tail - READ_ONCE(rings->cq.head);
 }
 static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
@@ -4306,9 +4318,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	bool mm_fault = false;
 	/* if we have a backlog and couldn't flush it all, return BUSY */
-	if (!list_empty(&ctx->cq_overflow_list) &&
+	if (test_bit(0, &ctx->sq_check_overflow)) {
-	    !io_cqring_overflow_flush(ctx, false))
+		if (!list_empty(&ctx->cq_overflow_list) &&
-		return -EBUSY;
+		    !io_cqring_overflow_flush(ctx, false))
+			return -EBUSY;
+	}
 	if (nr > IO_PLUG_THRESHOLD) {
 		io_submit_state_start(&state, nr);