io_uring: fix io_kiocb.flags modification race in IOPOLL mode

to #28736503 commit 65a6543da386838f935d2f03f452c5c0acff2a68 upstream While testing io_uring in arm, we found sometimes io_sq_thread() keeps polling io requests even though there are not inflight io requests in block layer. After some investigations, found a possible race about io_kiocb.flags, see below race codes: 1) in the end of io_write() or io_read() req->flags &= ~REQ_F_NEED_CLEANUP; kfree(iovec); return ret; 2) in io_complete_rw_iopoll() if (res != -EAGAIN) req->flags |= REQ_F_IOPOLL_COMPLETED; In IOPOLL mode, io requests still maybe completed by interrupt, then above codes are not safe, concurrent modifications to req->flags, which is not protected by lock or is not atomic modifications. I also had disassemble io_complete_rw_iopoll() in arm: req->flags |= REQ_F_IOPOLL_COMPLETED; 0xffff000008387b18 <+76>: ldr w0, [x19,#104] 0xffff000008387b1c <+80>: orr w0, w0, #0x1000 0xffff000008387b20 <+84>: str w0, [x19,#104] Seems that the "req->flags |= REQ_F_IOPOLL_COMPLETED;" is load and modification, two instructions, which obviously is not atomic. To fix this issue, add a new iopoll_completed in io_kiocb to indicate whether io request is completed. Signed-off-by: N Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com> Acked-by: N Joseph Qi <joseph.qi@linux.alibaba.com> Signed-off-by: N Jens Axboe <axboe@kernel.dk> Signed-off-by: N Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com> Acked-by: N Joseph Qi <joseph.qi@linux.alibaba.com>

io_uring: fix io_kiocb.flags modification race in IOPOLL mode
to #28736503 commit 65a6543da386838f935d2f03f452c5c0acff2a68 upstream While testing io_uring in arm, we found sometimes io_sq_thread() keeps polling io requests even though there are not inflight io requests in block layer. After some investigations, found a possible race about io_kiocb.flags, see below race codes: 1) in the end of io_write() or io_read() req->flags &= ~REQ_F_NEED_CLEANUP; kfree(iovec); return ret; 2) in io_complete_rw_iopoll() if (res != -EAGAIN) req->flags |= REQ_F_IOPOLL_COMPLETED; In IOPOLL mode, io requests still maybe completed by interrupt, then above codes are not safe, concurrent modifications to req->flags, which is not protected by lock or is not atomic modifications. I also had disassemble io_complete_rw_iopoll() in arm: req->flags |= REQ_F_IOPOLL_COMPLETED; 0xffff000008387b18 <+76>: ldr w0, [x19,#104] 0xffff000008387b1c <+80>: orr w0, w0, #0x1000 0xffff000008387b20 <+84>: str w0, [x19,#104] Seems that the "req->flags |= REQ_F_IOPOLL_COMPLETED;" is load and modification, two instructions, which obviously is not atomic. To fix this issue, add a new iopoll_completed in io_kiocb to indicate whether io request is completed. Signed-off-by: N Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com> Acked-by: N Joseph Qi <joseph.qi@linux.alibaba.com> Signed-off-by: N Jens Axboe <axboe@kernel.dk> Signed-off-by: N Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com> Acked-by: N Joseph Qi <joseph.qi@linux.alibaba.com>
d3101fc3 · Xiaoguang Wang · 9c90c6a4 · d3101fc3
隐藏空白更改
内联并排

Showing with 6 addition and 6 deletion

fs/io_uring.c fs/io_uring.c +6 -6

未找到文件。
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -519,7 +519,6 @@ enum {
 	REQ_F_INFLIGHT_BIT,
 	REQ_F_CUR_POS_BIT,
 	REQ_F_NOWAIT_BIT,
-	REQ_F_IOPOLL_COMPLETED_BIT,
 	REQ_F_LINK_TIMEOUT_BIT,
 	REQ_F_TIMEOUT_BIT,
 	REQ_F_ISREG_BIT,
@@ -564,8 +563,6 @@ enum {
 	REQ_F_CUR_POS		= BIT(REQ_F_CUR_POS_BIT),
 	/* must not punt to workers */
 	REQ_F_NOWAIT		= BIT(REQ_F_NOWAIT_BIT),
-	/* polled IO has completed */
-	REQ_F_IOPOLL_COMPLETED	= BIT(REQ_F_IOPOLL_COMPLETED_BIT),
 	/* has linked timeout */
 	REQ_F_LINK_TIMEOUT	= BIT(REQ_F_LINK_TIMEOUT_BIT),
 	/* timeout request */
@@ -630,6 +627,8 @@ struct io_kiocb {
 	struct io_async_ctx		*io;
 	int				cflags;
 	u8				opcode;
+	/* polled IO has completed */
+	u8				iopoll_completed;
 	u16				buf_index;
@@ -1789,7 +1788,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		 * If we find a request that requires polling, break out
 		 * and complete those lists first, if we have entries there.
 		 */
-		if (req->flags & REQ_F_IOPOLL_COMPLETED) {
+		if (READ_ONCE(req->iopoll_completed)) {
 			list_move_tail(&req->list, &done);
 			continue;
 		}
@@ -1970,7 +1969,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
 		req_set_fail_links(req);
 	req->result = res;
 	if (res != -EAGAIN)
-		req->flags |= REQ_F_IOPOLL_COMPLETED;
+		WRITE_ONCE(req->iopoll_completed, 1);
 }
 /*
@@ -2003,7 +2002,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
 	 * For fast devices, IO may have already completed. If it has, add
 	 * it to the front so we find it first.
 	 */
-	if (req->flags & REQ_F_IOPOLL_COMPLETED)
+	if (READ_ONCE(req->iopoll_completed))
 		list_add(&req->list, &ctx->poll_list);
 	else
 		list_add_tail(&req->list, &ctx->poll_list);
@@ -2131,6 +2130,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		kiocb->ki_flags |= IOCB_HIPRI;
 		kiocb->ki_complete = io_complete_rw_iopoll;
 		req->result = 0;
+		req->iopoll_completed = 0;
 	} else {
 		if (kiocb->ki_flags & IOCB_HIPRI)
 			return -EINVAL;