io_uring/io-wq: close io-wq full-stop gap

There is an old problem with io-wq cancellation where requests should be killed and are in io-wq but are not discoverable, e.g. in @next_hashed or @linked vars of io_worker_handle_work(). It adds some unreliability to individual request canellation, but also may potentially get __io_uring_cancel() stuck. For instance: 1) An __io_uring_cancel()'s cancellation round have not found any request but there are some as desribed. 2) __io_uring_cancel() goes to sleep 3) Then workers wake up and try to execute those hidden requests that happen to be unbound. As we already cancel all requests of io-wq there, set IO_WQ_BIT_EXIT in advance, so preventing 3) from executing unbound requests. The workers will initially break looping because of getting a signal as they are threads of the dying/exec()'ing user task. Cc: stable@vger.kernel.org Signed-off-by: N Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/abfcf8c54cb9e8f7bfbad7e9a0cc5433cc70bdc2.1621781238.git.asml.silence@gmail.comSigned-off-by: N Jens Axboe <axboe@kernel.dk>

io_uring/io-wq: close io-wq full-stop gap
There is an old problem with io-wq cancellation where requests should be killed and are in io-wq but are not discoverable, e.g. in @next_hashed or @linked vars of io_worker_handle_work(). It adds some unreliability to individual request canellation, but also may potentially get __io_uring_cancel() stuck. For instance: 1) An __io_uring_cancel()'s cancellation round have not found any request but there are some as desribed. 2) __io_uring_cancel() goes to sleep 3) Then workers wake up and try to execute those hidden requests that happen to be unbound. As we already cancel all requests of io-wq there, set IO_WQ_BIT_EXIT in advance, so preventing 3) from executing unbound requests. The workers will initially break looping because of getting a signal as they are threads of the dying/exec()'ing user task. Cc: stable@vger.kernel.org Signed-off-by: N Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/abfcf8c54cb9e8f7bfbad7e9a0cc5433cc70bdc2.1621781238.git.asml.silence@gmail.comSigned-off-by: N Jens Axboe <axboe@kernel.dk>
17a91051 · Pavel Begunkov · Jens Axboe · ba5ef6dc · 17a91051 · 17a91051
隐藏空白更改
内联并排

Showing with 16 addition and 12 deletion

fs/io-wq.c fs/io-wq.c +9 -11

fs/io-wq.h fs/io-wq.h +1 -1

fs/io_uring.c fs/io_uring.c +6 -0

未找到文件。
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -979,13 +979,16 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
 	return cwd->wqe->wq == data;
 }

+void io_wq_exit_start(struct io_wq *wq)
+{
+	set_bit(IO_WQ_BIT_EXIT, &wq->state);
+}
+
 static void io_wq_exit_workers(struct io_wq *wq)
 {
 	struct callback_head *cb;
 	int node;

-	set_bit(IO_WQ_BIT_EXIT, &wq->state);
-
 	if (!wq->task)
 		return;

@@ -1020,8 +1023,6 @@ static void io_wq_destroy(struct io_wq *wq)

 	cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);

-	io_wq_exit_workers(wq);
-
 	for_each_node(node) {
 		struct io_wqe *wqe = wq->wqes[node];
 		struct io_cb_cancel_data match = {
@@ -1036,16 +1037,13 @@ static void io_wq_destroy(struct io_wq *wq)
 	kfree(wq);
 }

-void io_wq_put(struct io_wq *wq)
-{
-	if (refcount_dec_and_test(&wq->refs))
-		io_wq_destroy(wq);
-}
-
 void io_wq_put_and_exit(struct io_wq *wq)
 {
+	WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
+
 	io_wq_exit_workers(wq);
-	io_wq_put(wq);
+	if (refcount_dec_and_test(&wq->refs))
+		io_wq_destroy(wq);
 }

 static bool io_wq_worker_affinity(struct io_worker *worker, void *data)

--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -122,7 +122,7 @@ struct io_wq_data {
 };

 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
-void io_wq_put(struct io_wq *wq);
+void io_wq_exit_start(struct io_wq *wq);
 void io_wq_put_and_exit(struct io_wq *wq);

 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);

--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9078,6 +9078,9 @@ static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)

 	if (!current->io_uring)
 		return;
+	if (tctx->io_wq)
+		io_wq_exit_start(tctx->io_wq);
+
 	WARN_ON_ONCE(!sqd || sqd->thread != current);

 	atomic_inc(&tctx->in_idle);
@@ -9112,6 +9115,9 @@ void __io_uring_cancel(struct files_struct *files)
 	DEFINE_WAIT(wait);
 	s64 inflight;

+	if (tctx->io_wq)
+		io_wq_exit_start(tctx->io_wq);
+
 	/* make sure overflow events are dropped */
 	atomic_inc(&tctx->in_idle);
 	do {