Merge tag 'io_uring-5.6-2020-03-07' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe: "Here are a few io_uring fixes that should go into this release. This contains: - Removal of (now) unused io_wq_flush() and associated flag (Pavel) - Fix cancelation lockup with linked timeouts (Pavel) - Fix for potential use-after-free when freeing percpu ref for fixed file sets - io-wq cancelation fixups (Pavel)" * tag 'io_uring-5.6-2020-03-07' of git://git.kernel.dk/linux-block: io_uring: fix lockup with timeouts io_uring: free fixed_file_data after RCU grace period io-wq: remove io_wq_flush and IO_WQ_WORK_INTERNAL io-wq: fix IO_WQ_WORK_NO_CANCEL cancellation

Merge tag 'io_uring-5.6-2020-03-07' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: "Here are a few io_uring fixes that should go into this release. This contains: - Removal of (now) unused io_wq_flush() and associated flag (Pavel) - Fix cancelation lockup with linked timeouts (Pavel) - Fix for potential use-after-free when freeing percpu ref for fixed file sets - io-wq cancelation fixups (Pavel)" * tag 'io_uring-5.6-2020-03-07' of git://git.kernel.dk/linux-block: io_uring: fix lockup with timeouts io_uring: free fixed_file_data after RCU grace period io-wq: remove io_wq_flush and IO_WQ_WORK_INTERNAL io-wq: fix IO_WQ_WORK_NO_CANCEL cancellation
c2003765 · Linus Torvalds · 5dfcc139 · f0e20b89 · c2003765 · c2003765
隐藏空白更改
内联并排

Showing with 38 addition and 47 deletion

fs/io-wq.c fs/io-wq.c +15 -43

fs/io-wq.h fs/io-wq.h +0 -2

fs/io_uring.c fs/io_uring.c +23 -2

未找到文件。
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -502,7 +502,7 @@ static void io_worker_handle_work(struct io_worker *worker)
 		if (worker->mm)
 			work->flags |= IO_WQ_WORK_HAS_MM;

-		if (wq->get_work && !(work->flags & IO_WQ_WORK_INTERNAL)) {
+		if (wq->get_work) {
 			put_work = work;
 			wq->get_work(work);
 		}
@@ -747,6 +747,17 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
 	return true;
 }

+static void io_run_cancel(struct io_wq_work *work)
+{
+	do {
+		struct io_wq_work *old_work = work;
+
+		work->flags |= IO_WQ_WORK_CANCEL;
+		work->func(&work);
+		work = (work == old_work) ? NULL : work;
+	} while (work);
+}
+
 static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
 {
 	struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
@@ -760,8 +771,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
 	 * It's close enough to not be an issue, fork() has the same delay.
 	 */
 	if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
-		work->flags |= IO_WQ_WORK_CANCEL;
-		work->func(&work);
+		io_run_cancel(work);
 		return;
 	}

@@ -900,8 +910,7 @@ static enum io_wq_cancel io_wqe_cancel_cb_work(struct io_wqe *wqe,
 	spin_unlock_irqrestore(&wqe->lock, flags);

 	if (found) {
-		work->flags |= IO_WQ_WORK_CANCEL;
-		work->func(&work);
+		io_run_cancel(work);
 		return IO_WQ_CANCEL_OK;
 	}

@@ -976,8 +985,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
 	spin_unlock_irqrestore(&wqe->lock, flags);

 	if (found) {
-		work->flags |= IO_WQ_WORK_CANCEL;
-		work->func(&work);
+		io_run_cancel(work);
 		return IO_WQ_CANCEL_OK;
 	}

@@ -1049,42 +1057,6 @@ enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid)
 	return ret;
 }

-struct io_wq_flush_data {
-	struct io_wq_work work;
-	struct completion done;
-};
-
-static void io_wq_flush_func(struct io_wq_work **workptr)
-{
-	struct io_wq_work *work = *workptr;
-	struct io_wq_flush_data *data;
-
-	data = container_of(work, struct io_wq_flush_data, work);
-	complete(&data->done);
-}
-
-/*
- * Doesn't wait for previously queued work to finish. When this completes,
- * it just means that previously queued work was started.
- */
-void io_wq_flush(struct io_wq *wq)
-{
-	struct io_wq_flush_data data;
-	int node;
-
-	for_each_node(node) {
-		struct io_wqe *wqe = wq->wqes[node];
-
-		if (!node_online(node))
-			continue;
-		init_completion(&data.done);
-		INIT_IO_WORK(&data.work, io_wq_flush_func);
-		data.work.flags |= IO_WQ_WORK_INTERNAL;
-		io_wqe_enqueue(wqe, &data.work);
-		wait_for_completion(&data.done);
-	}
-}
-
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 {
 	int ret = -ENOMEM, node;

--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -8,7 +8,6 @@ enum {
 	IO_WQ_WORK_HAS_MM	= 2,
 	IO_WQ_WORK_HASHED	= 4,
 	IO_WQ_WORK_UNBOUND	= 32,
-	IO_WQ_WORK_INTERNAL	= 64,
 	IO_WQ_WORK_CB		= 128,
 	IO_WQ_WORK_NO_CANCEL	= 256,
 	IO_WQ_WORK_CONCURRENT	= 512,
@@ -100,7 +99,6 @@ void io_wq_destroy(struct io_wq *wq);

 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
 void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val);
-void io_wq_flush(struct io_wq *wq);

 void io_wq_cancel_all(struct io_wq *wq);
 enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);

--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -191,6 +191,7 @@ struct fixed_file_data {
 	struct llist_head		put_llist;
 	struct work_struct		ref_work;
 	struct completion		done;
+	struct rcu_head			rcu;
 };

 struct io_ring_ctx {
@@ -999,6 +1000,7 @@ static void io_kill_timeout(struct io_kiocb *req)
 	if (ret != -1) {
 		atomic_inc(&req->ctx->cq_timeouts);
 		list_del_init(&req->list);
+		req->flags |= REQ_F_COMP_LOCKED;
 		io_cqring_fill_event(req, 0);
 		io_put_req(req);
 	}
@@ -5329,6 +5331,26 @@ static void io_file_ref_kill(struct percpu_ref *ref)
 	complete(&data->done);
 }

+static void __io_file_ref_exit_and_free(struct rcu_head *rcu)
+{
+	struct fixed_file_data *data = container_of(rcu, struct fixed_file_data,
+							rcu);
+	percpu_ref_exit(&data->refs);
+	kfree(data);
+}
+
+static void io_file_ref_exit_and_free(struct rcu_head *rcu)
+{
+	/*
+	 * We need to order our exit+free call against the potentially
+	 * existing call_rcu() for switching to atomic. One way to do that
+	 * is to have this rcu callback queue the final put and free, as we
+	 * could otherwise have a pre-existing atomic switch complete _after_
+	 * the free callback we queued.
+	 */
+	call_rcu(rcu, __io_file_ref_exit_and_free);
+}
+
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
 	struct fixed_file_data *data = ctx->file_data;
@@ -5341,14 +5363,13 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 	flush_work(&data->ref_work);
 	wait_for_completion(&data->done);
 	io_ring_file_ref_flush(data);
-	percpu_ref_exit(&data->refs);

 	__io_sqe_files_unregister(ctx);
 	nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
 	for (i = 0; i < nr_tables; i++)
 		kfree(data->table[i].files);
 	kfree(data->table);
-	kfree(data);
+	call_rcu(&data->rcu, io_file_ref_exit_and_free);
 	ctx->file_data = NULL;
 	ctx->nr_user_files = 0;
 	return 0;