io_uring: manage provided buffers strictly ordered

Workloads using provided buffers benefit from using and returning buffers in the right order, and so does TLBs for that matter. Manage the internal buffer list in a straight list, rather than use the head buffer as the insertion node. Use a hashed list for the buffer group IDs instead of xarray, the overhead is much lower this way. xarray provides internal locking and other trickery that is handy for some uses cases, but io_uring already locks internally for the buffer manipulation and needs none of that. This is good for about a 2% reduction in overhead, combination of the improved management and the fact that the workload has an easier time bundling back provided buffers. Signed-off-by: N Jens Axboe <axboe@kernel.dk>

io_uring: manage provided buffers strictly ordered
Workloads using provided buffers benefit from using and returning buffers in the right order, and so does TLBs for that matter. Manage the internal buffer list in a straight list, rather than use the head buffer as the insertion node. Use a hashed list for the buffer group IDs instead of xarray, the overhead is much lower this way. xarray provides internal locking and other trickery that is handy for some uses cases, but io_uring already locks internally for the buffer manipulation and needs none of that. This is good for about a 2% reduction in overhead, combination of the improved management and the fact that the workload has an easier time bundling back provided buffers. Signed-off-by: N Jens Axboe <axboe@kernel.dk>
dbc7d452 · Jens Axboe · 9aa8dfde · dbc7d452
隐藏空白更改
内联并排

Showing with 92 addition and 62 deletion

fs/io_uring.c fs/io_uring.c +92 -62

未找到文件。
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -264,6 +264,12 @@ struct io_rsrc_data {
 	bool				quiesce;
 };
+struct io_buffer_list {
+	struct list_head list;
+	struct list_head buf_list;
+	__u16 bgid;
+};
 struct io_buffer {
 	struct list_head list;
 	__u64 addr;
@@ -334,6 +340,8 @@ struct io_ev_fd {
 	struct rcu_head		rcu;
 };
+#define IO_BUFFERS_HASH_BITS	5
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
 	struct {
@@ -386,7 +394,7 @@ struct io_ring_ctx {
 		struct list_head	timeout_list;
 		struct list_head	ltimeout_list;
 		struct list_head	cq_overflow_list;
-		struct xarray		io_buffers;
+		struct list_head	*io_buffers;
 		struct list_head	io_buffers_cache;
 		struct list_head	apoll_cache;
 		struct xarray		personalities;
@@ -1361,10 +1369,25 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
 	return cflags;
 }
+static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
+						 unsigned int bgid)
+{
+	struct list_head *hash_list;
+	struct io_buffer_list *bl;
+	hash_list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
+	list_for_each_entry(bl, hash_list, list)
+		if (bl->bgid == bgid || bgid == -1U)
+			return bl;
+	return NULL;
+}
 static void io_kbuf_recycle(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_buffer *head, *buf;
+	struct io_buffer_list *bl;
+	struct io_buffer *buf;
 	if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
 		return;
@@ -1372,21 +1395,8 @@ static void io_kbuf_recycle(struct io_kiocb *req)
 	lockdep_assert_held(&ctx->uring_lock);
 	buf = req->kbuf;
+	bl = io_buffer_get_list(ctx, buf->bgid);
-	head = xa_load(&ctx->io_buffers, buf->bgid);
+	list_add(&buf->list, &bl->buf_list);
-	if (head) {
-		list_add(&buf->list, &head->list);
-	} else {
-		int ret;
-		INIT_LIST_HEAD(&buf->list);
-		/* if we fail, just leave buffer attached */
-		ret = xa_insert(&ctx->io_buffers, buf->bgid, buf, GFP_KERNEL);
-		if (unlikely(ret < 0))
-			return;
-	}
 	req->flags &= ~REQ_F_BUFFER_SELECTED;
 	req->kbuf = NULL;
 }
@@ -1501,7 +1511,7 @@ static __cold void io_fallback_req_func(struct work_struct *work)
 static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 {
 	struct io_ring_ctx *ctx;
-	int hash_bits;
+	int i, hash_bits;
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -1528,6 +1538,13 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	/* set invalid range, so io_import_fixed() fails meeting it */
 	ctx->dummy_ubuf->ubuf = -1UL;
+	ctx->io_buffers = kcalloc(1U << IO_BUFFERS_HASH_BITS,
+					sizeof(struct list_head), GFP_KERNEL);
+	if (!ctx->io_buffers)
+		goto err;
+	for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++)
+		INIT_LIST_HEAD(&ctx->io_buffers[i]);
 	if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
 			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
 		goto err;
@@ -1539,7 +1556,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->io_buffers_cache);
 	INIT_LIST_HEAD(&ctx->apoll_cache);
 	init_completion(&ctx->ref_comp);
-	xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
 	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
 	mutex_init(&ctx->uring_lock);
 	init_waitqueue_head(&ctx->cq_wait);
@@ -1568,6 +1584,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 err:
 	kfree(ctx->dummy_ubuf);
 	kfree(ctx->cancel_hash);
+	kfree(ctx->io_buffers);
 	kfree(ctx);
 	return NULL;
 }
@@ -3351,30 +3368,36 @@ static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
 		mutex_lock(&ctx->uring_lock);
 }
+static void io_buffer_add_list(struct io_ring_ctx *ctx,
+			       struct io_buffer_list *bl, unsigned int bgid)
+{
+	struct list_head *list;
+	list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
+	INIT_LIST_HEAD(&bl->buf_list);
+	bl->bgid = bgid;
+	list_add(&bl->list, list);
+}
 static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
 					  int bgid, unsigned int issue_flags)
 {
 	struct io_buffer *kbuf = req->kbuf;
-	struct io_buffer *head;
 	bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_buffer_list *bl;
 	if (req->flags & REQ_F_BUFFER_SELECTED)
 		return kbuf;
-	io_ring_submit_lock(req->ctx, needs_lock);
+	io_ring_submit_lock(ctx, needs_lock);
-	lockdep_assert_held(&req->ctx->uring_lock);
+	lockdep_assert_held(&ctx->uring_lock);
-	head = xa_load(&req->ctx->io_buffers, bgid);
+	bl = io_buffer_get_list(ctx, bgid);
-	if (head) {
+	if (bl && !list_empty(&bl->buf_list)) {
-		if (!list_empty(&head->list)) {
+		kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
-			kbuf = list_last_entry(&head->list, struct io_buffer,
+		list_del(&kbuf->list);
-							list);
-			list_del(&kbuf->list);
-		} else {
-			kbuf = head;
-			xa_erase(&req->ctx->io_buffers, bgid);
-		}
 		if (*len > kbuf->len)
 			*len = kbuf->len;
 		req->flags |= REQ_F_BUFFER_SELECTED;
@@ -4669,8 +4692,8 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
 	return 0;
 }
-static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
+static int __io_remove_buffers(struct io_ring_ctx *ctx,
-			       int bgid, unsigned nbufs)
+			       struct io_buffer_list *bl, unsigned nbufs)
 {
 	unsigned i = 0;
@@ -4679,17 +4702,16 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
 		return 0;
 	/* the head kbuf is the list itself */
-	while (!list_empty(&buf->list)) {
+	while (!list_empty(&bl->buf_list)) {
 		struct io_buffer *nxt;
-		nxt = list_first_entry(&buf->list, struct io_buffer, list);
+		nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
 		list_del(&nxt->list);
 		if (++i == nbufs)
 			return i;
 		cond_resched();
 	}
 	i++;
-	xa_erase(&ctx->io_buffers, bgid);
 	return i;
 }
@@ -4698,7 +4720,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_provide_buf *p = &req->pbuf;
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_buffer *head;
+	struct io_buffer_list *bl;
 	int ret = 0;
 	bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
@@ -4707,9 +4729,9 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 	lockdep_assert_held(&ctx->uring_lock);
 	ret = -ENOENT;
-	head = xa_load(&ctx->io_buffers, p->bgid);
+	bl = io_buffer_get_list(ctx, p->bgid);
-	if (head)
+	if (bl)
-		ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
+		ret = __io_remove_buffers(ctx, bl, p->nbufs);
 	if (ret < 0)
 		req_set_fail(req);
@@ -4798,7 +4820,7 @@ static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
 }
 static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
-			  struct io_buffer **head)
+			  struct io_buffer_list *bl)
 {
 	struct io_buffer *buf;
 	u64 addr = pbuf->addr;
@@ -4810,30 +4832,24 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
 			break;
 		buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
 					list);
-		list_del(&buf->list);
+		list_move_tail(&buf->list, &bl->buf_list);
 		buf->addr = addr;
 		buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
 		buf->bid = bid;
 		buf->bgid = pbuf->bgid;
 		addr += pbuf->len;
 		bid++;
-		if (!*head) {
-			INIT_LIST_HEAD(&buf->list);
-			*head = buf;
-		} else {
-			list_add_tail(&buf->list, &(*head)->list);
-		}
 		cond_resched();
 	}
-	return i ? i : -ENOMEM;
+	return i ? 0 : -ENOMEM;
 }
 static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_provide_buf *p = &req->pbuf;
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_buffer *head, *list;
+	struct io_buffer_list *bl;
 	int ret = 0;
 	bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
@@ -4841,14 +4857,18 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 	lockdep_assert_held(&ctx->uring_lock);
-	list = head = xa_load(&ctx->io_buffers, p->bgid);
+	bl = io_buffer_get_list(ctx, p->bgid);
+	if (unlikely(!bl)) {
-	ret = io_add_buffers(ctx, p, &head);
+		bl = kmalloc(sizeof(*bl), GFP_KERNEL);
-	if (ret >= 0 && !list) {
+		if (!bl) {
-		ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
+			ret = -ENOMEM;
-		if (ret < 0)
+			goto err;
-			__io_remove_buffers(ctx, head, p->bgid, -1U);
+		}
+		io_buffer_add_list(ctx, bl, p->bgid);
 	}
+	ret = io_add_buffers(ctx, p, bl);
+err:
 	if (ret < 0)
 		req_set_fail(req);
 	/* complete before unlock, IOPOLL may need the lock */
@@ -9936,11 +9956,20 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 static void io_destroy_buffers(struct io_ring_ctx *ctx)
 {
-	struct io_buffer *buf;
+	int i;
-	unsigned long index;
+	for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++) {
+		struct list_head *list = &ctx->io_buffers[i];
-	xa_for_each(&ctx->io_buffers, index, buf)
+		while (!list_empty(list)) {
-		__io_remove_buffers(ctx, buf, index, -1U);
+			struct io_buffer_list *bl;
+			bl = list_first_entry(list, struct io_buffer_list, list);
+			__io_remove_buffers(ctx, bl, -1U);
+			list_del(&bl->list);
+			kfree(bl);
+		}
+	}
 	while (!list_empty(&ctx->io_buffers_pages)) {
 		struct page *page;
@@ -10049,6 +10078,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	io_free_napi_list(ctx);
 	kfree(ctx->cancel_hash);
 	kfree(ctx->dummy_ubuf);
+	kfree(ctx->io_buffers);
 	kfree(ctx);
 }