提交 14b146b6 编写于 作者: P Pavel Begunkov 提交者: Jens Axboe

io_uring: notification completion optimisation

We want to use all optimisations that we have for io_uring requests like
completion batching, memory caching and more but for zc notifications.
Fortunately, notification perfectly fit the request model so we can
overlay them onto struct io_kiocb and use all the infratructure.

Most of the fields of struct io_notif natively fits into io_kiocb, so we
replace struct io_notif with struct io_kiocb carrying struct
io_notif_data in the cmd cache line. Then we adapt io_alloc_notif() to
use io_alloc_req()/io_alloc_req_refill(), and kill leftovers of hand
coded caching. __io_notif_complete_tw() is converted to use io_uring's
tw infra.
Signed-off-by: NPavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/9e010125175e80baf51f0ca63bdc7cc6a4a9fa56.1658913593.git.asml.silence@gmail.comSigned-off-by: NJens Axboe <axboe@kernel.dk>
上级 bd1a3783
...@@ -249,9 +249,6 @@ struct io_ring_ctx { ...@@ -249,9 +249,6 @@ struct io_ring_ctx {
struct xarray io_bl_xa; struct xarray io_bl_xa;
struct list_head io_buffers_cache; struct list_head io_buffers_cache;
/* struct io_notif cache, protected by uring_lock */
struct list_head notif_list;
struct io_hash_table cancel_table_locked; struct io_hash_table cancel_table_locked;
struct list_head cq_overflow_list; struct list_head cq_overflow_list;
struct io_alloc_cache apoll_cache; struct io_alloc_cache apoll_cache;
...@@ -262,10 +259,6 @@ struct io_ring_ctx { ...@@ -262,10 +259,6 @@ struct io_ring_ctx {
struct io_wq_work_list locked_free_list; struct io_wq_work_list locked_free_list;
unsigned int locked_free_nr; unsigned int locked_free_nr;
/* struct io_notif cache protected by completion_lock */
struct list_head notif_list_locked;
unsigned int notif_locked_nr;
const struct cred *sq_creds; /* cred used for __io_sq_thread() */ const struct cred *sq_creds; /* cred used for __io_sq_thread() */
struct io_sq_data *sq_data; /* if using sq thread polling */ struct io_sq_data *sq_data; /* if using sq thread polling */
......
...@@ -321,8 +321,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ...@@ -321,8 +321,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_WQ_LIST(&ctx->locked_free_list); INIT_WQ_LIST(&ctx->locked_free_list);
INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
INIT_WQ_LIST(&ctx->submit_state.compl_reqs); INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
INIT_LIST_HEAD(&ctx->notif_list);
INIT_LIST_HEAD(&ctx->notif_list_locked);
return ctx; return ctx;
err: err:
kfree(ctx->dummy_ubuf); kfree(ctx->dummy_ubuf);
...@@ -2466,7 +2464,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) ...@@ -2466,7 +2464,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots); WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots);
io_notif_cache_purge(ctx);
io_mem_free(ctx->rings); io_mem_free(ctx->rings);
io_mem_free(ctx->sq_sqes); io_mem_free(ctx->sq_sqes);
......
...@@ -946,7 +946,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) ...@@ -946,7 +946,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
struct io_sendzc *zc = io_kiocb_to_cmd(req); struct io_sendzc *zc = io_kiocb_to_cmd(req);
struct io_notif_slot *notif_slot; struct io_notif_slot *notif_slot;
struct io_notif *notif; struct io_kiocb *notif;
struct msghdr msg; struct msghdr msg;
struct iovec iov; struct iovec iov;
struct socket *sock; struct socket *sock;
...@@ -1005,7 +1005,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) ...@@ -1005,7 +1005,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
min_ret = iov_iter_count(&msg.msg_iter); min_ret = iov_iter_count(&msg.msg_iter);
msg.msg_flags = msg_flags; msg.msg_flags = msg_flags;
msg.msg_ubuf = &notif->uarg; msg.msg_ubuf = &io_notif_to_data(notif)->uarg;
msg.sg_from_iter = io_sg_from_iter; msg.sg_from_iter = io_sg_from_iter;
ret = sock_sendmsg(sock, &msg); ret = sock_sendmsg(sock, &msg);
......
...@@ -9,140 +9,79 @@ ...@@ -9,140 +9,79 @@
#include "notif.h" #include "notif.h"
#include "rsrc.h" #include "rsrc.h"
static void __io_notif_complete_tw(struct callback_head *cb) static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
{ {
struct io_notif *notif = container_of(cb, struct io_notif, task_work); struct io_notif_data *nd = io_notif_to_data(notif);
struct io_rsrc_node *rsrc_node = notif->rsrc_node;
struct io_ring_ctx *ctx = notif->ctx; struct io_ring_ctx *ctx = notif->ctx;
if (notif->account_pages && ctx->user) { if (nd->account_pages && ctx->user) {
__io_unaccount_mem(ctx->user, notif->account_pages); __io_unaccount_mem(ctx->user, nd->account_pages);
notif->account_pages = 0; nd->account_pages = 0;
} }
if (likely(notif->task)) { io_req_task_complete(notif, locked);
io_put_task(notif->task, 1);
notif->task = NULL;
}
io_cq_lock(ctx);
io_fill_cqe_aux(ctx, notif->tag, 0, notif->seq, true);
list_add(&notif->cache_node, &ctx->notif_list_locked);
ctx->notif_locked_nr++;
io_cq_unlock_post(ctx);
io_rsrc_put_node(rsrc_node, 1);
percpu_ref_put(&ctx->refs);
} }
static inline void io_notif_complete(struct io_notif *notif) static inline void io_notif_complete(struct io_kiocb *notif)
__must_hold(&notif->ctx->uring_lock)
{ {
__io_notif_complete_tw(&notif->task_work); bool locked = true;
}
static void io_notif_complete_wq(struct work_struct *work)
{
struct io_notif *notif = container_of(work, struct io_notif, commit_work);
io_notif_complete(notif); __io_notif_complete_tw(notif, &locked);
} }
static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
struct ubuf_info *uarg, struct ubuf_info *uarg,
bool success) bool success)
{ {
struct io_notif *notif = container_of(uarg, struct io_notif, uarg); struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
struct io_kiocb *notif = cmd_to_io_kiocb(nd);
if (!refcount_dec_and_test(&uarg->refcnt))
return;
if (likely(notif->task)) {
init_task_work(&notif->task_work, __io_notif_complete_tw);
if (likely(!task_work_add(notif->task, &notif->task_work,
TWA_SIGNAL)))
return;
}
INIT_WORK(&notif->commit_work, io_notif_complete_wq);
queue_work(system_unbound_wq, &notif->commit_work);
}
static void io_notif_splice_cached(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
spin_lock(&ctx->completion_lock);
list_splice_init(&ctx->notif_list_locked, &ctx->notif_list);
ctx->notif_locked_nr = 0;
spin_unlock(&ctx->completion_lock);
}
void io_notif_cache_purge(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
io_notif_splice_cached(ctx);
while (!list_empty(&ctx->notif_list)) { if (refcount_dec_and_test(&uarg->refcnt)) {
struct io_notif *notif = list_first_entry(&ctx->notif_list, notif->io_task_work.func = __io_notif_complete_tw;
struct io_notif, cache_node); io_req_task_work_add(notif);
list_del(&notif->cache_node);
kfree(notif);
} }
} }
static inline bool io_notif_has_cached(struct io_ring_ctx *ctx) struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx,
__must_hold(&ctx->uring_lock)
{
if (likely(!list_empty(&ctx->notif_list)))
return true;
if (data_race(READ_ONCE(ctx->notif_locked_nr) <= IO_NOTIF_SPLICE_BATCH))
return false;
io_notif_splice_cached(ctx);
return !list_empty(&ctx->notif_list);
}
struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
struct io_notif_slot *slot) struct io_notif_slot *slot)
__must_hold(&ctx->uring_lock) __must_hold(&ctx->uring_lock)
{ {
struct io_notif *notif; struct io_kiocb *notif;
struct io_notif_data *nd;
if (likely(io_notif_has_cached(ctx))) {
notif = list_first_entry(&ctx->notif_list, if (unlikely(!io_alloc_req_refill(ctx)))
struct io_notif, cache_node); return NULL;
list_del(&notif->cache_node); notif = io_alloc_req(ctx);
} else { notif->opcode = IORING_OP_NOP;
notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT); notif->flags = 0;
if (!notif) notif->file = NULL;
return NULL; notif->task = current;
/* pre-initialise some fields */ io_get_task_refs(1);
notif->ctx = ctx; notif->rsrc_node = NULL;
notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; io_req_set_rsrc_node(notif, ctx, 0);
notif->uarg.callback = io_uring_tx_zerocopy_callback; notif->cqe.user_data = slot->tag;
notif->account_pages = 0; notif->cqe.flags = slot->seq++;
} notif->cqe.res = 0;
notif->seq = slot->seq++; nd = io_notif_to_data(notif);
notif->tag = slot->tag; nd->account_pages = 0;
nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
nd->uarg.callback = io_uring_tx_zerocopy_callback;
/* master ref owned by io_notif_slot, will be dropped on flush */ /* master ref owned by io_notif_slot, will be dropped on flush */
refcount_set(&notif->uarg.refcnt, 1); refcount_set(&nd->uarg.refcnt, 1);
percpu_ref_get(&ctx->refs);
notif->rsrc_node = ctx->rsrc_node;
io_charge_rsrc_node(ctx);
return notif; return notif;
} }
void io_notif_slot_flush(struct io_notif_slot *slot) void io_notif_slot_flush(struct io_notif_slot *slot)
__must_hold(&ctx->uring_lock) __must_hold(&ctx->uring_lock)
{ {
struct io_notif *notif = slot->notif; struct io_kiocb *notif = slot->notif;
struct io_notif_data *nd = io_notif_to_data(notif);
slot->notif = NULL; slot->notif = NULL;
if (WARN_ON_ONCE(in_interrupt()))
return;
/* drop slot's master ref */ /* drop slot's master ref */
if (refcount_dec_and_test(&notif->uarg.refcnt)) if (refcount_dec_and_test(&nd->uarg.refcnt))
io_notif_complete(notif); io_notif_complete(notif);
} }
...@@ -156,18 +95,22 @@ __cold int io_notif_unregister(struct io_ring_ctx *ctx) ...@@ -156,18 +95,22 @@ __cold int io_notif_unregister(struct io_ring_ctx *ctx)
for (i = 0; i < ctx->nr_notif_slots; i++) { for (i = 0; i < ctx->nr_notif_slots; i++) {
struct io_notif_slot *slot = &ctx->notif_slots[i]; struct io_notif_slot *slot = &ctx->notif_slots[i];
struct io_kiocb *notif = slot->notif;
struct io_notif_data *nd;
if (!slot->notif) if (!notif)
continue;
nd = io_kiocb_to_cmd(notif);
slot->notif = NULL;
if (!refcount_dec_and_test(&nd->uarg.refcnt))
continue; continue;
if (WARN_ON_ONCE(slot->notif->task)) notif->io_task_work.func = __io_notif_complete_tw;
slot->notif->task = NULL; io_req_task_work_add(notif);
io_notif_slot_flush(slot);
} }
kvfree(ctx->notif_slots); kvfree(ctx->notif_slots);
ctx->notif_slots = NULL; ctx->notif_slots = NULL;
ctx->nr_notif_slots = 0; ctx->nr_notif_slots = 0;
io_notif_cache_purge(ctx);
return 0; return 0;
} }
...@@ -180,6 +123,8 @@ __cold int io_notif_register(struct io_ring_ctx *ctx, ...@@ -180,6 +123,8 @@ __cold int io_notif_register(struct io_ring_ctx *ctx,
struct io_uring_notification_register reg; struct io_uring_notification_register reg;
unsigned i; unsigned i;
BUILD_BUG_ON(sizeof(struct io_notif_data) > 64);
if (ctx->nr_notif_slots) if (ctx->nr_notif_slots)
return -EBUSY; return -EBUSY;
if (size != sizeof(reg)) if (size != sizeof(reg))
......
...@@ -10,27 +10,10 @@ ...@@ -10,27 +10,10 @@
#define IO_NOTIF_SPLICE_BATCH 32 #define IO_NOTIF_SPLICE_BATCH 32
#define IORING_MAX_NOTIF_SLOTS (1U << 10) #define IORING_MAX_NOTIF_SLOTS (1U << 10)
struct io_notif { struct io_notif_data {
struct file *file;
struct ubuf_info uarg; struct ubuf_info uarg;
struct io_ring_ctx *ctx;
struct io_rsrc_node *rsrc_node;
/* complete via tw if ->task is non-NULL, fallback to wq otherwise */
struct task_struct *task;
/* cqe->user_data, io_notif_slot::tag if not overridden */
u64 tag;
/* see struct io_notif_slot::seq */
u32 seq;
/* hook into ctx->notif_list and ctx->notif_list_locked */
struct list_head cache_node;
unsigned long account_pages; unsigned long account_pages;
union {
struct callback_head task_work;
struct work_struct commit_work;
};
}; };
struct io_notif_slot { struct io_notif_slot {
...@@ -39,7 +22,7 @@ struct io_notif_slot { ...@@ -39,7 +22,7 @@ struct io_notif_slot {
* time and keeps one reference to it. Flush releases the reference and * time and keeps one reference to it. Flush releases the reference and
* lazily replaces it with a new notifier. * lazily replaces it with a new notifier.
*/ */
struct io_notif *notif; struct io_kiocb *notif;
/* /*
* Default ->user_data for this slot notifiers CQEs * Default ->user_data for this slot notifiers CQEs
...@@ -56,13 +39,17 @@ struct io_notif_slot { ...@@ -56,13 +39,17 @@ struct io_notif_slot {
int io_notif_register(struct io_ring_ctx *ctx, int io_notif_register(struct io_ring_ctx *ctx,
void __user *arg, unsigned int size); void __user *arg, unsigned int size);
int io_notif_unregister(struct io_ring_ctx *ctx); int io_notif_unregister(struct io_ring_ctx *ctx);
void io_notif_cache_purge(struct io_ring_ctx *ctx);
void io_notif_slot_flush(struct io_notif_slot *slot); void io_notif_slot_flush(struct io_notif_slot *slot);
struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx, struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx,
struct io_notif_slot *slot); struct io_notif_slot *slot);
static inline struct io_notif *io_get_notif(struct io_ring_ctx *ctx, static inline struct io_notif_data *io_notif_to_data(struct io_kiocb *notif)
{
return io_kiocb_to_cmd(notif);
}
static inline struct io_kiocb *io_get_notif(struct io_ring_ctx *ctx,
struct io_notif_slot *slot) struct io_notif_slot *slot)
{ {
if (!slot->notif) if (!slot->notif)
...@@ -83,16 +70,13 @@ static inline struct io_notif_slot *io_get_notif_slot(struct io_ring_ctx *ctx, ...@@ -83,16 +70,13 @@ static inline struct io_notif_slot *io_get_notif_slot(struct io_ring_ctx *ctx,
static inline void io_notif_slot_flush_submit(struct io_notif_slot *slot, static inline void io_notif_slot_flush_submit(struct io_notif_slot *slot,
unsigned int issue_flags) unsigned int issue_flags)
{ {
if (!(issue_flags & IO_URING_F_UNLOCKED)) {
slot->notif->task = current;
io_get_task_refs(1);
}
io_notif_slot_flush(slot); io_notif_slot_flush(slot);
} }
static inline int io_notif_account_mem(struct io_notif *notif, unsigned len) static inline int io_notif_account_mem(struct io_kiocb *notif, unsigned len)
{ {
struct io_ring_ctx *ctx = notif->ctx; struct io_ring_ctx *ctx = notif->ctx;
struct io_notif_data *nd = io_notif_to_data(notif);
unsigned nr_pages = (len >> PAGE_SHIFT) + 2; unsigned nr_pages = (len >> PAGE_SHIFT) + 2;
int ret; int ret;
...@@ -100,7 +84,7 @@ static inline int io_notif_account_mem(struct io_notif *notif, unsigned len) ...@@ -100,7 +84,7 @@ static inline int io_notif_account_mem(struct io_notif *notif, unsigned len)
ret = __io_account_mem(ctx->user, nr_pages); ret = __io_account_mem(ctx->user, nr_pages);
if (ret) if (ret)
return ret; return ret;
notif->account_pages += nr_pages; nd->account_pages += nr_pages;
} }
return 0; return 0;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册