提交 8cb840fc 编写于 作者: J Jens Axboe 提交者: Cheng Jian

io_uring: make POLL_ADD/POLL_REMOVE scale better

mainline inclusion
from mainline-5.5-rc1
commit eac406c6
category: feature
bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=27
CVE: NA
---------------------------

One of the obvious use cases for these commands is networking, where
it's not uncommon to have tons of sockets open and polled for. The
current implementation uses a list for insertion and lookup, which works
fine for file based use cases where the count is usually low, it breaks
down somewhat for higher number of files / sockets. A test case with
30k sockets being polled for and cancelled takes:

real    0m6.968s
user    0m0.002s
sys     0m6.936s

with the patch it takes:

real    0m0.233s
user    0m0.010s
sys     0m0.176s

If you go to 50k sockets, it gets even more abysmal with the current
code:

real    0m40.602s
user    0m0.010s
sys     0m40.555s

with the patch it takes:

real    0m0.398s
user    0m0.000s
sys     0m0.341s

Change is pretty straight forward, just replace the cancel_list with
a red/black tree instead.
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NZhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Nyangerkun <yangerkun@huawei.com>
Reviewed-by: Nzhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: NCheng Jian <cj.chengjian@huawei.com>
上级 d3bdf1e8
......@@ -271,7 +271,7 @@ struct io_ring_ctx {
* manipulate the list, hence no extra locking is needed there.
*/
struct list_head poll_list;
struct list_head cancel_list;
struct rb_root cancel_tree;
spinlock_t inflight_lock;
struct list_head inflight_list;
......@@ -323,7 +323,10 @@ struct io_kiocb {
struct sqe_submit submit;
struct io_ring_ctx *ctx;
union {
struct list_head list;
struct rb_node rb_node;
};
struct list_head link_list;
unsigned int flags;
refcount_t refs;
......@@ -433,7 +436,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_waitqueue_head(&ctx->wait);
spin_lock_init(&ctx->completion_lock);
INIT_LIST_HEAD(&ctx->poll_list);
INIT_LIST_HEAD(&ctx->cancel_list);
ctx->cancel_tree = RB_ROOT;
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
init_waitqueue_head(&ctx->inflight_wait);
......@@ -1939,6 +1942,14 @@ static int io_accept(struct io_kiocb *req, const struct io_uring_sqe *sqe,
#endif
}
static inline void io_poll_remove_req(struct io_kiocb *req)
{
if (!RB_EMPTY_NODE(&req->rb_node)) {
rb_erase(&req->rb_node, &req->ctx->cancel_tree);
RB_CLEAR_NODE(&req->rb_node);
}
}
static void io_poll_remove_one(struct io_kiocb *req)
{
struct io_poll_iocb *poll = &req->poll;
......@@ -1950,17 +1961,17 @@ static void io_poll_remove_one(struct io_kiocb *req)
io_queue_async_work(req);
}
spin_unlock(&poll->head->lock);
list_del_init(&req->list);
io_poll_remove_req(req);
}
static void io_poll_remove_all(struct io_ring_ctx *ctx)
{
struct rb_node *node;
struct io_kiocb *req;
spin_lock_irq(&ctx->completion_lock);
while (!list_empty(&ctx->cancel_list)) {
req = list_first_entry(&ctx->cancel_list, struct io_kiocb,list);
while ((node = rb_first(&ctx->cancel_tree)) != NULL) {
req = rb_entry(node, struct io_kiocb, rb_node);
io_poll_remove_one(req);
}
spin_unlock_irq(&ctx->completion_lock);
......@@ -1968,14 +1979,22 @@ static void io_poll_remove_all(struct io_ring_ctx *ctx)
static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
{
struct rb_node *p, *parent = NULL;
struct io_kiocb *req;
list_for_each_entry(req, &ctx->cancel_list, list) {
if (req->user_data != sqe_addr)
continue;
p = ctx->cancel_tree.rb_node;
while (p) {
parent = p;
req = rb_entry(parent, struct io_kiocb, rb_node);
if (sqe_addr < req->user_data) {
p = p->rb_left;
} else if (sqe_addr > req->user_data) {
p = p->rb_right;
} else {
io_poll_remove_one(req);
return 0;
}
}
return -ENOENT;
}
......@@ -2044,7 +2063,7 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
spin_unlock_irq(&ctx->completion_lock);
return;
}
list_del_init(&req->list);
io_poll_remove_req(req);
io_poll_complete(req, mask);
spin_unlock_irq(&ctx->completion_lock);
......@@ -2078,7 +2097,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
* for finalizing the request, mark us as having grabbed that already.
*/
if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) {
list_del(&req->list);
io_poll_remove_req(req);
io_poll_complete(req, mask);
req->flags |= REQ_F_COMP_LOCKED;
io_put_req(req);
......@@ -2113,6 +2132,25 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
add_wait_queue(head, &pt->req->poll.wait);
}
static void io_poll_req_insert(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct rb_node **p = &ctx->cancel_tree.rb_node;
struct rb_node *parent = NULL;
struct io_kiocb *tmp;
while (*p) {
parent = *p;
tmp = rb_entry(parent, struct io_kiocb, rb_node);
if (req->user_data < tmp->user_data)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
}
rb_link_node(&req->rb_node, parent, p);
rb_insert_color(&req->rb_node, &ctx->cancel_tree);
}
static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct io_kiocb **nxt)
{
......@@ -2134,6 +2172,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
INIT_IO_WORK(&req->work, io_poll_complete_work);
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
RB_CLEAR_NODE(&req->rb_node);
poll->head = NULL;
poll->done = false;
......@@ -2166,7 +2205,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
else if (cancel)
WRITE_ONCE(poll->canceled, true);
else if (!poll->done) /* actually waiting for an event */
list_add_tail(&req->list, &ctx->cancel_list);
io_poll_req_insert(req);
spin_unlock(&poll->head->lock);
}
if (mask) { /* no async, we'd stolen it */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册