提交 c9166992 编写于 作者: J Jens Axboe 提交者: Joseph Qi

io_uring: add support for epoll_ctl(2)

to #26323588

commit 3e4827b05d2ac2d377ed136a52829ec46787bf4b upstream.

This adds IORING_OP_EPOLL_CTL, which can perform the same work as the
epoll_ctl(2) system call.
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Acked-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
上级 73681652
...@@ -74,6 +74,7 @@ ...@@ -74,6 +74,7 @@
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/fsnotify.h> #include <linux/fsnotify.h>
#include <linux/fadvise.h> #include <linux/fadvise.h>
#include <linux/eventpoll.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h> #include <trace/events/io_uring.h>
...@@ -423,6 +424,14 @@ struct io_madvise { ...@@ -423,6 +424,14 @@ struct io_madvise {
u32 advice; u32 advice;
}; };
struct io_epoll {
struct file *file;
int epfd;
int op;
int fd;
struct epoll_event event;
};
struct io_async_connect { struct io_async_connect {
struct sockaddr_storage address; struct sockaddr_storage address;
}; };
...@@ -536,6 +545,7 @@ struct io_kiocb { ...@@ -536,6 +545,7 @@ struct io_kiocb {
struct io_files_update files_update; struct io_files_update files_update;
struct io_fadvise fadvise; struct io_fadvise fadvise;
struct io_madvise madvise; struct io_madvise madvise;
struct io_epoll epoll;
}; };
struct io_async_ctx *io; struct io_async_ctx *io;
...@@ -728,6 +738,10 @@ static const struct io_op_def io_op_defs[] = { ...@@ -728,6 +738,10 @@ static const struct io_op_def io_op_defs[] = {
.fd_non_neg = 1, .fd_non_neg = 1,
.file_table = 1, .file_table = 1,
}, },
[IORING_OP_EPOLL_CTL] = {
.unbound_nonreg_file = 1,
.file_table = 1,
},
}; };
static void io_wq_submit_work(struct io_wq_work **workptr); static void io_wq_submit_work(struct io_wq_work **workptr);
...@@ -2608,6 +2622,52 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2608,6 +2622,52 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt,
return io_openat2(req, nxt, force_nonblock); return io_openat2(req, nxt, force_nonblock);
} }
static int io_epoll_ctl_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_EPOLL)
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
req->epoll.epfd = READ_ONCE(sqe->fd);
req->epoll.op = READ_ONCE(sqe->len);
req->epoll.fd = READ_ONCE(sqe->off);
if (ep_op_has_event(req->epoll.op)) {
struct epoll_event __user *ev;
ev = u64_to_user_ptr(READ_ONCE(sqe->addr));
if (copy_from_user(&req->epoll.event, ev, sizeof(*ev)))
return -EFAULT;
}
return 0;
#else
return -EOPNOTSUPP;
#endif
}
static int io_epoll_ctl(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
#if defined(CONFIG_EPOLL)
struct io_epoll *ie = &req->epoll;
int ret;
ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
if (force_nonblock && ret == -EAGAIN)
return -EAGAIN;
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req_find_next(req, nxt);
return 0;
#else
return -EOPNOTSUPP;
#endif
}
static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
...@@ -4072,6 +4132,9 @@ static int io_req_defer_prep(struct io_kiocb *req, ...@@ -4072,6 +4132,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_OPENAT2: case IORING_OP_OPENAT2:
ret = io_openat2_prep(req, sqe); ret = io_openat2_prep(req, sqe);
break; break;
case IORING_OP_EPOLL_CTL:
ret = io_epoll_ctl_prep(req, sqe);
break;
default: default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode); req->opcode);
...@@ -4300,6 +4363,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -4300,6 +4363,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
} }
ret = io_openat2(req, nxt, force_nonblock); ret = io_openat2(req, nxt, force_nonblock);
break; break;
case IORING_OP_EPOLL_CTL:
if (sqe) {
ret = io_epoll_ctl_prep(req, sqe);
if (ret)
break;
}
ret = io_epoll_ctl(req, nxt, force_nonblock);
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
break; break;
......
...@@ -112,6 +112,7 @@ enum { ...@@ -112,6 +112,7 @@ enum {
IORING_OP_SEND, IORING_OP_SEND,
IORING_OP_RECV, IORING_OP_RECV,
IORING_OP_OPENAT2, IORING_OP_OPENAT2,
IORING_OP_EPOLL_CTL,
/* this goes last, obviously */ /* this goes last, obviously */
IORING_OP_LAST, IORING_OP_LAST,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册