提交 745a29cc 编写于 作者: J Jens Axboe 提交者: Joseph Qi

io_uring: add IORING_OP_MADVISE

to #26323588

commit c1ca757bd6f4632c510714631ddcc2d13030fe1e upstream.

This adds support for doing madvise(2) through io_uring. We assume that
any operation can block, and hence punt everything async. This could be
improved, but hard to make bullet proof. The async punt ensures it's
safe.
Reviewed-by: NPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Acked-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
上级 724746b1
...@@ -408,6 +408,13 @@ struct io_fadvise { ...@@ -408,6 +408,13 @@ struct io_fadvise {
u32 advice; u32 advice;
}; };
struct io_madvise {
struct file *file;
u64 addr;
u32 len;
u32 advice;
};
struct io_async_connect { struct io_async_connect {
struct sockaddr_storage address; struct sockaddr_storage address;
}; };
...@@ -461,6 +468,7 @@ struct io_kiocb { ...@@ -461,6 +468,7 @@ struct io_kiocb {
struct io_close close; struct io_close close;
struct io_files_update files_update; struct io_files_update files_update;
struct io_fadvise fadvise; struct io_fadvise fadvise;
struct io_madvise madvise;
}; };
struct io_async_ctx *io; struct io_async_ctx *io;
...@@ -680,6 +688,10 @@ static const struct io_op_def io_op_defs[] = { ...@@ -680,6 +688,10 @@ static const struct io_op_def io_op_defs[] = {
/* IORING_OP_FADVISE */ /* IORING_OP_FADVISE */
.needs_file = 1, .needs_file = 1,
}, },
{
/* IORING_OP_MADVISE */
.needs_mm = 1,
},
}; };
static void io_wq_submit_work(struct io_wq_work **workptr); static void io_wq_submit_work(struct io_wq_work **workptr);
...@@ -2446,6 +2458,42 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2446,6 +2458,42 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt,
return 0; return 0;
} }
static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
if (sqe->ioprio || sqe->buf_index || sqe->off)
return -EINVAL;
req->madvise.addr = READ_ONCE(sqe->addr);
req->madvise.len = READ_ONCE(sqe->len);
req->madvise.advice = READ_ONCE(sqe->fadvise_advice);
return 0;
#else
return -EOPNOTSUPP;
#endif
}
static int io_madvise(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
struct io_madvise *ma = &req->madvise;
int ret;
if (force_nonblock)
return -EAGAIN;
ret = do_madvise(ma->addr, ma->len, ma->advice);
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req_find_next(req, nxt);
return 0;
#else
return -EOPNOTSUPP;
#endif
}
static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
if (sqe->ioprio || sqe->buf_index || sqe->addr) if (sqe->ioprio || sqe->buf_index || sqe->addr)
...@@ -3763,6 +3811,9 @@ static int io_req_defer_prep(struct io_kiocb *req, ...@@ -3763,6 +3811,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_FADVISE: case IORING_OP_FADVISE:
ret = io_fadvise_prep(req, sqe); ret = io_fadvise_prep(req, sqe);
break; break;
case IORING_OP_MADVISE:
ret = io_madvise_prep(req, sqe);
break;
default: default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode); req->opcode);
...@@ -3967,6 +4018,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -3967,6 +4018,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
} }
ret = io_fadvise(req, nxt, force_nonblock); ret = io_fadvise(req, nxt, force_nonblock);
break; break;
case IORING_OP_MADVISE:
if (sqe) {
ret = io_madvise_prep(req, sqe);
if (ret)
break;
}
ret = io_madvise(req, nxt, force_nonblock);
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
break; break;
......
...@@ -88,6 +88,7 @@ enum { ...@@ -88,6 +88,7 @@ enum {
IORING_OP_READ, IORING_OP_READ,
IORING_OP_WRITE, IORING_OP_WRITE,
IORING_OP_FADVISE, IORING_OP_FADVISE,
IORING_OP_MADVISE,
/* this goes last, obviously */ /* this goes last, obviously */
IORING_OP_LAST, IORING_OP_LAST,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册