提交 c5d6bd8a 编写于 作者: J Jens Axboe 提交者: Joseph Qi

io_uring: add and use struct io_rw for read/writes

to #26323578

commit 9adbd45d6d32ffc1a03f3c51d72cfc69ebfc2ddb upstream.

Put the kiocb in struct io_rw, and add the addr/len for the request as
well. Use the kiocb->private field for the buffer index for fixed reads
and writes.

Any use of kiocb->ki_filp is flipped to req->file. It's the same thing,
and less confusing.
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Acked-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
上级 08ba131e
...@@ -332,6 +332,13 @@ struct io_timeout { ...@@ -332,6 +332,13 @@ struct io_timeout {
int flags; int flags;
}; };
struct io_rw {
/* NOTE: kiocb has the file as the first member, so don't do it here */
struct kiocb kiocb;
u64 addr;
u64 len;
};
struct io_async_connect { struct io_async_connect {
struct sockaddr_storage address; struct sockaddr_storage address;
}; };
...@@ -369,7 +376,7 @@ struct io_async_ctx { ...@@ -369,7 +376,7 @@ struct io_async_ctx {
struct io_kiocb { struct io_kiocb {
union { union {
struct file *file; struct file *file;
struct kiocb rw; struct io_rw rw;
struct io_poll_iocb poll; struct io_poll_iocb poll;
struct io_accept accept; struct io_accept accept;
struct io_sync sync; struct io_sync sync;
...@@ -1181,7 +1188,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -1181,7 +1188,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
ret = 0; ret = 0;
list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) { list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) {
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw.kiocb;
/* /*
* Move completed entries to our local list. If we find a * Move completed entries to our local list. If we find a
...@@ -1327,7 +1334,7 @@ static inline void req_set_fail_links(struct io_kiocb *req) ...@@ -1327,7 +1334,7 @@ static inline void req_set_fail_links(struct io_kiocb *req)
static void io_complete_rw_common(struct kiocb *kiocb, long res) static void io_complete_rw_common(struct kiocb *kiocb, long res)
{ {
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE) if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req); kiocb_end_write(req);
...@@ -1339,7 +1346,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res) ...@@ -1339,7 +1346,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
static void io_complete_rw(struct kiocb *kiocb, long res, long res2) static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{ {
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
io_complete_rw_common(kiocb, res); io_complete_rw_common(kiocb, res);
io_put_req(req); io_put_req(req);
...@@ -1347,7 +1354,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) ...@@ -1347,7 +1354,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res) static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
{ {
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
struct io_kiocb *nxt = NULL; struct io_kiocb *nxt = NULL;
io_complete_rw_common(kiocb, res); io_complete_rw_common(kiocb, res);
...@@ -1358,7 +1365,7 @@ static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res) ...@@ -1358,7 +1365,7 @@ static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{ {
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE) if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req); kiocb_end_write(req);
...@@ -1392,7 +1399,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req) ...@@ -1392,7 +1399,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_req = list_first_entry(&ctx->poll_list, struct io_kiocb, list_req = list_first_entry(&ctx->poll_list, struct io_kiocb,
list); list);
if (list_req->rw.ki_filp != req->rw.ki_filp) if (list_req->file != req->file)
ctx->poll_multi_file = true; ctx->poll_multi_file = true;
} }
...@@ -1471,7 +1478,7 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock) ...@@ -1471,7 +1478,7 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
{ {
const struct io_uring_sqe *sqe = req->sqe; const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw.kiocb;
unsigned ioprio; unsigned ioprio;
int ret; int ret;
...@@ -1520,6 +1527,12 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock) ...@@ -1520,6 +1527,12 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
return -EINVAL; return -EINVAL;
kiocb->ki_complete = io_complete_rw; kiocb->ki_complete = io_complete_rw;
} }
req->rw.addr = READ_ONCE(req->sqe->addr);
req->rw.len = READ_ONCE(req->sqe->len);
/* we own ->private, reuse it for the buffer index */
req->rw.kiocb.private = (void *) (unsigned long)
READ_ONCE(req->sqe->buf_index);
return 0; return 0;
} }
...@@ -1553,11 +1566,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt, ...@@ -1553,11 +1566,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt,
io_rw_done(kiocb, ret); io_rw_done(kiocb, ret);
} }
static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw, static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
const struct io_uring_sqe *sqe,
struct iov_iter *iter) struct iov_iter *iter)
{ {
size_t len = READ_ONCE(sqe->len); struct io_ring_ctx *ctx = req->ctx;
size_t len = req->rw.len;
struct io_mapped_ubuf *imu; struct io_mapped_ubuf *imu;
unsigned index, buf_index; unsigned index, buf_index;
size_t offset; size_t offset;
...@@ -1567,13 +1580,13 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw, ...@@ -1567,13 +1580,13 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
if (unlikely(!ctx->user_bufs)) if (unlikely(!ctx->user_bufs))
return -EFAULT; return -EFAULT;
buf_index = READ_ONCE(sqe->buf_index); buf_index = (unsigned long) req->rw.kiocb.private;
if (unlikely(buf_index >= ctx->nr_user_bufs)) if (unlikely(buf_index >= ctx->nr_user_bufs))
return -EFAULT; return -EFAULT;
index = array_index_nospec(buf_index, ctx->nr_user_bufs); index = array_index_nospec(buf_index, ctx->nr_user_bufs);
imu = &ctx->user_bufs[index]; imu = &ctx->user_bufs[index];
buf_addr = READ_ONCE(sqe->addr); buf_addr = req->rw.addr;
/* overflow */ /* overflow */
if (buf_addr + len < buf_addr) if (buf_addr + len < buf_addr)
...@@ -1630,25 +1643,20 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw, ...@@ -1630,25 +1643,20 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
static ssize_t io_import_iovec(int rw, struct io_kiocb *req, static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
struct iovec **iovec, struct iov_iter *iter) struct iovec **iovec, struct iov_iter *iter)
{ {
const struct io_uring_sqe *sqe = req->sqe; void __user *buf = u64_to_user_ptr(req->rw.addr);
void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); size_t sqe_len = req->rw.len;
size_t sqe_len = READ_ONCE(sqe->len);
u8 opcode; u8 opcode;
/*
* We're reading ->opcode for the second time, but the first read
* doesn't care whether it's _FIXED or not, so it doesn't matter
* whether ->opcode changes concurrently. The first read does care
* about whether it is a READ or a WRITE, so we don't trust this read
* for that purpose and instead let the caller pass in the read/write
* flag.
*/
opcode = req->opcode; opcode = req->opcode;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
*iovec = NULL; *iovec = NULL;
return io_import_fixed(req->ctx, rw, sqe, iter); return io_import_fixed(req, rw, iter);
} }
/* buffer index only valid with fixed read/write */
if (req->rw.kiocb.private)
return -EINVAL;
if (req->io) { if (req->io) {
struct io_async_rw *iorw = &req->io->rw; struct io_async_rw *iorw = &req->io->rw;
...@@ -1797,9 +1805,8 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1797,9 +1805,8 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock) bool force_nonblock)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter; struct iov_iter iter;
struct file *file;
size_t iov_count; size_t iov_count;
ssize_t io_size, ret; ssize_t io_size, ret;
...@@ -1815,9 +1822,8 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1815,9 +1822,8 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
/* Ensure we clear previously set non-block flag */ /* Ensure we clear previously set non-block flag */
if (!force_nonblock) if (!force_nonblock)
req->rw.ki_flags &= ~IOCB_NOWAIT; req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
file = req->file;
io_size = ret; io_size = ret;
if (req->flags & REQ_F_LINK) if (req->flags & REQ_F_LINK)
req->result = io_size; req->result = io_size;
...@@ -1826,20 +1832,20 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1826,20 +1832,20 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
* we know to async punt it even if it was opened O_NONBLOCK * we know to async punt it even if it was opened O_NONBLOCK
*/ */
if (force_nonblock && !io_file_supports_async(file)) { if (force_nonblock && !io_file_supports_async(req->file)) {
req->flags |= REQ_F_MUST_PUNT; req->flags |= REQ_F_MUST_PUNT;
goto copy_iov; goto copy_iov;
} }
iov_count = iov_iter_count(&iter); iov_count = iov_iter_count(&iter);
ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
if (!ret) { if (!ret) {
ssize_t ret2; ssize_t ret2;
if (file->f_op->read_iter) if (req->file->f_op->read_iter)
ret2 = call_read_iter(file, kiocb, &iter); ret2 = call_read_iter(req->file, kiocb, &iter);
else else
ret2 = loop_rw_iter(READ, file, kiocb, &iter); ret2 = loop_rw_iter(READ, req->file, kiocb, &iter);
/* /*
* In case of a short read, punt to async. This can happen * In case of a short read, punt to async. This can happen
...@@ -1890,9 +1896,8 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1890,9 +1896,8 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock) bool force_nonblock)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter; struct iov_iter iter;
struct file *file;
size_t iov_count; size_t iov_count;
ssize_t ret, io_size; ssize_t ret, io_size;
...@@ -1908,9 +1913,8 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1908,9 +1913,8 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
/* Ensure we clear previously set non-block flag */ /* Ensure we clear previously set non-block flag */
if (!force_nonblock) if (!force_nonblock)
req->rw.ki_flags &= ~IOCB_NOWAIT; req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
file = kiocb->ki_filp;
io_size = ret; io_size = ret;
if (req->flags & REQ_F_LINK) if (req->flags & REQ_F_LINK)
req->result = io_size; req->result = io_size;
...@@ -1930,7 +1934,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1930,7 +1934,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
goto copy_iov; goto copy_iov;
iov_count = iov_iter_count(&iter); iov_count = iov_iter_count(&iter);
ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count); ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
if (!ret) { if (!ret) {
ssize_t ret2; ssize_t ret2;
...@@ -1942,17 +1946,17 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1942,17 +1946,17 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
* we return to userspace. * we return to userspace.
*/ */
if (req->flags & REQ_F_ISREG) { if (req->flags & REQ_F_ISREG) {
__sb_start_write(file_inode(file)->i_sb, __sb_start_write(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE, true); SB_FREEZE_WRITE, true);
__sb_writers_release(file_inode(file)->i_sb, __sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE); SB_FREEZE_WRITE);
} }
kiocb->ki_flags |= IOCB_WRITE; kiocb->ki_flags |= IOCB_WRITE;
if (file->f_op->write_iter) if (req->file->f_op->write_iter)
ret2 = call_write_iter(file, kiocb, &iter); ret2 = call_write_iter(req->file, kiocb, &iter);
else else
ret2 = loop_rw_iter(WRITE, file, kiocb, &iter); ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
if (!force_nonblock || ret2 != -EAGAIN) { if (!force_nonblock || ret2 != -EAGAIN) {
kiocb_done(kiocb, ret2, nxt, req->in_async); kiocb_done(kiocb, ret2, nxt, req->in_async);
} else { } else {
...@@ -2032,7 +2036,7 @@ static void io_fsync_finish(struct io_wq_work **workptr) ...@@ -2032,7 +2036,7 @@ static void io_fsync_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req)) if (io_req_cancelled(req))
return; return;
ret = vfs_fsync_range(req->rw.ki_filp, req->sync.off, ret = vfs_fsync_range(req->file, req->sync.off,
end > 0 ? end : LLONG_MAX, end > 0 ? end : LLONG_MAX,
req->sync.flags & IORING_FSYNC_DATASYNC); req->sync.flags & IORING_FSYNC_DATASYNC);
if (ret < 0) if (ret < 0)
...@@ -2098,7 +2102,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr) ...@@ -2098,7 +2102,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req)) if (io_req_cancelled(req))
return; return;
ret = sync_file_range(req->rw.ki_filp, req->sync.off, req->sync.len, ret = sync_file_range(req->file, req->sync.off, req->sync.len,
req->sync.flags); req->sync.flags);
if (ret < 0) if (ret < 0)
req_set_fail_links(req); req_set_fail_links(req);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册