提交 a4c44f2b 编写于 作者: J Jens Axboe 提交者: Joseph Qi

io_uring: use fget/fput_many() for file references

commit 9a56a2323dbbd8ed7f380a5af7ae3ff82caa55a6 upstream.

Add a separate io_submit_state structure, to cache some of the things
we need for IO submission.

One such example is file reference batching. io_submit_state. We get as
many references as the number of sqes we are submitting, and drop
unused ones if we end up switching files. The assumption here is that
we're usually only dealing with one fd, and if there are multiple,
hopefuly they are at least somewhat ordered. Could trivially be extended
to cover multiple fds, if needed.

On the completion side we do the same thing, except this is trivially
done just locally in io_iopoll_reap().
Reviewed-by: NHannes Reinecke <hare@suse.com>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: NJeffle Xu <jefflexu@linux.alibaba.com>
Acked-by: NCaspar Zhang <caspar@linux.alibaba.com>
上级 bbc4ceed
...@@ -165,6 +165,19 @@ struct io_kiocb { ...@@ -165,6 +165,19 @@ struct io_kiocb {
#define IO_PLUG_THRESHOLD 2 #define IO_PLUG_THRESHOLD 2
#define IO_IOPOLL_BATCH 8 #define IO_IOPOLL_BATCH 8
struct io_submit_state {
struct blk_plug plug;
/*
* File reference cache
*/
struct file *file;
unsigned int fd;
unsigned int has_refs;
unsigned int used_refs;
unsigned int ios_left;
};
static struct kmem_cache *req_cachep; static struct kmem_cache *req_cachep;
static const struct file_operations io_uring_fops; static const struct file_operations io_uring_fops;
...@@ -332,9 +345,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -332,9 +345,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
struct list_head *done) struct list_head *done)
{ {
void *reqs[IO_IOPOLL_BATCH]; void *reqs[IO_IOPOLL_BATCH];
int file_count, to_free;
struct file *file = NULL;
struct io_kiocb *req; struct io_kiocb *req;
int to_free = 0;
file_count = to_free = 0;
while (!list_empty(done)) { while (!list_empty(done)) {
req = list_first_entry(done, struct io_kiocb, list); req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list); list_del(&req->list);
...@@ -344,12 +359,28 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -344,12 +359,28 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
reqs[to_free++] = req; reqs[to_free++] = req;
(*nr_events)++; (*nr_events)++;
fput(req->rw.ki_filp); /*
* Batched puts of the same file, to avoid dirtying the
* file usage count multiple times, if avoidable.
*/
if (!file) {
file = req->rw.ki_filp;
file_count = 1;
} else if (file == req->rw.ki_filp) {
file_count++;
} else {
fput_many(file, file_count);
file = req->rw.ki_filp;
file_count = 1;
}
if (to_free == ARRAY_SIZE(reqs)) if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free); io_free_req_many(ctx, reqs, &to_free);
} }
io_commit_cqring(ctx); io_commit_cqring(ctx);
if (file)
fput_many(file, file_count);
io_free_req_many(ctx, reqs, &to_free); io_free_req_many(ctx, reqs, &to_free);
} }
...@@ -530,6 +561,48 @@ static void io_iopoll_req_issued(struct io_kiocb *req) ...@@ -530,6 +561,48 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_add_tail(&req->list, &ctx->poll_list); list_add_tail(&req->list, &ctx->poll_list);
} }
static void io_file_put(struct io_submit_state *state, struct file *file)
{
if (!state) {
fput(file);
} else if (state->file) {
int diff = state->has_refs - state->used_refs;
if (diff)
fput_many(state->file, diff);
state->file = NULL;
}
}
/*
* Get as many references to a file as we have IOs left in this submission,
* assuming most submissions are for one file, or at least that each file
* has more than one submission.
*/
static struct file *io_file_get(struct io_submit_state *state, int fd)
{
if (!state)
return fget(fd);
if (state->file) {
if (state->fd == fd) {
state->used_refs++;
state->ios_left--;
return state->file;
}
io_file_put(state, NULL);
}
state->file = fget_many(fd, state->ios_left);
if (!state->file)
return NULL;
state->fd = fd;
state->has_refs = state->ios_left;
state->used_refs = 1;
state->ios_left--;
return state->file;
}
/* /*
* If we tracked the file through the SCM inflight mechanism, we could support * If we tracked the file through the SCM inflight mechanism, we could support
* any file. For now, just ensure that anything potentially problematic is done * any file. For now, just ensure that anything potentially problematic is done
...@@ -548,7 +621,7 @@ static bool io_file_supports_async(struct file *file) ...@@ -548,7 +621,7 @@ static bool io_file_supports_async(struct file *file)
} }
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool force_nonblock) bool force_nonblock, struct io_submit_state *state)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw;
...@@ -560,7 +633,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -560,7 +633,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0; return 0;
fd = READ_ONCE(sqe->fd); fd = READ_ONCE(sqe->fd);
kiocb->ki_filp = fget(fd); kiocb->ki_filp = io_file_get(state, fd);
if (unlikely(!kiocb->ki_filp)) if (unlikely(!kiocb->ki_filp))
return -EBADF; return -EBADF;
if (force_nonblock && !io_file_supports_async(kiocb->ki_filp)) if (force_nonblock && !io_file_supports_async(kiocb->ki_filp))
...@@ -604,7 +677,10 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -604,7 +677,10 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
} }
return 0; return 0;
out_fput: out_fput:
fput(kiocb->ki_filp); /* in case of error, we didn't use this file reference. drop it. */
if (state)
state->used_refs--;
io_file_put(state, kiocb->ki_filp);
return ret; return ret;
} }
...@@ -650,7 +726,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw, ...@@ -650,7 +726,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
} }
static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s, static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
bool force_nonblock) bool force_nonblock, struct io_submit_state *state)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw;
...@@ -658,7 +734,7 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s, ...@@ -658,7 +734,7 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
struct file *file; struct file *file;
ssize_t ret; ssize_t ret;
ret = io_prep_rw(req, s->sqe, force_nonblock); ret = io_prep_rw(req, s->sqe, force_nonblock, state);
if (ret) if (ret)
return ret; return ret;
file = kiocb->ki_filp; file = kiocb->ki_filp;
...@@ -694,7 +770,7 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s, ...@@ -694,7 +770,7 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
} }
static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s, static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
bool force_nonblock) bool force_nonblock, struct io_submit_state *state)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw;
...@@ -702,7 +778,7 @@ static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s, ...@@ -702,7 +778,7 @@ static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
struct file *file; struct file *file;
ssize_t ret; ssize_t ret;
ret = io_prep_rw(req, s->sqe, force_nonblock); ret = io_prep_rw(req, s->sqe, force_nonblock, state);
if (ret) if (ret)
return ret; return ret;
/* Hold on to the file for -EAGAIN */ /* Hold on to the file for -EAGAIN */
...@@ -826,7 +902,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -826,7 +902,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
} }
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct sqe_submit *s, bool force_nonblock) const struct sqe_submit *s, bool force_nonblock,
struct io_submit_state *state)
{ {
ssize_t ret; ssize_t ret;
int opcode; int opcode;
...@@ -841,10 +918,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, ...@@ -841,10 +918,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
ret = io_nop(req, req->user_data); ret = io_nop(req, req->user_data);
break; break;
case IORING_OP_READV: case IORING_OP_READV:
ret = io_read(req, s, force_nonblock); ret = io_read(req, s, force_nonblock, state);
break; break;
case IORING_OP_WRITEV: case IORING_OP_WRITEV:
ret = io_write(req, s, force_nonblock); ret = io_write(req, s, force_nonblock, state);
break; break;
case IORING_OP_FSYNC: case IORING_OP_FSYNC:
ret = io_fsync(req, s->sqe, force_nonblock); ret = io_fsync(req, s->sqe, force_nonblock);
...@@ -896,7 +973,7 @@ static void io_sq_wq_submit_work(struct work_struct *work) ...@@ -896,7 +973,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
s->needs_lock = true; s->needs_lock = true;
do { do {
ret = __io_submit_sqe(ctx, req, s, false); ret = __io_submit_sqe(ctx, req, s, false, NULL);
/* /*
* We can get EAGAIN for polled IO even though we're forcing * We can get EAGAIN for polled IO even though we're forcing
* a sync submission from here, since we can't wait for * a sync submission from here, since we can't wait for
...@@ -920,7 +997,8 @@ static void io_sq_wq_submit_work(struct work_struct *work) ...@@ -920,7 +997,8 @@ static void io_sq_wq_submit_work(struct work_struct *work)
kfree(sqe); kfree(sqe);
} }
static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s) static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
struct io_submit_state *state)
{ {
struct io_kiocb *req; struct io_kiocb *req;
ssize_t ret; ssize_t ret;
...@@ -935,7 +1013,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s) ...@@ -935,7 +1013,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s)
req->rw.ki_filp = NULL; req->rw.ki_filp = NULL;
ret = __io_submit_sqe(ctx, req, s, true); ret = __io_submit_sqe(ctx, req, s, true, state);
if (ret == -EAGAIN) { if (ret == -EAGAIN) {
struct io_uring_sqe *sqe_copy; struct io_uring_sqe *sqe_copy;
...@@ -956,6 +1034,26 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s) ...@@ -956,6 +1034,26 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s)
return ret; return ret;
} }
/*
* Batched submission is done, ensure local IO is flushed out.
*/
static void io_submit_state_end(struct io_submit_state *state)
{
blk_finish_plug(&state->plug);
io_file_put(state, NULL);
}
/*
* Start submission side cache.
*/
static void io_submit_state_start(struct io_submit_state *state,
struct io_ring_ctx *ctx, unsigned max_ios)
{
blk_start_plug(&state->plug);
state->file = NULL;
state->ios_left = max_ios;
}
static void io_commit_sqring(struct io_ring_ctx *ctx) static void io_commit_sqring(struct io_ring_ctx *ctx)
{ {
struct io_sq_ring *ring = ctx->sq_ring; struct io_sq_ring *ring = ctx->sq_ring;
...@@ -1029,11 +1127,13 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) ...@@ -1029,11 +1127,13 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
{ {
struct io_submit_state state, *statep = NULL;
int i, ret = 0, submit = 0; int i, ret = 0, submit = 0;
struct blk_plug plug;
if (to_submit > IO_PLUG_THRESHOLD) if (to_submit > IO_PLUG_THRESHOLD) {
blk_start_plug(&plug); io_submit_state_start(&state, ctx, to_submit);
statep = &state;
}
for (i = 0; i < to_submit; i++) { for (i = 0; i < to_submit; i++) {
struct sqe_submit s; struct sqe_submit s;
...@@ -1044,7 +1144,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) ...@@ -1044,7 +1144,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
s.has_user = true; s.has_user = true;
s.needs_lock = false; s.needs_lock = false;
ret = io_submit_sqe(ctx, &s); ret = io_submit_sqe(ctx, &s, statep);
if (ret) { if (ret) {
io_drop_sqring(ctx); io_drop_sqring(ctx);
break; break;
...@@ -1054,8 +1154,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) ...@@ -1054,8 +1154,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
} }
io_commit_sqring(ctx); io_commit_sqring(ctx);
if (to_submit > IO_PLUG_THRESHOLD) if (statep)
blk_finish_plug(&plug); io_submit_state_end(statep);
return submit ? submit : ret; return submit ? submit : ret;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册