提交 b7706b3d 编写于 作者: J Jiufei Xue

ovl: implement async IO routines

A performance regression is observed since linux v4.19 when we do aio
test using fio with iodepth 128 on overlayfs. And we found that queue
depth of the device is always 1 which is unexpected.

After investigation, it is found that commit 16914e6f
("ovl: add ovl_read_iter()") and commit 2a92e07e
("ovl: add ovl_write_iter()") use do_iter_readv_writev() to submit
requests to real filesystem. Async IOs are converted to sync IOs here
and cause performance regression.

So implement async IO for stacked reading and writing.

Changes since v1:
  - add a cleanup helper for completion/error handling
  - handle the case when aio_req allocation failed
Signed-off-by: NJiufei Xue <jiufei.xue@linux.alibaba.com>
Reviewed-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
上级 7ff6623e
...@@ -14,6 +14,14 @@ ...@@ -14,6 +14,14 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include "overlayfs.h" #include "overlayfs.h"
struct ovl_aio_req {
struct kiocb iocb;
struct kiocb *orig_iocb;
struct fd fd;
};
static struct kmem_cache *ovl_aio_request_cachep;
static char ovl_whatisit(struct inode *inode, struct inode *realinode) static char ovl_whatisit(struct inode *inode, struct inode *realinode)
{ {
if (realinode != ovl_inode_upper(inode)) if (realinode != ovl_inode_upper(inode))
...@@ -228,6 +236,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) ...@@ -228,6 +236,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
return flags; return flags;
} }
static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
{
struct kiocb *iocb = &aio_req->iocb;
struct kiocb *orig_iocb = aio_req->orig_iocb;
if (iocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(orig_iocb->ki_filp);
file_end_write(iocb->ki_filp);
ovl_copyattr(ovl_inode_real(inode), inode);
}
orig_iocb->ki_pos = iocb->ki_pos;
fdput(aio_req->fd);
kmem_cache_free(ovl_aio_request_cachep, aio_req);
}
static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
{
struct ovl_aio_req *aio_req = container_of(iocb,
struct ovl_aio_req, iocb);
struct kiocb *orig_iocb = aio_req->orig_iocb;
ovl_aio_cleanup_handler(aio_req);
orig_iocb->ki_complete(orig_iocb, res, res2);
}
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
...@@ -243,14 +278,32 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -243,14 +278,32 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
return ret; return ret;
old_cred = ovl_override_creds(file_inode(file)->i_sb); old_cred = ovl_override_creds(file_inode(file)->i_sb);
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, if (is_sync_kiocb(iocb)) {
ovl_iocb_to_rwf(iocb)); ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
ovl_iocb_to_rwf(iocb));
ovl_file_accessed(file);
fdput(real);
} else {
struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
GFP_NOFS);
if (!aio_req) {
ret = -ENOMEM;
fdput(real);
goto out_revert;
}
aio_req->fd = real;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, real.file);
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
ovl_file_accessed(file);
if (ret != -EIOCBQUEUED)
ovl_aio_cleanup_handler(aio_req);
}
out_revert:
revert_creds(old_cred); revert_creds(old_cred);
ovl_file_accessed(file);
fdput(real);
return ret; return ret;
} }
...@@ -278,15 +331,34 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -278,15 +331,34 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
old_cred = ovl_override_creds(file_inode(file)->i_sb); old_cred = ovl_override_creds(file_inode(file)->i_sb);
file_start_write(real.file); file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, if (is_sync_kiocb(iocb)) {
ovl_iocb_to_rwf(iocb)); ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
file_end_write(real.file); ovl_iocb_to_rwf(iocb));
revert_creds(old_cred); file_end_write(real.file);
/* Update size */
/* Update size */ ovl_copyattr(ovl_inode_real(inode), inode);
ovl_copyattr(ovl_inode_real(inode), inode); fdput(real);
} else {
struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
GFP_NOFS);
if (!aio_req) {
ret = -ENOMEM;
file_end_write(real.file);
fdput(real);
goto out_revert;
}
aio_req->fd = real;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, real.file);
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
if (ret != -EIOCBQUEUED)
ovl_aio_cleanup_handler(aio_req);
}
fdput(real); out_revert:
revert_creds(old_cred);
out_unlock: out_unlock:
inode_unlock(inode); inode_unlock(inode);
...@@ -649,3 +721,19 @@ const struct file_operations ovl_file_operations = { ...@@ -649,3 +721,19 @@ const struct file_operations ovl_file_operations = {
.clone_file_range = ovl_clone_file_range, .clone_file_range = ovl_clone_file_range,
.dedupe_file_range = ovl_dedupe_file_range, .dedupe_file_range = ovl_dedupe_file_range,
}; };
int __init ovl_init_aio_request_cache(void)
{
ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
sizeof(struct ovl_aio_req),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!ovl_aio_request_cachep)
return -ENOMEM;
return 0;
}
void ovl_exit_aio_request_cache(void)
{
kmem_cache_destroy(ovl_aio_request_cachep);
}
...@@ -409,6 +409,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); ...@@ -409,6 +409,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
/* file.c */ /* file.c */
extern const struct file_operations ovl_file_operations; extern const struct file_operations ovl_file_operations;
int __init ovl_init_aio_request_cache(void);
void ovl_exit_aio_request_cache(void);
/* copy_up.c */ /* copy_up.c */
int ovl_copy_up(struct dentry *dentry); int ovl_copy_up(struct dentry *dentry);
......
...@@ -1715,9 +1715,17 @@ static int __init ovl_init(void) ...@@ -1715,9 +1715,17 @@ static int __init ovl_init(void)
if (ovl_inode_cachep == NULL) if (ovl_inode_cachep == NULL)
return -ENOMEM; return -ENOMEM;
err = ovl_init_aio_request_cache();
if (err) {
kmem_cache_destroy(ovl_inode_cachep);
return -ENOMEM;
}
err = register_filesystem(&ovl_fs_type); err = register_filesystem(&ovl_fs_type);
if (err) if (err) {
kmem_cache_destroy(ovl_inode_cachep); kmem_cache_destroy(ovl_inode_cachep);
ovl_exit_aio_request_cache();
}
return err; return err;
} }
...@@ -1732,7 +1740,7 @@ static void __exit ovl_exit(void) ...@@ -1732,7 +1740,7 @@ static void __exit ovl_exit(void)
*/ */
rcu_barrier(); rcu_barrier();
kmem_cache_destroy(ovl_inode_cachep); kmem_cache_destroy(ovl_inode_cachep);
ovl_exit_aio_request_cache();
} }
module_init(ovl_init); module_init(ovl_init);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册