From b7706b3d2369beefbb361adce56697dc9bfc521b Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Thu, 14 Nov 2019 18:48:46 +0800 Subject: [PATCH] ovl: implement async IO routines A performance regression is observed since linux v4.19 when we do aio test using fio with iodepth 128 on overlayfs. And we found that queue depth of the device is always 1 which is unexpected. After investigation, it is found that commit 16914e6fc7e1 ("ovl: add ovl_read_iter()") and commit 2a92e07edc5e ("ovl: add ovl_write_iter()") use do_iter_readv_writev() to submit requests to real filesystem. Async IOs are converted to sync IOs here and cause performance regression. So implement async IO for stacked reading and writing. Changes since v1: - add a cleanup helper for completion/error handling - handle the case when aio_req allocation failed Signed-off-by: Jiufei Xue Reviewed-by: Joseph Qi --- fs/overlayfs/file.c | 116 ++++++++++++++++++++++++++++++++++----- fs/overlayfs/overlayfs.h | 2 + fs/overlayfs/super.c | 12 +++- 3 files changed, 114 insertions(+), 16 deletions(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 0bd276e4ccbe..8518efd9dcee 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -14,6 +14,14 @@ #include #include "overlayfs.h" +struct ovl_aio_req { + struct kiocb iocb; + struct kiocb *orig_iocb; + struct fd fd; +}; + +static struct kmem_cache *ovl_aio_request_cachep; + static char ovl_whatisit(struct inode *inode, struct inode *realinode) { if (realinode != ovl_inode_upper(inode)) @@ -228,6 +236,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) return flags; } +static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) +{ + struct kiocb *iocb = &aio_req->iocb; + struct kiocb *orig_iocb = aio_req->orig_iocb; + + if (iocb->ki_flags & IOCB_WRITE) { + struct inode *inode = file_inode(orig_iocb->ki_filp); + + file_end_write(iocb->ki_filp); + ovl_copyattr(ovl_inode_real(inode), inode); + } + + orig_iocb->ki_pos = iocb->ki_pos; + fdput(aio_req->fd); + kmem_cache_free(ovl_aio_request_cachep, aio_req); +} + +static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2) +{ + struct ovl_aio_req *aio_req = container_of(iocb, + struct ovl_aio_req, iocb); + struct kiocb *orig_iocb = aio_req->orig_iocb; + + ovl_aio_cleanup_handler(aio_req); + orig_iocb->ki_complete(orig_iocb, res, res2); +} + static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; @@ -243,14 +278,32 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) return ret; old_cred = ovl_override_creds(file_inode(file)->i_sb); - ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb)); + if (is_sync_kiocb(iocb)) { + ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, + ovl_iocb_to_rwf(iocb)); + ovl_file_accessed(file); + fdput(real); + } else { + struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep, + GFP_NOFS); + if (!aio_req) { + ret = -ENOMEM; + fdput(real); + goto out_revert; + } + + aio_req->fd = real; + aio_req->orig_iocb = iocb; + kiocb_clone(&aio_req->iocb, iocb, real.file); + aio_req->iocb.ki_complete = ovl_aio_rw_complete; + ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); + ovl_file_accessed(file); + if (ret != -EIOCBQUEUED) + ovl_aio_cleanup_handler(aio_req); + } +out_revert: revert_creds(old_cred); - ovl_file_accessed(file); - - fdput(real); - return ret; } @@ -278,15 +331,34 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) old_cred = ovl_override_creds(file_inode(file)->i_sb); file_start_write(real.file); - ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb)); - file_end_write(real.file); - revert_creds(old_cred); - - /* Update size */ - ovl_copyattr(ovl_inode_real(inode), inode); + if (is_sync_kiocb(iocb)) { + ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, + ovl_iocb_to_rwf(iocb)); + file_end_write(real.file); + /* Update size */ + ovl_copyattr(ovl_inode_real(inode), inode); + fdput(real); + } else { + struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep, + GFP_NOFS); + if (!aio_req) { + ret = -ENOMEM; + file_end_write(real.file); + fdput(real); + goto out_revert; + } + + aio_req->fd = real; + aio_req->orig_iocb = iocb; + kiocb_clone(&aio_req->iocb, iocb, real.file); + aio_req->iocb.ki_complete = ovl_aio_rw_complete; + ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); + if (ret != -EIOCBQUEUED) + ovl_aio_cleanup_handler(aio_req); + } - fdput(real); +out_revert: + revert_creds(old_cred); out_unlock: inode_unlock(inode); @@ -649,3 +721,19 @@ const struct file_operations ovl_file_operations = { .clone_file_range = ovl_clone_file_range, .dedupe_file_range = ovl_dedupe_file_range, }; + +int __init ovl_init_aio_request_cache(void) +{ + ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req", + sizeof(struct ovl_aio_req), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ovl_aio_request_cachep) + return -ENOMEM; + + return 0; +} + +void ovl_exit_aio_request_cache(void) +{ + kmem_cache_destroy(ovl_aio_request_cachep); +} diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 265bf9cfde08..d0257ca6f251 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -409,6 +409,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); /* file.c */ extern const struct file_operations ovl_file_operations; +int __init ovl_init_aio_request_cache(void); +void ovl_exit_aio_request_cache(void); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 127df4a85c8a..64993ace9f85 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1715,9 +1715,17 @@ static int __init ovl_init(void) if (ovl_inode_cachep == NULL) return -ENOMEM; + err = ovl_init_aio_request_cache(); + if (err) { + kmem_cache_destroy(ovl_inode_cachep); + return -ENOMEM; + } + err = register_filesystem(&ovl_fs_type); - if (err) + if (err) { kmem_cache_destroy(ovl_inode_cachep); + ovl_exit_aio_request_cache(); + } return err; } @@ -1732,7 +1740,7 @@ static void __exit ovl_exit(void) */ rcu_barrier(); kmem_cache_destroy(ovl_inode_cachep); - + ovl_exit_aio_request_cache(); } module_init(ovl_init); -- GitLab