提交 5193a88e 编写于 作者: Z Zhang Yi 提交者: Jialin Zhang

ext4: dio take shared inode lock when overwriting preallocated blocks

mainline inclusion
from mainline-v6.3-rc1
commit 240930fb
category: perf
bugzilla: https://gitee.com/openeuler/kernel/issues/I6S63P
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=240930fb7e6b52229bdee5b1423bfeab0002fed2

--------------------------------

In the dio write path, we only take shared inode lock for the case of
aligned overwriting initialized blocks inside EOF. But for overwriting
preallocated blocks, it may only need to split unwritten extents, this
procedure has been protected under i_data_sem lock, it's safe to
release the exclusive inode lock and take shared inode lock.

This could give a significant speed up for multi-threaded writes. Test
on Intel Xeon Gold 6140 and nvme SSD with below fio parameters.

 direct=1
 ioengine=libaio
 iodepth=10
 numjobs=10
 runtime=60
 rw=randwrite
 size=100G

And the test result are:
Before:
 bs=4k       IOPS=11.1k, BW=43.2MiB/s
 bs=16k      IOPS=11.1k, BW=173MiB/s
 bs=64k      IOPS=11.2k, BW=697MiB/s

After:
 bs=4k       IOPS=41.4k, BW=162MiB/s
 bs=16k      IOPS=41.3k, BW=646MiB/s
 bs=64k      IOPS=13.5k, BW=843MiB/s
Signed-off-by: NZhang Yi <yi.zhang@huawei.com>
Reviewed-by: NJan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20221226062015.3479416-1-yi.zhang@huaweicloud.comSigned-off-by: NTheodore Ts'o <tytso@mit.edu>
Conflicts:
	fs/ext4/file.c
	[ 2f632965("iomap: pass a flags argument to iomap_dio_rw")
	  is not applied. ]
Signed-off-by: NZhihao Cheng <chengzhihao1@huawei.com>
Reviewed-by: NZhang Yi <yi.zhang@huawei.com>
Signed-off-by: NJialin Zhang <zhangjialin11@huawei.com>
上级 b66c23ec
...@@ -190,8 +190,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len) ...@@ -190,8 +190,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
return false; return false;
} }
/* Is IO overwriting allocated and initialized blocks? */ /* Is IO overwriting allocated or initialized blocks? */
static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) static bool ext4_overwrite_io(struct inode *inode,
loff_t pos, loff_t len, bool *unwritten)
{ {
struct ext4_map_blocks map; struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
...@@ -205,12 +206,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) ...@@ -205,12 +206,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
blklen = map.m_len; blklen = map.m_len;
err = ext4_map_blocks(NULL, inode, &map, 0); err = ext4_map_blocks(NULL, inode, &map, 0);
if (err != blklen)
return false;
/* /*
* 'err==len' means that all of the blocks have been preallocated, * 'err==len' means that all of the blocks have been preallocated,
* regardless of whether they have been initialized or not. To exclude * regardless of whether they have been initialized or not. We need to
* unwritten extents, we need to check m_flags. * check m_flags to distinguish the unwritten extents.
*/ */
return err == blklen && (map.m_flags & EXT4_MAP_MAPPED); *unwritten = !(map.m_flags & EXT4_MAP_MAPPED);
return true;
} }
static ssize_t ext4_generic_write_checks(struct kiocb *iocb, static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
...@@ -421,11 +425,16 @@ static const struct iomap_dio_ops ext4_dio_write_ops = { ...@@ -421,11 +425,16 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
* - For extending writes case we don't take the shared lock, since it requires * - For extending writes case we don't take the shared lock, since it requires
* updating inode i_disksize and/or orphan handling with exclusive lock. * updating inode i_disksize and/or orphan handling with exclusive lock.
* *
* - shared locking will only be true mostly with overwrites. Otherwise we will * - shared locking will only be true mostly with overwrites, including
* switch to exclusive i_rwsem lock. * initialized blocks and unwritten blocks. For overwrite unwritten blocks
* we protect splitting extents by i_data_sem in ext4_inode_info, so we can
* also release exclusive i_rwsem lock.
*
* - Otherwise we will switch to exclusive i_rwsem lock.
*/ */
static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
bool *ilock_shared, bool *extend) bool *ilock_shared, bool *extend,
bool *unwritten)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
...@@ -449,7 +458,7 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, ...@@ -449,7 +458,7 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
* in file_modified(). * in file_modified().
*/ */
if (*ilock_shared && (!IS_NOSEC(inode) || *extend || if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
!ext4_overwrite_io(inode, offset, count))) { !ext4_overwrite_io(inode, offset, count, unwritten))) {
if (iocb->ki_flags & IOCB_NOWAIT) { if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN; ret = -EAGAIN;
goto out; goto out;
...@@ -481,7 +490,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -481,7 +490,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t offset = iocb->ki_pos; loff_t offset = iocb->ki_pos;
size_t count = iov_iter_count(from); size_t count = iov_iter_count(from);
const struct iomap_ops *iomap_ops = &ext4_iomap_ops; const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
bool extend = false, unaligned_io = false; bool extend = false, unaligned_io = false, unwritten = false;
bool ilock_shared = true; bool ilock_shared = true;
/* /*
...@@ -524,7 +533,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -524,7 +533,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_buffered_write_iter(iocb, from); return ext4_buffered_write_iter(iocb, from);
} }
ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend); ret = ext4_dio_write_checks(iocb, from,
&ilock_shared, &extend, &unwritten);
if (ret <= 0) if (ret <= 0)
return ret; return ret;
...@@ -568,7 +578,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -568,7 +578,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
ext4_journal_stop(handle); ext4_journal_stop(handle);
} }
if (ilock_shared) if (ilock_shared && !unwritten)
iomap_ops = &ext4_iomap_overwrite_ops; iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
is_sync_kiocb(iocb) || unaligned_io || extend); is_sync_kiocb(iocb) || unaligned_io || extend);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册