提交 5b2a4093 编写于 作者: Y yangerkun 提交者: Xie XiuQi

Revert "ext4: remove EXT4_STATE_DIOREAD_LOCK flag"

euler inclusion
category: perf
bugzilla: 12801
CVE: NA
---------------------------

This reverts commit 1d39834f.
Signed-off-by: Nyangerkun <yangerkun@huawei.coom>
Reviewed-by: NMiao Xie <miaoxie@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 98887c58
......@@ -1545,6 +1545,8 @@ enum {
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */
EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
nolocking */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
......@@ -3220,6 +3222,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
}
/*
* Disable DIO read nolock optimization, so new dioreaders will be forced
* to grab i_mutex
*/
static inline void ext4_inode_block_unlocked_dio(struct inode *inode)
{
ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
smp_mb();
}
static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
{
smp_mb();
ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
}
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
/* For ioend & aio unwritten conversion wait queues */
......
......@@ -4802,6 +4802,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
/* Preallocate the range including the unaligned edges */
......@@ -4812,7 +4813,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
round_down(offset, 1 << blkbits)) >> blkbits,
new_size, flags);
if (ret)
goto out_mutex;
goto out_dio;
}
......@@ -4836,7 +4837,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
ret = ext4_update_disksize_before_punch(inode, offset, len);
if (ret) {
up_write(&EXT4_I(inode)->i_mmap_sem);
goto out_mutex;
goto out_dio;
}
/* Now release the pages and zero block aligned part of pages */
truncate_pagecache_range(inode, start, end - 1);
......@@ -4846,10 +4847,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags);
up_write(&EXT4_I(inode)->i_mmap_sem);
if (ret)
goto out_mutex;
goto out_dio;
}
if (!partial_begin && !partial_end)
goto out_mutex;
goto out_dio;
/*
* In worst case we have to writeout two nonadjacent unwritten
......@@ -4862,7 +4863,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
ext4_std_error(inode->i_sb, ret);
goto out_mutex;
goto out_dio;
}
inode->i_mtime = inode->i_ctime = current_time(inode);
......@@ -4887,6 +4888,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
ext4_handle_sync(handle);
ext4_journal_stop(handle);
out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
inode_unlock(inode);
return ret;
......@@ -4974,9 +4977,11 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
}
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
ext4_inode_resume_unlocked_dio(inode);
if (ret)
goto out;
......@@ -5499,6 +5504,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
}
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
/*
......@@ -5580,6 +5586,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ext4_journal_stop(handle);
out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
inode_unlock(inode);
return ret;
......@@ -5652,6 +5659,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
}
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
/*
......@@ -5758,6 +5766,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
ext4_journal_stop(handle);
out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
inode_unlock(inode);
return ret;
......
......@@ -4294,6 +4294,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
}
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
/*
......@@ -4371,6 +4372,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
ext4_journal_stop(handle);
out_dio:
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
inode_unlock(inode);
return ret;
......@@ -5612,7 +5614,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
*/
if (orphan) {
if (!ext4_should_journal_data(inode)) {
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
ext4_inode_resume_unlocked_dio(inode);
} else
ext4_wait_for_tail_page_commit(inode);
}
......@@ -6112,6 +6116,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return -EROFS;
/* Wait for all existing dio workers */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
/*
......@@ -6127,6 +6132,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
err = filemap_write_and_wait(inode->i_mapping);
if (err < 0) {
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
return err;
}
}
......@@ -6149,6 +6155,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
if (err < 0) {
jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);
ext4_inode_resume_unlocked_dio(inode);
return err;
}
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
......@@ -6160,6 +6167,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
if (val)
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
/* Finally we can mark the inode as dirty. */
......
......@@ -154,6 +154,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
goto err_out;
/* Wait for all existing dio workers */
ext4_inode_block_unlocked_dio(inode);
ext4_inode_block_unlocked_dio(inode_bl);
inode_dio_wait(inode);
inode_dio_wait(inode_bl);
......@@ -252,6 +254,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
err_out:
up_write(&EXT4_I(inode)->i_mmap_sem);
journal_err_out:
ext4_inode_resume_unlocked_dio(inode);
ext4_inode_resume_unlocked_dio(inode_bl);
unlock_two_nondirectories(inode, inode_bl);
iput(inode_bl);
return err;
......
......@@ -603,6 +603,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
lock_two_nondirectories(orig_inode, donor_inode);
/* Wait for all existing dio workers */
ext4_inode_block_unlocked_dio(orig_inode);
ext4_inode_block_unlocked_dio(donor_inode);
inode_dio_wait(orig_inode);
inode_dio_wait(donor_inode);
......@@ -693,6 +695,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
ext4_ext_drop_refs(path);
kfree(path);
ext4_double_up_write_data_sem(orig_inode, donor_inode);
ext4_inode_resume_unlocked_dio(orig_inode);
ext4_inode_resume_unlocked_dio(donor_inode);
unlock_two_nondirectories(orig_inode, donor_inode);
return ret;
......
......@@ -101,13 +101,15 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
* i_data_sem (rw)
*
* truncate:
* sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
* sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
* i_data_sem (rw)
* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
* i_mmap_rwsem (w) -> page lock
* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
* transaction start -> i_data_sem (rw)
*
* direct IO:
* sb_start_write -> i_mutex -> mmap_sem
* sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
* transaction start -> i_data_sem (rw)
*
* writepages:
* transaction start -> page lock(s) -> i_data_sem (rw)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册