diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2ddf7833350dd1a0155a52a5ccf12837ed8c2da5..520beefdb216ef8a69a1249f0f5f93f6217421fe 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1545,6 +1545,8 @@ enum { EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ EXT4_STATE_NEWENTRY, /* File just added to dir */ + EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read + nolocking */ EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */ @@ -3220,6 +3222,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); } +/* + * Disable DIO read nolock optimization, so new dioreaders will be forced + * to grab i_mutex + */ +static inline void ext4_inode_block_unlocked_dio(struct inode *inode) +{ + ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); + smp_mb(); +} +static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) +{ + smp_mb(); + ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); +} + #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) /* For ioend & aio unwritten conversion wait queues */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 72a361d5ef7477c6687820b9d7f05164d1e37f65..2874fad5139a6b397f1acd950c27d4ddcb1ae845 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4802,6 +4802,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, flags |= EXT4_GET_BLOCKS_KEEP_SIZE; /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); /* Preallocate the range including the unaligned edges */ @@ -4812,7 +4813,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, round_down(offset, 1 << blkbits)) >> blkbits, new_size, flags); if (ret) - goto out_mutex; + goto out_dio; } @@ -4836,7 +4837,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, ret = ext4_update_disksize_before_punch(inode, offset, len); if (ret) { up_write(&EXT4_I(inode)->i_mmap_sem); - goto out_mutex; + goto out_dio; } /* Now release the pages and zero block aligned part of pages */ truncate_pagecache_range(inode, start, end - 1); @@ -4846,10 +4847,10 @@ static long ext4_zero_range(struct file *file, loff_t offset, flags); up_write(&EXT4_I(inode)->i_mmap_sem); if (ret) - goto out_mutex; + goto out_dio; } if (!partial_begin && !partial_end) - goto out_mutex; + goto out_dio; /* * In worst case we have to writeout two nonadjacent unwritten @@ -4862,7 +4863,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (IS_ERR(handle)) { ret = PTR_ERR(handle); ext4_std_error(inode->i_sb, ret); - goto out_mutex; + goto out_dio; } inode->i_mtime = inode->i_ctime = current_time(inode); @@ -4887,6 +4888,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, ext4_handle_sync(handle); ext4_journal_stop(handle); +out_dio: + ext4_inode_resume_unlocked_dio(inode); out_mutex: inode_unlock(inode); return ret; @@ -4974,9 +4977,11 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) } /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); + ext4_inode_resume_unlocked_dio(inode); if (ret) goto out; @@ -5499,6 +5504,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) } /* Wait for existing dio to complete */ + ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); /* @@ -5580,6 +5586,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ext4_journal_stop(handle); out_mmap: up_write(&EXT4_I(inode)->i_mmap_sem); + ext4_inode_resume_unlocked_dio(inode); out_mutex: inode_unlock(inode); return ret; @@ -5652,6 +5659,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) } /* Wait for existing dio to complete */ + ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); /* @@ -5758,6 +5766,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ext4_journal_stop(handle); out_mmap: up_write(&EXT4_I(inode)->i_mmap_sem); + ext4_inode_resume_unlocked_dio(inode); out_mutex: inode_unlock(inode); return ret; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2c43c5b92229d57509c91ce38d1a9c5660c3e485..81ce6bf2dacfecb5dd2b44af66d4f98af34c66e1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4294,6 +4294,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); /* @@ -4371,6 +4372,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) ext4_journal_stop(handle); out_dio: up_write(&EXT4_I(inode)->i_mmap_sem); + ext4_inode_resume_unlocked_dio(inode); out_mutex: inode_unlock(inode); return ret; @@ -5612,7 +5614,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) */ if (orphan) { if (!ext4_should_journal_data(inode)) { + ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); + ext4_inode_resume_unlocked_dio(inode); } else ext4_wait_for_tail_page_commit(inode); } @@ -6112,6 +6116,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return -EROFS; /* Wait for all existing dio workers */ + ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); /* @@ -6127,6 +6132,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = filemap_write_and_wait(inode->i_mapping); if (err < 0) { up_write(&EXT4_I(inode)->i_mmap_sem); + ext4_inode_resume_unlocked_dio(inode); return err; } } @@ -6149,6 +6155,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) if (err < 0) { jbd2_journal_unlock_updates(journal); percpu_up_write(&sbi->s_journal_flag_rwsem); + ext4_inode_resume_unlocked_dio(inode); return err; } ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); @@ -6160,6 +6167,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) if (val) up_write(&EXT4_I(inode)->i_mmap_sem); + ext4_inode_resume_unlocked_dio(inode); /* Finally we can mark the inode as dirty. */ diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index eb8ca8d80885ad917d9892a26c467dbae6a436e9..80bee0bfbc9f6ad08a0995aad9295a5a2b197cb6 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -154,6 +154,8 @@ static long swap_inode_boot_loader(struct super_block *sb, goto err_out; /* Wait for all existing dio workers */ + ext4_inode_block_unlocked_dio(inode); + ext4_inode_block_unlocked_dio(inode_bl); inode_dio_wait(inode); inode_dio_wait(inode_bl); @@ -252,6 +254,8 @@ static long swap_inode_boot_loader(struct super_block *sb, err_out: up_write(&EXT4_I(inode)->i_mmap_sem); journal_err_out: + ext4_inode_resume_unlocked_dio(inode); + ext4_inode_resume_unlocked_dio(inode_bl); unlock_two_nondirectories(inode, inode_bl); iput(inode_bl); return err; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 2f5be02fc6f6a2194f45adb5429f658c2d0a184a..2afa4c432850971b6ff98b69a20516b3dc4b71ea 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -603,6 +603,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, lock_two_nondirectories(orig_inode, donor_inode); /* Wait for all existing dio workers */ + ext4_inode_block_unlocked_dio(orig_inode); + ext4_inode_block_unlocked_dio(donor_inode); inode_dio_wait(orig_inode); inode_dio_wait(donor_inode); @@ -693,6 +695,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, ext4_ext_drop_refs(path); kfree(path); ext4_double_up_write_data_sem(orig_inode, donor_inode); + ext4_inode_resume_unlocked_dio(orig_inode); + ext4_inode_resume_unlocked_dio(donor_inode); unlock_two_nondirectories(orig_inode, donor_inode); return ret; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a1cf7d68b4f068ed48859115ced0c3c3094d0454..852bfe3675dd927fdca9564c46a2c6e57b525844 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -101,13 +101,15 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, * i_data_sem (rw) * * truncate: - * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock - * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start -> - * i_data_sem (rw) + * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) -> + * i_mmap_rwsem (w) -> page lock + * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) -> + * transaction start -> i_data_sem (rw) * * direct IO: - * sb_start_write -> i_mutex -> mmap_sem - * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw) + * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem + * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> + * transaction start -> i_data_sem (rw) * * writepages: * transaction start -> page lock(s) -> i_data_sem (rw)