diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2399f56d8a544f6891716d0599b94c730fffc00c..6e0fd98c6bd9e72508071d22a40f417412f64f74 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -934,7 +934,8 @@ struct btrfs_fs_info { spinlock_t delayed_iput_lock; struct list_head delayed_iputs; - struct mutex cleaner_delayed_iput_mutex; + atomic_t nr_delayed_iputs; + wait_queue_head_t delayed_iputs_wait; /* this protects tree_mod_seq_list */ spinlock_t tree_mod_seq_lock; @@ -3282,6 +3283,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); void btrfs_add_delayed_iput(struct inode *inode); void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info); +int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info); int btrfs_prealloc_file_range(struct inode *inode, int mode, u64 start, u64 num_bytes, u64 min_size, loff_t actual_len, u64 *alloc_hint); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4047867473e16ead5445ed2589525687b229aec2..8c0038de73ee2ebdabf3d890159088a5c0a3608e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1717,9 +1717,7 @@ static int cleaner_kthread(void *arg) goto sleep; } - mutex_lock(&fs_info->cleaner_delayed_iput_mutex); btrfs_run_delayed_iputs(fs_info); - mutex_unlock(&fs_info->cleaner_delayed_iput_mutex); again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&fs_info->cleaner_mutex); @@ -2676,7 +2674,6 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->delete_unused_bgs_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); - mutex_init(&fs_info->cleaner_delayed_iput_mutex); seqlock_init(&fs_info->profiles_lock); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); @@ -2698,6 +2695,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->reada_works_cnt, 0); + atomic_set(&fs_info->nr_delayed_iputs, 0); atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; @@ -2775,6 +2773,7 @@ int open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->transaction_blocked_wait); init_waitqueue_head(&fs_info->async_submit_wait); + init_waitqueue_head(&fs_info->delayed_iputs_wait); INIT_LIST_HEAD(&fs_info->pinned_chunks); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 994c71c9eb7051a947a404558a3b7ecc95fc4b3c..f72935646fb154903abb0fd5f6863ad98194e117 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4279,10 +4279,14 @@ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) /* * The cleaner kthread might still be doing iput * operations. Wait for it to finish so that - * more space is released. + * more space is released. We don't need to + * explicitly run the delayed iputs here because + * the commit_transaction would have woken up + * the cleaner. */ - mutex_lock(&fs_info->cleaner_delayed_iput_mutex); - mutex_unlock(&fs_info->cleaner_delayed_iput_mutex); + ret = btrfs_wait_on_delayed_iputs(fs_info); + if (ret) + return ret; goto again; } else { btrfs_end_transaction(trans); @@ -4967,9 +4971,8 @@ static void flush_space(struct btrfs_fs_info *fs_info, * bunch of pinned space, so make sure we run the iputs before * we do our pinned bytes check below. */ - mutex_lock(&fs_info->cleaner_delayed_iput_mutex); btrfs_run_delayed_iputs(fs_info); - mutex_unlock(&fs_info->cleaner_delayed_iput_mutex); + btrfs_wait_on_delayed_iputs(fs_info); ret = may_commit_transaction(fs_info, space_info); break; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4a0da2d7758b96bf9c0027d2281b9ef35eb1c119..a443645cf8153f57f902e4f7ff686b97f54682ec 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3256,6 +3256,7 @@ void btrfs_add_delayed_iput(struct inode *inode) if (atomic_add_unless(&inode->i_count, -1, 1)) return; + atomic_inc(&fs_info->nr_delayed_iputs); spin_lock(&fs_info->delayed_iput_lock); ASSERT(list_empty(&binode->delayed_iput)); list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); @@ -3276,11 +3277,32 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) list_del_init(&inode->delayed_iput); spin_unlock(&fs_info->delayed_iput_lock); iput(&inode->vfs_inode); + if (atomic_dec_and_test(&fs_info->nr_delayed_iputs)) + wake_up(&fs_info->delayed_iputs_wait); spin_lock(&fs_info->delayed_iput_lock); } spin_unlock(&fs_info->delayed_iput_lock); } +/** + * btrfs_wait_on_delayed_iputs - wait on the delayed iputs to be done running + * @fs_info - the fs_info for this fs + * @return - EINTR if we were killed, 0 if nothing's pending + * + * This will wait on any delayed iputs that are currently running with KILLABLE + * set. Once they are all done running we will return, unless we are killed in + * which case we return EINTR. This helps in user operations like fallocate etc + * that might get blocked on the iputs. + */ +int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info) +{ + int ret = wait_event_killable(fs_info->delayed_iputs_wait, + atomic_read(&fs_info->nr_delayed_iputs) == 0); + if (ret) + return -EINTR; + return 0; +} + /* * This creates an orphan entry for the given inode in case something goes wrong * in the middle of an unlink.