提交 24f8ebe9 编写于 作者: C Chris Mason
......@@ -3997,7 +3997,7 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
* We make the other tasks wait for the flush only when we can flush
* all things.
*/
if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) {
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
flushing = true;
space_info->flush = 1;
}
......@@ -5560,7 +5560,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
int empty_cluster = 2 * 1024 * 1024;
struct btrfs_space_info *space_info;
int loop = 0;
int index = 0;
int index = __get_raid_index(data);
int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
bool found_uncached_bg = false;
......@@ -6788,11 +6788,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
&wc->flags[level]);
if (ret < 0) {
btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
return ret;
}
BUG_ON(wc->refs[level] == 0);
if (wc->refs[level] == 1) {
btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
return 1;
}
}
......
......@@ -171,6 +171,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
return 0;
if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
test_bit(EXTENT_FLAG_LOGGING, &next->flags))
return 0;
if (extent_map_end(prev) == next->start &&
prev->flags == next->flags &&
prev->bdev == next->bdev &&
......@@ -256,7 +260,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
if (!em)
goto out;
list_move(&em->list, &tree->modified_extents);
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
list_move(&em->list, &tree->modified_extents);
em->generation = gen;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
em->mod_start = em->start;
......@@ -281,6 +286,13 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
}
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
{
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
if (em->in_tree)
try_merge_map(tree, em);
}
/**
* add_extent_mapping - add new extent map to the extent tree
* @tree: tree to insert new map in
......
......@@ -69,6 +69,7 @@ void free_extent_map(struct extent_map *em);
int __init extent_map_init(void);
void extent_map_exit(void);
int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len);
#endif
......@@ -460,8 +460,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
if (!contig)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
if (!contig && (offset >= ordered->file_offset + ordered->len ||
offset < ordered->file_offset)) {
if (offset >= ordered->file_offset + ordered->len ||
offset < ordered->file_offset) {
unsigned long bytes_left;
sums->len = this_sum_bytes;
this_sum_bytes = 0;
......
......@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
struct btrfs_key key;
struct btrfs_ioctl_defrag_range_args range;
int num_defrag;
int index;
int ret;
/* get the inode */
key.objectid = defrag->root;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
key.offset = (u64)-1;
index = srcu_read_lock(&fs_info->subvol_srcu);
inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(inode_root)) {
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
return PTR_ERR(inode_root);
ret = PTR_ERR(inode_root);
goto cleanup;
}
if (btrfs_root_refs(&inode_root->root_item) == 0) {
ret = -ENOENT;
goto cleanup;
}
key.objectid = defrag->ino;
......@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
if (IS_ERR(inode)) {
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
return PTR_ERR(inode);
ret = PTR_ERR(inode);
goto cleanup;
}
srcu_read_unlock(&fs_info->subvol_srcu, index);
/* do a chunk of defrag */
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
......@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
iput(inode);
return 0;
cleanup:
srcu_read_unlock(&fs_info->subvol_srcu, index);
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
return ret;
}
/*
......@@ -1595,9 +1609,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
if (err < 0 && num_written > 0)
num_written = err;
}
out:
if (sync)
atomic_dec(&BTRFS_I(inode)->sync_writers);
out:
sb_end_write(inode->i_sb);
current->backing_dev_info = NULL;
return num_written ? num_written : err;
......@@ -2242,6 +2257,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
if (lockend <= lockstart)
lockend = lockstart + root->sectorsize;
lockend--;
len = lockend - lockstart + 1;
len = max_t(u64, len, root->sectorsize);
......@@ -2308,9 +2324,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
}
}
*offset = start;
free_extent_map(em);
break;
if (!test_bit(EXTENT_FLAG_PREALLOC,
&em->flags)) {
*offset = start;
free_extent_map(em);
break;
}
}
}
......
......@@ -1862,11 +1862,13 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info;
int ret = 0;
int ret;
bool re_search = false;
spin_lock(&ctl->tree_lock);
again:
ret = 0;
if (!bytes)
goto out_lock;
......@@ -1879,17 +1881,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1, 0);
if (!info) {
/* the tree logging code might be calling us before we
* have fully loaded the free space rbtree for this
* block group. So it is possible the entry won't
* be in the rbtree yet at all. The caching code
* will make sure not to put it in the rbtree if
* the logging code has pinned it.
/*
* If we found a partial bit of our free space in a
* bitmap but then couldn't find the other part this may
* be a problem, so WARN about it.
*/
WARN_ON(re_search);
goto out_lock;
}
}
re_search = false;
if (!info->bitmap) {
unlink_free_space(ctl, info);
if (offset == info->offset) {
......@@ -1935,8 +1937,10 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
}
ret = remove_from_bitmap(ctl, info, &offset, &bytes);
if (ret == -EAGAIN)
if (ret == -EAGAIN) {
re_search = true;
goto again;
}
BUG_ON(ret); /* logic error */
out_lock:
spin_unlock(&ctl->tree_lock);
......
......@@ -88,7 +88,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
};
static int btrfs_setsize(struct inode *inode, loff_t newsize);
static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct inode *inode);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
......@@ -2478,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
continue;
}
nr_truncate++;
/* 1 for the orphan item deletion. */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
ret = btrfs_orphan_add(trans, inode);
btrfs_end_transaction(trans, root);
if (ret)
goto out;
ret = btrfs_truncate(inode);
} else {
nr_unlink++;
......@@ -3665,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
block_end - cur_offset, 0);
if (IS_ERR(em)) {
err = PTR_ERR(em);
em = NULL;
break;
}
last_byte = min(extent_map_end(em), block_end);
......@@ -3748,16 +3761,27 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
return err;
}
static int btrfs_setsize(struct inode *inode, loff_t newsize)
static int btrfs_setsize(struct inode *inode, struct iattr *attr)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
loff_t oldsize = i_size_read(inode);
loff_t newsize = attr->ia_size;
int mask = attr->ia_valid;
int ret;
if (newsize == oldsize)
return 0;
/*
* The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
* special case where we need to update the times despite not having
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
if (newsize > oldsize) {
truncate_pagecache(inode, oldsize, newsize);
ret = btrfs_cont_expand(inode, oldsize, newsize);
......@@ -3783,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags);
/*
* 1 for the orphan item we're going to add
* 1 for the orphan item deletion.
*/
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
/*
* We need to do this in case we fail at _any_ point during the
* actual truncate. Once we do the truncate_setsize we could
* invalidate pages which forces any outstanding ordered io to
* be instantly completed which will give us extents that need
* to be truncated. If we fail to get an orphan inode down we
* could have left over extents that were never meant to live,
* so we need to garuntee from this point on that everything
* will be consistent.
*/
ret = btrfs_orphan_add(trans, inode);
btrfs_end_transaction(trans, root);
if (ret)
return ret;
/* we don't support swapfiles, so vmtruncate shouldn't fail */
truncate_setsize(inode, newsize);
ret = btrfs_truncate(inode);
if (ret && inode->i_nlink)
btrfs_orphan_del(NULL, inode);
}
return ret;
......@@ -3805,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
err = btrfs_setsize(inode, attr->ia_size);
err = btrfs_setsize(inode, attr);
if (err)
return err;
}
......@@ -5586,10 +5635,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
return em;
if (em) {
/*
* if our em maps to a hole, there might
* actually be delalloc bytes behind it
* if our em maps to
* - a hole or
* - a pre-alloc extent,
* there might actually be delalloc bytes behind it.
*/
if (em->block_start != EXTENT_MAP_HOLE)
if (em->block_start != EXTENT_MAP_HOLE &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
return em;
else
hole_em = em;
......@@ -5671,6 +5723,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
*/
em->block_start = hole_em->block_start;
em->block_len = hole_len;
if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
} else {
em->start = range_start;
em->len = found;
......@@ -6929,11 +6983,9 @@ static int btrfs_truncate(struct inode *inode)
/*
* 1 for the truncate slack space
* 1 for the orphan item we're going to add
* 1 for the orphan item deletion
* 1 for updating the inode.
*/
trans = btrfs_start_transaction(root, 4);
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out;
......@@ -6944,12 +6996,6 @@ static int btrfs_truncate(struct inode *inode)
min_size);
BUG_ON(ret);
ret = btrfs_orphan_add(trans, inode);
if (ret) {
btrfs_end_transaction(trans, root);
goto out;
}
/*
* setattr is responsible for setting the ordered_data_close flag,
* but that is only tested during the last file release. That
......@@ -7018,12 +7064,6 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_orphan_del(trans, inode);
if (ret)
err = ret;
} else if (ret && inode->i_nlink > 0) {
/*
* Failed to do the truncate, remove us from the in memory
* orphan list.
*/
ret = btrfs_orphan_del(NULL, inode);
}
if (trans) {
......@@ -7545,41 +7585,61 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
*/
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
{
struct list_head *head = &root->fs_info->delalloc_inodes;
struct btrfs_inode *binode;
struct inode *inode;
struct btrfs_delalloc_work *work, *next;
struct list_head works;
struct list_head splice;
int ret = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
INIT_LIST_HEAD(&works);
INIT_LIST_HEAD(&splice);
again:
spin_lock(&root->fs_info->delalloc_lock);
while (!list_empty(head)) {
binode = list_entry(head->next, struct btrfs_inode,
list_splice_init(&root->fs_info->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
binode = list_entry(splice.next, struct btrfs_inode,
delalloc_inodes);
list_del_init(&binode->delalloc_inodes);
inode = igrab(&binode->vfs_inode);
if (!inode)
list_del_init(&binode->delalloc_inodes);
continue;
list_add_tail(&binode->delalloc_inodes,
&root->fs_info->delalloc_inodes);
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
if (!work) {
ret = -ENOMEM;
goto out;
}
list_add_tail(&work->list, &works);
btrfs_queue_worker(&root->fs_info->flush_workers,
&work->work);
work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
if (unlikely(!work)) {
ret = -ENOMEM;
goto out;
}
list_add_tail(&work->list, &works);
btrfs_queue_worker(&root->fs_info->flush_workers,
&work->work);
cond_resched();
spin_lock(&root->fs_info->delalloc_lock);
}
spin_unlock(&root->fs_info->delalloc_lock);
list_for_each_entry_safe(work, next, &works, list) {
list_del_init(&work->list);
btrfs_wait_and_free_delalloc_work(work);
}
spin_lock(&root->fs_info->delalloc_lock);
if (!list_empty(&root->fs_info->delalloc_inodes)) {
spin_unlock(&root->fs_info->delalloc_lock);
goto again;
}
spin_unlock(&root->fs_info->delalloc_lock);
/* the filemap_flush will queue IO into the worker threads, but
* we have to make sure the IO is actually started and that
* ordered extents get created before we return
......@@ -7592,11 +7652,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
atomic_read(&root->fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&root->fs_info->async_submit_draining);
return 0;
out:
list_for_each_entry_safe(work, next, &works, list) {
list_del_init(&work->list);
btrfs_wait_and_free_delalloc_work(work);
}
if (!list_empty_careful(&splice)) {
spin_lock(&root->fs_info->delalloc_lock);
list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
spin_unlock(&root->fs_info->delalloc_lock);
}
return ret;
}
......
......@@ -1339,7 +1339,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
return -EINPROGRESS;
mnt_drop_write_file(file);
return -EINVAL;
}
mutex_lock(&root->fs_info->volume_mutex);
......@@ -1362,6 +1363,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
printk(KERN_INFO "btrfs: resizing devid %llu\n",
(unsigned long long)devid);
}
device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
if (!device) {
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
......@@ -1369,9 +1371,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
ret = -EINVAL;
goto out_free;
}
if (device->fs_devices && device->fs_devices->seeding) {
if (!device->writeable) {
printk(KERN_INFO "btrfs: resizer unable to apply on "
"seeding device %llu\n",
"readonly device %llu\n",
(unsigned long long)devid);
ret = -EINVAL;
goto out_free;
......@@ -1443,8 +1446,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
mnt_drop_write_file(file);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
mnt_drop_write_file(file);
return ret;
}
......@@ -2095,13 +2098,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
if (err)
goto out_dput;
/* check if subvolume may be deleted by a non-root user */
err = btrfs_may_delete(dir, dentry, 1);
if (err)
goto out_dput;
}
/* check if subvolume may be deleted by a user */
err = btrfs_may_delete(dir, dentry, 1);
if (err)
goto out_dput;
if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL;
goto out_dput;
......@@ -2183,19 +2186,20 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
struct btrfs_ioctl_defrag_range_args *range;
int ret;
if (btrfs_root_readonly(root))
return -EROFS;
ret = mnt_want_write_file(file);
if (ret)
return ret;
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
return -EINPROGRESS;
mnt_drop_write_file(file);
return -EINVAL;
}
ret = mnt_want_write_file(file);
if (ret) {
atomic_set(&root->fs_info->mutually_exclusive_operation_running,
0);
return ret;
if (btrfs_root_readonly(root)) {
ret = -EROFS;
goto out;
}
switch (inode->i_mode & S_IFMT) {
......@@ -2247,8 +2251,8 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = -EINVAL;
}
out:
mnt_drop_write_file(file);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
mnt_drop_write_file(file);
return ret;
}
......@@ -2263,7 +2267,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
return -EINPROGRESS;
return -EINVAL;
}
mutex_lock(&root->fs_info->volume_mutex);
......@@ -2300,7 +2304,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
mnt_drop_write_file(file);
return -EINPROGRESS;
return -EINVAL;
}
mutex_lock(&root->fs_info->volume_mutex);
......@@ -2316,8 +2320,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
mnt_drop_write_file(file);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
mnt_drop_write_file(file);
return ret;
}
......@@ -3437,8 +3441,8 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
struct btrfs_balance_control *bctl;
bool need_unlock; /* for mut. excl. ops lock */
int ret;
int need_to_clear_lock = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
......@@ -3447,14 +3451,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
if (ret)
return ret;
mutex_lock(&fs_info->volume_mutex);
again:
if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
need_unlock = true;
goto locked;
}
/*
* mut. excl. ops lock is locked. Three possibilites:
* (1) some other op is running
* (2) balance is running
* (3) balance is paused -- special case (think resume)
*/
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl) {
/* this is either (2) or (3) */
if (!atomic_read(&fs_info->balance_running)) {
mutex_unlock(&fs_info->balance_mutex);
if (!mutex_trylock(&fs_info->volume_mutex))
goto again;
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl &&
!atomic_read(&fs_info->balance_running)) {
/* this is (3) */
need_unlock = false;
goto locked;
}
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
goto again;
} else {
/* this is (2) */
mutex_unlock(&fs_info->balance_mutex);
ret = -EINPROGRESS;
goto out;
}
} else {
/* this is (1) */
mutex_unlock(&fs_info->balance_mutex);
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
ret = -EINVAL;
goto out;
}
locked:
BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
if (arg) {
bargs = memdup_user(arg, sizeof(*bargs));
if (IS_ERR(bargs)) {
ret = PTR_ERR(bargs);
goto out;
goto out_unlock;
}
if (bargs->flags & BTRFS_BALANCE_RESUME) {
......@@ -3474,13 +3525,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
bargs = NULL;
}
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
if (fs_info->balance_ctl) {
ret = -EINPROGRESS;
goto out_bargs;
}
need_to_clear_lock = 1;
bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) {
......@@ -3501,11 +3549,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
}
do_balance:
ret = btrfs_balance(bctl, bargs);
/*
* bctl is freed in __cancel_balance or in free_fs_info if
* restriper was paused all the way until unmount
* Ownership of bctl and mutually_exclusive_operation_running
* goes to to btrfs_balance. bctl is freed in __cancel_balance,
* or, if restriper was paused all the way until unmount, in
* free_fs_info. mutually_exclusive_operation_running is
* cleared in __cancel_balance.
*/
need_unlock = false;
ret = btrfs_balance(bctl, bargs);
if (arg) {
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
......@@ -3513,12 +3567,12 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
out_bargs:
kfree(bargs);
out:
if (need_to_clear_lock)
atomic_set(&root->fs_info->mutually_exclusive_operation_running,
0);
out_unlock:
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
if (need_unlock)
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
out:
mnt_drop_write_file(file);
return ret;
}
......@@ -3698,6 +3752,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
goto drop_write;
}
if (!sa->qgroupid) {
ret = -EINVAL;
goto out;
}
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
......
......@@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
* if the disk i_size is already at the inode->i_size, or
* this ordered extent is inside the disk i_size, we're done
*/
if (disk_i_size == i_size || offset <= disk_i_size) {
if (disk_i_size == i_size)
goto out;
/*
* We still need to update disk_i_size if outstanding_isize is greater
* than disk_i_size.
*/
if (offset <= disk_i_size &&
(!ordered || ordered->outstanding_isize <= disk_i_size))
goto out;
}
/*
* walk backward from this ordered extent to disk_i_size.
......@@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
break;
if (test->file_offset >= i_size)
break;
if (test->file_offset >= disk_i_size) {
if (entry_end(test) > disk_i_size) {
/*
* we don't update disk_i_size now, so record this
* undealt i_size. Or we will not know the real
......
......@@ -379,6 +379,13 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
ret = add_relation_rb(fs_info, found_key.objectid,
found_key.offset);
if (ret == -ENOENT) {
printk(KERN_WARNING
"btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
(unsigned long long)found_key.objectid,
(unsigned long long)found_key.offset);
ret = 0; /* ignore the error */
}
if (ret)
goto out;
next2:
......@@ -956,17 +963,28 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
quota_root = fs_info->quota_root;
if (!quota_root)
return -EINVAL;
/* check if there are no relations to this qgroup */
spin_lock(&fs_info->qgroup_lock);
qgroup = find_qgroup_rb(fs_info, qgroupid);
if (qgroup) {
if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
spin_unlock(&fs_info->qgroup_lock);
return -EBUSY;
}
}
spin_unlock(&fs_info->qgroup_lock);
ret = del_qgroup_item(trans, quota_root, qgroupid);
spin_lock(&fs_info->qgroup_lock);
del_qgroup_rb(quota_root->fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock);
return ret;
......
......@@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
int corrected = 0;
struct btrfs_key key;
struct inode *inode = NULL;
struct btrfs_fs_info *fs_info;
u64 end = offset + PAGE_SIZE - 1;
struct btrfs_root *local_root;
int srcu_index;
key.objectid = root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key);
if (IS_ERR(local_root))
fs_info = fixup->root->fs_info;
srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
local_root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(local_root)) {
srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
return PTR_ERR(local_root);
}
key.type = BTRFS_INODE_ITEM_KEY;
key.objectid = inum;
key.offset = 0;
inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL);
inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
if (IS_ERR(inode))
return PTR_ERR(inode);
......@@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
}
if (PageUptodate(page)) {
struct btrfs_fs_info *fs_info;
if (PageDirty(page)) {
/*
* we need to write the data to the defect sector. the
......@@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
u64 physical_for_dev_replace;
u64 len;
struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
int srcu_index;
key.objectid = root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
local_root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(local_root))
if (IS_ERR(local_root)) {
srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
return PTR_ERR(local_root);
}
key.type = BTRFS_INODE_ITEM_KEY;
key.objectid = inum;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
if (IS_ERR(inode))
return PTR_ERR(inode);
......
......@@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx,
(unsigned long)nce->ino);
if (!nce_head) {
nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
if (!nce_head)
if (!nce_head) {
kfree(nce);
return -ENOMEM;
}
INIT_LIST_HEAD(nce_head);
ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
......
......@@ -267,7 +267,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
function, line, errstr);
return;
}
trans->transaction->aborted = errno;
ACCESS_ONCE(trans->transaction->aborted) = errno;
__btrfs_std_error(root->fs_info, function, line, errno, NULL);
}
/*
......
......@@ -333,12 +333,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
&root->fs_info->trans_block_rsv,
num_bytes, flush);
if (ret)
return ERR_PTR(ret);
goto reserve_fail;
}
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
return ERR_PTR(-ENOMEM);
if (!h) {
ret = -ENOMEM;
goto alloc_fail;
}
/*
* If we are JOIN_NOLOCK we're already committing a transaction and
......@@ -365,11 +367,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
if (ret < 0) {
/* We must get the transaction if we are JOIN_NOLOCK. */
BUG_ON(type == TRANS_JOIN_NOLOCK);
if (type < TRANS_JOIN_NOLOCK)
sb_end_intwrite(root->fs_info->sb);
kmem_cache_free(btrfs_trans_handle_cachep, h);
return ERR_PTR(ret);
goto join_fail;
}
cur_trans = root->fs_info->running_transaction;
......@@ -410,6 +408,19 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
if (!current->journal_info && type != TRANS_USERSPACE)
current->journal_info = h;
return h;
join_fail:
if (type < TRANS_JOIN_NOLOCK)
sb_end_intwrite(root->fs_info->sb);
kmem_cache_free(btrfs_trans_handle_cachep, h);
alloc_fail:
if (num_bytes)
btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
num_bytes);
reserve_fail:
if (qgroup_reserved)
btrfs_qgroup_free(root, qgroup_reserved);
return ERR_PTR(ret);
}
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
......@@ -1468,7 +1479,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
goto cleanup_transaction;
}
if (cur_trans->aborted) {
/* Stop the commit early if ->aborted is set */
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
goto cleanup_transaction;
}
......@@ -1574,6 +1586,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
/* ->aborted might be set after the previous check, so check it */
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
goto cleanup_transaction;
}
/*
* the reloc mutex makes sure that we stop
* the balancing code from coming in and moving
......@@ -1657,6 +1674,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
goto cleanup_transaction;
}
/*
* The tasks which save the space cache and inode cache may also
* update ->aborted, check it.
*/
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
mutex_unlock(&root->fs_info->tree_log_mutex);
mutex_unlock(&root->fs_info->reloc_mutex);
goto cleanup_transaction;
}
btrfs_prepare_extent_commit(trans, root);
cur_trans = root->fs_info->running_transaction;
......
......@@ -3357,6 +3357,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (skip_csum)
return 0;
if (em->compress_type) {
csum_offset = 0;
csum_len = block_len;
}
/* block start is already adjusted for the file extent offset. */
ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
em->block_start + csum_offset,
......@@ -3410,13 +3415,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
em = list_entry(extents.next, struct extent_map, list);
list_del_init(&em->list);
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
/*
* If we had an error we just need to delete everybody from our
* private list.
*/
if (ret) {
clear_em_logging(tree, em);
free_extent_map(em);
continue;
}
......@@ -3424,8 +3429,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
write_unlock(&tree->lock);
ret = log_one_extent(trans, inode, root, em, path);
free_extent_map(em);
write_lock(&tree->lock);
clear_em_logging(tree, em);
free_extent_map(em);
}
WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock);
......
......@@ -1431,7 +1431,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
}
} else {
ret = btrfs_get_bdev_and_sb(device_path,
FMODE_READ | FMODE_EXCL,
FMODE_WRITE | FMODE_EXCL,
root->fs_info->bdev_holder, 0,
&bdev, &bh);
if (ret)
......@@ -2615,7 +2615,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
chunk_used = btrfs_block_group_used(&cache->item);
user_thresh = div_factor_fine(cache->key.offset, bargs->usage);
if (bargs->usage == 0)
user_thresh = 0;
else if (bargs->usage > 100)
user_thresh = cache->key.offset;
else
user_thresh = div_factor_fine(cache->key.offset,
bargs->usage);
if (chunk_used < user_thresh)
ret = 0;
......@@ -2960,6 +2967,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
unset_balance_control(fs_info);
ret = del_balance_item(fs_info->tree_root);
BUG_ON(ret);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
}
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
......@@ -3139,8 +3148,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
out:
if (bctl->flags & BTRFS_BALANCE_RESUME)
__cancel_balance(fs_info);
else
else {
kfree(bctl);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
}
return ret;
}
......@@ -3157,7 +3168,6 @@ static int balance_kthread(void *data)
ret = btrfs_balance(fs_info->balance_ctl, NULL);
}
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
......@@ -3180,7 +3190,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
return 0;
}
WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
if (IS_ERR(tsk))
return PTR_ERR(tsk);
......@@ -3234,6 +3243,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
......@@ -3497,7 +3508,7 @@ struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
{ 1, 1, 2, 2, 2, 2 /* raid1 */ },
{ 1, 2, 1, 1, 1, 2 /* dup */ },
{ 1, 1, 0, 2, 1, 1 /* raid0 */ },
{ 1, 1, 0, 1, 1, 1 /* single */ },
{ 1, 1, 1, 1, 1, 1 /* single */ },
};
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册